ADD: PATSTAT analysis

main
radvanyimome 1 year ago
parent 732f60d270
commit eadfe5f1f5

File diff suppressed because one or more lines are too long

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -10,7 +10,7 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"CPU times: total: 125 ms\n", "CPU times: total: 125 ms\n",
"Wall time: 247 ms\n" "Wall time: 143 ms\n"
] ]
} }
], ],
@ -21,14 +21,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "<dask.config.set at 0x215813bc370>" "text/plain": "<dask.config.set at 0x197d05e7220>"
}, },
"execution_count": 3, "execution_count": 2,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -39,14 +39,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "<dask.config.set at 0x2159c19c580>" "text/plain": "<dask.config.set at 0x197d0c03ac0>"
}, },
"execution_count": 4, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -57,14 +57,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "{'temporary-directory': 'D:\\\\PATSTAT\\\\dask_temp',\n 'visualization': {'engine': None},\n 'tokenize': {'ensure-deterministic': False},\n 'dataframe': {'backend': 'pandas',\n 'shuffle': {'method': None, 'compression': None},\n 'parquet': {'metadata-task-size-local': 512, 'metadata-task-size-remote': 1},\n 'dtype_backend': 'pandas',\n 'convert_string': False},\n 'array': {'backend': 'numpy',\n 'rechunk': {'method': 'tasks'},\n 'svg': {'size': 120},\n 'slicing': {'split-large-chunks': None}},\n 'optimization': {'annotations': {'fuse': True},\n 'fuse': {'active': None,\n 'ave-width': 1,\n 'max-width': None,\n 'max-height': inf,\n 'max-depth-new-edges': None,\n 'subgraphs': None,\n 'rename-keys': True}}}" "text/plain": "{'temporary-directory': 'D:\\\\PATSTAT\\\\dask_temp',\n 'visualization': {'engine': None},\n 'tokenize': {'ensure-deterministic': False},\n 'dataframe': {'backend': 'pandas',\n 'shuffle': {'method': None, 'compression': None},\n 'parquet': {'metadata-task-size-local': 512, 'metadata-task-size-remote': 1},\n 'dtype_backend': 'pandas',\n 'convert_string': False},\n 'array': {'backend': 'numpy',\n 'rechunk': {'method': 'tasks'},\n 'svg': {'size': 120},\n 'slicing': {'split-large-chunks': None}},\n 'optimization': {'annotations': {'fuse': True},\n 'fuse': {'active': None,\n 'ave-width': 1,\n 'max-width': None,\n 'max-height': inf,\n 'max-depth-new-edges': None,\n 'subgraphs': None,\n 'rename-keys': True}}}"
}, },
"execution_count": 5, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -75,7 +75,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -84,7 +84,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -107,22 +107,22 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"CPU times: total: 39min 33s\n", "CPU times: total: 41min 20s\n",
"Wall time: 20min 19s\n" "Wall time: 18min 32s\n"
] ]
} }
], ],
@ -136,9 +136,21 @@
"tls_206.to_parquet(\"tls_206.parquet\")\n", "tls_206.to_parquet(\"tls_206.parquet\")\n",
"\n", "\n",
"tls_207 = dd.read_csv(\"table_tls207.csv\", low_memory=False)\n", "tls_207 = dd.read_csv(\"table_tls207.csv\", low_memory=False)\n",
"tls_207.to_parquet(\"tls_207.parquet\")" "tls_207.to_parquet(\"tls_207.parquet\")\n",
"\n",
"tls_204 = dd.read_csv(\"table_tls204.csv\", low_memory=False)\n",
"tls_207.to_parquet(\"tls_204.parquet\")"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 11,

Loading…
Cancel
Save