ADD: PATSTAT analysis

main
radvanyimome 1 year ago
parent 732f60d270
commit eadfe5f1f5

File diff suppressed because one or more lines are too long

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [
{
@ -10,7 +10,7 @@
"output_type": "stream",
"text": [
"CPU times: total: 125 ms\n",
"Wall time: 247 ms\n"
"Wall time: 143 ms\n"
]
}
],
@ -21,14 +21,14 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "<dask.config.set at 0x215813bc370>"
"text/plain": "<dask.config.set at 0x197d05e7220>"
},
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@ -39,14 +39,14 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "<dask.config.set at 0x2159c19c580>"
"text/plain": "<dask.config.set at 0x197d0c03ac0>"
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -57,14 +57,14 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "{'temporary-directory': 'D:\\\\PATSTAT\\\\dask_temp',\n 'visualization': {'engine': None},\n 'tokenize': {'ensure-deterministic': False},\n 'dataframe': {'backend': 'pandas',\n 'shuffle': {'method': None, 'compression': None},\n 'parquet': {'metadata-task-size-local': 512, 'metadata-task-size-remote': 1},\n 'dtype_backend': 'pandas',\n 'convert_string': False},\n 'array': {'backend': 'numpy',\n 'rechunk': {'method': 'tasks'},\n 'svg': {'size': 120},\n 'slicing': {'split-large-chunks': None}},\n 'optimization': {'annotations': {'fuse': True},\n 'fuse': {'active': None,\n 'ave-width': 1,\n 'max-width': None,\n 'max-height': inf,\n 'max-depth-new-edges': None,\n 'subgraphs': None,\n 'rename-keys': True}}}"
},
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@ -75,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -84,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -107,22 +107,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: total: 39min 33s\n",
"Wall time: 20min 19s\n"
"CPU times: total: 41min 20s\n",
"Wall time: 18min 32s\n"
]
}
],
@ -136,9 +136,21 @@
"tls_206.to_parquet(\"tls_206.parquet\")\n",
"\n",
"tls_207 = dd.read_csv(\"table_tls207.csv\", low_memory=False)\n",
"tls_207.to_parquet(\"tls_207.parquet\")"
"tls_207.to_parquet(\"tls_207.parquet\")\n",
"\n",
"tls_204 = dd.read_csv(\"table_tls204.csv\", low_memory=False)\n",
"tls_207.to_parquet(\"tls_204.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 11,

Loading…
Cancel
Save