|
|
@ -2,7 +2,7 @@
|
|
|
|
"cells": [
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 2,
|
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -10,7 +10,7 @@
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"CPU times: total: 125 ms\n",
|
|
|
|
"CPU times: total: 125 ms\n",
|
|
|
|
"Wall time: 247 ms\n"
|
|
|
|
"Wall time: 143 ms\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -21,14 +21,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 3,
|
|
|
|
"execution_count": 2,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"data": {
|
|
|
|
"text/plain": "<dask.config.set at 0x215813bc370>"
|
|
|
|
"text/plain": "<dask.config.set at 0x197d05e7220>"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"execution_count": 3,
|
|
|
|
"execution_count": 2,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -39,14 +39,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 4,
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"data": {
|
|
|
|
"text/plain": "<dask.config.set at 0x2159c19c580>"
|
|
|
|
"text/plain": "<dask.config.set at 0x197d0c03ac0>"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"execution_count": 4,
|
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -57,14 +57,14 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 5,
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"data": {
|
|
|
|
"text/plain": "{'temporary-directory': 'D:\\\\PATSTAT\\\\dask_temp',\n 'visualization': {'engine': None},\n 'tokenize': {'ensure-deterministic': False},\n 'dataframe': {'backend': 'pandas',\n 'shuffle': {'method': None, 'compression': None},\n 'parquet': {'metadata-task-size-local': 512, 'metadata-task-size-remote': 1},\n 'dtype_backend': 'pandas',\n 'convert_string': False},\n 'array': {'backend': 'numpy',\n 'rechunk': {'method': 'tasks'},\n 'svg': {'size': 120},\n 'slicing': {'split-large-chunks': None}},\n 'optimization': {'annotations': {'fuse': True},\n 'fuse': {'active': None,\n 'ave-width': 1,\n 'max-width': None,\n 'max-height': inf,\n 'max-depth-new-edges': None,\n 'subgraphs': None,\n 'rename-keys': True}}}"
|
|
|
|
"text/plain": "{'temporary-directory': 'D:\\\\PATSTAT\\\\dask_temp',\n 'visualization': {'engine': None},\n 'tokenize': {'ensure-deterministic': False},\n 'dataframe': {'backend': 'pandas',\n 'shuffle': {'method': None, 'compression': None},\n 'parquet': {'metadata-task-size-local': 512, 'metadata-task-size-remote': 1},\n 'dtype_backend': 'pandas',\n 'convert_string': False},\n 'array': {'backend': 'numpy',\n 'rechunk': {'method': 'tasks'},\n 'svg': {'size': 120},\n 'slicing': {'split-large-chunks': None}},\n 'optimization': {'annotations': {'fuse': True},\n 'fuse': {'active': None,\n 'ave-width': 1,\n 'max-width': None,\n 'max-height': inf,\n 'max-depth-new-edges': None,\n 'subgraphs': None,\n 'rename-keys': True}}}"
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"execution_count": 5,
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -75,7 +75,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 6,
|
|
|
|
"execution_count": 5,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
@ -84,7 +84,7 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 7,
|
|
|
|
"execution_count": 6,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -107,22 +107,22 @@
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"execution_count": 6,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": []
|
|
|
|
"source": []
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 10,
|
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"text": [
|
|
|
|
"CPU times: total: 39min 33s\n",
|
|
|
|
"CPU times: total: 41min 20s\n",
|
|
|
|
"Wall time: 20min 19s\n"
|
|
|
|
"Wall time: 18min 32s\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
],
|
|
|
@ -136,9 +136,21 @@
|
|
|
|
"tls_206.to_parquet(\"tls_206.parquet\")\n",
|
|
|
|
"tls_206.to_parquet(\"tls_206.parquet\")\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"tls_207 = dd.read_csv(\"table_tls207.csv\", low_memory=False)\n",
|
|
|
|
"tls_207 = dd.read_csv(\"table_tls207.csv\", low_memory=False)\n",
|
|
|
|
"tls_207.to_parquet(\"tls_207.parquet\")"
|
|
|
|
"tls_207.to_parquet(\"tls_207.parquet\")\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"tls_204 = dd.read_csv(\"table_tls204.csv\", low_memory=False)\n",
|
|
|
|
|
|
|
|
"tls_207.to_parquet(\"tls_204.parquet\")"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
|
|
"source": [],
|
|
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
|
|
"collapsed": false
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 11,
|
|
|
|
"execution_count": 11,
|
|
|
|