ADD:
parent
6cfade8002
commit
732f60d270
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,110 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\n",
|
||||
"D:\\PATSTAT\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"import dask\n",
|
||||
"\n",
|
||||
"dask.config.set(temporary_directory=r'D:\\PATSTAT\\dask_temp')\n",
|
||||
"dask.config.set({'temporary_directory': r'D:\\PATSTAT\\dask_temp'})\n",
|
||||
"dask.config.config\n",
|
||||
"import dask.dataframe as dd\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import os\n",
|
||||
"print(os.getcwd()) # Prints the current working directory\n",
|
||||
"\n",
|
||||
"workdir_path=r\"D:\\PATSTAT\"\n",
|
||||
"os.chdir(workdir_path)\n",
|
||||
"print(os.getcwd())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tls_206 = dd.read_csv(\"table_tls206.csv\", low_memory=False)\n",
|
||||
"tls_206.to_parquet(\"tls_206.parquet\")\n",
|
||||
"# %%time\n",
|
||||
"#Person data\n",
|
||||
"tls_206_p = dd.read_parquet(\"tls_206.parquet\")"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"appln_pers_f = pd.read_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\first-filings-with-persons-raw.csv\", header=None,\n",
|
||||
" names=[\"appln_id\",\"appln_auth\",\"person_id\",\" invt_seq_nr\",\"applt_seq_nr\",'person_name',\"person_ctry_code\"])"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pers_id_scope =appln_pers_f[\"person_id\"].unique()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tls_206_scope = tls_206_p[tls_206_p['person_id'].isin(pers_id_scope)]\n",
|
||||
"tls_206_scope.compute().to_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\tls_206_scope.csv\",index=False)"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue