You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
111 lines
2.6 KiB
Plaintext
111 lines
2.6 KiB
Plaintext
1 year ago
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {
|
||
|
"collapsed": true
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\n",
|
||
|
"D:\\PATSTAT\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"import dask\n",
|
||
|
"\n",
|
||
|
"dask.config.set(temporary_directory=r'D:\\PATSTAT\\dask_temp')\n",
|
||
|
"dask.config.set({'temporary_directory': r'D:\\PATSTAT\\dask_temp'})\n",
|
||
|
"dask.config.config\n",
|
||
|
"import dask.dataframe as dd\n",
|
||
|
"import os\n",
|
||
|
"\n",
|
||
|
"import os\n",
|
||
|
"print(os.getcwd()) # Prints the current working directory\n",
|
||
|
"\n",
|
||
|
"workdir_path=r\"D:\\PATSTAT\"\n",
|
||
|
"os.chdir(workdir_path)\n",
|
||
|
"print(os.getcwd())"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"tls_206 = dd.read_csv(\"table_tls206.csv\", low_memory=False)\n",
|
||
|
"tls_206.to_parquet(\"tls_206.parquet\")\n",
|
||
|
"# %%time\n",
|
||
|
"#Person data\n",
|
||
|
"tls_206_p = dd.read_parquet(\"tls_206.parquet\")"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"appln_pers_f = pd.read_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\first-filings-with-persons-raw.csv\", header=None,\n",
|
||
|
" names=[\"appln_id\",\"appln_auth\",\"person_id\",\" invt_seq_nr\",\"applt_seq_nr\",'person_name',\"person_ctry_code\"])"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"pers_id_scope =appln_pers_f[\"person_id\"].unique()"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"tls_206_scope = tls_206_p[tls_206_p['person_id'].isin(pers_id_scope)]\n",
|
||
|
"tls_206_scope.compute().to_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\tls_206_scope.csv\",index=False)"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false
|
||
|
}
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 2
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython2",
|
||
|
"version": "2.7.6"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 0
|
||
|
}
|