You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ZSI_Reconnect_China/PATSTAT/person_minipipe.ipynb

166 lines
15 KiB
Plaintext

1 year ago
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\n",
"D:\\PATSTAT\n"
]
}
],
"source": [
"\n",
"import dask\n",
"\n",
"dask.config.set(temporary_directory=r'D:\\PATSTAT\\dask_temp')\n",
"dask.config.set({'temporary_directory': r'D:\\PATSTAT\\dask_temp'})\n",
"dask.config.config\n",
"import dask.dataframe as dd\n",
"import os\n",
"\n",
"import os\n",
"print(os.getcwd()) # Prints the current working directory\n",
"\n",
"workdir_path=r\"D:\\PATSTAT\"\n",
"os.chdir(workdir_path)\n",
"print(os.getcwd())"
]
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"tls_206 = dd.read_csv(\"table_tls206.csv\", low_memory=False)\n",
"tls_206.to_parquet(\"tls_206.parquet\")\n",
"# %%time\n",
"#Person data\n",
"tls_206_p = dd.read_parquet(\"tls_206.parquet\")"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [],
"source": [
"# import pandas as pd\n",
"# appln_pers_f = pd.read_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\first-filings-with-persons-raw.csv\", header=None,\n",
"# names=[\"appln_id\",\"appln_auth\",\"person_id\",\" invt_seq_nr\",\"applt_seq_nr\",'person_name',\"person_ctry_code\"])"
1 year ago
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"outdir=\"raw_files_csv\"\n",
"appln_pers_f = pd.read_csv(r\"C:/Users/radvanyi/PycharmProjects/ZSI_analytics/PATSTAT/raw_files_csv/02_persons_2011_2022.csv\",low_memory=False)"
1 year ago
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [],
"source": [
"pers_id_scope = appln_pers_f[\"person_id\"].unique()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
1 year ago
"source": [
"tls_206_scope = tls_206_p[tls_206_p['person_id'].isin(pers_id_scope)]\n",
"tls_206_scope.compute().to_csv(r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\PATSTAT\\first_round\\tls_206_scope_v2.csv\",index=False)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [],
"source": [
"df_206 = tls_206_scope.compute()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"data": {
"text/plain": " person_id person_name \n4023 4025 Meritor Heavy Vehicle Braking Systems (UK) Lim... \\\n4347 4349 Fraser, Stuart \n5627 5629 Xaar Technology Limited \n5811 5813 SIEMENS PLC \n6499 6501 BAE Systems PLC \n... ... ... \n366118 88823960 WARD, Lauren \n366119 88823961 WÄRTSILÄ UK LIMITED \n366130 88823972 Xavier Erdödy \n366135 88823977 Zeg.AI Ltd \n366137 88823979 Zhiyang Pan \n\n person_name_orig_lg \n4023 Meritor Heavy Vehicle Braking Systems (UK) Lim... \\\n4347 Fraser, Stuart \n5627 Xaar Technology Limited \n5811 SIEMENS PLC \n6499 BAE Systems PLC \n... ... \n366118 WARD, Lauren \n366119 WÄRTSILÄ UK LIMITED \n366130 Xavier Erdödy \n366135 Zeg.AI Ltd \n366137 Zhiyang Pan \n\n person_address person_ctry_code \n4023 Grange Road Cwmbran,Gwent NP44 3XU GB \\\n4347 Fernhill Lees Lane,Little Neston Cheshire CH64... GB \n5627 Unit 316, Science Park,Cambridge CB4 0XR GB \n5811 Faraday House Sir William Siemens Square Friml... GB \n6499 6 Carlton Gardens,London SW1Y 5AD GB \n... ... ... \n366118 None GB \n366119 None GB \n366130 None GB \n366135 None GB \n366137 None GB \n\n nuts nuts_level doc_std_name_id \n4023 UKL16 3 25273975 \\\n4347 UKD63 3 3738 \n5627 UKH12 3 4824 \n5811 UKJ25 3 4979 \n6499 UKI32 3 5583 \n... ... ... ... \n366118 UK 0 40301088 \n366119 UK 0 21929085 \n366130 UK 0 40578262 \n366135 UK 0 37017676 \n366137 UK 0 17409767 \n\n doc_std_name psn_id \n4023 MERITOR HEAVY VEHICLE BRAKING SYSTEMS UK LTD 21718818 \\\n4347 FRASER STUART 9243356 \n5627 XAAR TECHNOLOGY LTD 35706185 \n5811 SIEMENS PLC 30138991 \n6499 BAE SYSTEMS PLC 1787059 \n... ... ... \n366118 WARD LAUREN 188823960 \n366119 RTSIL UK LTD W 188823961 \n366130 XAVIER ERDÖDY 188823972 \n366135 ZEG AI LTD 188823977 \n366137 ZHIYANG PAN 188823979 \n\n psn_name psn_level psn_sector \n4023 MERITOR HEAVY V
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>person_id</th>\n <th>person_name</th>\n <th>person_name_orig_lg</th>\n <th>person_address</th>\n <th>person_ctry_code</th>\n <th>nuts</th>\n <th>nuts_level</th>\n <th>doc_std_name_id</th>\n <th>doc_std_name</th>\n <th>psn_id</th>\n <th>psn_name</th>\n <th>psn_level</th>\n <th>psn_sector</th>\n <th>han_id</th>\n <th>han_name</th>\n <th>han_harmonized</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>4023</th>\n <td>4025</td>\n <td>Meritor Heavy Vehicle Braking Systems (UK) Lim...</td>\n <td>Meritor Heavy Vehicle Braking Systems (UK) Lim...</td>\n <td>Grange Road Cwmbran,Gwent NP44 3XU</td>\n <td>GB</td>\n <td>UKL16</td>\n <td>3</td>\n <td>25273975</td>\n <td>MERITOR HEAVY VEHICLE BRAKING SYSTEMS UK LTD</td>\n <td>21718818</td>\n <td>MERITOR HEAVY VEHICLE BRAKING SYSTEMS (UK)</td>\n <td>1</td>\n <td>COMPANY</td>\n <td>1940089</td>\n <td>MERITOR HEAVY VEHICLE BRAKING SYSTEMS UK LTD</td>\n <td>2</td>\n </tr>\n <tr>\n <th>4347</th>\n <td>4349</td>\n <td>Fraser, Stuart</td>\n <td>Fraser, Stuart</td>\n <td>Fernhill Lees Lane,Little Neston Cheshire CH64...</td>\n <td>GB</td>\n <td>UKD63</td>\n <td>3</td>\n <td>3738</td>\n <td>FRASER STUART</td>\n <td>9243356</td>\n <td>FRASER, STUART</td>\n <td>0</td>\n <td>None</td>\n <td>100004349</td>\n <td>Fraser, Stuart</td>\n <td>0</td>\n </tr>\n <tr>\n <th>5627</th>\n <td>5629</td>\n <td>Xaar Technology Limited</td>\n <td>Xaar Technology Limited</td>\n <td>Unit 316, Science Park,Cambridge CB4 0XR</td>\n <td>GB</td>\n <td>UKH12</td>\n <td>3</td>\n <td>4824</td>\n <td>XAAR TECHNOLOGY LTD</td>\n <td>35706185</td>\n <td>XAAR TECHNOLOGY</td>\n <td>1</td>\n <td>COMPANY</td>\n <td>3228426</td>\n <td>XAAR TECH LTD</td>\n <td>2</td>\n </tr>\n <tr>\n <th>5811</th>\n <td>5813</td>\n <td>SIEMENS PLC</td>\n <td>SIEMENS PLC</td>\n <td>Faraday House Sir William Siemens Square Friml...</td>\n <td>GB</td>\n <td>UKJ25</td>\n <td>3</td>\n <td>4979</td>\n <td>SIEMENS PLC</td>\n <td>30138991</td>\n <td>SIEMENS</td>\n <td>2</td>\n <td>COMPANY</td>\n <td>2755905</td>\n <td>SIEMENS PLC</td>\n <td>2</td>\n </tr>\n <tr>\n <th>6499</th>\n <td>6501</td>\n <td>BAE Systems PLC</td>\n <td>BAE Systems PLC</td>\n <td>6 Carlton Gardens,London SW1Y 5AD</td>\n <td>GB</td>\n <td>UKI32</td>\n <td>3</td>\n <td>5583</td>\n <td>BAE SYSTEMS PLC</td>\n <td>1787059</td>\n <td>BAE SYSTEMS</td>\n <td>2</td>\n <td>COMPANY</td>\n <td>208539</td>\n <td>BAE SYSTEMS PLC</td>\n <td>2</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>366118</th>\n <td>88823960</td>\n <td>WARD, Lauren</td>\n <td>WARD, Lauren</td>\n <td>None</td>\n <td>GB</td>\n <td>UK</td>\n <td>0</td>\n <td>40301088</td>\n <td>WARD LAUREN</td>\n <td>188823960</td>\n <td>WARD, Lauren</td>\n <td>0</td>\n <td>UNKNOWN</td>\n <td>188823960</td>\n <td>WARD, Lauren<
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_206[df_206[\"person_ctry_code\"]==\"GB\"]"
1 year ago
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}