{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import janitor\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from matplotlib.ticker import MaxNLocator\n", "import math\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 33, "outputs": [], "source": [ "outdir=\"EU_CH_scope/v2_\"\n", "\n", "appln = pd.read_csv(f\"{outdir}/tls_201_scope.csv\")\n", "\n", "appln_title = pd.read_csv(f\"{outdir}/tls_202_scope.csv\")\n", "\n", "pers = pd.read_csv(f\"{outdir}/tls_206_scope.csv\")\n", "pers['psn_sector'] = pers['psn_sector'].fillna(\"UNKNOWN\")\n", "\n", "appln_pers = pd.read_csv(f\"{outdir}/tls_207_scope.csv\")\n", "\n", "appln_cpc = pd.read_csv(f\"{outdir}/tls_224_scope.csv\")" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 23, "outputs": [ { "data": { "text/plain": " appln_id appln_auth appln_nr appln_kind appln_filing_date \n0 330225325 EP 11150195 A 2011-01-05 \\\n1 330322632 EP 11150485 A 2011-01-10 \n2 330350961 EP 11150683 A 2011-01-12 \n3 330374780 WO 2011050339 W 2011-01-12 \n4 330424360 WO 2011050199 W 2011-01-10 \n... ... ... ... ... ... \n64261 575551871 WO 2020142401 W 2020-12-31 \n64262 575551946 WO 2020142230 W 2020-12-31 \n64263 575553943 WO 2021142692 W 2021-12-29 \n64264 575553975 WO 2021142655 W 2021-12-29 \n64265 575556091 WO 2021064274 W 2021-12-20 \n\n appln_filing_year appln_nr_original ipr_type receiving_office \n0 2011 11150195 PI \\\n1 2011 11150485 PI \n2 2011 11150683 PI \n3 2011 EP2011/050339 PI EP \n4 2011 EP2011/050199 PI EP \n... ... ... ... ... \n64261 2020 CN2020/142401 PI CN \n64262 2020 CN2020/142230 PI CN \n64263 2021 CN2021/142692 PI CN \n64264 2021 CN2021/142655 PI CN \n64265 2021 US2021/064274 PI US \n\n internat_appln_id ... earliest_publn_date earliest_publn_year \n0 0 ... 2011-07-13 2011 \\\n1 0 ... 2012-07-11 2012 \n2 0 ... 2012-07-18 2012 \n3 0 ... 2011-07-21 2011 \n4 0 ... 2012-07-19 2012 \n... ... ... ... ... \n64261 0 ... 2022-07-07 2022 \n64262 0 ... 2022-07-07 2022 \n64263 0 ... 2022-07-07 2022 \n64264 0 ... 2022-07-07 2022 \n64265 0 ... 2022-07-07 2022 \n\n earliest_pat_publn_id granted docdb_family_id inpadoc_family_id \n0 335277427 Y 43754737 330225325 \\\n1 364719889 Y 43991052 330322632 \n2 364923578 N 43881056 330350961 \n3 335927718 N 43923624 330374780 \n4 365345607 N 43533009 330424360 \n... ... ... ... ... \n64261 575551872 N 82260109 575551871 \n64262 575551947 N 82260125 575551946 \n64263 575553944 N 79460210 564546189 \n64264 575553976 N 82260272 575553975 \n64265 575556092 N 82132815 575038927 \n\n docdb_family_size nb_citing_docdb_fam nb_applicants nb_inventors \n0 4 16 1 1 \n1 2 5 1 2 \n2 7 12 2 5 \n3 2 8 5 4 \n4 4 13 3 2 \n... ... ... ... ... \n64261 1 0 2 1 \n64262 1 0 3 3 \n64263 2 0 2 6 \n64264 1 0 2 7 \n64265 2 0 4 7 \n\n[64266 rows x 26 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_authappln_nrappln_kindappln_filing_dateappln_filing_yearappln_nr_originalipr_typereceiving_officeinternat_appln_id...earliest_publn_dateearliest_publn_yearearliest_pat_publn_idgranteddocdb_family_idinpadoc_family_iddocdb_family_sizenb_citing_docdb_famnb_applicantsnb_inventors
0330225325EP11150195A2011-01-05201111150195PI0...2011-07-132011335277427Y4375473733022532541611
1330322632EP11150485A2011-01-10201111150485PI0...2012-07-112012364719889Y439910523303226322512
2330350961EP11150683A2011-01-12201111150683PI0...2012-07-182012364923578N4388105633035096171225
3330374780WO2011050339W2011-01-122011EP2011/050339PIEP0...2011-07-212011335927718N439236243303747802854
4330424360WO2011050199W2011-01-102011EP2011/050199PIEP0...2012-07-192012365345607N4353300933042436041332
..................................................................
64261575551871WO2020142401W2020-12-312020CN2020/142401PICN0...2022-07-072022575551872N822601095755518711021
64262575551946WO2020142230W2020-12-312020CN2020/142230PICN0...2022-07-072022575551947N822601255755519461033
64263575553943WO2021142692W2021-12-292021CN2021/142692PICN0...2022-07-072022575553944N794602105645461892026
64264575553975WO2021142655W2021-12-292021CN2021/142655PICN0...2022-07-072022575553976N822602725755539751027
64265575556091WO2021064274W2021-12-202021US2021/064274PIUS0...2022-07-072022575556092N821328155750389272047
\n

64266 rows × 26 columns

\n
" }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "appln" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 22, "outputs": [ { "data": { "text/plain": " appln_id appln_title_lg \n0 330225325 en \\\n1 330322632 en \n2 330350961 en \n3 330374780 en \n4 330424360 en \n... ... ... \n64258 575551871 en \n64259 575551946 en \n64260 575553943 en \n64261 575553975 en \n64262 575556091 en \n\n appln_title \n0 Beverage preparation machine \n1 Method and system for recommending contextual ... \n2 A method and an apparatus for treating at leas... \n3 A METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W... \n4 ERROR CONTROL IN A COMMUNICATION SYSTEM \n... ... \n64258 IMAGE STITCHING METHOD AND APPARATUS, AND COMP... \n64259 LOW VOC AND FOOD GRADE RESEALABLE LABEL \n64260 METHOD, DEVICE, COMPUTER READABLE MEDIUM, AND ... \n64261 MULTISPECIFIC ANTIGEN BINDING PROTEINS \n64262 SYSTEM AND METHOD FOR METHANE HYDRATE BASED PR... \n\n[64263 rows x 3 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_title_lgappln_title
0330225325enBeverage preparation machine
1330322632enMethod and system for recommending contextual ...
2330350961enA method and an apparatus for treating at leas...
3330374780enA METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W...
4330424360enERROR CONTROL IN A COMMUNICATION SYSTEM
............
64258575551871enIMAGE STITCHING METHOD AND APPARATUS, AND COMP...
64259575551946enLOW VOC AND FOOD GRADE RESEALABLE LABEL
64260575553943enMETHOD, DEVICE, COMPUTER READABLE MEDIUM, AND ...
64261575553975enMULTISPECIFIC ANTIGEN BINDING PROTEINS
64262575556091enSYSTEM AND METHOD FOR METHANE HYDRATE BASED PR...
\n

64263 rows × 3 columns

\n
" }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "appln_title" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 7, "outputs": [ { "data": { "text/plain": " appln_id appln_auth appln_nr appln_kind appln_filing_date \n0 330225325 EP 11150195 A 2011-01-05 \\\n1 330322632 EP 11150485 A 2011-01-10 \n2 330350961 EP 11150683 A 2011-01-12 \n3 330374780 WO 2011050339 W 2011-01-12 \n4 330424360 WO 2011050199 W 2011-01-10 \n\n appln_filing_year appln_nr_original ipr_type receiving_office \n0 2011 11150195 PI \\\n1 2011 11150485 PI \n2 2011 11150683 PI \n3 2011 EP2011/050339 PI EP \n4 2011 EP2011/050199 PI EP \n\n internat_appln_id ... earliest_pat_publn_id granted docdb_family_id \n0 0 ... 335277427 Y 43754737 \\\n1 0 ... 364719889 Y 43991052 \n2 0 ... 364923578 N 43881056 \n3 0 ... 335927718 N 43923624 \n4 0 ... 365345607 N 43533009 \n\n inpadoc_family_id docdb_family_size nb_citing_docdb_fam nb_applicants \n0 330225325 4 16 1 \\\n1 330322632 2 5 1 \n2 330350961 7 12 2 \n3 330374780 2 8 5 \n4 330424360 4 13 3 \n\n nb_inventors appln_title_lg \n0 1 en \\\n1 2 en \n2 5 en \n3 4 en \n4 2 en \n\n appln_title \n0 Beverage preparation machine \n1 Method and system for recommending contextual ... \n2 A method and an apparatus for treating at leas... \n3 A METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W... \n4 ERROR CONTROL IN A COMMUNICATION SYSTEM \n\n[5 rows x 28 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_authappln_nrappln_kindappln_filing_dateappln_filing_yearappln_nr_originalipr_typereceiving_officeinternat_appln_id...earliest_pat_publn_idgranteddocdb_family_idinpadoc_family_iddocdb_family_sizenb_citing_docdb_famnb_applicantsnb_inventorsappln_title_lgappln_title
0330225325EP11150195A2011-01-05201111150195PI0...335277427Y4375473733022532541611enBeverage preparation machine
1330322632EP11150485A2011-01-10201111150485PI0...364719889Y439910523303226322512enMethod and system for recommending contextual ...
2330350961EP11150683A2011-01-12201111150683PI0...364923578N4388105633035096171225enA method and an apparatus for treating at leas...
3330374780WO2011050339W2011-01-122011EP2011/050339PIEP0...335927718N439236243303747802854enA METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W...
4330424360WO2011050199W2011-01-102011EP2011/050199PIEP0...365345607N4353300933042436041332enERROR CONTROL IN A COMMUNICATION SYSTEM
\n

5 rows × 28 columns

\n
" }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "appln_data = appln.merge(appln_title, on=\"appln_id\")\n", "appln_data.head()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 10, "outputs": [ { "data": { "text/plain": "array(['EP', 'WO', 'LU', 'FI', 'FR', 'ES', 'NO', 'US', 'GB', 'DO', 'DE',\n 'CA', 'UY', 'SV', 'KR', 'TR', 'CR', 'TW', 'NL', 'SG', 'CO', 'DK',\n 'CU', 'HR', 'AR', 'RU', 'AU', 'PL', 'BE', 'BR', 'MX', 'AP', 'MC',\n 'EC', 'PE', 'HU', 'EA', 'AT', 'RO', 'PT', 'CZ', 'IS', 'HN', 'MA',\n 'MD', 'CN', 'GT', 'UA', 'CL', 'SK', 'PH', 'MY', 'SI', 'HK', 'RS',\n 'IN', 'VN', 'TN', 'IL', 'GE', 'CY', 'SM', 'ZA', 'SE', 'CH', 'LT',\n 'ME', 'JO', 'NI', 'JP', 'SA', 'LV'], dtype=object)" }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "appln_data[\"appln_auth\"].unique()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "data": { "text/plain": " person_id appln_id applt_seq_nr invt_seq_nr\n0 1 340314532 1 0\n1 1 413601768 1 0\n2 21 332015605 1 0\n3 21 333490084 1 0\n4 21 335903805 1 0\n... ... ... ... ...\n274039 85719932 545918634 0 2\n274040 85720336 569409547 0 4\n274041 85720376 555215896 0 2\n274042 85720469 569304088 0 5\n274043 85720500 569495993 0 5\n\n[274044 rows x 4 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
person_idappln_idapplt_seq_nrinvt_seq_nr
0134031453210
1141360176810
22133201560510
32133349008410
42133590380510
...............
2740398571993254591863402
2740408572033656940954704
2740418572037655521589602
2740428572046956930408805
2740438572050056949599305
\n

274044 rows × 4 columns

\n
" }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "appln_pers" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 37, "outputs": [ { "data": { "text/plain": " person_id person_name person_name_orig_lg \n0 1 Nokia Corporation Nokia Corporation \\\n1 128 Nokia Siemens Networks Oy Nokia Siemens Networks Oy \n2 5217785 Nokia Corporation Nokia Corporation \n3 5217811 Nokia Corporation Nokia Corporation \n4 5232170 Nokia Siemens Networks Oy Nokia Siemens Networks Oy \n... ... ... ... \n112235 85719932 VIKSTREM, Erik ВИКСТРЁМ, Эрик \n112236 85720336 HWANG, LING-CHI HWANG, LING-CHI \n112237 85720376 LI, I Chan LI, I Chan \n112238 85720469 TING, Chia Ching TING, Chia Ching \n112239 85720500 WANG, YU-CHEIH WANG, YU-CHEIH \n\n person_address person_ctry_code nuts nuts_level \n0 Keilalahdentie 4,02150 Espoo FI FI1B1 3 \\\n1 Karaportti 3,02610 Espoo FI FI1B1 3 \n2 Espoo FI FI 0 \n3 NaN FI FI 0 \n4 Espoo FI FI 0 \n... ... ... ... ... \n112235 NaN SE SE 0 \n112236 NaN TW NaN 9 \n112237 NaN TW NaN 9 \n112238 TW TW NaN 9 \n112239 NaN TW NaN 9 \n\n doc_std_name_id doc_std_name psn_id \n0 1 NOKIA CORP 23782051 \\\n1 112 NOKIA SIEMENS NETWORKS OY 23782129 \n2 1 NOKIA CORP 23782051 \n3 1 NOKIA CORP 23782051 \n4 112 NOKIA SIEMENS NETWORKS OY 23782129 \n... ... ... ... \n112235 38919340 VIKSTREM ERIK 185719932 \n112236 35599384 HWANG LING-CHI 185720336 \n112237 38707281 LI I CHAN 185720376 \n112238 23937900 TING CHIA CHING 185720469 \n112239 38204835 WANG YU-CHEIH 185720500 \n\n psn_name psn_level psn_sector han_id han_name \n0 NOKIA CORPORATION 2 COMPANY 2125445 NOKIA CORP \\\n1 NOKIA NETWORKS 2 COMPANY 2125445 NOKIA CORP \n2 NOKIA CORPORATION 2 COMPANY 2125445 NOKIA CORP \n3 NOKIA CORPORATION 2 COMPANY 2125445 NOKIA CORP \n4 NOKIA NETWORKS 2 COMPANY 2125445 NOKIA CORP \n... ... ... ... ... ... \n112235 VIKSTREM, Erik 0 UNKNOWN 185719932 VIKSTREM, Erik \n112236 HWANG, LING-CHI 0 UNKNOWN 185720336 HWANG, LING-CHI \n112237 LI, I Chan 0 UNKNOWN 185720376 LI, I Chan \n112238 TING, Chia Ching 0 UNKNOWN 185720469 TING, Chia Ching \n112239 WANG, YU-CHEIH 0 UNKNOWN 185720500 WANG, YU-CHEIH \n\n han_harmonized psn_sector_primary \n0 2 COMPANY \n1 2 COMPANY \n2 2 COMPANY \n3 2 COMPANY \n4 2 COMPANY \n... ... ... \n112235 0 UNKNOWN \n112236 0 UNKNOWN \n112237 0 UNKNOWN \n112238 0 UNKNOWN \n112239 0 UNKNOWN \n\n[112240 rows x 17 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
person_idperson_nameperson_name_orig_lgperson_addressperson_ctry_codenutsnuts_leveldoc_std_name_iddoc_std_namepsn_idpsn_namepsn_levelpsn_sectorhan_idhan_namehan_harmonizedpsn_sector_primary
01Nokia CorporationNokia CorporationKeilalahdentie 4,02150 EspooFIFI1B131NOKIA CORP23782051NOKIA CORPORATION2COMPANY2125445NOKIA CORP2COMPANY
1128Nokia Siemens Networks OyNokia Siemens Networks OyKaraportti 3,02610 EspooFIFI1B13112NOKIA SIEMENS NETWORKS OY23782129NOKIA NETWORKS2COMPANY2125445NOKIA CORP2COMPANY
25217785Nokia CorporationNokia CorporationEspooFIFI01NOKIA CORP23782051NOKIA CORPORATION2COMPANY2125445NOKIA CORP2COMPANY
35217811Nokia CorporationNokia CorporationNaNFIFI01NOKIA CORP23782051NOKIA CORPORATION2COMPANY2125445NOKIA CORP2COMPANY
45232170Nokia Siemens Networks OyNokia Siemens Networks OyEspooFIFI0112NOKIA SIEMENS NETWORKS OY23782129NOKIA NETWORKS2COMPANY2125445NOKIA CORP2COMPANY
......................................................
11223585719932VIKSTREM, ErikВИКСТРЁМ, ЭрикNaNSESE038919340VIKSTREM ERIK185719932VIKSTREM, Erik0UNKNOWN185719932VIKSTREM, Erik0UNKNOWN
11223685720336HWANG, LING-CHIHWANG, LING-CHINaNTWNaN935599384HWANG LING-CHI185720336HWANG, LING-CHI0UNKNOWN185720336HWANG, LING-CHI0UNKNOWN
11223785720376LI, I ChanLI, I ChanNaNTWNaN938707281LI I CHAN185720376LI, I Chan0UNKNOWN185720376LI, I Chan0UNKNOWN
11223885720469TING, Chia ChingTING, Chia ChingTWTWNaN923937900TING CHIA CHING185720469TING, Chia Ching0UNKNOWN185720469TING, Chia Ching0UNKNOWN
11223985720500WANG, YU-CHEIHWANG, YU-CHEIHNaNTWNaN938204835WANG YU-CHEIH185720500WANG, YU-CHEIH0UNKNOWN185720500WANG, YU-CHEIH0UNKNOWN
\n

112240 rows × 17 columns

\n
" }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pers_sector_primary = pers.groupby(\"han_id\", as_index=False)[\"psn_sector\"].agg(\n", " lambda x: pd.Series.mode(x)[0]).rename(columns={\"psn_sector\":\"psn_sector_primary\"})\n", "persn = pers.merge(pers_sector_primary, on='han_id')\n", "persn" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 38, "outputs": [ { "data": { "text/plain": " han_id psn_sector_primary\n0 264 GOV NON-PROFIT UNIVERSITY\n1 627 COMPANY\n2 974 COMPANY\n3 1480 COMPANY\n4 1699 COMPANY\n... ... ...\n106154 185719932 UNKNOWN\n106155 185720336 UNKNOWN\n106156 185720376 UNKNOWN\n106157 185720469 UNKNOWN\n106158 185720500 UNKNOWN\n\n[106159 rows x 2 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
han_idpsn_sector_primary
0264GOV NON-PROFIT UNIVERSITY
1627COMPANY
2974COMPANY
31480COMPANY
41699COMPANY
.........
106154185719932UNKNOWN
106155185720336UNKNOWN
106156185720376UNKNOWN
106157185720469UNKNOWN
106158185720500UNKNOWN
\n

106159 rows × 2 columns

\n
" }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pers_sector_primary" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 20, "outputs": [], "source": [ "appln_merge = appln.merge(appln_title, on=\"appln_id\")#.merge(appln_pers,on=\"appln_id\")\n", "appln_merge.to_excel(\"appln_data.xlsx\", index=False)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 21, "outputs": [], "source": [ "person_merge = appln_pers.merge(pers,on=\"person_id\")\n", "person_merge.to_excel(\"person_data.xlsx\", index=False)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 18, "outputs": [ { "data": { "text/plain": "array(['FI', 'NL', 'FR', 'DE', 'DK', 'AT', 'SE', 'BE', 'TW', 'LU', 'CN',\n 'IT', 'HU', 'IE', 'SI', 'CZ', 'ES', 'HK', 'PL', 'CY', 'SK', 'PT',\n 'LT', 'EE', 'MT', 'GR', 'RO', 'BG', 'HR', 'MO', 'LV'], dtype=object)" }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pers[\"person_ctry_code\"].unique()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 16, "outputs": [ { "ename": "KeyError", "evalue": "'cry_code'", "output_type": "error", "traceback": [ "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", "File \u001B[1;32m~\\.conda\\envs\\MOME_BIGDATA\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3649\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3648\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m-> 3649\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3650\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", "File \u001B[1;32m~\\.conda\\envs\\MOME_BIGDATA\\lib\\site-packages\\pandas\\_libs\\index.pyx:147\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", "File \u001B[1;32m~\\.conda\\envs\\MOME_BIGDATA\\lib\\site-packages\\pandas\\_libs\\index.pyx:176\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[1;34m()\u001B[0m\n", "File \u001B[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7080\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[1;34m()\u001B[0m\n", "File \u001B[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7088\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[1;34m()\u001B[0m\n", "\u001B[1;31mKeyError\u001B[0m: 'cry_code'", "\nThe above exception was the direct cause of the following exception:\n", "\u001B[1;31mKeyError\u001B[0m Traceback (most recent call last)", "Cell \u001B[1;32mIn[16], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[43mperson_merge\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mcry_code\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241m.\u001B[39munique()\n", "File \u001B[1;32m~\\.conda\\envs\\MOME_BIGDATA\\lib\\site-packages\\pandas\\core\\frame.py:3745\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3743\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[0;32m 3744\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[1;32m-> 3745\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 3746\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[0;32m 3747\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", "File \u001B[1;32m~\\.conda\\envs\\MOME_BIGDATA\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3651\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[1;34m(self, key)\u001B[0m\n\u001B[0;32m 3649\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_engine\u001B[38;5;241m.\u001B[39mget_loc(casted_key)\n\u001B[0;32m 3650\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n\u001B[1;32m-> 3651\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[0;32m 3652\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[0;32m 3653\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[0;32m 3654\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[0;32m 3655\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[0;32m 3656\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", "\u001B[1;31mKeyError\u001B[0m: 'cry_code'" ] } ], "source": [ "person_merge[\"cry_code\"].unique()" ], "metadata": { "collapsed": false } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }