diff --git a/PATSTAT/CPC_data/cpc_defs.csv b/PATSTAT/CPC_data/cpc_defs.csv index 98b760b..f6dc1fe 100644 --- a/PATSTAT/CPC_data/cpc_defs.csv +++ b/PATSTAT/CPC_data/cpc_defs.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb826a2dea595fe31335060af00aabe71a6a8f660584f2a1adcb04976e6cfdc9 -size 159308737 +oid sha256:ab0f4acc10a622f8a162a9f1f2aaf39b06799f65feab44412aed2dc2d6f27cf8 +size 159305379 diff --git a/PATSTAT/WESTERN_CH_scope/scope_cpc_defs.csv b/PATSTAT/CPC_data/scope_cpc_defs.csv similarity index 100% rename from PATSTAT/WESTERN_CH_scope/scope_cpc_defs.csv rename to PATSTAT/CPC_data/scope_cpc_defs.csv diff --git a/PATSTAT/WESTERN_CH_scope/cpc_defs.csv b/PATSTAT/WESTERN_CH_scope/cpc_defs.csv deleted file mode 100644 index f6dc1fe..0000000 --- a/PATSTAT/WESTERN_CH_scope/cpc_defs.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab0f4acc10a622f8a162a9f1f2aaf39b06799f65feab44412aed2dc2d6f27cf8 -size 159305379 diff --git a/PATSTAT/patstat_analysis_pipeline.ipynb b/PATSTAT/patstat_analysis_pipeline.ipynb index 4d5fd24..b738328 100644 --- a/PATSTAT/patstat_analysis_pipeline.ipynb +++ b/PATSTAT/patstat_analysis_pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": { "collapsed": true }, @@ -20,10 +20,10 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 3, "outputs": [], "source": [ - "outdir=\"EU_CH_scope/v2_\"\n", + "outdir=\"WESTERN_CH_scope\"\n", "\n", "appln = pd.read_csv(f\"{outdir}/tls_201_scope.csv\")\n", "\n", @@ -42,20 +42,19 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 5, "outputs": [ { "data": { - "text/plain": " appln_id appln_auth appln_nr appln_kind appln_filing_date \n0 330225325 EP 11150195 A 2011-01-05 \\\n1 330322632 EP 11150485 A 2011-01-10 \n2 330350961 EP 11150683 A 2011-01-12 \n3 330374780 WO 2011050339 W 2011-01-12 \n4 330424360 WO 2011050199 W 2011-01-10 \n... ... ... ... ... ... \n64261 575551871 WO 2020142401 W 2020-12-31 \n64262 575551946 WO 2020142230 W 2020-12-31 \n64263 575553943 WO 2021142692 W 2021-12-29 \n64264 575553975 WO 2021142655 W 2021-12-29 \n64265 575556091 WO 2021064274 W 2021-12-20 \n\n appln_filing_year appln_nr_original ipr_type receiving_office \n0 2011 11150195 PI \\\n1 2011 11150485 PI \n2 2011 11150683 PI \n3 2011 EP2011/050339 PI EP \n4 2011 EP2011/050199 PI EP \n... ... ... ... ... \n64261 2020 CN2020/142401 PI CN \n64262 2020 CN2020/142230 PI CN \n64263 2021 CN2021/142692 PI CN \n64264 2021 CN2021/142655 PI CN \n64265 2021 US2021/064274 PI US \n\n internat_appln_id ... earliest_publn_date earliest_publn_year \n0 0 ... 2011-07-13 2011 \\\n1 0 ... 2012-07-11 2012 \n2 0 ... 2012-07-18 2012 \n3 0 ... 2011-07-21 2011 \n4 0 ... 2012-07-19 2012 \n... ... ... ... ... \n64261 0 ... 2022-07-07 2022 \n64262 0 ... 2022-07-07 2022 \n64263 0 ... 2022-07-07 2022 \n64264 0 ... 2022-07-07 2022 \n64265 0 ... 2022-07-07 2022 \n\n earliest_pat_publn_id granted docdb_family_id inpadoc_family_id \n0 335277427 Y 43754737 330225325 \\\n1 364719889 Y 43991052 330322632 \n2 364923578 N 43881056 330350961 \n3 335927718 N 43923624 330374780 \n4 365345607 N 43533009 330424360 \n... ... ... ... ... \n64261 575551872 N 82260109 575551871 \n64262 575551947 N 82260125 575551946 \n64263 575553944 N 79460210 564546189 \n64264 575553976 N 82260272 575553975 \n64265 575556092 N 82132815 575038927 \n\n docdb_family_size nb_citing_docdb_fam nb_applicants nb_inventors \n0 4 16 1 1 \n1 2 5 1 2 \n2 7 12 2 5 \n3 2 8 5 4 \n4 4 13 3 2 \n... ... ... ... ... \n64261 1 0 2 1 \n64262 1 0 3 3 \n64263 2 0 2 6 \n64264 1 0 2 7 \n64265 2 0 4 7 \n\n[64266 rows x 26 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_authappln_nrappln_kindappln_filing_dateappln_filing_yearappln_nr_originalipr_typereceiving_officeinternat_appln_id...earliest_publn_dateearliest_publn_yearearliest_pat_publn_idgranteddocdb_family_idinpadoc_family_iddocdb_family_sizenb_citing_docdb_famnb_applicantsnb_inventors
0330225325EP11150195A2011-01-05201111150195PI0...2011-07-132011335277427Y4375473733022532541611
1330322632EP11150485A2011-01-10201111150485PI0...2012-07-112012364719889Y439910523303226322512
2330350961EP11150683A2011-01-12201111150683PI0...2012-07-182012364923578N4388105633035096171225
3330374780WO2011050339W2011-01-122011EP2011/050339PIEP0...2011-07-212011335927718N439236243303747802854
4330424360WO2011050199W2011-01-102011EP2011/050199PIEP0...2012-07-192012365345607N4353300933042436041332
..................................................................
64261575551871WO2020142401W2020-12-312020CN2020/142401PICN0...2022-07-072022575551872N822601095755518711021
64262575551946WO2020142230W2020-12-312020CN2020/142230PICN0...2022-07-072022575551947N822601255755519461033
64263575553943WO2021142692W2021-12-292021CN2021/142692PICN0...2022-07-072022575553944N794602105645461892026
64264575553975WO2021142655W2021-12-292021CN2021/142655PICN0...2022-07-072022575553976N822602725755539751027
64265575556091WO2021064274W2021-12-202021US2021/064274PIUS0...2022-07-072022575556092N821328155750389272047
\n

64266 rows × 26 columns

\n
" + "text/plain": "203873" }, - "execution_count": 23, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "appln" + "len(appln)" ], "metadata": { "collapsed": false @@ -63,20 +62,20 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "outputs": [ { "data": { - "text/plain": " appln_id appln_title_lg \n0 330225325 en \\\n1 330322632 en \n2 330350961 en \n3 330374780 en \n4 330424360 en \n... ... ... \n64258 575551871 en \n64259 575551946 en \n64260 575553943 en \n64261 575553975 en \n64262 575556091 en \n\n appln_title \n0 Beverage preparation machine \n1 Method and system for recommending contextual ... \n2 A method and an apparatus for treating at leas... \n3 A METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W... \n4 ERROR CONTROL IN A COMMUNICATION SYSTEM \n... ... \n64258 IMAGE STITCHING METHOD AND APPARATUS, AND COMP... \n64259 LOW VOC AND FOOD GRADE RESEALABLE LABEL \n64260 METHOD, DEVICE, COMPUTER READABLE MEDIUM, AND ... \n64261 MULTISPECIFIC ANTIGEN BINDING PROTEINS \n64262 SYSTEM AND METHOD FOR METHANE HYDRATE BASED PR... \n\n[64263 rows x 3 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_title_lgappln_title
0330225325enBeverage preparation machine
1330322632enMethod and system for recommending contextual ...
2330350961enA method and an apparatus for treating at leas...
3330374780enA METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W...
4330424360enERROR CONTROL IN A COMMUNICATION SYSTEM
............
64258575551871enIMAGE STITCHING METHOD AND APPARATUS, AND COMP...
64259575551946enLOW VOC AND FOOD GRADE RESEALABLE LABEL
64260575553943enMETHOD, DEVICE, COMPUTER READABLE MEDIUM, AND ...
64261575553975enMULTISPECIFIC ANTIGEN BINDING PROTEINS
64262575556091enSYSTEM AND METHOD FOR METHANE HYDRATE BASED PR...
\n

64263 rows × 3 columns

\n
" + "text/plain": " appln_id appln_title_lg \n106316 498640253 en \\\n119852 511974583 en \n193586 577006640 en \n172207 556318748 en \n117620 509549284 en \n... ... ... \n58791 448189845 en \n119362 511604550 en \n73722 471815906 en \n45133 438311946 en \n25978 414431520 en \n\n appln_title \n106316 DRAIN CLEANING DEVICE \n119852 Antenna panel switching and beam indication \n193586 Loft bed \n172207 Winch for Securing a Load \n117620 TEMPERATURE CONTROL APPARATUS FORELECTRIC VEHI... \n... ... \n58791 Collaborative spectrum sensing in cognitive ra... \n119362 CLIP-ON GLASSES WITH REPLACEABLE LENS \n73722 Sensitized, photo-sensitive glass and its prod... \n45133 PREPARATION OF 3,4-DIHYDRO-1,4-BENZOXAZEPIN-5(... \n25978 - CRYSTAL OSCILLATOR WITH LOW-POWER MODE \n\n[100 rows x 3 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_title_lgappln_title
106316498640253enDRAIN CLEANING DEVICE
119852511974583enAntenna panel switching and beam indication
193586577006640enLoft bed
172207556318748enWinch for Securing a Load
117620509549284enTEMPERATURE CONTROL APPARATUS FORELECTRIC VEHI...
............
58791448189845enCollaborative spectrum sensing in cognitive ra...
119362511604550enCLIP-ON GLASSES WITH REPLACEABLE LENS
73722471815906enSensitized, photo-sensitive glass and its prod...
45133438311946enPREPARATION OF 3,4-DIHYDRO-1,4-BENZOXAZEPIN-5(...
25978414431520en- CRYSTAL OSCILLATOR WITH LOW-POWER MODE
\n

100 rows × 3 columns

\n
" }, - "execution_count": 22, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "appln_title" + "appln_title.sample(100)" ], "metadata": { "collapsed": false @@ -84,14 +83,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "outputs": [ { "data": { - "text/plain": " appln_id appln_auth appln_nr appln_kind appln_filing_date \n0 330225325 EP 11150195 A 2011-01-05 \\\n1 330322632 EP 11150485 A 2011-01-10 \n2 330350961 EP 11150683 A 2011-01-12 \n3 330374780 WO 2011050339 W 2011-01-12 \n4 330424360 WO 2011050199 W 2011-01-10 \n\n appln_filing_year appln_nr_original ipr_type receiving_office \n0 2011 11150195 PI \\\n1 2011 11150485 PI \n2 2011 11150683 PI \n3 2011 EP2011/050339 PI EP \n4 2011 EP2011/050199 PI EP \n\n internat_appln_id ... earliest_pat_publn_id granted docdb_family_id \n0 0 ... 335277427 Y 43754737 \\\n1 0 ... 364719889 Y 43991052 \n2 0 ... 364923578 N 43881056 \n3 0 ... 335927718 N 43923624 \n4 0 ... 365345607 N 43533009 \n\n inpadoc_family_id docdb_family_size nb_citing_docdb_fam nb_applicants \n0 330225325 4 16 1 \\\n1 330322632 2 5 1 \n2 330350961 7 12 2 \n3 330374780 2 8 5 \n4 330424360 4 13 3 \n\n nb_inventors appln_title_lg \n0 1 en \\\n1 2 en \n2 5 en \n3 4 en \n4 2 en \n\n appln_title \n0 Beverage preparation machine \n1 Method and system for recommending contextual ... \n2 A method and an apparatus for treating at leas... \n3 A METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W... \n4 ERROR CONTROL IN A COMMUNICATION SYSTEM \n\n[5 rows x 28 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_authappln_nrappln_kindappln_filing_dateappln_filing_yearappln_nr_originalipr_typereceiving_officeinternat_appln_id...earliest_pat_publn_idgranteddocdb_family_idinpadoc_family_iddocdb_family_sizenb_citing_docdb_famnb_applicantsnb_inventorsappln_title_lgappln_title
0330225325EP11150195A2011-01-05201111150195PI0...335277427Y4375473733022532541611enBeverage preparation machine
1330322632EP11150485A2011-01-10201111150485PI0...364719889Y439910523303226322512enMethod and system for recommending contextual ...
2330350961EP11150683A2011-01-12201111150683PI0...364923578N4388105633035096171225enA method and an apparatus for treating at leas...
3330374780WO2011050339W2011-01-122011EP2011/050339PIEP0...335927718N439236243303747802854enA METHOD FOR DIAGNOSIS OF FAULT IN VEHICULAR W...
4330424360WO2011050199W2011-01-102011EP2011/050199PIEP0...365345607N4353300933042436041332enERROR CONTROL IN A COMMUNICATION SYSTEM
\n

5 rows × 28 columns

\n
" + "text/plain": " appln_id appln_auth appln_nr appln_kind appln_filing_date \n0 330225325 EP 11150195 A 2011-01-05 \\\n1 330225397 EP 11150231 A 2011-01-05 \n2 330322632 EP 11150485 A 2011-01-10 \n3 330326785 EP 11150605 A 2011-01-11 \n4 330350961 EP 11150683 A 2011-01-12 \n\n appln_filing_year appln_nr_original ipr_type receiving_office \n0 2011 11150195 PI \\\n1 2011 11150231 PI \n2 2011 11150485 PI \n3 2011 11150605 PI \n4 2011 11150683 PI \n\n internat_appln_id ... earliest_pat_publn_id granted docdb_family_id \n0 0 ... 335277427 Y 43754737 \\\n1 0 ... 335277736 Y 43619902 \n2 0 ... 364719889 Y 43991052 \n3 0 ... 335277720 N 43023665 \n4 0 ... 364923578 N 43881056 \n\n inpadoc_family_id docdb_family_size nb_citing_docdb_fam nb_applicants \n0 330225325 4 16 1 \\\n1 330225397 6 56 1 \n2 330322632 2 5 1 \n3 328518903 6 9 1 \n4 330350961 7 13 2 \n\n nb_inventors appln_title_lg \n0 1 en \\\n1 9 en \n2 2 en \n3 3 en \n4 5 en \n\n appln_title \n0 Beverage preparation machine \n1 Screwdriving tool having a driving tool with a... \n2 Method and system for recommending contextual ... \n3 Apparatus and method for continuous casting of... \n4 A method and an apparatus for treating at leas... \n\n[5 rows x 28 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
appln_idappln_authappln_nrappln_kindappln_filing_dateappln_filing_yearappln_nr_originalipr_typereceiving_officeinternat_appln_id...earliest_pat_publn_idgranteddocdb_family_idinpadoc_family_iddocdb_family_sizenb_citing_docdb_famnb_applicantsnb_inventorsappln_title_lgappln_title
0330225325EP11150195A2011-01-05201111150195PI0...335277427Y4375473733022532541611enBeverage preparation machine
1330225397EP11150231A2011-01-05201111150231PI0...335277736Y4361990233022539765619enScrewdriving tool having a driving tool with a...
2330322632EP11150485A2011-01-10201111150485PI0...364719889Y439910523303226322512enMethod and system for recommending contextual ...
3330326785EP11150605A2011-01-11201111150605PI0...335277720N430236653285189036913enApparatus and method for continuous casting of...
4330350961EP11150683A2011-01-12201111150683PI0...364923578N4388105633035096171325enA method and an apparatus for treating at leas...
\n

5 rows × 28 columns

\n
" }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -106,13 +105,13 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "outputs": [ { "data": { - "text/plain": "array(['EP', 'WO', 'LU', 'FI', 'FR', 'ES', 'NO', 'US', 'GB', 'DO', 'DE',\n 'CA', 'UY', 'SV', 'KR', 'TR', 'CR', 'TW', 'NL', 'SG', 'CO', 'DK',\n 'CU', 'HR', 'AR', 'RU', 'AU', 'PL', 'BE', 'BR', 'MX', 'AP', 'MC',\n 'EC', 'PE', 'HU', 'EA', 'AT', 'RO', 'PT', 'CZ', 'IS', 'HN', 'MA',\n 'MD', 'CN', 'GT', 'UA', 'CL', 'SK', 'PH', 'MY', 'SI', 'HK', 'RS',\n 'IN', 'VN', 'TN', 'IL', 'GE', 'CY', 'SM', 'ZA', 'SE', 'CH', 'LT',\n 'ME', 'JO', 'NI', 'JP', 'SA', 'LV'], dtype=object)" + "text/plain": "array(['EP', 'WO', 'LU', 'FI', 'NO', 'FR', 'GB', 'KR', 'ES', 'US', 'CA',\n 'DO', 'EC', 'DE', 'UY', 'IL', 'SV', 'PL', 'TR', 'CO', 'CR', 'TW',\n 'MA', 'PE', 'SG', 'CU', 'BE', 'DK', 'AR', 'AP', 'HR', 'MX', 'BR',\n 'EA', 'RU', 'AU', 'MC', 'HU', 'PT', 'NL', 'HN', 'AT', 'RO', 'SM',\n 'CH', 'SI', 'IS', 'CZ', 'HK', 'MD', 'JP', 'CN', 'RS', 'GT', 'UA',\n 'CL', 'SK', 'LT', 'PH', 'MY', 'IN', 'VN', 'TN', 'CY', 'GE', 'ZA',\n 'SE', 'ME', 'JO', 'NI', 'SA'], dtype=object)" }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" }