You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
blabla/PATSTAT/.ipynb_checkpoints/patstat_analysis-checkpoint...

867 lines
29 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"id": "a8be6839",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import janitor\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from matplotlib.ticker import MaxNLocator\n",
"import math\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "211ba466",
"metadata": {},
"outputs": [],
"source": [
"outdir=\"EU_CH_scope\"\n",
"\n",
"appln = pd.read_csv(f\"{outdir}/tls_201_scope.csv\")\n",
"\n",
"appln_title = pd.read_csv(f\"{outdir}/tls_202_scope.csv\")\n",
"\n",
"pers = pd.read_csv(f\"{outdir}/tls_206_scope.csv\")\n",
"\n",
"appln_pers = pd.read_csv(f\"{outdir}/tls_207_scope.csv\")\n",
"\n",
"appln_cpc = pd.read_csv(f\"{outdir}/tls_224_scope.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "f878b151",
"metadata": {},
"outputs": [],
"source": [
"# workdir_path=r\"CPCTitleList202302\"\n",
"# # outfile='wos_extract_complete.csv'\n",
"# # with_header=True\n",
"# cpc_ids = pd.DataFrame()\n",
"# for root, dirs, files in os.walk(workdir_path):\n",
"# for filename in files:\n",
"# path=os.path.join(root, filename)\n",
"# section = pd.read_csv(path, sep='\\t', header=None)\n",
"# cpc_ids=pd.concat([cpc_ids,section], ignore_index=True)\n",
"# cpc_ids.columns =[\"cpc_id\",\"idk\",\"cpc_name\"]\n",
"# cpc_ids = cpc_ids.drop(columns=\"idk\")"
]
},
{
"cell_type": "code",
"execution_count": 106,
"id": "95ea20da",
"metadata": {},
"outputs": [],
"source": [
"parsed = {x: [] for x in ['code', 'title', 'section', 'class', 'subclass', 'group', 'main_group']}\n",
"for letter in 'ABCDEFGHY':\n",
" file = f'CPCTitleList202302/cpc-section-{letter}_20230201.txt'\n",
" with open(file) as f:\n",
" for line in f:\n",
" vals = line.strip().split('\\t')\n",
" if len(vals) == 2:\n",
" parsed['code'].append(vals[0])\n",
" parsed['title'].append(vals[1])\n",
" elif len(vals) == 3:\n",
" parsed['code'].append(vals[0])\n",
" parsed['title'].append(vals[2])\n",
"\n",
"\n",
"\n",
"for i in range(len(parsed['code'])):\n",
" code = parsed['code'][i]\n",
" main_group = code.split('/')[-1] if \"/\" in code else None\n",
" group = code.split('/')[0][4:] if len(code) >= 5 else None\n",
" subclass = code[3] if len(code) >= 4 else None\n",
" class_ = code[1:3] if len(code) >= 3 else None\n",
" section = code[0] if len(code) >= 1 else None\n",
" \n",
" parsed['main_group'].append(main_group)\n",
" parsed['group'].append(group)\n",
" parsed['subclass'].append(subclass)\n",
" parsed['class'].append(class_)\n",
" parsed['section'].append(section)\n",
"\n",
"cpc_ids2023 = pd.DataFrame.from_dict(parsed)\n",
"cpc_ids2023['cpc_version']=2023\n",
"cpc_ids2022 = pd.read_csv(\"CPC_data/cpc_titles_2022.csv\")\n",
"cpc_ids2022['cpc_version']=2022\n",
"cpc_ids = pd.concat([cpc_ids2023,cpc_ids2022], ignore_index=True)\n",
"cpc_ids = cpc_ids.rename(columns={\"code\":\"cpc_id\",\"title\":\"cpc_name\"}).drop_duplicates(subset=\"cpc_id\")"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "907d9c3e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cpc_id</th>\n",
" <th>cpc_name</th>\n",
" <th>section</th>\n",
" <th>class</th>\n",
" <th>subclass</th>\n",
" <th>group</th>\n",
" <th>main_group</th>\n",
" <th>cpc_version</th>\n",
" <th>cpc_taxonomy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>HUMAN NECESSITIES</td>\n",
" <td>A</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES)]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>A01</td>\n",
" <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A01B</td>\n",
" <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>A01B1/00</td>\n",
" <td>Hand tools (edge trimmers for lawns A01G3/06 ...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>00</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>A01B1/02</td>\n",
" <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>02</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cpc_id cpc_name section class \n",
"0 A HUMAN NECESSITIES A None \\\n",
"1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... A 01 \n",
"2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... A 01 \n",
"3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... A 01 \n",
"4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... A 01 \n",
"\n",
" subclass group main_group cpc_version \n",
"0 None None None 2023 \\\n",
"1 None None None 2023 \n",
"2 B None None 2023 \n",
"3 B 1 00 2023 \n",
"4 B 1 02 2023 \n",
"\n",
" cpc_taxonomy \n",
"0 [(A, HUMAN NECESSITIES)] \n",
"1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... "
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 108,
"id": "1be8971a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"40 cpc_ids not found\n",
"0.11297201118422912 % lost\n"
]
}
],
"source": [
"appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n",
"appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n",
"\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "b1274c34",
"metadata": {},
"outputs": [],
"source": [
"cpc_dict = dict(zip(cpc_ids.cpc_id.str.replace(\" \",\"\"), cpc_ids.cpc_name))\n",
"# cpc_dict"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "2a7e39ee",
"metadata": {},
"outputs": [],
"source": [
"def cpc_classifier(id_text):\n",
" taxonomy = []\n",
" iter_text = id_text.replace(\" \",\"\")\n",
" for i in range(len(iter_text)+1):\n",
" tax_id = iter_text[:i]\n",
" tax_name = cpc_dict.get(iter_text[:i])\n",
" if tax_name:\n",
" taxonomy.append((tax_id,tax_name))\n",
" return taxonomy\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "e31a013f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('A', 'HUMAN NECESSITIES'),\n",
" ('A01',\n",
" 'AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTING; TRAPPING; FISHING'),\n",
" ('A01B',\n",
" 'SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS, DETAILS, OR ACCESSORIES OF AGRICULTURAL MACHINES OR IMPLEMENTS, IN GENERAL (making or covering furrows or holes for sowing, planting, or manuring A01C5/00; soil working for engineering purposes E01, E02, E21; {measuring areas for agricultural purposes G01B})'),\n",
" ('A01B1/06',\n",
" 'Hoes; Hand cultivators {(rakes A01D7/00; forks A01D9/00; picks B25D)}'),\n",
" ('A01B1/065', '{powered}')]"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpc_classifier(\"A01B1/065\")"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "f09a616c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cpc_id</th>\n",
" <th>cpc_name</th>\n",
" <th>section</th>\n",
" <th>class</th>\n",
" <th>subclass</th>\n",
" <th>group</th>\n",
" <th>main_group</th>\n",
" <th>cpc_version</th>\n",
" <th>cpc_taxonomy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>HUMAN NECESSITIES</td>\n",
" <td>A</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES)]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>A01</td>\n",
" <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A01B</td>\n",
" <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>A01B1/00</td>\n",
" <td>Hand tools (edge trimmers for lawns A01G3/06 ...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>00</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>A01B1/02</td>\n",
" <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>02</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cpc_id cpc_name section class \n",
"0 A HUMAN NECESSITIES A None \\\n",
"1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... A 01 \n",
"2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... A 01 \n",
"3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... A 01 \n",
"4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... A 01 \n",
"\n",
" subclass group main_group cpc_version \n",
"0 None None None 2023 \\\n",
"1 None None None 2023 \n",
"2 B None None 2023 \n",
"3 B 1 00 2023 \n",
"4 B 1 02 2023 \n",
"\n",
" cpc_taxonomy \n",
"0 [(A, HUMAN NECESSITIES)] \n",
"1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... "
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpc_ids[\"cpc_taxonomy\"] = cpc_ids[\"cpc_id\"].map(cpc_classifier)\n",
"cpc_ids.head()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "f3fa8bf3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"40 cpc_ids not found\n",
"0.11297201118422912 % lost\n"
]
}
],
"source": [
"appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n",
"appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "58701721",
"metadata": {},
"outputs": [],
"source": [
"# appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()"
]
},
{
"cell_type": "markdown",
"id": "ca631acf",
"metadata": {},
"source": [
"## 'AI' keywords"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "6c3baa5b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cpc_id</th>\n",
" <th>cpc_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12725</th>\n",
" <td>A61B1/000096</td>\n",
" <td>{using artificial intelligence}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13764</th>\n",
" <td>A61B5/7264</td>\n",
" <td>{Classification of physiological signals or da...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45143</th>\n",
" <td>B23K31/006</td>\n",
" <td>{relating to using of neural networks}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47746</th>\n",
" <td>B25J9/161</td>\n",
" <td>{Hardware, e.g. neural networks, fuzzy logic, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53935</th>\n",
" <td>B29C66/965</td>\n",
" <td>{using artificial neural networks}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240426</th>\n",
" <td>H04Q2213/343</td>\n",
" <td>Neural network</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240673</th>\n",
" <td>H04R25/507</td>\n",
" <td>{implemented by neural network or fuzzy logic}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246159</th>\n",
" <td>Y10S128/924</td>\n",
" <td>using artificial intelligence</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246160</th>\n",
" <td>Y10S128/925</td>\n",
" <td>Neural network</td>\n",
" </tr>\n",
" <tr>\n",
" <th>250570</th>\n",
" <td>Y10S706/00</td>\n",
" <td>Data processing: artificial intelligence</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>105 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" cpc_id cpc_name\n",
"12725 A61B1/000096 {using artificial intelligence}\n",
"13764 A61B5/7264 {Classification of physiological signals or da...\n",
"45143 B23K31/006 {relating to using of neural networks}\n",
"47746 B25J9/161 {Hardware, e.g. neural networks, fuzzy logic, ...\n",
"53935 B29C66/965 {using artificial neural networks}\n",
"... ... ...\n",
"240426 H04Q2213/343 Neural network\n",
"240673 H04R25/507 {implemented by neural network or fuzzy logic}\n",
"246159 Y10S128/924 using artificial intelligence\n",
"246160 Y10S128/925 Neural network\n",
"250570 Y10S706/00 Data processing: artificial intelligence\n",
"\n",
"[105 rows x 2 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpc_ids[cpc_ids[\"cpc_name\"].str.lower().str.contains(\"machine learn|neural network|deep learn|deep network|artificial intelligence\", regex=True)]"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "2e8368b4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>appln_id</th>\n",
" <th>appln_auth</th>\n",
" <th>appln_nr</th>\n",
" <th>appln_kind</th>\n",
" <th>appln_filing_date</th>\n",
" <th>appln_filing_year</th>\n",
" <th>appln_nr_original</th>\n",
" <th>ipr_type</th>\n",
" <th>receiving_office</th>\n",
" <th>internat_appln_id</th>\n",
" <th>...</th>\n",
" <th>earliest_pat_publn_id</th>\n",
" <th>granted</th>\n",
" <th>docdb_family_id</th>\n",
" <th>inpadoc_family_id</th>\n",
" <th>docdb_family_size</th>\n",
" <th>nb_citing_docdb_fam</th>\n",
" <th>nb_applicants</th>\n",
" <th>nb_inventors</th>\n",
" <th>appln_title_lg</th>\n",
" <th>appln_title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>340657036</td>\n",
" <td>EP</td>\n",
" <td>12000117</td>\n",
" <td>A</td>\n",
" <td>2012-01-09</td>\n",
" <td>2012</td>\n",
" <td>12000117</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>407623142</td>\n",
" <td>Y</td>\n",
" <td>45507394</td>\n",
" <td>340657036</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>en</td>\n",
" <td>Rotating membrane filter disc apparatus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>340982410</td>\n",
" <td>EP</td>\n",
" <td>12151915</td>\n",
" <td>A</td>\n",
" <td>2012-01-20</td>\n",
" <td>2012</td>\n",
" <td>12151915</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>365158710</td>\n",
" <td>Y</td>\n",
" <td>45531220</td>\n",
" <td>340982410</td>\n",
" <td>2</td>\n",
" <td>16</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>en</td>\n",
" <td>Heating-Cooling-Capacity measurement controlli...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>341078960</td>\n",
" <td>EP</td>\n",
" <td>12700310</td>\n",
" <td>A</td>\n",
" <td>2012-01-11</td>\n",
" <td>2012</td>\n",
" <td>12700310</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>340778427</td>\n",
" <td>...</td>\n",
" <td>413564969</td>\n",
" <td>Y</td>\n",
" <td>45491582</td>\n",
" <td>340778427</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>en</td>\n",
" <td>TRANSMISSION DEVICE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>341078962</td>\n",
" <td>EP</td>\n",
" <td>12700311</td>\n",
" <td>A</td>\n",
" <td>2012-01-11</td>\n",
" <td>2012</td>\n",
" <td>12700311</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>340778431</td>\n",
" <td>...</td>\n",
" <td>413564970</td>\n",
" <td>Y</td>\n",
" <td>45491583</td>\n",
" <td>340778431</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>en</td>\n",
" <td>TRANSMISSION DEVICE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>341127772</td>\n",
" <td>EP</td>\n",
" <td>12700372</td>\n",
" <td>A</td>\n",
" <td>2012-01-02</td>\n",
" <td>2012</td>\n",
" <td>12700372</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>340460188</td>\n",
" <td>...</td>\n",
" <td>421840120</td>\n",
" <td>Y</td>\n",
" <td>45495923</td>\n",
" <td>340460188</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>en</td>\n",
" <td>POWER CONTROL IN A WIRELESS COMMUNICATION SYST...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" appln_id appln_auth appln_nr appln_kind appln_filing_date \n",
"0 340657036 EP 12000117 A 2012-01-09 \\\n",
"1 340982410 EP 12151915 A 2012-01-20 \n",
"2 341078960 EP 12700310 A 2012-01-11 \n",
"3 341078962 EP 12700311 A 2012-01-11 \n",
"4 341127772 EP 12700372 A 2012-01-02 \n",
"\n",
" appln_filing_year appln_nr_original ipr_type receiving_office \n",
"0 2012 12000117 PI \\\n",
"1 2012 12151915 PI \n",
"2 2012 12700310 PI \n",
"3 2012 12700311 PI \n",
"4 2012 12700372 PI \n",
"\n",
" internat_appln_id ... earliest_pat_publn_id granted docdb_family_id \n",
"0 0 ... 407623142 Y 45507394 \\\n",
"1 0 ... 365158710 Y 45531220 \n",
"2 340778427 ... 413564969 Y 45491582 \n",
"3 340778431 ... 413564970 Y 45491583 \n",
"4 340460188 ... 421840120 Y 45495923 \n",
"\n",
" inpadoc_family_id docdb_family_size nb_citing_docdb_fam nb_applicants \n",
"0 340657036 3 6 1 \\\n",
"1 340982410 2 16 2 \n",
"2 340778427 3 2 1 \n",
"3 340778431 3 3 1 \n",
"4 340460188 4 8 1 \n",
"\n",
" nb_inventors appln_title_lg \n",
"0 2 en \\\n",
"1 6 en \n",
"2 1 en \n",
"3 1 en \n",
"4 2 en \n",
"\n",
" appln_title \n",
"0 Rotating membrane filter disc apparatus \n",
"1 Heating-Cooling-Capacity measurement controlli... \n",
"2 TRANSMISSION DEVICE \n",
"3 TRANSMISSION DEVICE \n",
"4 POWER CONTROL IN A WIRELESS COMMUNICATION SYST... \n",
"\n",
"[5 rows x 28 columns]"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"appln_data = appln.merge(appln_title, on=\"appln_id\")\n",
"appln_data.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}