init commit

utku_keyword_suggestion
radvanyimome 2 years ago
parent e060613834
commit d6d9b0eaf6

@ -0,0 +1,866 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"id": "a8be6839",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import janitor\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from matplotlib.ticker import MaxNLocator\n",
"import math\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "211ba466",
"metadata": {},
"outputs": [],
"source": [
"outdir=\"EU_CH_scope\"\n",
"\n",
"appln = pd.read_csv(f\"{outdir}/tls_201_scope.csv\")\n",
"\n",
"appln_title = pd.read_csv(f\"{outdir}/tls_202_scope.csv\")\n",
"\n",
"pers = pd.read_csv(f\"{outdir}/tls_206_scope.csv\")\n",
"\n",
"appln_pers = pd.read_csv(f\"{outdir}/tls_207_scope.csv\")\n",
"\n",
"appln_cpc = pd.read_csv(f\"{outdir}/tls_224_scope.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "f878b151",
"metadata": {},
"outputs": [],
"source": [
"# workdir_path=r\"CPCTitleList202302\"\n",
"# # outfile='wos_extract_complete.csv'\n",
"# # with_header=True\n",
"# cpc_ids = pd.DataFrame()\n",
"# for root, dirs, files in os.walk(workdir_path):\n",
"# for filename in files:\n",
"# path=os.path.join(root, filename)\n",
"# section = pd.read_csv(path, sep='\\t', header=None)\n",
"# cpc_ids=pd.concat([cpc_ids,section], ignore_index=True)\n",
"# cpc_ids.columns =[\"cpc_id\",\"idk\",\"cpc_name\"]\n",
"# cpc_ids = cpc_ids.drop(columns=\"idk\")"
]
},
{
"cell_type": "code",
"execution_count": 106,
"id": "95ea20da",
"metadata": {},
"outputs": [],
"source": [
"parsed = {x: [] for x in ['code', 'title', 'section', 'class', 'subclass', 'group', 'main_group']}\n",
"for letter in 'ABCDEFGHY':\n",
" file = f'CPCTitleList202302/cpc-section-{letter}_20230201.txt'\n",
" with open(file) as f:\n",
" for line in f:\n",
" vals = line.strip().split('\\t')\n",
" if len(vals) == 2:\n",
" parsed['code'].append(vals[0])\n",
" parsed['title'].append(vals[1])\n",
" elif len(vals) == 3:\n",
" parsed['code'].append(vals[0])\n",
" parsed['title'].append(vals[2])\n",
"\n",
"\n",
"\n",
"for i in range(len(parsed['code'])):\n",
" code = parsed['code'][i]\n",
" main_group = code.split('/')[-1] if \"/\" in code else None\n",
" group = code.split('/')[0][4:] if len(code) >= 5 else None\n",
" subclass = code[3] if len(code) >= 4 else None\n",
" class_ = code[1:3] if len(code) >= 3 else None\n",
" section = code[0] if len(code) >= 1 else None\n",
" \n",
" parsed['main_group'].append(main_group)\n",
" parsed['group'].append(group)\n",
" parsed['subclass'].append(subclass)\n",
" parsed['class'].append(class_)\n",
" parsed['section'].append(section)\n",
"\n",
"cpc_ids2023 = pd.DataFrame.from_dict(parsed)\n",
"cpc_ids2023['cpc_version']=2023\n",
"cpc_ids2022 = pd.read_csv(\"CPC_data/cpc_titles_2022.csv\")\n",
"cpc_ids2022['cpc_version']=2022\n",
"cpc_ids = pd.concat([cpc_ids2023,cpc_ids2022], ignore_index=True)\n",
"cpc_ids = cpc_ids.rename(columns={\"code\":\"cpc_id\",\"title\":\"cpc_name\"}).drop_duplicates(subset=\"cpc_id\")"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "907d9c3e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cpc_id</th>\n",
" <th>cpc_name</th>\n",
" <th>section</th>\n",
" <th>class</th>\n",
" <th>subclass</th>\n",
" <th>group</th>\n",
" <th>main_group</th>\n",
" <th>cpc_version</th>\n",
" <th>cpc_taxonomy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>HUMAN NECESSITIES</td>\n",
" <td>A</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES)]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>A01</td>\n",
" <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A01B</td>\n",
" <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>A01B1/00</td>\n",
" <td>Hand tools (edge trimmers for lawns A01G3/06 ...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>00</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>A01B1/02</td>\n",
" <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>02</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cpc_id cpc_name section class \n",
"0 A HUMAN NECESSITIES A None \\\n",
"1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... A 01 \n",
"2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... A 01 \n",
"3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... A 01 \n",
"4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... A 01 \n",
"\n",
" subclass group main_group cpc_version \n",
"0 None None None 2023 \\\n",
"1 None None None 2023 \n",
"2 B None None 2023 \n",
"3 B 1 00 2023 \n",
"4 B 1 02 2023 \n",
"\n",
" cpc_taxonomy \n",
"0 [(A, HUMAN NECESSITIES)] \n",
"1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... "
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 108,
"id": "1be8971a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"40 cpc_ids not found\n",
"0.11297201118422912 % lost\n"
]
}
],
"source": [
"appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n",
"appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n",
"\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "b1274c34",
"metadata": {},
"outputs": [],
"source": [
"cpc_dict = dict(zip(cpc_ids.cpc_id.str.replace(\" \",\"\"), cpc_ids.cpc_name))\n",
"# cpc_dict"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "2a7e39ee",
"metadata": {},
"outputs": [],
"source": [
"def cpc_classifier(id_text):\n",
" taxonomy = []\n",
" iter_text = id_text.replace(\" \",\"\")\n",
" for i in range(len(iter_text)+1):\n",
" tax_id = iter_text[:i]\n",
" tax_name = cpc_dict.get(iter_text[:i])\n",
" if tax_name:\n",
" taxonomy.append((tax_id,tax_name))\n",
" return taxonomy\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "e31a013f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('A', 'HUMAN NECESSITIES'),\n",
" ('A01',\n",
" 'AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTING; TRAPPING; FISHING'),\n",
" ('A01B',\n",
" 'SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS, DETAILS, OR ACCESSORIES OF AGRICULTURAL MACHINES OR IMPLEMENTS, IN GENERAL (making or covering furrows or holes for sowing, planting, or manuring A01C5/00; soil working for engineering purposes E01, E02, E21; {measuring areas for agricultural purposes G01B})'),\n",
" ('A01B1/06',\n",
" 'Hoes; Hand cultivators {(rakes A01D7/00; forks A01D9/00; picks B25D)}'),\n",
" ('A01B1/065', '{powered}')]"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpc_classifier(\"A01B1/065\")"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "f09a616c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cpc_id</th>\n",
" <th>cpc_name</th>\n",
" <th>section</th>\n",
" <th>class</th>\n",
" <th>subclass</th>\n",
" <th>group</th>\n",
" <th>main_group</th>\n",
" <th>cpc_version</th>\n",
" <th>cpc_taxonomy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>HUMAN NECESSITIES</td>\n",
" <td>A</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES)]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>A01</td>\n",
" <td>AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>A01B</td>\n",
" <td>SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>A01B1/00</td>\n",
" <td>Hand tools (edge trimmers for lawns A01G3/06 ...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>00</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>A01B1/02</td>\n",
" <td>Spades; Shovels {(hand-operated dredgers E02F3...</td>\n",
" <td>A</td>\n",
" <td>01</td>\n",
" <td>B</td>\n",
" <td>1</td>\n",
" <td>02</td>\n",
" <td>2023</td>\n",
" <td>[(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" cpc_id cpc_name section class \n",
"0 A HUMAN NECESSITIES A None \\\n",
"1 A01 AGRICULTURE; FORESTRY; ANIMAL HUSBANDRY; HUNTI... A 01 \n",
"2 A01B SOIL WORKING IN AGRICULTURE OR FORESTRY; PARTS... A 01 \n",
"3 A01B1/00 Hand tools (edge trimmers for lawns A01G3/06 ... A 01 \n",
"4 A01B1/02 Spades; Shovels {(hand-operated dredgers E02F3... A 01 \n",
"\n",
" subclass group main_group cpc_version \n",
"0 None None None 2023 \\\n",
"1 None None None 2023 \n",
"2 B None None 2023 \n",
"3 B 1 00 2023 \n",
"4 B 1 02 2023 \n",
"\n",
" cpc_taxonomy \n",
"0 [(A, HUMAN NECESSITIES)] \n",
"1 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"2 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"3 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... \n",
"4 [(A, HUMAN NECESSITIES), (A01, AGRICULTURE; FO... "
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpc_ids[\"cpc_taxonomy\"] = cpc_ids[\"cpc_id\"].map(cpc_classifier)\n",
"cpc_ids.head()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "f3fa8bf3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"40 cpc_ids not found\n",
"0.11297201118422912 % lost\n"
]
}
],
"source": [
"appln_cpc[\"cpc_id\"] = appln_cpc[\"cpc_class_symbol\"].str.replace(\" \",\"\")\n",
"appln_cpc_tax = appln_cpc.merge(cpc_ids, on=\"cpc_id\", how=\"left\")\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()), \"cpc_ids not found\")\n",
"print(len(appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique())/len(appln_cpc_tax[\"cpc_id\"].unique())*100, \"% lost\")"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "58701721",
"metadata": {},
"outputs": [],
"source": [
"# appln_cpc_tax[appln_cpc_tax[\"cpc_name\"].isna()][\"cpc_id\"].unique()"
]
},
{
"cell_type": "markdown",
"id": "ca631acf",
"metadata": {},
"source": [
"## 'AI' keywords"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "6c3baa5b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cpc_id</th>\n",
" <th>cpc_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12725</th>\n",
" <td>A61B1/000096</td>\n",
" <td>{using artificial intelligence}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13764</th>\n",
" <td>A61B5/7264</td>\n",
" <td>{Classification of physiological signals or da...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45143</th>\n",
" <td>B23K31/006</td>\n",
" <td>{relating to using of neural networks}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47746</th>\n",
" <td>B25J9/161</td>\n",
" <td>{Hardware, e.g. neural networks, fuzzy logic, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53935</th>\n",
" <td>B29C66/965</td>\n",
" <td>{using artificial neural networks}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240426</th>\n",
" <td>H04Q2213/343</td>\n",
" <td>Neural network</td>\n",
" </tr>\n",
" <tr>\n",
" <th>240673</th>\n",
" <td>H04R25/507</td>\n",
" <td>{implemented by neural network or fuzzy logic}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246159</th>\n",
" <td>Y10S128/924</td>\n",
" <td>using artificial intelligence</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246160</th>\n",
" <td>Y10S128/925</td>\n",
" <td>Neural network</td>\n",
" </tr>\n",
" <tr>\n",
" <th>250570</th>\n",
" <td>Y10S706/00</td>\n",
" <td>Data processing: artificial intelligence</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>105 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" cpc_id cpc_name\n",
"12725 A61B1/000096 {using artificial intelligence}\n",
"13764 A61B5/7264 {Classification of physiological signals or da...\n",
"45143 B23K31/006 {relating to using of neural networks}\n",
"47746 B25J9/161 {Hardware, e.g. neural networks, fuzzy logic, ...\n",
"53935 B29C66/965 {using artificial neural networks}\n",
"... ... ...\n",
"240426 H04Q2213/343 Neural network\n",
"240673 H04R25/507 {implemented by neural network or fuzzy logic}\n",
"246159 Y10S128/924 using artificial intelligence\n",
"246160 Y10S128/925 Neural network\n",
"250570 Y10S706/00 Data processing: artificial intelligence\n",
"\n",
"[105 rows x 2 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpc_ids[cpc_ids[\"cpc_name\"].str.lower().str.contains(\"machine learn|neural network|deep learn|deep network|artificial intelligence\", regex=True)]"
]
},
{
"cell_type": "code",
"execution_count": 121,
"id": "2e8368b4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>appln_id</th>\n",
" <th>appln_auth</th>\n",
" <th>appln_nr</th>\n",
" <th>appln_kind</th>\n",
" <th>appln_filing_date</th>\n",
" <th>appln_filing_year</th>\n",
" <th>appln_nr_original</th>\n",
" <th>ipr_type</th>\n",
" <th>receiving_office</th>\n",
" <th>internat_appln_id</th>\n",
" <th>...</th>\n",
" <th>earliest_pat_publn_id</th>\n",
" <th>granted</th>\n",
" <th>docdb_family_id</th>\n",
" <th>inpadoc_family_id</th>\n",
" <th>docdb_family_size</th>\n",
" <th>nb_citing_docdb_fam</th>\n",
" <th>nb_applicants</th>\n",
" <th>nb_inventors</th>\n",
" <th>appln_title_lg</th>\n",
" <th>appln_title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>340657036</td>\n",
" <td>EP</td>\n",
" <td>12000117</td>\n",
" <td>A</td>\n",
" <td>2012-01-09</td>\n",
" <td>2012</td>\n",
" <td>12000117</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>407623142</td>\n",
" <td>Y</td>\n",
" <td>45507394</td>\n",
" <td>340657036</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>en</td>\n",
" <td>Rotating membrane filter disc apparatus</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>340982410</td>\n",
" <td>EP</td>\n",
" <td>12151915</td>\n",
" <td>A</td>\n",
" <td>2012-01-20</td>\n",
" <td>2012</td>\n",
" <td>12151915</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>365158710</td>\n",
" <td>Y</td>\n",
" <td>45531220</td>\n",
" <td>340982410</td>\n",
" <td>2</td>\n",
" <td>16</td>\n",
" <td>2</td>\n",
" <td>6</td>\n",
" <td>en</td>\n",
" <td>Heating-Cooling-Capacity measurement controlli...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>341078960</td>\n",
" <td>EP</td>\n",
" <td>12700310</td>\n",
" <td>A</td>\n",
" <td>2012-01-11</td>\n",
" <td>2012</td>\n",
" <td>12700310</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>340778427</td>\n",
" <td>...</td>\n",
" <td>413564969</td>\n",
" <td>Y</td>\n",
" <td>45491582</td>\n",
" <td>340778427</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>en</td>\n",
" <td>TRANSMISSION DEVICE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>341078962</td>\n",
" <td>EP</td>\n",
" <td>12700311</td>\n",
" <td>A</td>\n",
" <td>2012-01-11</td>\n",
" <td>2012</td>\n",
" <td>12700311</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>340778431</td>\n",
" <td>...</td>\n",
" <td>413564970</td>\n",
" <td>Y</td>\n",
" <td>45491583</td>\n",
" <td>340778431</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>en</td>\n",
" <td>TRANSMISSION DEVICE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>341127772</td>\n",
" <td>EP</td>\n",
" <td>12700372</td>\n",
" <td>A</td>\n",
" <td>2012-01-02</td>\n",
" <td>2012</td>\n",
" <td>12700372</td>\n",
" <td>PI</td>\n",
" <td></td>\n",
" <td>340460188</td>\n",
" <td>...</td>\n",
" <td>421840120</td>\n",
" <td>Y</td>\n",
" <td>45495923</td>\n",
" <td>340460188</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>en</td>\n",
" <td>POWER CONTROL IN A WIRELESS COMMUNICATION SYST...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 28 columns</p>\n",
"</div>"
],
"text/plain": [
" appln_id appln_auth appln_nr appln_kind appln_filing_date \n",
"0 340657036 EP 12000117 A 2012-01-09 \\\n",
"1 340982410 EP 12151915 A 2012-01-20 \n",
"2 341078960 EP 12700310 A 2012-01-11 \n",
"3 341078962 EP 12700311 A 2012-01-11 \n",
"4 341127772 EP 12700372 A 2012-01-02 \n",
"\n",
" appln_filing_year appln_nr_original ipr_type receiving_office \n",
"0 2012 12000117 PI \\\n",
"1 2012 12151915 PI \n",
"2 2012 12700310 PI \n",
"3 2012 12700311 PI \n",
"4 2012 12700372 PI \n",
"\n",
" internat_appln_id ... earliest_pat_publn_id granted docdb_family_id \n",
"0 0 ... 407623142 Y 45507394 \\\n",
"1 0 ... 365158710 Y 45531220 \n",
"2 340778427 ... 413564969 Y 45491582 \n",
"3 340778431 ... 413564970 Y 45491583 \n",
"4 340460188 ... 421840120 Y 45495923 \n",
"\n",
" inpadoc_family_id docdb_family_size nb_citing_docdb_fam nb_applicants \n",
"0 340657036 3 6 1 \\\n",
"1 340982410 2 16 2 \n",
"2 340778427 3 2 1 \n",
"3 340778431 3 3 1 \n",
"4 340460188 4 8 1 \n",
"\n",
" nb_inventors appln_title_lg \n",
"0 2 en \\\n",
"1 6 en \n",
"2 1 en \n",
"3 1 en \n",
"4 2 en \n",
"\n",
" appln_title \n",
"0 Rotating membrane filter disc apparatus \n",
"1 Heating-Cooling-Capacity measurement controlli... \n",
"2 TRANSMISSION DEVICE \n",
"3 TRANSMISSION DEVICE \n",
"4 POWER CONTROL IN A WIRELESS COMMUNICATION SYST... \n",
"\n",
"[5 rows x 28 columns]"
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"appln_data = appln.merge(appln_title, on=\"appln_id\")\n",
"appln_data.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 966 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 963 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 888 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 573 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 371 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 414 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 715 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save