added new keywords including refined syntax

utku_keyword_suggestion
radvanyimome 2 years ago
parent 57952c8dc0
commit fd24b72359

1
.gitignore vendored

@ -1 +1,2 @@
/PATSTAT/EU_CH_scope/cpc_defs.csv /PATSTAT/EU_CH_scope/cpc_defs.csv
/misc_code/

@ -153,11 +153,10 @@ markov chain,
markov process, markov process,
markov decision process, markov decision process,
monte carlo method, monte carlo method,
bayesian interference, bayesian inference,
kernel method, kernel method,
eigendecomposition, eigendecomposition,
eigen decomposition, eigen decomposition,
kernel method,
radial basis function, radial basis function,
QR decomposition, QR decomposition,
LU decomposition, LU decomposition,
@ -169,4 +168,39 @@ convex optimization,
nonlinear optimization, nonlinear optimization,
L? regulari*, L? regulari*,
ridge regression, ridge regression,
gaussian process gaussian process,
manifold learning,
locally linear embedding*,
vector database*,
vector embedding*,
text mining,
human-robot interact*,
semantic web*,
fuzzy set*,
face recognition &! brain,
object detection &! brain,
multi agent system*,
speech recognition &! brain,
brain computer interface,
intelligent robot*,
remote sensing,
image reconstruction,
representation learning,
data augmentation,
adversarial robustness,
meta learning,
learning system,
adversarial training,
adversarial example*,
generative model*,
large language model*,
few shot learning,
image representation,
optimization algorithm,
swarm optimization,
variational inference,
kalman network*,
knowledge distillation,
kernel learning,
classifier,
lasso regression

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 72, "execution_count": 1,
"metadata": { "metadata": {
"collapsed": true "collapsed": true
}, },
@ -16,7 +16,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 2,
"outputs": [], "outputs": [],
"source": [ "source": [
"agg_df = pd.DataFrame()\n", "agg_df = pd.DataFrame()\n",
@ -39,7 +39,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 3,
"outputs": [], "outputs": [],
"source": [ "source": [
"agg_df[\"region\"] = agg_df[\"query\"].apply(lambda x: \"EU+China\" if \"CU\" in x else \"Global\")\n", "agg_df[\"region\"] = agg_df[\"query\"].apply(lambda x: \"EU+China\" if \"CU\" in x else \"Global\")\n",
@ -52,7 +52,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 83, "execution_count": 4,
"outputs": [], "outputs": [],
"source": [ "source": [
"agg_df = agg_df[~agg_df[\"Record Count\"].isna()]" "agg_df = agg_df[~agg_df[\"Record Count\"].isna()]"
@ -63,14 +63,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 62, "execution_count": 5,
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": " query Record Count\n0 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 972.0\n1 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 451.0\n2 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 12.0\n3 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 5.0\n4 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 2631.0\n.. ... ...\n275 TS=(\"ubiquitous computing\") AND PY=(2011-2022) 3655.0\n276 TS=(\"unstructured data*\") AND PY=(2011-2022) 3386.0\n277 TS=(\"unsupervised deep learning\") AND PY=(2011... 728.0\n278 TS=(\"word embedding*\") AND PY=(2011-2022) 7068.0\n279 TS=(\"word vector*\") AND PY=(2011-2022) 1747.0\n\n[280 rows x 2 columns]", "text/plain": " query Record Count\n0 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 972.0\n1 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 451.0\n2 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 30.0\n3 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 12.0\n4 CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST... 5.0\n.. ... ...\n384 TS=(\"word embedding*\") AND PY=(2011-2022) 7068.0\n385 TS=(\"word vector*\") AND PY=(2011-2022) 1747.0\n386 TS=((\"face recognition\" NOT \"brain\")) AND PY=(... 19690.0\n387 TS=((\"object detection\" NOT \"brain\")) AND PY=(... 28989.0\n388 TS=((\"speech recognition\" NOT \"brain\")) AND PY... 19912.0\n\n[389 rows x 2 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>query</th>\n <th>Record Count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>972.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>451.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>12.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>2631.0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>275</th>\n <td>TS=(\"ubiquitous computing\") AND PY=(2011-2022)</td>\n <td>3655.0</td>\n </tr>\n <tr>\n <th>276</th>\n <td>TS=(\"unstructured data*\") AND PY=(2011-2022)</td>\n <td>3386.0</td>\n </tr>\n <tr>\n <th>277</th>\n <td>TS=(\"unsupervised deep learning\") AND PY=(2011...</td>\n <td>728.0</td>\n </tr>\n <tr>\n <th>278</th>\n <td>TS=(\"word embedding*\") AND PY=(2011-2022)</td>\n <td>7068.0</td>\n </tr>\n <tr>\n <th>279</th>\n <td>TS=(\"word vector*\") AND PY=(2011-2022)</td>\n <td>1747.0</td>\n </tr>\n </tbody>\n</table>\n<p>280 rows × 2 columns</p>\n</div>" "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>query</th>\n <th>Record Count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>972.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>451.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>30.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>12.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>CU=(PEOPLES R CHINA OR HONG KONG) AND CU=(AUST...</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>384</th>\n <td>TS=(\"word embedding*\") AND PY=(2011-2022)</td>\n <td>7068.0</td>\n </tr>\n <tr>\n <th>385</th>\n <td>TS=(\"word vector*\") AND PY=(2011-2022)</td>\n <td>1747.0</td>\n </tr>\n <tr>\n <th>386</th>\n <td>TS=((\"face recognition\" NOT \"brain\")) AND PY=(...</td>\n <td>19690.0</td>\n </tr>\n <tr>\n <th>387</th>\n <td>TS=((\"object detection\" NOT \"brain\")) AND PY=(...</td>\n <td>28989.0</td>\n </tr>\n <tr>\n <th>388</th>\n <td>TS=((\"speech recognition\" NOT \"brain\")) AND PY...</td>\n <td>19912.0</td>\n </tr>\n </tbody>\n</table>\n<p>389 rows × 2 columns</p>\n</div>"
}, },
"execution_count": 62, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -84,7 +84,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 63, "execution_count": 6,
"outputs": [], "outputs": [],
"source": [ "source": [
"# agg_df = agg_df[agg_df[\"Publication Years\"].str.startswith(\"20\", na=False)].copy()\n", "# agg_df = agg_df[agg_df[\"Publication Years\"].str.startswith(\"20\", na=False)].copy()\n",
@ -97,19 +97,10 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 84, "execution_count": 7,
"outputs": [ "outputs": [],
{
"data": {
"text/plain": "Publication Years\n2022 314\n2019 305\n2021 305\n2020 302\n2018 296\n2017 287\n2016 281\n2015 271\n2014 258\n2013 251\n2012 233\n2011 224\n2023 52\n2017 4\n2014 4\n2019 4\n2021 4\n2018 4\n2020 4\n2022 4\n2016 3\n2015 3\n2013 3\n2012 3\n2011 3\n2023 2\nName: count, dtype: int64"
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"agg_df[\"Publication Years\"].value_counts()" "# agg_df[\"Publication Years\"].value_counts()"
], ],
"metadata": { "metadata": {
"collapsed": false "collapsed": false
@ -117,20 +108,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 64, "execution_count": 8,
"outputs": [], "outputs": [],
"source": [], "source": [
"agg_df.to_excel(r'C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\WOS\\wos_processed_data\\query_yearly_agg.xlsx', index=False)"
],
"metadata": { "metadata": {
"collapsed": false "collapsed": false
} }
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 85, "execution_count": 64,
"outputs": [], "outputs": [],
"source": [ "source": [],
"agg_df.to_excel(r'C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\WOS\\wos_processed_data\\query_yearly_agg.xlsx', index=False)"
],
"metadata": { "metadata": {
"collapsed": false "collapsed": false
} }

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save