"\"\"\"(TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\")) AND (CU=PEOPLES R CHINA AND (CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=REPUBLIC OF CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN))\"\"\""
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"other keywords: pattern recognition, computer vision, image classification, reinforcement learning, support vector machines, recommender system, random forest, ensemble models, image processing, generative network, ai ethic, natural language processing, clustering algorithm, feature extraction, time series forecast, anomaly detection, identity fraud detection, dimensionality reduction, feature elicitation, chatbot, clustering, unsupervised learning, supervised learning, convolutional network, adversarial network\n",
"\n",
"# AI ETHICS keyword!!!"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Only CPC classification? Or some basic PTC? (ASEAN analysis had some)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 1,
@ -73,7 +117,12 @@
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"source": [
"# Baseline of co-publications\n",
"#\n",
"# Use address instead of CU?\n",
"# plus countries UK Norway Switzerland | Turkey Serbia"
"# (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"computer vision\") OR TS=(\"pattern recognition\")) AND"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 53,
"outputs": [
{
"data": {
"text/plain": "'TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\")'"
"keywords = [c.strip() for c in keywords[0].split(\",\")]\n",
"\n",
"keywords_str = ' OR '.join('TS=(\"'+k+'\")' for k in keywords)\n",
"keywords_str"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 54,
"outputs": [
{
"data": {
"text/plain": "'CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND'"
"coop_countries = [c.strip().upper() for c in coop_countries[0].split(\",\")]\n",
"focal_countries = [c.strip().upper() for c in focal_countries_list]\n",
"\n",
"foc_str = ' OR '.join([country_mode+'='+c for c in focal_countries])\n",
"coop_str = ' OR '.join([country_mode+'='+c for c in coop_countries])\n",
"\n",
"coop_str"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 55,
"outputs": [
{
"data": {
"text/plain": "'CU=PEOPLES R CHINA OR CU=HONG KONG'"
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"foc_str"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 58,
"outputs": [
{
"data": {
"text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'"
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scope_query = f'({foc_str}) AND ({coop_str}) AND ({keywords_str})'\n",
"scope_query"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 60,
"outputs": [
{
"data": {
"text/plain": "'(CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'"
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ch_scope_query = f'({coop_str}) AND ({keywords_str})'\n",
"text/plain": " UT (Unique WOS ID) Keywords Plus \n0 WOS:000852293800024 CONVOLUTIONAL NEURAL-NETWORK; DEEP LEARNING FR... \\\n9714 WOS:000540750000002 STATE-SPACE RECONSTRUCTION; SURFACE AIR-TEMPER... \n9697 WOS:000600708400002 COMPRESSIVE STRENGTH; MODELS; ADABOOST.RT; DUC... \n9699 WOS:000511965100005 STRUCTURAL RELIABILITY; FAILURE MODES \n9701 WOS:000663142500003 REFLECTED GPS SIGNALS; SOIL-MOISTURE; OCEAN; S... \n... ... ... \n3066 WOS:000528727500074 LOCAL SEARCH; ALGORITHM; VARIANCE; MODEL \n5097 WOS:000596139400001 INDUSTRY 4.0; MANAGEMENT; RISK; ANALYTICS; CHA... \n11369 WOS:000436774300069 NaN \n11368 WOS:000846290700001 PARTIAL LEAST-SQUARES; INFRARED-SPECTROSCOPY; ... \n11362 WOS:000480527800025 MICROWAVE DIELECTRIC BEHAVIOR; GPS SIGNALS; RE... \n\n Author Keywords \n0 Imaging; Three-dimensional displays; Electroma... \\\n9714 NaN \n9697 Plastic hinge length; RC columns; Machine lear... \n9699 system reliability; jacket platform; beta-unzi... \n9701 Cyclone GNSS (CYGNSS); Sea surface wind speed;... \n... ... \n3066 sea surface temperature; sea surface temperatu... \n5097 Big data finance; Big data in financial servic... \n11369 planetary gear; fault diagnosis; VMD; center f... \n11368 soil fertility class; reflectance spectroscopy... \n11362 global navigation satellite system (GNSS)-refl... \n\n Article Title \n0 Artificial Intelligence: New Frontiers in Real... \\\n9714 Detecting causality from time series in a mach... \n9697 Data-Driven Approach to Predict the Plastic Hi... \n9699 System Reliability Analysis of an Offshore Jac... \n9701 Analysis of coastal wind speed retrieval from ... \n... ... \n3066 Improved Particle Swarm Optimization for Sea S... \n5097 Current landscape and influence of big data on... \n11369 Planetary Gear Fault Diagnosis via Feature Ima... \n11368 How Well Can Reflectance Spectroscopy Allocate... \n11362 GNSS-R Soil Moisture Retrieval Based on a XGbo... \n\n Abstract \n0 In recent years, artificial intelligence (AI) ... \n9714 Detecting causality from observational data is... \n9697 Inelastic response of reinforced concrete colu... \n9699 This study investigates strategies for solving... \n9701 This paper demonstrates the capability and per... \n... ... \n3066 The Sea Surface Temperature (SST) is one of th... \n5097 Big data is one of the most recent business an... \n11369 Poor working environment leads to frequent fai... \n11368 Fertilization decisions depend on the measurem... \n11362 Global navigation satellite system (GNSS)-refl... \n\n[9889 rows x 5 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>UT (Unique WOS ID)</th>\n <th>Keywords Plus</th>\n <th>Author Keywords</th>\n <th>Article Title</th>\n <th>Abstract</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>WOS:000852293800024</td>\n <td>CONVOLUTIONAL NEURAL-NETWORK; DEEP LEARNING FR...</td>\n <td>Imaging; Three-dimensional displays; Electroma...</td>\n <td>Artificial Intelligence: New Frontiers in Real...</td>\n <td>In recent years, artificial intelligence (AI) ...</td>\n </tr>\n <tr>\n <th>9714</th>\n <td>WOS:000540750000002</td>\n <td>STATE-SPACE RECONSTRUCTION; SURFACE AIR-TEMPER...</td>\n <td>NaN</td>\n <td>Detecting causality from time series in a mach...</td>\n <td>Detecting causality from observational data is...</td>\n </tr>\n <tr>\n <th>9697</th>\n <td>WOS:000600708400002</td>\n <td>COMPRESSIVE STRENGTH; MODELS; ADABOOST.RT; DUC...</td>\n <td>Plastic hinge length; RC columns; Machine lear...</td>\n <td>Data-Driven Approach to Predict the Plastic Hi...</td>\n <td>Inelastic response of reinforced concrete colu...</td>\n </tr>\n <tr>\n <th>9699</th>\n <td>WOS:000511965100005</td>\n <td>STRUCTURAL RELIABILITY; FAILURE MODES</td>\n <td>system reliability; jacket platform; beta-unzi...</td>\n <td>System Reliability Analysis of an Offshore Jac...</td>\n <td>This study investigates strategies for solving...</td>\n </tr>\n <tr>\n <th>9701</th>\n <td>WOS:000663142500003</td>\n <td>REFLECTED GPS SIGNALS; SOIL-MOISTURE; OCEAN; S...</td>\n <td>Cyclone GNSS (CYGNSS); Sea surface wind speed;...</td>\n <td>Analysis of coastal wind speed retrieval from ...</td>\n <td>This paper demonstrates the capability and per...</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3066</th>\n <td>WOS:000528727500074</td>\n <td>LOCAL SEARCH; ALGORITHM; VARIANCE; MODEL</td>\n <td>sea surface temperature; sea surface temperatu...</td>\n <td>Improved Particle Swarm Optimization for Sea S...</td>\n <td>The Sea Surface Temperature (SST) is one of th...</td>\n </tr>\n <tr>\n <th>5097</th>\n <td>WOS:000596139400001</td>\n <td>INDUSTRY 4.0; MANAGEMENT; RISK; ANALYTICS; CHA...</td>\n <td>Big data finance; Big data in financial servic...</td>\n <td>Current landscape and influence of big data on...</td>\n <td>Big data is one of the most recent business an...</td>\n </tr>\n <tr>\n <th>11369</th>\n <td>WOS:000436774300069</td>\n <td>NaN</td>\n <td>planetary gear; fault diagnosis; VMD; center f...</td>\n <td>Planetary Gear Fault Diagnosis via Feature Ima...</td>\n <td>Poor working environment leads to frequent fai...</td>\n </tr>\n <tr>\n <th>11368</th>\n <td>WOS:000846290700001</td>\n <td>PARTIAL LEAST-SQUARES; INFRARED-SPECTROSCOPY; ...</td>\n <td>soil fertility class; reflectance spectroscopy...</td>\n <td>How Well Can Reflectance Spectroscopy Allocate...</td>\n <td>Fertilization decisions depend on the measurem...</td>\n </tr>\n <tr>\n <th>11362</th>\n <td>WOS:000480527800025</td>\n <td>MICROWAVE DIELECTRIC BEHAVIOR; GPS SIGNALS; RE...</td>\n <td>global navigation satellite system (GNSS)-refl...</td>\n <td>GNSS-R Soil Moisture Retrieval Based on a XGbo...</td>\n <td>Global navigation satellite system (GNSS)-refl...</td>\n </tr>\n </tbody>\n</table>\n<p>9889 rows × 5 columns</p>\n</div>"
"evalue": "(\"Connection broken: ConnectionResetError(10054, 'A létező kapcsolatot a távoli állomás kényszerítetten bezárta', None, 10054, None)\", ConnectionResetError(10054, 'A létező kapcsolatot a távoli állomás kényszerítetten bezárta', None, 10054, None))",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\urllib3\\response.py:444\u001B[0m, in \u001B[0;36mHTTPResponse._error_catcher\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 443\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 444\u001B[0m \u001B[38;5;28;01myield\u001B[39;00m\n\u001B[0;32m 446\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m SocketTimeout:\n\u001B[0;32m 447\u001B[0m \u001B[38;5;66;03m# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but\u001B[39;00m\n\u001B[0;32m 448\u001B[0m \u001B[38;5;66;03m# there is yet no clean way to get at it from this context.\u001B[39;00m\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\urllib3\\response.py:567\u001B[0m, in \u001B[0;36mHTTPResponse.read\u001B[1;34m(self, amt, decode_content, cache_content)\u001B[0m\n\u001B[0;32m 566\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_error_catcher():\n\u001B[1;32m--> 567\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_fp_read\u001B[49m\u001B[43m(\u001B[49m\u001B[43mamt\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m fp_closed \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;124mb\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m\"\u001B[39m\n\u001B[0;32m 568\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m amt \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\urllib3\\response.py:533\u001B[0m, in \u001B[0;36mHTTPResponse._fp_read\u001B[1;34m(self, amt)\u001B[0m\n\u001B[0;32m 531\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m 532\u001B[0m \u001B[38;5;66;03m# StringIO doesn't like amt=None\u001B[39;00m\n\u001B[1;32m--> 533\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_fp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mread\u001B[49m\u001B[43m(\u001B[49m\u001B[43mamt\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mif\u001B[39;00m amt \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_fp\u001B[38;5;241m.\u001B[39mread()\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\http\\client.py:463\u001B[0m, in \u001B[0;36mHTTPResponse.read\u001B[1;34m(self, amt)\u001B[0m\n\u001B[0;32m 462\u001B[0m b \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mbytearray\u001B[39m(amt)\n\u001B[1;32m--> 463\u001B[0m n \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mreadinto\u001B[49m\u001B[43m(\u001B[49m\u001B[43mb\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 464\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mmemoryview\u001B[39m(b)[:n]\u001B[38;5;241m.\u001B[39mtobytes()\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\http\\client.py:507\u001B[0m, in \u001B[0;36mHTTPResponse.readinto\u001B[1;34m(self, b)\u001B[0m\n\u001B[0;32m 504\u001B[0m \u001B[38;5;66;03m# we do not use _safe_read() here because this may be a .will_close\u001B[39;00m\n\u001B[0;32m 505\u001B[0m \u001B[38;5;66;03m# connection, and the user is reading more bytes than will be provided\u001B[39;00m\n\u001B[0;32m 506\u001B[0m \u001B[38;5;66;03m# (for example, reading in 1k chunks)\u001B[39;00m\n\u001B[1;32m--> 507\u001B[0m n \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mreadinto\u001B[49m\u001B[43m(\u001B[49m\u001B[43mb\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 508\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m n \u001B[38;5;129;01mand\u001B[39;00m b:\n\u001B[0;32m 509\u001B[0m \u001B[38;5;66;03m# Ideally, we would raise IncompleteRead if the content-length\u001B[39;00m\n\u001B[0;32m 510\u001B[0m \u001B[38;5;66;03m# wasn't satisfied, but it might break compatibility.\u001B[39;00m\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\urllib3\\response.py:628\u001B[0m, in \u001B[0;36mHTTPResponse.stream\u001B[1;34m(self, amt, decode_content)\u001B[0m\n\u001B[0;32m 627\u001B[0m \u001B[38;5;28;01mwhile\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m is_fp_closed(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_fp):\n\u001B[1;32m--> 628\u001B[0m data \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mread\u001B[49m\u001B[43m(\u001B[49m\u001B[43mamt\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mamt\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdecode_content\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mdecode_content\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 630\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m data:\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\urllib3\\response.py:593\u001B[0m, in \u001B[0;36mHTTPResponse.read\u001B[1;34m(self, amt, decode_content, cache_content)\u001B[0m\n\u001B[0;32m 584\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39menforce_content_length \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlength_remaining \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m (\n\u001B[0;32m 585\u001B[0m \u001B[38;5;241m0\u001B[39m,\n\u001B[0;32m 586\u001B[0m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 591\u001B[0m \u001B[38;5;66;03m# raised during streaming, so all calls with incorrect\u001B[39;00m\n\u001B[0;32m 592\u001B[0m \u001B[38;5;66;03m# Content-Length are caught.\u001B[39;00m\n\u001B[1;32m--> 593\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m IncompleteRead(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_fp_bytes_read, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mlength_remaining)\n\u001B[0;32m 595\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m data:\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\contextlib.py:137\u001B[0m, in \u001B[0;36m_GeneratorContextManager.__exit__\u001B[1;34m(self, typ, value, traceback)\u001B[0m\n\u001B[0;32m 136\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[1;32m--> 137\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mgen\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mthrow\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtyp\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mvalue\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtraceback\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 138\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mStopIteration\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m exc:\n\u001B[0;32m 139\u001B[0m \u001B[38;5;66;03m# Suppress StopIteration *unless* it's the same exception that\u001B[39;00m\n\u001B[0;32m 140\u001B[0m \u001B[38;5;66;03m# was passed to throw(). This prevents a StopIteration\u001B[39;00m\n\u001B[0;32m 141\u001B[0m \u001B[38;5;66;03m# raised inside the \"with\" statement from being suppressed.\u001B[39;00m\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\urllib3\\response.py:461\u001B[0m, in \u001B[0;36mHTTPResponse._error_catcher\u001B[1;34m(self)\u001B[0m\n\u001B[0;32m 459\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m (HTTPException, SocketError) \u001B[38;5;28;01mas\u001B[39;00m e:\n\u001B[0;32m 460\u001B[0m \u001B[38;5;66;03m# This includes IncompleteRead.\u001B[39;00m\n\u001B[1;32m--> 461\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m ProtocolError(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mConnection broken: \u001B[39m\u001B[38;5;132;01m%r\u001B[39;00m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;241m%\u001B[39m e, e)\n\u001B[0;32m 463\u001B[0m \u001B[38;5;66;03m# If no exception is thrown, we should avoid cleaning up\u001B[39;00m\n\u001B[0;32m 464\u001B[0m \u001B[38;5;66;03m# unnecessarily.\u001B[39;00m\n",
"\u001B[1;31mProtocolError\u001B[0m: (\"Connection broken: ConnectionResetError(10054, 'A létező kapcsolatot a távoli állomás kényszerítetten bezárta', None, 10054, None)\", ConnectionResetError(10054, 'A létező kapcsolatot a távoli állomás kényszerítetten bezárta', None, 10054, None))",
"\nDuring handling of the above exception, another exception occurred:\n",
"Cell \u001B[1;32mIn[39], line 7\u001B[0m\n\u001B[0;32m 4\u001B[0m \u001B[38;5;66;03m# Uses stopwords for english from NLTK, and all puntuation characters by\u001B[39;00m\n\u001B[0;32m 5\u001B[0m \u001B[38;5;66;03m# default\u001B[39;00m\n\u001B[0;32m 6\u001B[0m r \u001B[38;5;241m=\u001B[39m Rake()\n\u001B[1;32m----> 7\u001B[0m kw_model \u001B[38;5;241m=\u001B[39m \u001B[43mKeyBERT\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmodel\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mall-mpnet-base-v2\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\keybert\\backend\\_utils.py:49\u001B[0m, in \u001B[0;36mselect_backend\u001B[1;34m(embedding_model)\u001B[0m\n\u001B[0;32m 47\u001B[0m \u001B[38;5;66;03m# Create a Sentence Transformer model based on a string\u001B[39;00m\n\u001B[0;32m 48\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(embedding_model, \u001B[38;5;28mstr\u001B[39m):\n\u001B[1;32m---> 49\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mSentenceTransformerBackend\u001B[49m\u001B[43m(\u001B[49m\u001B[43membedding_model\u001B[49m\u001B[43m)\u001B[49m\n\u001B[0;32m 51\u001B[0m \u001B[38;5;66;03m# Hugging Face embeddings\u001B[39;00m\n\u001B[0;32m 52\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(embedding_model, Pipeline):\n",
"File \u001B[1;32m~\\Anaconda3\\envs\\MOME_BIGDATA\\lib\\site-packages\\sentence_transformers\\util.py:491\u001B[0m, in \u001B[0;36msnapshot_download\u001B[1;34m(repo_id, revision, cache_dir, library_name, library_version, user_agent, ignore_files, use_auth_token)\u001B[0m\n\u001B[0;32m 486\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m version\u001B[38;5;241m.\u001B[39mparse(huggingface_hub\u001B[38;5;241m.\u001B[39m__version__) \u001B[38;5;241m>\u001B[39m\u001B[38;5;241m=\u001B[39m version\u001B[38;5;241m.\u001B[39mparse(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m0.8.1\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n\u001B[0;32m 487\u001B[0m \u001B[38;5;66;03m# huggingface_hub v0.8.1 introduces a new cache layout. We sill use a manual layout\u001B[39;00m\n\u001B[0;32m 488\u001B[0m \u001B[38;5;66;03m# And need to pass legacy_cache_layout=True to avoid that a warning will be printed\u001B[39;00m\n\u001B[0;32m 489\u001B[0m cached_download_args[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mlegacy_cache_layout\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mTrue\u001B[39;00m\n\u001B[1;32m--> 491\u001B[0m path \u001B[38;5;241m=\u001B[39m cached_download(\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mcached_download_args)\n\u001B[0;32m 493\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m os\u001B[38;5;241m.\u001B[39mpath\u001B[38;5;241m.\u001B[39mexists(path \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m.lock\u001B[39m\u001B[38;5;124m\"\u001B[39m):\n\u001B[0;32m 494\u001B[0m os\u001B[38;5;241m.\u001B[39mremove(path \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m.lock\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n",
"\u001B[1;31mChunkedEncodingError\u001B[0m: (\"Connection broken: ConnectionResetError(10054, 'A létező kapcsolatot a távoli állomás kényszerítetten bezárta', None, 10054, None)\", ConnectionResetError(10054, 'A létező kapcsolatot a távoli állomás kényszerítetten bezárta', None, 10054, None))"
]
}
],
"source": [
"from rake_nltk import Rake\n",
"from keybert import KeyBERT\n",
"\n",
"# Uses stopwords for english from NLTK, and all puntuation characters by\n",
"Cell \u001B[1;32mIn[32], line 1\u001B[0m\n\u001B[1;32m----> 1\u001B[0m \u001B[43mRake\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mextract_keywords_from_text\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mmy time to shine\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_ranked_phrases\u001B[49m()\n",
"\u001B[1;31mAttributeError\u001B[0m: 'NoneType' object has no attribute 'get_ranked_phrases'"
]
}
],
"source": [
"Rake().extract_keywords_from_text(\"my time to shine\").get_ranked_phrases()"