{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import os\n", "\n", "import pandas as pd\n", "focal_countries_list = [\"Peoples R china\", \"Hong Kong\"]" ] }, { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "country_mode = \"CU\" #CU-country-region AU-address" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "# (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"computer vision\") OR TS=(\"pattern recognition\")) AND" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 4, "outputs": [ { "data": { "text/plain": "'TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\")'" }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keywords_source = r'..\\ai_scope_keywords.txt'\n", "with open(keywords_source,'r') as f:\n", " keywords = f.readlines()\n", "\n", "keywords = [c.strip() for c in keywords[0].split(\",\")]\n", "\n", "keywords_str = ' OR '.join('TS=(\\\"'+k+'\\\")' for k in keywords)\n", "keywords_str" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 5, "outputs": [], "source": [ "scope_country_source = r'..\\eu_scope_countries.txt'\n", "\n", "with open(scope_country_source,'r') as f:\n", " coop_countries = f.readlines()\n", "coop_countries = [c.strip().upper() for c in coop_countries[0].split(\",\")]\n", "focal_countries = [c.strip().upper() for c in focal_countries_list]\n", "eu_countries = coop_countries[0:-7]\n", "assoc_countries = coop_countries[-7:]\n", "\n", "nor_c = [coop_countries[-7],]\n", "swi_c = [coop_countries[-6],]\n", "uk_c = coop_countries[-5:]\n", "\n", "foc_str = ' OR '.join([country_mode+'='+c for c in focal_countries])\n", "coop_str = ' OR '.join([country_mode+'='+c for c in coop_countries])\n", "eu_str = ' OR '.join([country_mode+'='+c for c in eu_countries])\n", "assoc_str = ' OR '.join([country_mode+'='+c for c in assoc_countries])\n", "\n", "nor_str =' OR '.join([country_mode+'='+c for c in nor_c])\n", "swi_str =' OR '.join([country_mode+'='+c for c in swi_c])\n", "uk_str =' OR '.join([country_mode+'='+c for c in uk_c])\n", "eu_sub_str = eu_str.split(' OR ')\n", "# eu_sub_str" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 5, "outputs": [], "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 6, "outputs": [ { "data": { "text/plain": "['UNITED KINGDOM', 'ENGLAND', 'WALES', 'SCOTLAND', 'N IRELAND']" }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coop_countries[-5:]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 7, "outputs": [ { "data": { "text/plain": "'CU=PEOPLES R CHINA OR CU=HONG KONG'" }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "foc_str" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "data": { "text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\")) AND PY=(2011-2022)'" }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scope_query = f'({foc_str}) AND ({coop_str}) AND ({keywords_str}) AND PY=(2011-2022)'\n", "scope_query" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 9, "outputs": [ { "data": { "text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'" }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ch_scope_query = f'({foc_str}) AND ({keywords_str})'\n", "ch_scope_query" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 10, "outputs": [ { "data": { "text/plain": "'(CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\"))'" }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eu_scope_query = f'({eu_str}) AND ({keywords_str})'\n", "eu_scope_query" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 11, "outputs": [], "source": [ "sub_queries = [f'PY=(2011-2022) AND ({i_str}) AND ({keywords_str})' for i_str in [foc_str,eu_str,assoc_str,nor_str,swi_str,uk_str]+eu_sub_str]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 12, "outputs": [], "source": [ "from wossel_miners import wos_fetch_entries,wos_fetch_yearly_output" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 13, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 33/33 [12:49<00:00, 23.31s/it]\n" ] } ], "source": [ "wos_fetch_yearly_output(query_str_list=sub_queries)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "data": { "text/plain": "'(CU=PEOPLES R CHINA OR CU=HONG KONG) AND (CU=AUSTRIA OR CU=BELGIUM OR CU=BULGARIA OR CU=CROATIA OR CU=CYPRUS OR CU=CZECH REPUBLIC OR CU=DENMARK OR CU=ESTONIA OR CU=FINLAND OR CU=FRANCE OR CU=GERMANY OR CU=GREECE OR CU=HUNGARY OR CU=IRELAND OR CU=ITALY OR CU=LATVIA OR CU=LITHUANIA OR CU=LUXEMBOURG OR CU=MALTA OR CU=NETHERLANDS OR CU=POLAND OR CU=PORTUGAL OR CU=ROMANIA OR CU=SLOVAKIA OR CU=SLOVENIA OR CU=SPAIN OR CU=SWEDEN OR CU=NORWAY OR CU=SWITZERLAND OR CU=UNITED KINGDOM OR CU=ENGLAND OR CU=WALES OR CU=SCOTLAND OR CU=N IRELAND) AND (TS=(\"artificial intelligence\") OR TS=(\"machine learning\") OR TS=(\"neural network\") OR TS=(\"big data\") OR TS=(\"deep learning\") OR TS=(\"pattern recognition\") OR TS=(\"computer vision\") OR TS=(\"image classification\") OR TS=(\"reinforcement learning\") OR TS=(\"support vector machines\") OR TS=(\"recommender system\") OR TS=(\"random forest\") OR TS=(\"ensemble model\") OR TS=(\"image processing\") OR TS=(\"generative network\") OR TS=(\"ai ethic\") OR TS=(\"natural language processing\") OR TS=(\"clustering algorithm\") OR TS=(\"feature extraction\") OR TS=(\"time series forecast\") OR TS=(\"anomaly detection\") OR TS=(\"identity fraud detection\") OR TS=(\"dimensionality reduction\") OR TS=(\"feature elicitation\") OR TS=(\"chatbot\") OR TS=(\"clustering\") OR TS=(\"unsupervised learning\") OR TS=(\"supervised learning\") OR TS=(\"convolutional network\") OR TS=(\"adversarial network\")) AND PY=(2011-2022)'" }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scope_query" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 16, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Hoooold...\n", "27672 records found! Here we go in 93 steps...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 92/92 [09:38<00:00, 6.29s/it]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "final batch of 27601-27672\n" ] } ], "source": [ "wos_fetch_entries(query_str=scope_query)" ], "metadata": { "collapsed": false } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }