{ "cells": [ { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import os\n", "import shutil\n", "from flashgeotext.geotext import GeoText\n", "import re" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "import hashlib\n", "\n", "def md5hash(s: str):\n", " return hashlib.md5(s.encode('utf-8')).hexdigest()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "record_col=\"UT (Unique WOS ID)\"\n", "outfile = r\"C:\\Users\\radvanyi\\PycharmProjects\\ZSI_analytics\\WOS\\wos_extract\\wos_records_concat.csv\"" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wos = pd.read_csv(outfile, sep=\"\\t\",low_memory=False)\n", "\n", "wos = wos[((wos[\"Publication Year\"]<2023)&(wos[\"Publication Year\"]>2010))].copy()\n", "print(f'Number of initial (valid interval) records: {len(wos)}')\n", "\n", "metrix = pd.read_excel(\"sm_journal_classification.xlsx\", sheet_name=\"Journal_Classification\")\n", "\n", "\n", "metrix = metrix.set_index([c for c in metrix.columns if \"issn\" not in c]).stack().reset_index()\n", "metrix = metrix.rename(columns={'level_6':\"issn_type\", 0:\"issn\"})\n", "metrix[\"issn\"]=metrix[\"issn\"].str.replace(\"-\",\"\").str.lower().str.strip()\n", "\n", "wos[\"issn\"] = wos[\"ISSN\"].str.replace(\"-\",\"\").str.lower().str.strip()\n", "wos[\"eissn\"] = wos[\"eISSN\"].str.replace(\"-\",\"\").str.lower().str.strip()\n", "wos = wos.set_index([c for c in wos.columns if \"issn\" not in c]).stack().reset_index()\n", "wos = wos.rename(columns={'level_71':\"issn_var\", 0:\"issn\"})\n", "\n", "wos_merge = wos.merge(metrix, on=\"issn\", how=\"left\")\n", "\n", "\n", "\n", "wos_indexed = wos_merge[~wos_merge[\"Domain_English\"].isna()]\n", "wos_unindexed = wos_merge[~wos_merge[record_col].isin(wos_indexed[record_col])]\n", "\n", "\n", "wos_unindexed = wos_unindexed.sort_values(by=[\"issn_var\"],ascending=False).drop_duplicates(subset=record_col)\n", "wos = wos_indexed.sort_values(by=[\"issn_var\"],ascending=False).drop_duplicates(subset=record_col)\n", "\n", "wos_postmerge = wos.copy()\n", "print(f'Number of METRIX filtered records: {len(wos)}')\n", "print(f'Number of unindexed records: {len(wos_unindexed)}')\n", "\n", "# drop entries not indexed by metrix\n", "# drop duplicates (based on doi)\n", "wos = wos[~((~wos[\"DOI\"].isna())&(wos[\"DOI\"].duplicated(False)))]\n", "wos = wos.drop_duplicates(subset=[\"Publication Type\",\"Document Type\",\"Authors\",\"Article Title\",\"Source Title\",\"Publication Year\"])\n", "print(f'Number of filtered records (dropping duplicates): {len(wos)}')" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos[\"Domain_English\"].value_counts()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos_classifier = wos[[\"WoS Categories\",\"Research Areas\"]+list(metrix.columns)].copy().drop_duplicates()\n", "wos_classifier = wos_classifier.groupby([\"WoS Categories\",\"Research Areas\"], as_index=False)[[\"Domain_English\",\"Field_English\",\"SubField_English\"]].agg(\n", " lambda x: pd.Series.mode(x)[0])" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos_to_reindex = wos_unindexed.drop(columns=list(metrix.columns))\n", "wos_found = wos_to_reindex.merge(wos_classifier, on=[\"WoS Categories\",\"Research Areas\"], how=\"inner\")\n", "# wos_found = wos_to_reindex.merge(wos_classifier, on=\"Research Areas\", how=\"inner\")\n", "# # wos_found = wos_to_reindex.merge(wos_classifier, on=\"WoS Categories\", how=\"inner\")\n", "wos_stillost = wos_unindexed[~wos_unindexed[record_col].isin(wos_found[record_col])]\n", "\n", "print(\"Found:\", wos_found[record_col].nunique(),\"\\nLost forever:\", wos_stillost[record_col].nunique())" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos = pd.concat([wos,wos_found], ignore_index=True)\n", "print(f'Number of records (after remerge): {len(wos)}')" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos[\"Domain_English\"].value_counts()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos_cat = wos.groupby(record_col)[\"WoS Categories\"].apply(lambda x: x.str.split(';')).explode().reset_index().drop(columns=\"level_1\")\n", "wos_cat[\"WoS Categories\"] = wos_cat[\"WoS Categories\"].str.strip()\n", "wos_cat[\"WoS Categories\"].value_counts()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos_subcat = wos_cat.copy()\n", "wos_subcat[['WoS Category', 'WoS SubCategory']] = wos_subcat[\"WoS Categories\"].str.split(\",\", expand = True, n=1)\n", "for c in ['WoS Category', 'WoS SubCategory',\"WoS Categories\"]:\n", " wos_subcat[c] = wos_subcat[c].str.strip()\n", "wos_subcat.drop_duplicates(subset=[record_col,'WoS Category'])[\"WoS Category\"].value_counts()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos_areas = wos.groupby(record_col)[\"Research Areas\"].apply(lambda x: x.str.split(';')).explode().reset_index().drop(columns=\"level_1\")\n", "wos_areas[\"Research Areas\"] = wos_areas[\"Research Areas\"].str.strip()\n", "wos_areas[\"Research Areas\"].value_counts()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos[[\"Article Title\",\"Keywords Plus\",\"Author Keywords\"]].sample(100)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "kw_df = pd.DataFrame()\n", "for c in [\"Keywords Plus\",\"Author Keywords\"]:\n", " kwp = wos.groupby(record_col)[c].apply(lambda x: x.str.split(';')).explode().str.strip().str.upper()\n", " kwp.name = 'keyword_all'\n", " kw_df = pd.concat([kwp.reset_index(),kw_df],ignore_index=True)\n", "kw_df = kw_df[~kw_df[\"keyword_all\"].isna()].copy().drop(columns=\"level_1\").drop_duplicates()\n", "kw_df[\"keyword_all\"] = kw_df[\"keyword_all\"].apply(lambda x: re.sub(\"[\\(\\[].*?[\\)\\]]\", \"\", x))\n", "kw_df.head(100)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos_kwd_concat = kw_df.groupby(record_col, as_index=False).agg({'keyword_all': '; '.join})\n", "wos_kwd_concat.head()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos.columns" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "geotext = GeoText()\n", "\n", "def extract_location(input_text, key='countries'):\n", " anomalies = {\"Malta\":\"Malta\",\n", " \"Mongolia\":\"Mongolia\",\n", " \"Quatar\":\"Qatar\",\n", " \"Qatar\":\"Qatar\",\n", " \"Ethiop\":\"Ethiopia\",\n", " \"Nigeria\":\"Nigeria\",\n", " \"BELAR\":\"Belarus\",\n", " \"Venezuela\":\"Venezuela\",\n", " \"Cyprus\":\"Cyprus\",\n", " \"Ecuador\":\"Ecuador\",\n", " \"U Arab\":\"United Arab Emirates\",\n", " \"Syria\":\"Syria\",\n", " \"Uganda\":\"Uganda\",\n", " \"Yemen\":\"Yemen\",\n", " \"Mali\":\"Mali\",\n", " \"Senegal\":\"Senegal\",\n", " \"Vatican\":\"Vatican\",\n", " \"Uruguay\":\"Uruguay\",\n", " \"Panama\":\"Panama\",\n", " \"Fiji\":\"Fiji\",\n", " \"Faroe\":\"Faroe Islands\",\n", " \"Macedonia\":\"Macedonia\",\n", " 'Mozambique':'Mozambique',\n", " \"Kuwait\":\"Kuwait\",\n", " \"Libya\":\"Libya\",\n", " \"Turkiy\":\"Turkey\",\n", " \"Liberia\":\"Liberia\",\n", " \"Namibia\":\"Namibia\",\n", " \"Ivoire\":\"Ivory Coast\",\n", " \"Guatemala\":\"Gutemala\",\n", " \"Paraguay\":\"Paraguay\",\n", " \"Honduras\":\"Honduras\",\n", " \"Nicaragua\":\"Nicaragua\",\n", " \"Trinidad\":\"Trinidad & Tobago\",\n", " \"Liechtenstein\":\"Liechtenstein\",\n", " \"Greenland\":\"Denmark\"}\n", "\n", " extracted = geotext.extract(input_text=input_text)\n", " found = extracted[key].keys()\n", " if len(sorted(found))>0:\n", " return sorted(found)[0]\n", " elif key=='countries':\n", " for i in ['Scotland','Wales','England', 'N Ireland']:\n", " if i in input_text:\n", " return 'United Kingdom'\n", " for j in anomalies.keys():\n", " if j in input_text:\n", " return anomalies.get(j)\n", " else:\n", " return None\n", "\n", "with open('../eu_members.txt',\"r\") as f:\n", " eu_countries=f.readline().split(\",\")\n", " eu_countries=[i.strip() for i in eu_countries]\n", "\n", "def country_cleanup(country):\n", " if \"USA\" in country:\n", " return \"USA\"\n", " elif \"China\" in country:\n", " return \"China\"\n", " elif country in [\"England\", \"Northern Ireland\", \"Wales\", \"Scotland\",\"N Ireland\"]:\n", " return \"United Kingdom\"\n", " else:\n", " return country\n", "\n", "\n", "def country_type(country):\n", " if country in eu_countries:\n", " return \"EU\"\n", " elif country==\"China\":\n", " return \"China\"\n", " elif country in [\"Switzerland\", 'Norway','United Kingdom']:\n", " return \"Non-EU associate\"\n", " else:\n", " return \"Other\"\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "locations = wos.groupby(record_col)[\"Addresses\"].apply(lambda x: x.str.split('[')).explode().reset_index().drop(columns=\"level_1\")\n", "\n", "\n", "locations = locations[locations[\"Addresses\"]!=\"\"].copy()\n", "locations[\"Address\"] = locations[\"Addresses\"].apply(lambda x:x.split(\"]\")[-1])\n", "locations[\"Authors_of_address\"] = locations[\"Addresses\"].apply(lambda x:x.split(\"]\")[0])" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "len(locations)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "locations[\"Address\"] = locations[\"Address\"].str.strip().str.strip(\";\")\n", "locations = locations.groupby([record_col,\"Authors_of_address\"])[\"Address\"].apply(lambda x: x.str.split(';')).explode().reset_index().drop(columns=\"level_2\")\n", "locations.head(100)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# import dask.dataframe as dd\n", "#\n", "# locations_ddf = dd.from_pandas(locations, npartitions=4) # convert pandas DataFrame to Dask DataFrame\n", "# loc_compute = locations_ddf.groupby([record_col,\"Authors_of_address\"])[\"Address\"].apply(lambda x: x.str.split(';')).explode().compute() # compute the result" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# locations_test = locations.head(1000)\n", "# locations_test = locations_test.groupby([record_col,\"Authors_of_address\"])[\"Address\"].str.split(';').explode()\n", "# locations_test" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "\n", "# locations[\"Country\"]=locations['Address'].apply(lambda x: extract_location(input_text=x, key='countries'))\n", "locations[\"Country\"]=locations['Address'].apply(lambda x: x.split(\",\")[-1].strip(\" \").strip(\";\").strip(\" \"))\n", "locations[\"Country\"]=locations['Country'].apply(lambda x: country_cleanup(x))\n", "locations[\"City\"]=locations['Address'].apply(lambda x: extract_location(input_text=x, key='cities'))\n", "locations[\"Country_Type\"] = locations[\"Country\"].apply(lambda x: country_type(x))" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "scope_types = [\"EU\",\"China\",\"Non-EU associate\"]\n", "locations=locations[locations[\"Country_Type\"].isin(scope_types)]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "univ_locations = locations[[record_col,\"Address\",\"Country\",\"City\",\"Country_Type\"]].copy()\n", "univ_locations[\"Institution\"] = univ_locations[\"Address\"].apply(lambda x: x.split(\",\")[0])\n", "univ_locations = univ_locations.drop_duplicates()\n", "univ_locations.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "author_locations = locations.groupby([record_col,\"Country\",\"Country_Type\"])[\"Authors_of_address\"].apply(lambda x: x.str.split(';')).explode().reset_index().drop(columns=\"level_3\")\n", "author_locations[\"Author_name\"] = author_locations[\"Authors_of_address\"].str.strip()\n", "author_locations = author_locations.drop(columns=\"Authors_of_address\")\n", "author_locations[\"author_str_id\"] = author_locations[\"Author_name\"].apply(lambda x:''.join(filter(str.isalnum, x.lower())))\n", "author_locations[\"author_str_id\"] = author_locations[\"author_str_id\"].apply(md5hash)\n", "author_locations = author_locations.drop(columns=\"Author_name\")\n", "author_locations.head()" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "author_locations[author_locations['author_str_id'].duplicated(False)]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "author_primary_region = author_locations.sort_values(by=\"Country_Type\").drop_duplicates(subset=[record_col,\"author_str_id\"])\n", "# author_primary_region\n", "\n", "china=author_primary_region[author_primary_region[\"Country_Type\"]==\"China\"][record_col].unique()\n", "eu=author_primary_region[author_primary_region[\"Country_Type\"]==\"EU\"][record_col].unique()\n", "assoc=author_primary_region[author_primary_region[\"Country_Type\"]==\"Non-EU associate\"][record_col].unique()\n", "\n", "\n", "# records that have distinct authors with different country affiliations\n", "valid_scope = wos[((wos[record_col].isin(china))\n", " &\n", " ((wos[record_col].isin(eu))\n", " |\n", " (wos[record_col].isin(assoc))))][record_col].unique()" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "author_primary_region.head()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(f'Number of records: {len(wos)}')\n", "print(f'Number of valid cooperation records: {len(valid_scope)}')" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos = wos[wos[record_col].isin(valid_scope)]\n", "locations = locations[locations[record_col].isin(valid_scope)]\n", "univ_locations = univ_locations[univ_locations[record_col].isin(valid_scope)]\n", "author_locations = author_locations[author_locations[record_col].isin(valid_scope)]\n", "author_primary_region = author_locations[author_locations[record_col].isin(valid_scope)]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "affiliations = wos.groupby(record_col)[\"Affiliations\"].apply(lambda x: x.str.split(';')).explode().reset_index().drop(columns=\"level_1\")\n", "affiliations[\"Affiliations\"] = affiliations[\"Affiliations\"].str.strip().str.upper().fillna(\"UNKNOWN\")\n", "affiliations = affiliations.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "affiliations[\"Affiliations\"].value_counts()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "univ_locations[\"Institution\"].value_counts()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "univ_locations[record_col].nunique()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "affiliations[record_col].nunique()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "univ_locations[\"Institution\"].value_counts().sum()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "affiliations[\"Affiliations\"].value_counts().sum()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wos_cat = wos.groupby(record_col)[\"WoS Categories\"].apply(lambda x: x.str.split(';')).explode().reset_index().drop(columns=\"level_1\")\n", "wos_cat[\"WoS Categories\"].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wos_areas = wos.groupby(record_col)[\"Research Areas\"].apply(lambda x: x.str.split(';')).explode().reset_index().drop(columns=\"level_1\")\n", "wos_areas[\"Research Areas\"] = wos_areas[\"Research Areas\"].str.strip()\n", "wos_areas[\"Research Areas\"].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "[c for c in wos.columns if \"_English\" in c]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrix_levels = [c for c in wos.columns if \"_English\" in c]\n", "for m in metrix_levels:\n", " wos[m] = wos[m].replace({\"article-level classification\":\"Multidisciplinary\"})\n" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wos" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "metrix_levels" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "record_countries = locations[[record_col,\"Country\"]].drop_duplicates()\n", "record_author_locations = author_locations[[record_col,\"author_str_id\",\"Country\"]].drop_duplicates()\n", "record_institution = univ_locations[[record_col,\"Institution\",\"Country\"]].drop_duplicates()\n", "country_types = locations[[\"Country\",\"Country_Type\"]].drop_duplicates()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# Basic network layout" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "country_collabs = record_countries.merge(record_countries, on=record_col)\n", "country_collabs = country_collabs[country_collabs[\"Country_x\"]!=country_collabs[\"Country_y\"]]\n", "country_collabs[\"weight\"] = 0.5" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "inst_collabs = record_institution.merge(record_institution, on=record_col)\n", "inst_collabs = inst_collabs[inst_collabs[\"Institution_x\"]!=inst_collabs[\"Institution_y\"]]\n", "inst_collabs[\"weight\"] = 0.5" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos.columns" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "drop_cols = [ws for ws in wos.columns if ((\"uthor\" in ws or \"ddress\" in ws or \"ORCID\" in\n", " ws or \"esearcher\" in ws or \"ditor\" in ws or \"name\" in ws or 'SEQ' in ws) and \"eyword\" not in ws)]\n", "drop_cols" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "outdir=\"wos_processed_data\"" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "os.makedirs(outdir, exist_ok=True)\n", "\n", "wos.drop(columns=drop_cols).to_excel(f\"{outdir}/wos_processed.xlsx\", index=False)\n", "\n", "record_countries.to_excel(f\"{outdir}/wos_countries.xlsx\", index=False)\n", "\n", "record_author_locations.to_excel(f\"{outdir}/wos_author_locations.xlsx\", index=False)\n", "\n", "record_institution.to_excel(f\"{outdir}/wos_institution_locations.xlsx\", index=False)\n", "\n", "kw_df.to_excel(f\"{outdir}/wos_keywords.xlsx\", index=False)\n", "\n", "country_types.to_excel(f\"{outdir}/wos_country_types.xlsx\", index=False)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos.drop(columns=drop_cols).to_csv(f\"{outdir}/wos_processed.csv\", index=False, sep='\\t')\n", "\n", "record_countries.to_csv(f\"{outdir}/wos_countries.csv\", index=False, sep='\\t')\n", "\n", "record_author_locations.to_csv(f\"{outdir}/wos_author_locations.csv\", index=False, sep='\\t')\n", "\n", "record_institution.to_csv(f\"{outdir}/wos_institution_locations.csv\", index=False, sep='\\t')\n", "\n", "kw_df.to_csv(f\"{outdir}/wos_keywords.csv\", index=False, sep='\\t')\n", "\n", "country_types.to_csv(f\"{outdir}/wos_country_types.csv\", index=False, sep='\\t')\n", "\n", "inst_collabs.to_csv(f\"{outdir}/wos_inst_collabs.csv\", index=False, sep='\\t')\n", "\n", "country_collabs.to_csv(f\"{outdir}/wos_country_collabs.csv\", index=False, sep='\\t')" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "wos_areas.to_csv(f\"{outdir}/wos_research_areas.csv\", index=False, sep='\\t')\n", "\n", "wos_subcat.to_csv(f\"{outdir}/wos_categories.csv\", index=False, sep='\\t')" ], "metadata": { "collapsed": false } } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 1 }