ZSI_Reconnect_China/WOS/wos_analysis/wos_analyses.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "40038234",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "        <script type=\"text/javascript\">\n        window.PlotlyConfig = {MathJaxConfig: 'local'};\n        if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n        if (typeof require !== 'undefined') {\n        require.undef(\"plotly\");\n        define('plotly', function(require, exports, module) {\n            /**\n* plotly.js v2.20.0\n* Copyright 2012-2023, Plotly, Inc.\n* All rights reserved.\n* Licensed under the MIT license\n*/\n/*! For license information please see plotly.min.js.LICENSE.txt */\n!function(t,e){\"object\"==typeof exports&&\"object\"==typeof module?module.exports=e():\"function\"==typeof define&&define.amd?define([],e):\"object\"==typeof exports?exports.Plotly=e():t.Plotly=e()}(self,(function(){return function(){var t={98847:function(t,e,r){\"use strict\";var n=r(71828),i={\"X,X div\":'direction:ltr;font-family:\"Open Sans\",verdana,arial,sans-serif;margin:0;padding:0;',\"X input,X button\":'font-family:\"Open Sans\",verdana,arial,sans-serif;',\"X input:focus,X button:focus\":\"outline:none;\",\"X a\":\"text-decoration:none;\",\"X a:hover\":\"text-decoration:none;\",\"X .crisp\":\"shape-rendering:crispEdges;\",\"X .user-select-none\":\"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;\",\"X svg\":\"overflow:hidden;\",\"X svg a\":\"fill:#447adb;\",\"X svg a:hover\":\"fill:#3c6dc5;\",\"X .main-svg\":\"position:absolute;top:0;left:0;pointer-events:none;\",\"X .main-svg .draglayer\":\"pointer-events:all;\",\"X .cursor-default\":\"cursor:default;\",\"X .cursor-pointer\":\"cursor:pointer;\",\"X .cursor-crosshair\":\"cursor:crosshair;\",\"X .cursor-move\":\"cursor:move;\",\"X .cursor-col-resize\":\"cursor:col-resize;\",\"X .cursor-row-resize\":\"cursor:row-resize;\",\"X .cursor-ns-resize\":\"cursor:ns-resize;\",\"X .cursor-ew-resize\":\"cursor:ew-resize;\",\"X .cursor-sw-resize\":\"cursor:sw-resize;\",\"X .cursor-s-resize\":\"cursor:s-resize;\",\"X .cursor-se-resize\":\"cursor:se-resize;\",\"X .cursor-w-resize\":\"cursor:w-resize;\",\"X .cursor-e-resize\":\"cursor:e-resize;\",\"X .cursor-nw-resize\":\"cursor:nw-resize;\",\"X .cursor-n-resize\":\"cursor:n-resize;\",\"X .cursor-ne-resize\":\"cursor:ne-resize;\",\"X .cursor-grab\":\"cursor:-webkit-grab;cursor:grab;\",\"X .modebar\":\"position:absolute;top:2px;right:2px;\",\"X .ease-bg\":\"-webkit-transition:background-color .3s ease 0s;-moz-transition:background-color .3s ease 0s;-ms-transition:background-color .3s ease 0s;-o-transition:background-color .3s ease 0s;transition:background-color .3s ease 0s;\",\"X .modebar--hover>:not(.watermark)\":\"opacity:0;-webkit-transition:opacity .3s ease 0s;-moz-transition:opacity .3s ease 0s;-ms-transition:opacity .3s ease 0s;-o-transition:opacity .3s ease 0s;transition:opacity .3s ease 0s;\",\"X:hover .modebar--hover .modebar-group\":\"opacity:1;\",\"X .modebar-group\":\"float:left;display:inline-block;box-sizing:border-box;padding-left:8px;position:relative;vertical-align:middle;white-space:nowrap;\",\"X .modebar-btn\":\"position:relative;font-size:16px;padding:3px 4px;height:22px;cursor:pointer;line-height:normal;box-sizing:border-box;\",\"X .modebar-btn svg\":\"position:relative;top:2px;\",\"X .modebar.vertical\":\"display:flex;flex-direction:column;flex-wrap:wrap;align-content:flex-end;max-height:100%;\",\"X .modebar.vertical svg\":\"top:-1px;\",\"X .modebar.vertical .modebar-group\":\"display:block;float:none;padding-left:0px;padding-bottom:8px;\",\"X .modebar.vertical .modebar-group .modebar-btn\":\"display:block;text-align:center;\",\"X [data-title]:before,X [data-title]:after\":\"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;\",\"X [data-title]:hover:before,X [data-title]:hover:after\":\"display:block;opacity:1;\",\"X [data-title]:before
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import janitor\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from matplotlib.ticker import MaxNLocator\n",
    "import math\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "import plotly.offline as pyo\n",
    "from plotly.subplots import make_subplots\n",
    "import plotly.graph_objects as go\n",
    "pyo.init_notebook_mode()\n",
    "\n",
    "import plotly.io as pio\n",
    "pio.renderers.default = \"plotly_mimetype+notebook\"\n",
    "\n",
    "import country_converter as coco\n",
    "cc = coco.CountryConverter()\n",
    "\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ea3629f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Seaborn palette\n",
    "# sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)\n",
    "# sns.palplot(sns.color_palette())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fb7baf32",
   "metadata": {},
   "outputs": [],
   "source": [
    "outdir=\"wos_processed_data\"\n",
    "\n",
    "wos = pd.read_excel(f\"../{outdir}/wos_processed.xlsx\")\n",
    "wos_univ = pd.read_excel(f\"../{outdir}/wos_institution_locations_harmonized.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4dd8e081",
   "metadata": {},
   "outputs": [],
   "source": [
    "def eurovoc_classer(x):\n",
    "    eurovoc_classification = {\"Eastern Europe\":[\"Bulgaria\",\"Czech Republic\",\"Croatia\",\"Hungary\",\"Poland\",\"Romania\",\"Slovakia\",\"Slovenia\"],\n",
    "                          \"Northern Europe\":[\"Denmark\",\"Estonia\",\"Finland\",\"Latvia\",\"Lithuania\",\"Sweden\",\"Norway\",\"Iceland\"],\n",
    "                          \"Southern Europe\":[\"Cyprus\",\"Greece\",\"Italy\",\"Portugal\",\"Spain\",\"Malta\"],\n",
    "                          \"Western Europe\":[\"Austria\",\"Belgium\",\"France\",\"Germany\",\"Luxembourg\",\"Netherlands\",\"Switzerland\",\"United Kingdom\",\"Ireland\"]}\n",
    "    if x == 'China':\n",
    "        return x\n",
    "    for k in eurovoc_classification.keys():\n",
    "        if x in eurovoc_classification[k]:\n",
    "            return k"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "eb933d66",
   "metadata": {},
   "outputs": [],
   "source": [
    "wos_country = pd.read_excel(f\"../{outdir}/wos_countries.xlsx\")\n",
    "wos_country_types = pd.read_excel(f\"../{outdir}/wos_country_types.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "cd0b0efa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "           Country      Country_Type    Eurovoc_Class\n0          Belgium                EU   Western Europe\n1            China             China            China\n2       Luxembourg                EU   Western Europe\n3      Netherlands                EU   Western Europe\n4           Norway  Non-EU associate  Northern Europe\n5   United Kingdom  Non-EU associate   Western Europe\n6           France                EU   Western Europe\n7           Sweden                EU  Northern Europe\n8            Italy                EU  Southern Europe\n9          Denmark                EU  Northern Europe\n10         Germany                EU   Western Europe\n11        Slovenia                EU   Eastern Europe\n12         Estonia                EU  Northern Europe\n13         Finland                EU  Northern Europe\n14        Bulgaria                EU   Eastern Europe\n15        Slovakia                EU   Eastern Europe\n16           Spain                EU  Southern Europe\n17          Poland                EU   Eastern Europe\n18  Czech Republic                EU   Eastern Europe\n19          Greece                EU  Southern Europe\n20           Malta                EU  Southern Europe\n21         Austria                EU   Western Europe\n22     Switzerland  Non-EU associate   Western Europe\n23         Ireland                EU   Western Europe\n24        Portugal                EU  Southern Europe\n25         Romania                EU   Eastern Europe\n26         Hungary                EU   Eastern Europe\n27          Cyprus                EU  Southern Europe\n28         Croatia                EU   Eastern Europe\n29       Lithuania                EU  Northern Europe\n30          Latvia                EU  Northern Europe",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Country</th>\n      <th>Country_Type</th>\n      <th>Eurovoc_Class</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Belgium</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>China</td>\n      <td>China</td>\n      <td>China</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Luxembourg</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Netherlands</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Norway</td>\n      <td>Non-EU associate</td>\n      <td>Northern Europe</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>United Kingdom</td>\n      <td>Non-EU associate</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>France</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>Sweden</td>\n      <td>EU</td>\n      <td>Northern Europe</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>Italy</td>\n      <td>EU</td>\n      <td>Southern Europe</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>Denmark</td>\n      <td>EU</td>\n      <td>Northern Europe</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>Germany</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>Slovenia</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>12</th>\n      <td>Estonia</td>\n      <td>EU</td>\n      <td>Northern Europe</td>\n    </tr>\n    <tr>\n      <th>13</th>\n      <td>Finland</td>\n      <td>EU</td>\n      <td>Northern Europe</td>\n    </tr>\n    <tr>\n      <th>14</th>\n      <td>Bulgaria</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>Slovakia</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>16</th>\n      <td>Spain</td>\n      <td>EU</td>\n      <td>Southern Europe</td>\n    </tr>\n    <tr>\n      <th>17</th>\n      <td>Poland</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>18</th>\n      <td>Czech Republic</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>19</th>\n      <td>Greece</td>\n      <td>EU</td>\n      <td>Southern Europe</td>\n    </tr>\n    <tr>\n      <th>20</th>\n      <td>Malta</td>\n      <td>EU</td>\n      <td>Southern Europe</td>\n    </tr>\n    <tr>\n      <th>21</th>\n      <td>Austria</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>22</th>\n      <td>Switzerland</td>\n      <td>Non-EU associate</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>23</th>\n      <td>Ireland</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n    </tr>\n    <tr>\n      <th>24</th>\n      <td>Portugal</td>\n      <td>EU</td>\n      <td>Southern Europe</td>\n    </tr>\n    <tr>\n      <th>25</th>\n      <td>Romania</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>26</th>\n      <td>Hungary</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>27</th>\n      <td>Cyprus</td>\n      <td>EU</td>\n      <td>Southern Europe</td>\n    </tr>\n    <tr>\n      <th>28</th>\n      <td>Croatia</td>\n      <td>EU</td>\n      <td>Eastern Europe</td>\n    </tr>\n    <tr>\n      <th>29</th>\n      <td>Lithuania</td>\n      <td>EU</td>\n      <td>Northern Europe</td>\n    </tr>\n    <tr>\n      <th>30</th>\n      <td>Latvia</td>\n      <td>EU</td>\n      <td>Northern Europe</td>\n    </tr>\n  </tbody>\
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wos_country_types[\"Eurovoc_Class\"] = wos_country_types[\"Country\"].map(eurovoc_classer)\n",
    "wos_country_types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "1e737dbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "record_col = \"UT (Unique WOS ID)\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b1aa7f2d",
   "metadata": {},
   "source": [
    "# Analysis by METRIX classification"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a97f1cbb",
   "metadata": {},
   "source": [
    "## Distribution of topics via the METRIX classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "id": "f39cb21d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "        Domain_English                             Field_English   \n37    Applied Sciences  Information & Communication Technologies  \\\n44    Applied Sciences  Information & Communication Technologies   \n32    Applied Sciences                               Engineering   \n33    Applied Sciences                               Engineering   \n15    Applied Sciences         Enabling & Strategic Technologies   \n..                 ...                                       ...   \n11    Applied Sciences                     Economics & Business    \n46    Applied Sciences                           Social Sciences   \n54   Arts & Humanities                     Philosophy & Theology   \n52   Arts & Humanities                        Historical Studies   \n129    Health Sciences           Psychology & Cognitive Sciences   \n\n                               SubField_English  UT (Unique WOS ID)    percent  \n37   Artificial Intelligence & Image Processing                7915  17.184108  \n44              Networking & Telecommunications                5360  11.636995  \n32           Geological & Geomatics Engineering                2576   5.592705  \n33          Industrial Engineering & Automation                2316   5.028224  \n15                                       Energy                1965   4.266175  \n..                                          ...                 ...        ...  \n11                        Business & Management                   1   0.002171  \n46                                 Anthropology                   1   0.002171  \n54                                   Philosophy                   1   0.002171  \n52                   History of Social Sciences                   1   0.002171  \n129     General Psychology & Cognitive Sciences                   1   0.002171  \n\n[175 rows x 5 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Domain_English</th>\n      <th>Field_English</th>\n      <th>SubField_English</th>\n      <th>UT (Unique WOS ID)</th>\n      <th>percent</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>37</th>\n      <td>Applied Sciences</td>\n      <td>Information &amp; Communication Technologies</td>\n      <td>Artificial Intelligence &amp; Image Processing</td>\n      <td>7915</td>\n      <td>17.184108</td>\n    </tr>\n    <tr>\n      <th>44</th>\n      <td>Applied Sciences</td>\n      <td>Information &amp; Communication Technologies</td>\n      <td>Networking &amp; Telecommunications</td>\n      <td>5360</td>\n      <td>11.636995</td>\n    </tr>\n    <tr>\n      <th>32</th>\n      <td>Applied Sciences</td>\n      <td>Engineering</td>\n      <td>Geological &amp; Geomatics Engineering</td>\n      <td>2576</td>\n      <td>5.592705</td>\n    </tr>\n    <tr>\n      <th>33</th>\n      <td>Applied Sciences</td>\n      <td>Engineering</td>\n      <td>Industrial Engineering &amp; Automation</td>\n      <td>2316</td>\n      <td>5.028224</td>\n    </tr>\n    <tr>\n      <th>15</th>\n      <td>Applied Sciences</td>\n      <td>Enabling &amp; Strategic Technologies</td>\n      <td>Energy</td>\n      <td>1965</td>\n      <td>4.266175</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>11</th>\n      <td>Applied Sciences</td>\n      <td>Economics &amp; Business</td>\n      <td>Business &amp; Management</td>\n      <td>1</td>\n      <td>0.002171</td>\n    </tr>\n    <tr>\n      <th>46</th>\n      <td>Applied Sciences</td>\n      <td>Social Sciences</td>\n      <td>Anthropology</td>\n      <td>1</td>\n      <td>0.002171</td>\n    </tr>\n    <tr>\n      <th>54</th>\n      <td>Arts &amp; Humanities</td>\n      <td>Philosophy &amp; Theology</td>\n      <td>Philosophy</td>\n      <td>1</td>\n      <td>0.002171</td>\n    </tr>\n    <tr>\n      <th>52</th>\n      <td>Arts &amp; Humanities</td>\n      <td>Historical Studies</td>\n      <td>History of Social Sciences</td>\n      <td>1</td>\n      <td>0.002171</td>\n    </tr>\n    <tr>\n      <th>129</th>\n      <td>Health Sciences</td>\n      <td>Psychology &amp; Cognitive Sciences</td>\n      <td>General Psychology &amp; Cognitive Sciences</td>\n      <td>1</td>\n      <td>0.002171</td>\n    </tr>\n  </tbody>\n</table>\n<p>175 rows × 5 columns</p>\n</div>"
     },
     "execution_count": 203,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def replace_nth(s, sub=\" \", repl=\"<br>\", n=2):\n",
    "    chunks = s.split(sub)\n",
    "    size = len(chunks)\n",
    "    rows = size // n + (0 if size % n == 0 else 1)\n",
    "    return (repl.join([\n",
    "        sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])\n",
    "        for i in range(rows)\n",
    "    ])).replace(\"<br>&\",\" &<br>\")\n",
    "\n",
    "\n",
    "groups = ['Domain_English',\"Field_English\",'SubField_English']\n",
    "data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n",
    "data[\"percent\"] = data[record_col]/data[record_col].sum()*100\n",
    "\n",
    "data[groups] = data[groups].applymap(replace_nth)\n",
    "# for c in [\"Domain_English\",\"Field_English\",\"SubField_English\"]:\n",
    "#     data[c] = data[c]+\"<br>(\"+(pd.DataFrame(data[c],columns=[c]).merge(data.groupby(c,as_index=False)[record_col].sum(), on=c)[record_col]).astype(str)+\")\"\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "id": "2c9d6d5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = px.sunburst(data, path=groups, values=record_col,\n",
    "                  color='Domain_English',title=\"Distribution of topics<br>(METRIX taxonomy)\", template='plotly')\n",
    "# fig.update_traces(hovertemplate='%{label}<br>%{value:.2f}%')\n",
    "fig.update_traces(textinfo=\"label+value+percent root\")\n",
    "fig.update_traces(hovertemplate='%{id}<br>%{value}<extra></extra>')\n",
    "metrix_distr = go.Figure(fig)\n",
    "# metrix_distr.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "outputs": [],
   "source": [
    "# metrix_distr.show(config= dict(displayModeBar = False))\n",
    "data = (wos.groupby(['Publication Year'])[record_col].nunique(dropna=False)\n",
    "        .reset_index()\n",
    "        .rename(columns={0:record_col}))\n",
    "data[record_col+\"_relative_growth\"] = data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True)[record_col][0]\n",
    "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
    "\n",
    "data = data.sort_values(by =[\"Publication Year\"], ascending=[True])\n",
    "data[record_col+\"_cumsum\"] = (data[record_col].cumsum())\n",
    "\n",
    "year_output = px.line(data,x=\"Publication Year\", y=record_col, markers=True)\n",
    "year_output.update_traces(hovertemplate='Year:%{x:d}<br>Number of co-publications:%{y:d}')\n",
    "\n",
    "year_rel_output = px.line(data,x=\"Publication Year\", y=record_col+\"_relative_growth\", markers=True)\n",
    "year_rel_output.update_traces(hovertemplate='Year:%{x:d}<br>Rel.growth in co-publications:%{y:.0%}')\n",
    "\n",
    "year_rel_cumsum = px.area(data,x=\"Publication Year\", y=record_col+\"_cumsum\")\n",
    "year_rel_cumsum.update_traces(hovertemplate='Year:%{x:d}<br>Cumulative number co-publications:%{y:d}')\n",
    "\n",
    "\n",
    "figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Distribution of topics\",\n",
    "                                                         \"Co-publications per year\",\"Relative growth of co-publications\",\n",
    "                                                         \"Cumulative sum of co-publications\",],\n",
    "                         specs=[\n",
    "                            [{\"type\": \"domain\", \"rowspan\":3}, {\"type\": \"xy\"}],\n",
    "                            [None,{\"type\": \"xy\"}],\n",
    "                            [None, {\"type\": \"xy\"}]\n",
    "                         ])\n",
    "\n",
    "\n",
    "for trace in list(metrix_distr.select_traces()):\n",
    "    # trace.barmode\n",
    "    figsuper.add_trace(trace,\n",
    "        row=[1,2,3], col=1\n",
    "    )\n",
    "\n",
    "for trace in list(year_output.select_traces()):\n",
    "    figsuper.add_trace(trace,\n",
    "        row=1, col=2\n",
    "    )\n",
    "\n",
    "for trace in list(year_rel_output.select_traces()):\n",
    "    figsuper.add_trace(trace,\n",
    "        row=2, col=2\n",
    "    )\n",
    "\n",
    "for trace in list(year_rel_cumsum.select_traces()):\n",
    "    figsuper.add_trace(trace,\n",
    "        row=3, col=2\n",
    "    )\n",
    "\n",
    "# figsuper.update_layout(hovermode='x unified')\n",
    "figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n",
    "figsuper.update_yaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_xaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
    "figsuper['layout']['yaxis2'].update(zerolinecolor='grey',tickformat=\".0%\")\n",
    "# figsuper.layout.annotations[0].update(x=0.1)\n",
    "# figsuper.layout.annotations[2].update(x=0.105)\n",
    "# figsuper.layout.annotations[1].update(x=0.7)\n",
    "# figsuper.layout.annotations[3].update(x=0.7)\n",
    "\n",
    "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
    "figsuper.write_html(f\"plot_html/Overall_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "outputs": [],
   "source": [
    "# data\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "id": "66fca444",
   "metadata": {},
   "source": [
    "## Domains, distribution, yearly trends"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "14e82a73",
   "metadata": {},
   "outputs": [],
   "source": [
    "group = 'Domain_English'\n",
    "data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n",
    "\n",
    "fig = px.bar(data.sort_values(by=group), x=record_col, y=group, color=group,barmode='relative',\n",
    "                              labels={\n",
    "                     record_col: 'Number of co-publications',\n",
    "                     group: \"\",\n",
    "                 },\n",
    "                title=\"Distribution of Domains\", template='plotly')\n",
    "fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family=\"Montserrat\")\n",
    "fig.update_traces(hovertemplate='%{x:d}')\n",
    "fig.add_shape(\n",
    "        # Rectangle with reference to the plot\n",
    "            type=\"rect\",\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            x0=0,\n",
    "            y0=0,\n",
    "            x1=1.0,\n",
    "            y1=1.0,\n",
    "            line=dict(\n",
    "                color=\"black\",\n",
    "                 width=0.5,\n",
    "             )\n",
    "         )\n",
    "fig.update_layout(yaxis={'categoryorder':'total ascending'})\n",
    "fig.update_yaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "fig.update_xaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "dom_distr = go.Figure(fig)\n",
    "# dom_distr.show(config= dict(displayModeBar = False, responsive = True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "id": "8cbe20ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "group = ['Publication Year','Domain_English']\n",
    "data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()\n",
    "        .fillna(0)\n",
    "        .stack()\n",
    "        .reset_index()\n",
    "        .rename(columns={0:record_col}))\n",
    "data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Domain_English'),\n",
    "                  on='Domain_English', suffixes=[None,\"_relative_growth\"])\n",
    "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
    "\n",
    "data = data.sort_values(by =[\"Domain_English\",\"Publication Year\"], ascending=[True,True])\n",
    "data[record_col+\"_cumsum\"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())\n",
    "\n",
    "# data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "id": "05d0922a",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True,                             labels={\n",
    "                     record_col: 'Number of co-publications',\n",
    "                     group[-1]: \"Domain\",\n",
    "                 },\n",
    "                title=\"Yearly output of co-publications\", template='plotly')\n",
    "fig.update_traces(hovertemplate='%{y:d}')\n",
    "fig.update_layout(hovermode='x unified')\n",
    "fig.add_shape(\n",
    "        # Rectangle with reference to the plot\n",
    "            type=\"rect\",\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            x0=0,\n",
    "            y0=0,\n",
    "            x1=1.0,\n",
    "            y1=1.0,\n",
    "            line=dict(\n",
    "                color=\"black\",\n",
    "                 width=0.5,\n",
    "             )\n",
    "         )\n",
    "fig.update_yaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "fig.update_xaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "\n",
    "year_output_by_domain = go.Figure(fig)\n",
    "\n",
    "fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+\"_relative_growth\",x=group[0], color=group[-1], markers=True,                             labels={\n",
    "                     record_col+\"_relative_growth\": 'Rel. growth<br>in co-publications (%)',\n",
    "                     group[-1]: \"Domain\",\n",
    "                 },\n",
    "                title=\"Relative growth in the output of co-publications\", template='plotly')\n",
    "# fig.update_traces(hovertemplate='%{y:.2f}%')\n",
    "\n",
    "fig.update_layout(hovermode='x unified',yaxis_tickformat='.0f%',font_family=\"Montserrat\")\n",
    "fig.update_traces(hovertemplate='%{y:.0f}00%')\n",
    "fig.add_shape(\n",
    "        # Rectangle with reference to the plot\n",
    "            type=\"rect\",\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            x0=0,\n",
    "            y0=0,\n",
    "            x1=1.0,\n",
    "            y1=1.0,\n",
    "            line=dict(\n",
    "                color=\"black\",\n",
    "                 width=0.5,\n",
    "             )\n",
    "         )\n",
    "fig.update_yaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "fig.update_xaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "# fig['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
    "# fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
    "\n",
    "rel_output_by_domain = go.Figure(fig)\n",
    "\n",
    "\n",
    "fig = px.area(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+\"_cumsum\",x=group[0], color=group[-1],line_group=group[-1],\n",
    "              labels={\n",
    "                     record_col+\"_cumsum\": 'Cumulative number of co-publications',\n",
    "                     group[-1]: \"Domain\",\n",
    "                 },\n",
    "                title=\"Cumulative number of co-publications\", template='plotly')\n",
    "fig.update_traces(hovertemplate='%{y:d}')\n",
    "fig.update_layout(hovermode='x unified')\n",
    "fig.add_shape(\n",
    "        # Rectangle with reference to the plot\n",
    "            type=\"rect\",\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            x0=0,\n",
    "            y0=0,\n",
    "            x1=1.0,\n",
    "            y1=1.0,\n",
    "            line=dict(\n",
    "                color=\"black\",\n",
    "                 width=0.5,\n",
    "             )\n",
    "         )\n",
    "fig.update_yaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "fig.update_xaxes(\n",
    "    showgrid=True,\n",
    "    ticks=\"outside\")\n",
    "\n",
    "cumsum_by_domain = go.Figure(fig)\n",
    "# cumsum_by_domain.show(config= dict(displayModeBar = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "id": "3a07c24d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from plotly.subplots import make_subplots\n",
    "import plotly.graph_objects as go\n",
    "\n",
    "# dom_distr\n",
    "# year_output_by_domain\n",
    "# rel_output_by_domain\n",
    "# cumsum_by_domain\n",
    "\n",
    "figsuper = make_subplots(rows=2, cols=2, subplot_titles=[\"Distribution of domains\",\"Cumulative sum of co-publications\",\n",
    "                                                         \"Co-publications per year\",\"Relative growth of co-publications\"])\n",
    "\n",
    "\n",
    "for trace in list(dom_distr.select_traces()):\n",
    "    trace.showlegend=False\n",
    "    # trace.barmode\n",
    "    figsuper.add_trace(trace,\n",
    "        row=1, col=1\n",
    "    )\n",
    "\n",
    "for trace in list(cumsum_by_domain.select_traces()):\n",
    "    figsuper.add_trace(trace,\n",
    "        row=1, col=2\n",
    "    )\n",
    "\n",
    "for trace in list(year_output_by_domain.select_traces()):\n",
    "    trace.showlegend=False\n",
    "    figsuper.add_trace(trace,\n",
    "        row=2, col=1\n",
    "    )\n",
    "\n",
    "for trace in list(rel_output_by_domain.select_traces()):\n",
    "    trace.showlegend=False\n",
    "    figsuper.add_trace(trace,\n",
    "        row=2, col=2\n",
    "    )\n",
    "\n",
    "# figsuper.update_layout(hovermode='x unified')\n",
    "figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n",
    "figsuper.update_yaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_xaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
    "figsuper['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
    "# figsuper.layout.annotations[0].update(x=0.1)\n",
    "# figsuper.layout.annotations[2].update(x=0.105)\n",
    "# figsuper.layout.annotations[1].update(x=0.7)\n",
    "# figsuper.layout.annotations[3].update(x=0.7)\n",
    "\n",
    "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
    "figsuper.write_html(f\"plot_html/Domains_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "outputs": [],
   "source": [
    "# figsuper['layout']"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "329b6889",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "Publication Year            2011  2012  2013  2014  2015  2016  2017  2018   \nDomain_English                                                               \nApplied Sciences             490   593   738  1031  1201  1535  1920  2808  \\\nArts & Humanities              0     0     0     4     1     3     7     4   \nEconomic & Social Sciences    20    22    29    28    34    40    84   105   \nHealth Sciences              116   120   155   184   216   243   321   403   \nMultidisciplinary             15    21    43    52    57    64    75    76   \nNatural Sciences             181   223   298   318   380   437   568   753   \n\nPublication Year            2019  2020  2021  2022  \nDomain_English                                      \nApplied Sciences            3729  4446  5295  6199  \nArts & Humanities             11    11    16    13  \nEconomic & Social Sciences   160   211   252   375  \nHealth Sciences              611   755  1035  1182  \nMultidisciplinary             83    97   115   149  \nNatural Sciences             999  1232  1403  1665  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th>Publication Year</th>\n      <th>2011</th>\n      <th>2012</th>\n      <th>2013</th>\n      <th>2014</th>\n      <th>2015</th>\n      <th>2016</th>\n      <th>2017</th>\n      <th>2018</th>\n      <th>2019</th>\n      <th>2020</th>\n      <th>2021</th>\n      <th>2022</th>\n    </tr>\n    <tr>\n      <th>Domain_English</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Applied Sciences</th>\n      <td>490</td>\n      <td>593</td>\n      <td>738</td>\n      <td>1031</td>\n      <td>1201</td>\n      <td>1535</td>\n      <td>1920</td>\n      <td>2808</td>\n      <td>3729</td>\n      <td>4446</td>\n      <td>5295</td>\n      <td>6199</td>\n    </tr>\n    <tr>\n      <th>Arts &amp; Humanities</th>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>4</td>\n      <td>1</td>\n      <td>3</td>\n      <td>7</td>\n      <td>4</td>\n      <td>11</td>\n      <td>11</td>\n      <td>16</td>\n      <td>13</td>\n    </tr>\n    <tr>\n      <th>Economic &amp; Social Sciences</th>\n      <td>20</td>\n      <td>22</td>\n      <td>29</td>\n      <td>28</td>\n      <td>34</td>\n      <td>40</td>\n      <td>84</td>\n      <td>105</td>\n      <td>160</td>\n      <td>211</td>\n      <td>252</td>\n      <td>375</td>\n    </tr>\n    <tr>\n      <th>Health Sciences</th>\n      <td>116</td>\n      <td>120</td>\n      <td>155</td>\n      <td>184</td>\n      <td>216</td>\n      <td>243</td>\n      <td>321</td>\n      <td>403</td>\n      <td>611</td>\n      <td>755</td>\n      <td>1035</td>\n      <td>1182</td>\n    </tr>\n    <tr>\n      <th>Multidisciplinary</th>\n      <td>15</td>\n      <td>21</td>\n      <td>43</td>\n      <td>52</td>\n      <td>57</td>\n      <td>64</td>\n      <td>75</td>\n      <td>76</td>\n      <td>83</td>\n      <td>97</td>\n      <td>115</td>\n      <td>149</td>\n    </tr>\n    <tr>\n      <th>Natural Sciences</th>\n      <td>181</td>\n      <td>223</td>\n      <td>298</td>\n      <td>318</td>\n      <td>380</td>\n      <td>437</td>\n      <td>568</td>\n      <td>753</td>\n      <td>999</td>\n      <td>1232</td>\n      <td>1403</td>\n      <td>1665</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],\n",
    "\n",
    "                       columns=['Publication Year'], fill_value=0)\n",
    "pivot_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "100f3002",
   "metadata": {},
   "outputs": [],
   "source": [
    "# f, ax = plt.subplots(figsize=(9, 6))\n",
    "# g = sns.heatmap(pivot_data, annot=True, fmt=\"d\", linewidths=.5, ax=ax)\n",
    "# g.set(xlabel=\"\", ylabel=\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "a8d24046",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "Publication Year                 2011       2012       2013       2014   \nDomain_English                                                           \nApplied Sciences            59.610706  60.572012  58.432304  63.760049  \\\nArts & Humanities            0.000000   0.000000   0.000000   0.247372   \nEconomic & Social Sciences   2.433090   2.247191   2.296120   1.731602   \nHealth Sciences             14.111922  12.257406  12.272367  11.379097   \nMultidisciplinary            1.824818   2.145046   3.404592   3.215832   \nNatural Sciences            22.019465  22.778345  23.594616  19.666048   \n\nPublication Year                 2015       2016       2017       2018   \nDomain_English                                                           \nApplied Sciences            63.578613  66.106804  64.537815  67.678959  \\\nArts & Humanities            0.052938   0.129199   0.235294   0.096409   \nEconomic & Social Sciences   1.799894   1.722653   2.823529   2.530730   \nHealth Sciences             11.434621  10.465116  10.789916   9.713184   \nMultidisciplinary            3.017470   2.756245   2.521008   1.831767   \nNatural Sciences            20.116464  18.819983  19.092437  18.148952   \n\nPublication Year                 2019       2020       2021       2022  \nDomain_English                                                          \nApplied Sciences            66.672626  65.847156  65.241498  64.687467  \nArts & Humanities            0.196674   0.162915   0.197141   0.135657  \nEconomic & Social Sciences   2.860719   3.125000   3.104978   3.913180  \nHealth Sciences             10.924370  11.181872  12.752587  12.334342  \nMultidisciplinary            1.483998   1.436611   1.416954   1.554837  \nNatural Sciences            17.861613  18.246445  17.286841  17.374517  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th>Publication Year</th>\n      <th>2011</th>\n      <th>2012</th>\n      <th>2013</th>\n      <th>2014</th>\n      <th>2015</th>\n      <th>2016</th>\n      <th>2017</th>\n      <th>2018</th>\n      <th>2019</th>\n      <th>2020</th>\n      <th>2021</th>\n      <th>2022</th>\n    </tr>\n    <tr>\n      <th>Domain_English</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Applied Sciences</th>\n      <td>59.610706</td>\n      <td>60.572012</td>\n      <td>58.432304</td>\n      <td>63.760049</td>\n      <td>63.578613</td>\n      <td>66.106804</td>\n      <td>64.537815</td>\n      <td>67.678959</td>\n      <td>66.672626</td>\n      <td>65.847156</td>\n      <td>65.241498</td>\n      <td>64.687467</td>\n    </tr>\n    <tr>\n      <th>Arts &amp; Humanities</th>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.247372</td>\n      <td>0.052938</td>\n      <td>0.129199</td>\n      <td>0.235294</td>\n      <td>0.096409</td>\n      <td>0.196674</td>\n      <td>0.162915</td>\n      <td>0.197141</td>\n      <td>0.135657</td>\n    </tr>\n    <tr>\n      <th>Economic &amp; Social Sciences</th>\n      <td>2.433090</td>\n      <td>2.247191</td>\n      <td>2.296120</td>\n      <td>1.731602</td>\n      <td>1.799894</td>\n      <td>1.722653</td>\n      <td>2.823529</td>\n      <td>2.530730</td>\n      <td>2.860719</td>\n      <td>3.125000</td>\n      <td>3.104978</td>\n      <td>3.913180</td>\n    </tr>\n    <tr>\n      <th>Health Sciences</th>\n      <td>14.111922</td>\n      <td>12.257406</td>\n      <td>12.272367</td>\n      <td>11.379097</td>\n      <td>11.434621</td>\n      <td>10.465116</td>\n      <td>10.789916</td>\n      <td>9.713184</td>\n      <td>10.924370</td>\n      <td>11.181872</td>\n      <td>12.752587</td>\n      <td>12.334342</td>\n    </tr>\n    <tr>\n      <th>Multidisciplinary</th>\n      <td>1.824818</td>\n      <td>2.145046</td>\n      <td>3.404592</td>\n      <td>3.215832</td>\n      <td>3.017470</td>\n      <td>2.756245</td>\n      <td>2.521008</td>\n      <td>1.831767</td>\n      <td>1.483998</td>\n      <td>1.436611</td>\n      <td>1.416954</td>\n      <td>1.554837</td>\n    </tr>\n    <tr>\n      <th>Natural Sciences</th>\n      <td>22.019465</td>\n      <td>22.778345</td>\n      <td>23.594616</td>\n      <td>19.666048</td>\n      <td>20.116464</td>\n      <td>18.819983</td>\n      <td>19.092437</td>\n      <td>18.148952</td>\n      <td>17.861613</td>\n      <td>18.246445</td>\n      <td>17.286841</td>\n      <td>17.374517</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100\n",
    "percent_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "3bda79fb",
   "metadata": {},
   "outputs": [],
   "source": [
    " # f, ax = plt.subplots(figsize=(15, 6))\n",
    "# # g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)\n",
    "# # for t in ax.texts: t.set_text(t.get_text() + \" %\")\n",
    "# g.set(xlabel=\"\", ylabel=\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "01024cc0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# percent_pivot.T.plot(kind='bar',\n",
    "#                     stacked=True,\n",
    "#                     figsize=(10, 6))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "4caa215d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# percent_pivot.T.plot(kind='bar',\n",
    "#                         stacked=True,\n",
    "#                         figsize=(15, 8))\n",
    "#\n",
    "# plt.legend(loc=\"lower left\", ncol=2)\n",
    "# # plt.ylabel(\"Release Year\")\n",
    "# # plt.xlabel(\"Proportion\")\n",
    "#\n",
    "#\n",
    "# for n, x in enumerate([*pivot_data.T.index.values]):\n",
    "#     for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],\n",
    "#                                           pivot_data.T.loc[x],\n",
    "#                                           percent_pivot.T.loc[x].cumsum()):\n",
    "#\n",
    "#         plt.text(y=(y_loc - proportion) + (proportion / 2),\n",
    "#                  x=n - 0.11,\n",
    "#                  s=f'{count}',# ({np.round(proportion, 1)}%)',\n",
    "#                  color=\"black\",\n",
    "#                  fontsize=8,\n",
    "#                  fontweight=\"bold\")\n",
    "#\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dcae04bd",
   "metadata": {},
   "source": [
    "## Field"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "d3807072",
   "metadata": {},
   "outputs": [],
   "source": [
    "# group = ['Publication Year',\"Domain_English\",'Field_English']\n",
    "# # data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n",
    "#\n",
    "#\n",
    "# data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()\n",
    "#         .fillna(0)\n",
    "#         .stack()\n",
    "#         .reset_index()\n",
    "#         .rename(columns={0:record_col}))\n",
    "#\n",
    "# data = data.merge(wos[[\"Domain_English\",'Field_English']].drop_duplicates(),on=\"Field_English\")\n",
    "#\n",
    "# data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Field_English'),\n",
    "#                   on='Field_English', suffixes=[None,\"_relative_growth\"])\n",
    "# data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n",
    "#\n",
    "# data = data.sort_values(by =[\"Field_English\",\"Publication Year\"], ascending=[True,True])\n",
    "# data[record_col+\"_cumsum\"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "id": "756513b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# data_complete = pd.DataFrame()\n",
    "#\n",
    "# for cat in sorted(data[group[-2]].unique()):\n",
    "#     #data segment\n",
    "#     sub_data = data[data[group[-2]]==cat]\n",
    "#     sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n",
    "#                                  ,group[-1],fill_value=0)\n",
    "#     data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n",
    "\n",
    "\n",
    "    # seaborn version plot\n",
    "    # g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),\n",
    "    #                y=record_col,x=group[0], hue=group[-1], marker=\"o\")\n",
    "    # g.set(xticks=list(range(2012,2022+1,2)))\n",
    "    # g.legend(title=None)\n",
    "    # g.set_title(cat)\n",
    "    # g.yaxis.set_major_locator(MaxNLocator(integer=True))\n",
    "    # plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "id": "d09c080a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# data_complete = pd.DataFrame()\n",
    "#\n",
    "# # Creating subplot axes\n",
    "# fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))\n",
    "#\n",
    "# for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):\n",
    "#     #data segment\n",
    "#     sub_data = data[data[group[-2]]==cat]\n",
    "#     sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n",
    "#                                  ,group[-1],fill_value=0)\n",
    "#     data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n",
    "#     #plot\n",
    "#     g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),\n",
    "#                    y=record_col,x=group[0], hue=group[-1], marker=\"o\", ax=ax)\n",
    "#     g.set(xticks=list(range(2012,2022+1,2)))\n",
    "#     g.legend(title=None)\n",
    "#     g.set_title(cat)\n",
    "#     g.set_xlabel(None)\n",
    "#     g.set_ylabel(None)\n",
    "#     g.yaxis.set_major_locator(MaxNLocator(integer=True))\n",
    "# fig.suptitle(\"Number of co-publications in domains and respective fields\", y=0.92)\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09a6de71",
   "metadata": {},
   "source": [
    "## SubField"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "0397eb85",
   "metadata": {},
   "outputs": [],
   "source": [
    "group = ['Publication Year',\"Domain_English\",'Field_English',\"SubField_English\"]\n",
    "data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n",
    "# data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "846596cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "for cat in sorted(data[group[-2]].unique()):\n",
    "    sub_data = data[data[group[-2]]==cat]\n",
    "    sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n",
    "                                 ,group[-1],fill_value=0)\n",
    "    # g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],\n",
    "    #                hue=group[-1], marker=\"o\", errorbar=None)\n",
    "    # g.set(xticks=list(range(2012,2022+1,2)))\n",
    "    # g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))\n",
    "    # g.set_title(f'Number or co-publications in {cat}')\n",
    "    # g.set_ylabel(None)\n",
    "    # plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "27c90aaf",
   "metadata": {},
   "outputs": [],
   "source": [
    "from  matplotlib.ticker import FuncFormatter\n",
    "import math\n",
    "def orderOfMagnitude(number):\n",
    "    return math.floor(math.log(number, 10))\n",
    "\n",
    "def roundToNearest(number):\n",
    "    order = orderOfMagnitude(number)\n",
    "    # if order!=0:\n",
    "    #     order+=1\n",
    "    near = math.ceil(number/10**order)*10**order\n",
    "    return near"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "91d2cc8a",
   "metadata": {},
   "source": [
    "## Country contributions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "id": "b3adb06a",
   "metadata": {},
   "outputs": [],
   "source": [
    "wos_univ_locations = wos_univ.merge(wos_country_types, on=\"Country\")\n",
    "wos_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\"]].drop_duplicates()\n",
    "\n",
    "collab_desc = wos_collabs[wos_collabs[\"Country\"]!=\"China\"][\"Country\"].value_counts().reset_index()\n",
    "collab_desc[\"percent_of_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].nunique()#*100\n",
    "collab_desc[\"percent_contrib_in_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].size#*100\n",
    "collab_desc = collab_desc.merge(wos_country_types, on=\"Country\")\n",
    "# collab_desc\n",
    "\n",
    "c_dict = {\"count\":\"Number of co-publications\",\n",
    "          \"percent_of_copubs\":\"Percent of co-publications\",\n",
    "          \"percent_contrib_in_copubs\":\"Contribution to co-publications\"}\n",
    "\n",
    "color_discrete_map= {'China': '#EF553B',\n",
    "                    'EU': '#636EFA',\n",
    "                    'Non-EU associate': '#00CC96'}\n",
    "\n",
    "fig_dict = dict()\n",
    "# Creating subplot axes\n",
    "# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))\n",
    "# for c,ax in zip(c_dict.keys(),axes.flatten()):\n",
    "for c in c_dict.keys():\n",
    "    data = collab_desc[[\"Country\",c,\"Country_Type\"]]\n",
    "    # plt.figure(figsize=(9,12))\n",
    "    col_by=\"Country_Type\"\n",
    "    y_lab=\"Country\"\n",
    "    # g = sns.barplot(data, x=c, y=\"Country\", hue=\"Country_Type\", dodge=False)\n",
    "    fig = px.bar(data, x=c, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,\n",
    "                              labels=dict({\n",
    "                     record_col: 'Number of co-publications',\n",
    "                     \"Institution_harm\": \"Institution\",\n",
    "                                  \"Institution_harm_label\": \"Institution\",\n",
    "                                  \"Country_Type\":\"Country type\",\n",
    "                                  \"Eurovoc_Class\":\"Region\"\n",
    "                 },**c_dict),\n",
    "                title=c_dict[c], template='plotly')\n",
    "    fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",\n",
    "                      yaxis={'categoryorder':'total ascending'},\n",
    "                                         width=1000, height=1000,)\n",
    "    if \"percent\" in c:\n",
    "        fig.update_traces(hovertemplate='%{y}<br>%{x}')\n",
    "        fig.update_xaxes(tickformat=\".1%\")\n",
    "    else:\n",
    "        fig.update_traces(hovertemplate='%{y}<br>%{x:d}')\n",
    "    fig_dict[c] = go.Figure(fig)\n",
    "    # fig.show(config= dict(displayModeBar = False, responsive = True))\n",
    "    # g.set_xlim(0,roundToNearest(data[c].max()))\n",
    "    # g.set_ylabel(None)\n",
    "    # g.set_xlabel(c_dict.get(c))\n",
    "    # g.set_title(c_dict.get(c))\n",
    "    # g.legend(title=None, loc=\"right\")\n",
    "    # for i in g.containers:\n",
    "    #     g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')\n",
    "    # if 'percent' in c:\n",
    "    #     g.xaxis.set_major_locator(MaxNLocator(integer=True))\n",
    "    #     vals = g.get_xticks()\n",
    "    #     g.set_xticklabels([str(int(val))+'%' for val in vals])\n",
    "    # plt.show()\n",
    "figsuper = make_subplots(rows=1, cols=3, subplot_titles =list(c_dict.values()))\n",
    "for i,f in enumerate(fig_dict.keys()):\n",
    "    sfig = fig_dict[f]\n",
    "    for trace in list(sfig.select_traces()):\n",
    "        trace.showlegend=False\n",
    "        figsuper.add_trace(trace,\n",
    "            row=1, col=i+1)\n",
    "\n",
    "figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative',yaxis2={'categoryorder':'total ascending'},yaxis3={'categoryorder':'total ascending'})\n",
    "figsuper.update_yaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_xaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
    "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
    "figsuper.write_html(f\"plot_html/europe_contribution_bar.html\",config= dict(displayModeBar = False, responsive = True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "id": "140395ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "# wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n",
    "# wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n",
    "# EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)\n",
    "#\n",
    "# # Generate a mask for the upper triangle\n",
    "# mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n",
    "#\n",
    "# # Set up the matplotlib figure\n",
    "# f, ax = plt.subplots(figsize=(11, 9))\n",
    "#\n",
    "# # Draw the heatmap with the mask and correct aspect ratio\n",
    "# g = sns.heatmap(EU_co_occur, mask=mask,\n",
    "#             square=True, linewidths=.5)\n",
    "#\n",
    "# g.set_ylabel(None)\n",
    "# g.set_xlabel(None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "id": "c959287e",
   "metadata": {},
   "outputs": [],
   "source": [
    "wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n",
    "wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n",
    "EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
    "\n",
    "\n",
    "eu_list = wos_collabs_EU.groupby(['Country_x'])[record_col].count().sort_values(ascending=False).index\n",
    "# pre_fig = sns.clustermap(EU_co_occur)\n",
    "# re_index = [i.get_text() for i in pre_fig.ax_heatmap.yaxis.get_majorticklabels()]\n",
    "# re_column = [i.get_text() for i in pre_fig.ax_heatmap.xaxis.get_majorticklabels()]\n",
    "\n",
    "EU_co_occur = EU_co_occur.reindex(index = eu_list, columns=eu_list)\n",
    "\n",
    "# Generate a mask for the upper triangle\n",
    "mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n",
    "data = np.where(mask,None,EU_co_occur)\n",
    "\n",
    "fig = px.imshow(data,\n",
    "                labels=dict(x=\"Country\", y=\"Country\", color=\"Co-publication with China\"),\n",
    "                x=list(EU_co_occur.columns),\n",
    "                y=list(EU_co_occur.index), title=\"Intraeuropean patterns<br>Co-occurences of countries in chinese co-publications\"\n",
    "               )\n",
    "fig.update_layout(title_x=0.5,\n",
    "                   width=1000, height=1000,\n",
    "                   xaxis_showgrid=False,\n",
    "                   yaxis_showgrid=False,\n",
    "                   yaxis_autorange='reversed', template='plotly_white')\n",
    "# fig.update_traces(hovertemplate='<b>%{y}</b><br>%{x}<br>Co-publications: %{hovertext}')\n",
    "fig.update_xaxes(tickangle= -90)\n",
    "fig.update_yaxes(\n",
    "    ticks=\"outside\")\n",
    "fig.update_xaxes(\n",
    "    ticks=\"outside\")\n",
    "# fig.show(config= dict(displayModeBar = False,responsive=True))\n",
    "fig.write_html(f\"plot_html/intraeurope_collabs.html\",config= dict(displayModeBar = False, responsive = True))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "id": "df1f03ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "# collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n",
    "# collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n",
    "# data = collab_year.groupby([\"Publication Year\",'Country_Type'],as_index=False)[record_col].nunique()\n",
    "#\n",
    "#\n",
    "# g=sns.lineplot(data,y=record_col,x=\"Publication Year\", hue=\"Country_Type\", marker=\"o\")\n",
    "# g.set(xticks=list(range(2012,2022+1,2)))\n",
    "# g.legend(title=None)\n",
    "# g.set_xlabel(None)\n",
    "# g.set_ylabel(None)\n",
    "# g.set_title(\"Yearly output of co-publications with China\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "122d0260",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "id": "f19501a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n",
    "collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n",
    "\n",
    "data = (collab_year.groupby(['Publication Year',\"Country\"])[record_col]\n",
    "        .nunique(dropna=False).unstack()\n",
    "        .fillna(0)\n",
    "        .stack()\n",
    "        .reset_index()\n",
    "        .rename(columns={0:record_col}))\n",
    "data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset=\"Country\"),\n",
    "                  on=[\"Country\"], suffixes=[None,\"_relative_growth\"])\n",
    "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n",
    "data = data.sort_values(by =[\"Country\",\"Publication Year\"], ascending=[True,True])\n",
    "data[record_col+\"_cumsum\"] = (data.groupby('Country',as_index=False)[record_col].cumsum())\n",
    "data = data.merge(wos_country_types, on='Country')\n",
    "# data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "id": "b9585045",
   "metadata": {},
   "outputs": [],
   "source": [
    "# data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n",
    "# fig = px.choropleth(data, locations=\"ISO3\", color=record_col, hover_name=\"Country\",\n",
    "#                     animation_frame='Publication Year', scope=\"europe\", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])\n",
    "# fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "id": "952bdbfe",
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n",
    "fig = px.choropleth(data[data[\"Publication Year\"] == 2022], locations=\"ISO3\", color=record_col+\"_cumsum\", hover_name=\"Country\",\n",
    "                    scope=\"europe\", template='plotly',\n",
    "                    range_color=[data[record_col+\"_cumsum\"].min(),data[record_col+\"_cumsum\"].max()],hover_data=[\"Eurovoc_Class\"])\n",
    "# original: '<b>%{hovertext}</b><br><br>ISO3=%{location}<br>Eurovoc_Class=%{customdata[0]}<br>UT (Unique WOS ID)_cumsum=%{z}<extra></extra>'\n",
    "\n",
    "fig.update_traces(hovertemplate='<b>%{hovertext}</b>'\n",
    "                                '<br>Region: %{customdata[0]}<br>'\n",
    "                                'Co-pubications: %{z:d}<extra></extra>')\n",
    "\n",
    "cumsum_country = go.Figure(fig)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "id": "ae3cb8e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color=\"Eurovoc_Class\",line_group=\"Country\", markers=True,\n",
    "#               labels={\n",
    "#                      record_col: 'Number of co-publications',\n",
    "#                   \"Eurovoc_Class\": \"Region\"\n",
    "#                  },\n",
    "#                 title=\"Yearly output of co-publications\", template='plotly',hover_name= \"Country\")\n",
    "# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')\n",
    "# # fig.update_layout(hovermode='x unified')\n",
    "# fig.add_shape(\n",
    "#         # Rectangle with reference to the plot\n",
    "#             type=\"rect\",\n",
    "#             xref=\"paper\",\n",
    "#             yref=\"paper\",\n",
    "#             x0=0,\n",
    "#             y0=0,\n",
    "#             x1=1.0,\n",
    "#             y1=1.0,\n",
    "#             line=dict(\n",
    "#                 color=\"black\",\n",
    "#                  width=0.5,\n",
    "#              )\n",
    "#          )\n",
    "# fig.update_yaxes(\n",
    "#     showgrid=True,\n",
    "#     ticks=\"outside\")\n",
    "# fig.update_xaxes(\n",
    "#     showgrid=True,\n",
    "#     ticks=\"outside\")\n",
    "# fig.show(config= dict(displayModeBar = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "id": "dd72ad3f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# fig.data[0].hovertemplate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "600d7459",
   "metadata": {},
   "outputs": [],
   "source": [
    "# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
    "#               y=record_col+\"_relative_growth\",\n",
    "#               x='Publication Year',\n",
    "#               color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n",
    "#               labels={\n",
    "#                      record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n",
    "#                  },\n",
    "#                 title=\"Relative growth of co-publications<br>(baseline: 2011)\", template='plotly',hover_name= \"Country\")\n",
    "# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')\n",
    "# fig.add_shape(\n",
    "#         # Rectangle with reference to the plot\n",
    "#             type=\"rect\",\n",
    "#             xref=\"paper\",\n",
    "#             yref=\"paper\",\n",
    "#             x0=0,\n",
    "#             y0=0,\n",
    "#             x1=1.0,\n",
    "#             y1=1.0,\n",
    "#             line=dict(\n",
    "#                 color=\"black\",\n",
    "#                  width=0.5,\n",
    "#              )\n",
    "#          )\n",
    "# fig.update_yaxes(\n",
    "#     showgrid=True,\n",
    "#     ticks=\"outside\")\n",
    "# fig.update_xaxes(\n",
    "#     showgrid=True,\n",
    "#     ticks=\"outside\")\n",
    "# fig.show(config= dict(displayModeBar = False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "id": "0ee76d32",
   "metadata": {},
   "outputs": [],
   "source": [
    "from plotly.subplots import make_subplots\n",
    "import plotly.graph_objects as go\n",
    "\n",
    "figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Number of publications (2022)\",\"Cumulative number of co-publications\",\n",
    "                                                         \"Yearly output of co-publications\",\"Relative growth of co-publications\"],\n",
    "                         specs=[\n",
    "                            [{\"type\": \"geo\", \"rowspan\":3}, {\"type\": \"xy\"}],\n",
    "                            [None,{\"type\": \"xy\"}],\n",
    "                            [None, {\"type\": \"xy\"}]\n",
    "                         ])\n",
    "\n",
    "for trace in list(cumsum_country.select_traces()):\n",
    "    figsuper.add_trace(trace,\n",
    "        row=1, col=1\n",
    "    )\n",
    "\n",
    "fig = px.area(data.sort_values(ascending=True, by='Publication Year'),  y=record_col+\"_cumsum\",\n",
    "              x='Publication Year',\n",
    "              color=\"Eurovoc_Class\",\n",
    "              line_group=\"Country\",\n",
    "              labels={\n",
    "                     record_col: 'Number of co-publications',\n",
    "                  \"Eurovoc_Class\": \"Region\"\n",
    "                 },\n",
    "                title=\"Cumulative number of co-publications\",\n",
    "              hover_name= \"Country\")\n",
    "fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')\n",
    "\n",
    "for trace in list(fig.select_traces()):\n",
    "    figsuper.add_trace(trace,\n",
    "        row=1, col=2\n",
    "    )\n",
    "\n",
    "\n",
    "fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
    "              y=record_col,\n",
    "              x='Publication Year',\n",
    "              color=\"Eurovoc_Class\",\n",
    "              line_group=\"Country\",\n",
    "              markers=True,\n",
    "              labels={\n",
    "                     record_col: 'Number of co-publications',\n",
    "                  \"Eurovoc_Class\": \"Region\"\n",
    "                 },\n",
    "                title=\"Yearly output of co-publications\",hover_name= \"Country\")\n",
    "fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')\n",
    "\n",
    "for trace in list(fig.select_traces()):\n",
    "    trace.showlegend=False\n",
    "    figsuper.add_trace(trace,\n",
    "        row=2, col=2\n",
    "    )\n",
    "\n",
    "fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
    "              y=record_col+\"_relative_growth\",\n",
    "              x='Publication Year',\n",
    "              color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n",
    "              labels={\n",
    "                     record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n",
    "                 },\n",
    "                title=\"Relative growth of co-publications\", template='plotly',hover_name= \"Country\")\n",
    "fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')\n",
    "fig.add_shape(\n",
    "        # Rectangle with reference to the plot\n",
    "            type=\"rect\",\n",
    "            xref=\"paper\",\n",
    "            yref=\"paper\",\n",
    "            x0=0,\n",
    "            y0=0,\n",
    "            x1=1.0,\n",
    "            y1=1.0,\n",
    "            line=dict(\n",
    "                color=\"black\",\n",
    "                 width=0.5,\n",
    "             )\n",
    "         )\n",
    "\n",
    "for trace in list(fig.select_traces()):\n",
    "    trace.showlegend=False\n",
    "    figsuper.add_trace(trace,\n",
    "        row=3, col=2\n",
    "    )\n",
    "\n",
    "figsuper.update_yaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_xaxes(\n",
    "    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "    ticks=\"outside\")\n",
    "figsuper.update_layout({'template':\"plotly\"})\n",
    "figsuper.layout[\"geo\"][\"scope\"] = 'europe'\n",
    "figsuper.update_coloraxes(colorbar=dict(lenmode='fraction',len=0.55, orientation=\"v\",yanchor='top', title=\"Co-publications\",\n",
    "                                        ticks=\"outside\", ticksuffix=\" \",outlinewidth=0.5))\n",
    "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
    "figsuper.write_html(f\"plot_html/country_trends_overall.html\",config= dict(displayModeBar = False, responsive = True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "e4c50e14",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "Publication Year  2011  2012  2013  2014  2015  2016  2017  2018  2019  2020   \nCountry                                                                        \nAustria             22    24    26    39    50    57    72    89   138   137  \\\nBelgium             34    38    40    65    71    81    90   133   179   213   \nBulgaria             4     5     8     9     7    19    21    18    10    25   \nCroatia              1     2     6     8    10     7    10    19    27    29   \nCyprus               2     1     5     5     5     5     8     7    15    28   \nCzech Republic      13    15    16    21    20    36    37    56    64    81   \nDenmark             35    33    40    59    68    74   101   195   234   245   \nEstonia              3     3     7    10    12    10    15    15    16    38   \nFinland             31    35    44    82   100   125   126   198   241   256   \nFrance             117   130   174   231   269   325   348   491   648   691   \nGermany            123   172   192   273   310   365   456   604   801   907   \nGreece              15    18    19    32    35    50    47    81   114   122   \nHungary             11    11    21    16    20    38    34    47    61    61   \nIreland             13    16    22    31    27    45    66    72    84   116   \nItaly               51    70    84   116   178   187   247   325   441   571   \nLatvia               0     0     1     0     1     8    10    15    10     9   \nLithuania            1     2    10     4     4    13    12    23    38    36   \nLuxembourg           2     3     3     1     8     9    13    15    18    22   \nMalta                1     0     0     0     1     1     0     0     6     2   \nNetherlands         72    64    77   103   139   166   220   297   408   470   \nNorway              30    42    60    76    67    88   104   134   222   253   \nPoland              17    31    37    57    73    82    98   110   138   181   \nPortugal            16    23    35    41    45    58    79   119   136   147   \nRomania              7    15    13    16    25    26    37    57    64    55   \nSlovakia             9     6     6    10    12    22    18    27    27    34   \nSlovenia             7     7    10    12    17    27    22    47    54    31   \nSpain               50    49    69   112   138   185   232   273   356   386   \nSweden              34    50    59    83   113   170   233   232   385   359   \nSwitzerland         37    50    54    74    74    95   155   195   233   263   \nUnited Kingdom     363   417   531   660   781   979  1350  1837  2430  3108   \n\nPublication Year  2021  2022  \nCountry                       \nAustria            185   205  \nBelgium            242   292  \nBulgaria            32    19  \nCroatia             33    35  \nCyprus              36    43  \nCzech Republic      93   123  \nDenmark            293   343  \nEstonia             45    39  \nFinland            289   380  \nFrance             807   858  \nGermany           1210  1386  \nGreece             139   181  \nHungary             83    90  \nIreland            167   187  \nItaly              641   811  \nLatvia              13    18  \nLithuania           38    38  \nLuxembourg          35    51  \nMalta                7    10  \nNetherlands        529   655  \nNorway             304   311  \nPoland             276   353  \nPortugal           204   212  \nRomania             48    62  \nSlovakia            36    45  \nSlovenia            48    40  \nSpain              473   640  \nSweden             428   510  \nSwitzerland        349   447  \nUnited Kingdom    3718  4245  ",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th>Publication Year</th>\n      <th>2011</th>\n      <th>2012</th>\n      <th>2013</th>\n      <th>2014</th>\n      <th>2015</th>\n      <th>2016</th>\n      <th>2017</th>\n      <th>2018</th>\n      <th>2019</th>\n      <th>2020</th>\n      <th>2021</th>\n      <th>2022</th>\n    </tr>\n    <tr>\n      <th>Country</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Austria</th>\n      <td>22</td>\n      <td>24</td>\n      <td>26</td>\n      <td>39</td>\n      <td>50</td>\n      <td>57</td>\n      <td>72</td>\n      <td>89</td>\n      <td>138</td>\n      <td>137</td>\n      <td>185</td>\n      <td>205</td>\n    </tr>\n    <tr>\n      <th>Belgium</th>\n      <td>34</td>\n      <td>38</td>\n      <td>40</td>\n      <td>65</td>\n      <td>71</td>\n      <td>81</td>\n      <td>90</td>\n      <td>133</td>\n      <td>179</td>\n      <td>213</td>\n      <td>242</td>\n      <td>292</td>\n    </tr>\n    <tr>\n      <th>Bulgaria</th>\n      <td>4</td>\n      <td>5</td>\n      <td>8</td>\n      <td>9</td>\n      <td>7</td>\n      <td>19</td>\n      <td>21</td>\n      <td>18</td>\n      <td>10</td>\n      <td>25</td>\n      <td>32</td>\n      <td>19</td>\n    </tr>\n    <tr>\n      <th>Croatia</th>\n      <td>1</td>\n      <td>2</td>\n      <td>6</td>\n      <td>8</td>\n      <td>10</td>\n      <td>7</td>\n      <td>10</td>\n      <td>19</td>\n      <td>27</td>\n      <td>29</td>\n      <td>33</td>\n      <td>35</td>\n    </tr>\n    <tr>\n      <th>Cyprus</th>\n      <td>2</td>\n      <td>1</td>\n      <td>5</td>\n      <td>5</td>\n      <td>5</td>\n      <td>5</td>\n      <td>8</td>\n      <td>7</td>\n      <td>15</td>\n      <td>28</td>\n      <td>36</td>\n      <td>43</td>\n    </tr>\n    <tr>\n      <th>Czech Republic</th>\n      <td>13</td>\n      <td>15</td>\n      <td>16</td>\n      <td>21</td>\n      <td>20</td>\n      <td>36</td>\n      <td>37</td>\n      <td>56</td>\n      <td>64</td>\n      <td>81</td>\n      <td>93</td>\n      <td>123</td>\n    </tr>\n    <tr>\n      <th>Denmark</th>\n      <td>35</td>\n      <td>33</td>\n      <td>40</td>\n      <td>59</td>\n      <td>68</td>\n      <td>74</td>\n      <td>101</td>\n      <td>195</td>\n      <td>234</td>\n      <td>245</td>\n      <td>293</td>\n      <td>343</td>\n    </tr>\n    <tr>\n      <th>Estonia</th>\n      <td>3</td>\n      <td>3</td>\n      <td>7</td>\n      <td>10</td>\n      <td>12</td>\n      <td>10</td>\n      <td>15</td>\n      <td>15</td>\n      <td>16</td>\n      <td>38</td>\n      <td>45</td>\n      <td>39</td>\n    </tr>\n    <tr>\n      <th>Finland</th>\n      <td>31</td>\n      <td>35</td>\n      <td>44</td>\n      <td>82</td>\n      <td>100</td>\n      <td>125</td>\n      <td>126</td>\n      <td>198</td>\n      <td>241</td>\n      <td>256</td>\n      <td>289</td>\n      <td>380</td>\n    </tr>\n    <tr>\n      <th>France</th>\n      <td>117</td>\n      <td>130</td>\n      <td>174</td>\n      <td>231</td>\n      <td>269</td>\n      <td>325</td>\n      <td>348</td>\n      <td>491</td>\n      <td>648</td>\n      <td>691</td>\n      <td>807</td>\n      <td>858</td>\n    </tr>\n    <tr>\n      <th>Germany</th>\n      <td>123</td>\n      <td>172</td>\n      <td>192</td>\n      <td>273</td>\n      <td>310</td>\n      <td>365</td>\n      <td>456</td>\n      <td>604</td>\n      <td>801</td>\n      <td>907</td>\n      <td>1210</td>\n      <td>1386</td>\n    </tr>\n    <tr>\n      <th>Greece</th>\n      <td>15</td>\n      <td>18</td>\n      <td>19</td>\n      <td>32</td>\n      <td>35</td>\n
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
    "year_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "e4e82db7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 1500x1500 with 2 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABL8AAASuCAYAAAAj9oupAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddVhU6fvH8TeKhIEYiCIGoYCNCqJigN3d4tqJrt26dscqJnbn2pi7aweKgYUiihISotgCKvz+QGcdMb7DTz0D3K/rOtflnBg+cztx5pnneY5OQkJCAkIIIYQQQgghhBBCpELplA4ghBBCCCGEEEIIIcTPIo1fQgghhBBCCCGEECLVksYvIYQQQgghhBBCCJFqSeOXEEIIIYQQQgghhEi1pPFLCCGEEEIIIYQQQqRa0vglhBBCCCGEEEIIIVItafwSQgghhBBCCCGEEKmWNH4JIYQQQgghhBBCiFRLV+kAQgghhBBCCCGEECnJ26h7Skf46TLktFQ6wg8jPb+EEEIIIYQQQgghRKoljV9CCCGEEEIIIYQQItWSxi8hhBBCCCGEEEIIkWpJ45cQQgghhBBCCCGESLVkwnshhBBCCCGEEEIITcS/VzqB0IA0fqVyz5+/UDpCimJklEVqlgxSN81JzZJH6qY5qVnySN00JzVLHqmb5qRmyWNklIUXz18qHSNFyWKUWWqWDFmMMisdQYgkdBISEhKUDiF+ngx6eZWOkKK8jQuVmiWD1E1zb+NC0dM3VzpGihMXG4K+QT6lY6QosTHBUrNkiI0JxsAgv9IxUpSYmCAMDQsoHSPFefPmAUaZUs+l5H+F56/uSc2S4fmre2TPUkjpGCnKkxd3yGlUWOkYKU7Uc3+lI/wSbyPvKB3hp8uQK/W8Z8icX0IIIYQQQgghhBAi1ZJhj0IIIYQQQgghhBCaSIhXOoHQgPT8EkIIIYQQQgghhBCpljR+CSGEEEIIIYQQQohUSxq/hBBCCCGEEEIIIUSqJY1fX7Fjxw5sbGzYtm3bD7vPs2fPcvfu3a9ud3Nzw8PD44f9PaWZmeVm82ZPIsKvcz/Qh5kz/kBfXx+Aco6lOXF8N9FP/Ll+/QSdO7VROK32+FbdPjIyysL9QB86uLVUKKV2+VbN8uUzY8/utTx7GoDfzVM0b95A4bTaw8qqIPv2refJ49sE3PFm4MCeqm329sU5cXw3Tx7f5uSJPTg6llYwqfZo2LA2sTHBasumjUsAKFmyKCdP7CH6iT+nT+3D3r64wmm1x9fqdvjw1iTrY2OCWbp0ltKRFeXm1pyYmKAky+vX9wGoXdsVb+8DREX5ceHCIerVq6FsYC1ibp6Hv/5aSUTEdW7dOoW7e2fVtmrVKuHtfYBHj27i5bWBQoXS9tUC9fT0OHfhAM6VyqnWVateidPnvIiIusnpc17UqFlF7ZjOXdrie/0YIWG+7Ni1ioIF096VZDWt27WbJ3j+6l6SZdjwvkrEV4Senh6nvb2o6OyoWudUoSz/nthJcLgvx0/voUrVCmrHBAZf5MmLO2pLpkwZf3V0ReTOY8rKtfO58+A8126dZOKUEejr6wGQv4A5f+1ezYOwK5w+v5+qrhXVjj12eg9Rz/3VFlu71HO1PiH+P2TC+6/w8vIif/787N69mxYtWvyQ++zYsSNr167Fysrqi9s9PDzIkCHDD/lb2mDLZk+io5/i4tqUbNmMWeY5h/fv3zP3z6Xs3buOpZ7r6NylP6VLF2f5sjmEhUdy4MA/SsdW3NfqNnzEJNU+U6eMIm/ePAqm1C5fq9mo0VPZs3st9wKDcHCsRZXK5Vmzej5+fv7cuHFb6diK0tHRYfeuNfj4+OJYrjbW1hasW7uAh6Hh/PPvSQ4d3Mz2v/bRrftAatVy4cD+jZSydyU4+KHS0RVlZ1eIffuO0LvPMNW6mJhYMmY0ZPeuNWzevItu3QbSrVt7du1cjV0RZ16/fqNgYu3wtbqlS5cOPb3/PvccHezZsGERS5euVSKm1ti2bS+HDx9X3c6QQZeDBzezf/8/FCtmy5YtSxkxYgoHD/5LjRpV2LRpMRUrNuDaNT8FU2uH9esXERQUQoUK9bGzK8Tq1fMJCgrlzp177Ny5ipkzF7F58y46dmzFwYObKFHChVevXisd+5fT19djxap5FClio1pnaVmADZuWMHH8bLz2HaFeg5ps3LyEMqWqExQUSrXqlZgwaRhdOw0gICCQP8YPYcPmJVR0qqfgI/m1klO3qpUbkz79f/0NGjeuw+ixg9i44S8lHsIvp6+vh+fKudgVKaxalzNndjZtWcqcWYvZs/sQTZvXY/3mxZQrXYuHD8PJk8eUrMZG2Bd35c0nn6Fp5bW6at18nj59Tv1abcmWzZj5i6bw/v17xo2ZwdqNi/C7eZvqVZpRt3511mxYSAWHOoSGhJEuXTqsrAvSoHZb7gbcV93f48fRyj2Y1C5eJrxPSaTx6wseP37M2bNnmTJlCsOHDyc4OJh8+X7+L1vGxsY//W/8KjY2Vjg5lSGveUkiI6MAGD9hJtOnjeHevQeERzxizJhpAAQEBFK1SkXatG6c5hu/vlW3j41fFSs44OLiTFhYhJJRtca3anbqtDfm5mZUrtKYFy9e4u9/l1q1XSjvVDbNN36Zmprg63sD974jePnyFQEBgRw9epoKFR3IY2bK4yfRuLuPID4+ntu371K9ehV6dO/A6A+v27TK1taaGzdvExHxSG39b7+1IiYmRvU6HTR4HLVru9KsWX3WrftxPYhTqq/V7VPp0qVjwoShzJ6zhEuXrv7CdNonJiaWmJj/ajVkSB90dHQYPXoaY8YM5NixMyxatAqApUvXUr9+DZo3r5/mG7+MjY0oV640vXsP4+7d+9y9e58jR47j4lIRF5eKnDt3kYkT5wAwatRU6tSpRuvWjVmxYqPCyX8tG1trVqz6Ex0dHbX1Znlzs3rVZhYuWAnAQo8VDB3ahzJlSxIUFErNWlX5959THDz4LwBTp8zj3PkDZM+RjSdp4Mt1cuv2OOqJal8joywMG9GXUSOmpIkfk2xsrPFcOYfPSka58mV49/4dHvOWAzB31hL69O1MWYdS7Nl9kMI2VoSFRfDgfrACqZVlXcgSB0d77KzK8+jRYwCmTZ7H+EnD+efICQpa5KNujVa8fv2GeXPuUrlKedq5NWfGVA8KFDRHTy8Dly5eJTY2TuFHIoT2kWGPX3Dw4EGyZMlCw4YNyZUrF7t371Ztc3V1ZceOHarb3t7e2Nj89+vP2rVrcXFxoXjx4jRt2hQfHx/VcQAdOnTAw8ODHTt20Lp1a/r06UOZMmXYs2eP2rDHuLg4pk6dSqVKlShatCiurq5s2bLlVzz8HyI8/BF167VVNUZ8lDWrEYcOH6Vb14FJjjEyMvpV8bTWt+oGid3GFy+ZSb/fR8qH2gffqlmVyhX49+gpXrx4qVrfvHkXlq/Y8Ktjap3w8Ejate/Ny5evAChfvizOzuU4cfwsFhb5uXzpGvGf/Jp1/Zof5cqVUSqu1rCzLcSdO/eSrC/naM/pMxfU1p0564NTORkuCl+v26c6dGhBtmzGzJq16BelShmyZcvKoEE9GT16GnFxcaxfv53Ro5M2QhsZZVEgnXZ58yaWV69e06FDS3R1dSlUyBInpzJcuXKDggXzc+HCFbX9b9y4Rbk0+Bp1di7HyRPnqO7STG39qZPeDB86EQBdXV3cOrRET1+Piz6+ADx5/JSKzg4UKmxJ+vTpadO2CffvB/M0+tkvfwxKSG7dPtXv966Ehz9ifRr5UaSCsyOnTpyjVjX1aTqePHlKjhzZqd+wJgB161cnc+ZM3LyZ+MOkja21Ws+ltCQy8hEtmnRWNXx9lMUoM2UcSnHN96Zaj3Lvcxcp61gKSGxsDA0Jk+8IQnyFNH59gZeXF1WrViVdunS4urqya9cuEhISvnvczZs3mTFjBn/88QcHDhygbNmy9O/fn/j4eLZv3w4kDm3s3Dlx/onLly9jbW3N1q1bcXZ2VrsvT09Pjh07hoeHBwcPHqRx48ZMnDiRqKioJH9XGz179pwjR/4bsqGjo0PvXp349+gpHjwIwfv
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "f, ax = plt.subplots(figsize=(15, 15))\n",
    "g = sns.heatmap(year_pivot, annot=True, fmt=\"d\", linewidths=.5, ax=ax)\n",
    "g.set(xlabel=\"\", ylabel=\"\")\n",
    "for i in range(year_pivot.shape[0]+1):\n",
    "    ax.axhline(i, color='white', lw=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "78bb0b4e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "Publication Year       2011       2012       2013       2014       2015   \nCountry                                                                   \nAustria            1.962533   1.801802   1.557819   1.736420   1.865672  \\\nBelgium            3.033006   2.852853   2.396645   2.894034   2.649254   \nBulgaria           0.356824   0.375375   0.479329   0.400712   0.261194   \nCroatia            0.089206   0.150150   0.359497   0.356189   0.373134   \nCyprus             0.178412   0.075075   0.299581   0.222618   0.186567   \nCzech Republic     1.159679   1.126126   0.958658   0.934996   0.746269   \nDenmark            3.122212   2.477477   2.396645   2.626892   2.537313   \nEstonia            0.267618   0.225225   0.419413   0.445236   0.447761   \nFinland            2.765388   2.627628   2.636309   3.650935   3.731343   \nFrance            10.437110   9.759760  10.425404  10.284951  10.037313   \nGermany           10.972346  12.912913  11.503895  12.154942  11.567164   \nGreece             1.338091   1.351351   1.138406   1.424755   1.305970   \nHungary            0.981267   0.825826   1.258238   0.712378   0.746269   \nIreland            1.159679   1.201201   1.318155   1.380232   1.007463   \nItaly              4.549509   5.255255   5.032954   5.164737   6.641791   \nLatvia             0.000000   0.000000   0.059916   0.000000   0.037313   \nLithuania          0.089206   0.150150   0.599161   0.178094   0.149254   \nLuxembourg         0.178412   0.225225   0.179748   0.044524   0.298507   \nMalta              0.089206   0.000000   0.000000   0.000000   0.037313   \nNetherlands        6.422837   4.804805   4.613541   4.585931   5.186567   \nNorway             2.676182   3.153153   3.594967   3.383793   2.500000   \nPoland             1.516503   2.327327   2.216896   2.537845   2.723881   \nPortugal           1.427297   1.726727   2.097064   1.825467   1.679104   \nRomania            0.624442   1.126126   0.778910   0.712378   0.932836   \nSlovakia           0.802855   0.450450   0.359497   0.445236   0.447761   \nSlovenia           0.624442   0.525526   0.599161   0.534283   0.634328   \nSpain              4.460303   3.678679   4.134212   4.986643   5.149254   \nSweden             3.033006   3.753754   3.535051   3.695459   4.216418   \nSwitzerland        3.300624   3.753754   3.235470   3.294746   2.761194   \nUnited Kingdom    32.381802  31.306306  31.815458  29.385574  29.141791   \n\nPublication Year       2016       2017       2018       2019       2020   \nCountry                                                                   \nAustria            1.699970   1.689744   1.552958   1.816267   1.543488  \\\nBelgium            2.415747   2.112180   2.320712   2.355883   2.399730   \nBulgaria           0.566657   0.492842   0.314081   0.131614   0.281658   \nCroatia            0.208768   0.234687   0.331530   0.355357   0.326724   \nCyprus             0.149120   0.187749   0.122143   0.197420   0.315457   \nCzech Republic     1.073665   0.868341   0.977142   0.842327   0.912573   \nDenmark            2.206979   2.370336   3.402548   3.079758   2.760252   \nEstonia            0.298240   0.352030   0.261734   0.210582   0.428121   \nFinland            3.728005   2.957052   3.454894   3.171887   2.884182   \nFrance             9.692812   8.167097   8.567440   8.528560   7.785038   \nGermany           10.885774  10.701713  10.539173  10.542248  10.218567   \nGreece             1.491202   1.103027   1.413366   1.500395   1.374493   \nHungary            1.133313   0.797935   0.820101   0.802843   0.687247   \nIreland            1.342082   1.548932   1.256325   1.105554   1.306895   \nItaly              5.577095   5.796761   5.670913   5.804159   6.433078   \nLatvia             0.238592   0.234687   0.261734   0.131614   0.101397   \nLithuania          0.387712   0.281624   0.401326   0.500132   0.405588   \nLuxembourg         0.268416   0.305093   0.261734   0.236904   0.247859   \nMalta              0.029824   0.000000   0.000000   0.078968   0.022533   \nNetherlands        4.950790   5.163107   5
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th>Publication Year</th>\n      <th>2011</th>\n      <th>2012</th>\n      <th>2013</th>\n      <th>2014</th>\n      <th>2015</th>\n      <th>2016</th>\n      <th>2017</th>\n      <th>2018</th>\n      <th>2019</th>\n      <th>2020</th>\n      <th>2021</th>\n      <th>2022</th>\n    </tr>\n    <tr>\n      <th>Country</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Austria</th>\n      <td>1.962533</td>\n      <td>1.801802</td>\n      <td>1.557819</td>\n      <td>1.736420</td>\n      <td>1.865672</td>\n      <td>1.699970</td>\n      <td>1.689744</td>\n      <td>1.552958</td>\n      <td>1.816267</td>\n      <td>1.543488</td>\n      <td>1.712804</td>\n      <td>1.623248</td>\n    </tr>\n    <tr>\n      <th>Belgium</th>\n      <td>3.033006</td>\n      <td>2.852853</td>\n      <td>2.396645</td>\n      <td>2.894034</td>\n      <td>2.649254</td>\n      <td>2.415747</td>\n      <td>2.112180</td>\n      <td>2.320712</td>\n      <td>2.355883</td>\n      <td>2.399730</td>\n      <td>2.240533</td>\n      <td>2.312139</td>\n    </tr>\n    <tr>\n      <th>Bulgaria</th>\n      <td>0.356824</td>\n      <td>0.375375</td>\n      <td>0.479329</td>\n      <td>0.400712</td>\n      <td>0.261194</td>\n      <td>0.566657</td>\n      <td>0.492842</td>\n      <td>0.314081</td>\n      <td>0.131614</td>\n      <td>0.281658</td>\n      <td>0.296269</td>\n      <td>0.150447</td>\n    </tr>\n    <tr>\n      <th>Croatia</th>\n      <td>0.089206</td>\n      <td>0.150150</td>\n      <td>0.359497</td>\n      <td>0.356189</td>\n      <td>0.373134</td>\n      <td>0.208768</td>\n      <td>0.234687</td>\n      <td>0.331530</td>\n      <td>0.355357</td>\n      <td>0.326724</td>\n      <td>0.305527</td>\n      <td>0.277140</td>\n    </tr>\n    <tr>\n      <th>Cyprus</th>\n      <td>0.178412</td>\n      <td>0.075075</td>\n      <td>0.299581</td>\n      <td>0.222618</td>\n      <td>0.186567</td>\n      <td>0.149120</td>\n      <td>0.187749</td>\n      <td>0.122143</td>\n      <td>0.197420</td>\n      <td>0.315457</td>\n      <td>0.333302</td>\n      <td>0.340486</td>\n    </tr>\n    <tr>\n      <th>Czech Republic</th>\n      <td>1.159679</td>\n      <td>1.126126</td>\n      <td>0.958658</td>\n      <td>0.934996</td>\n      <td>0.746269</td>\n      <td>1.073665</td>\n      <td>0.868341</td>\n      <td>0.977142</td>\n      <td>0.842327</td>\n      <td>0.912573</td>\n      <td>0.861031</td>\n      <td>0.973949</td>\n    </tr>\n    <tr>\n      <th>Denmark</th>\n      <td>3.122212</td>\n      <td>2.477477</td>\n      <td>2.396645</td>\n      <td>2.626892</td>\n      <td>2.537313</td>\n      <td>2.206979</td>\n      <td>2.370336</td>\n      <td>3.402548</td>\n      <td>3.079758</td>\n      <td>2.760252</td>\n      <td>2.712712</td>\n      <td>2.715971</td>\n    </tr>\n    <tr>\n      <th>Estonia</th>\n      <td>0.267618</td>\n      <td>0.225225</td>\n      <td>0.419413</td>\n      <td>0.445236</td>\n      <td>0.447761</td>\n      <td>0.298240</td>\n      <td>0.352030</td>\n      <td>0.261734</td>\n      <td>0.210582</td>\n      <td>0.428121</td>\n      <td>0.416628</td>\n      <td>0.308813</td>\n    </tr>\n    <tr>\n      <th>Finland</th>\n      <td>2.765388</td>\n      <td>2.627628</td>\n      <td>2.636309</td>\n      <td>3.650935</td>\n      <td>3.731343</td>\n      <td>3.728005</td>\n      <td>2.957052</td>\n      <td>3.454894</td>\n      <td>3.171887</td>\n      <td>2.884182</td>\n      <td>2.675678</td>\n      <td>3.008948</td>\n    </tr>\n    <tr>\n      <th>France</th>\n      <td>10.437110</
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100\n",
    "year_percent_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "42dc8be7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "<Figure size 1500x1500 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAABQsAAASuCAYAAABlZX8qAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1QVRwPG4R+IoEaNsRsroCJWsNdEsWOJmphoEpOoMfbeS+yIvWHvxpqoiL33rth77xVs2DB+ge8P4AbkUgPZIO9zDidh7+zu7H2dmWXu7l6LwMDAQERERERERERERCTRszS6AiIiIiIiIiIiIvLfoMlCERERERERERERATRZKCIiIiIiIiIiIsE0WSgiIiIiIiIiIiKAJgtFREREREREREQkmCYLRUREREREREREBNBkoYiIiIiIiIiIiATTZKGIiIiIiIiIiIgAmiwUERERERERERGRYFZGV0D+fS/8XpI7e3Gjq/FBuHLbm1SpU8ZonRd+L7HLXiyeapT4XLt9NFYZ5MzqHE81Snxu3j0eqwxssxWNpxolPtfvHFNfZLDY9kVqB3Entu1AGcSd2GagMTnuxHZM1ngQN3ReajydlxpPY4HxYtMO3qcrC0VERERERERERATQZKGIiIiIiIiIiIgE02ShiIiIiIiIiIiIAJosFBERERERERERkWCaLBQRERERERERERFAk4UiIiIiIiIiIiISTJOFIiIiIiIiIiIiAmiyUERERERERERERIJpslBEREREREREREQATRaKiIiIiIiIiIhIME0WioiIiIiIiIiICKDJwgh5enri4ODAsmXL4mybBw4c4OrVqxG+3qRJEzw8POJsf0bKnCUjs36bwIUbBzlxfheD3HphY2MdYfmChR3ZsO13rt8/zsYdyyjsVCDCsiVKOXPoxGbOXt3P9z82DPPazPnjqVmrcpwdx4fA2jopuw+soWz5kpGWc61dhX2H13Pj7jHWblxM4SL5Iyxbq05Vzlzcw4mzO6lWo1KY1zZu+4NChR3jpO4fCmtra/YdWke5KDJwzJ+X9ZuXcPfRafYeXEv5CqUiLFu7bjXOXd7H6fO7qV7TJcxrW7Yvp1DhiPNLjKytk7Ln4NpIM1i1bgG+fpfC/UyYPMxs+Vp1qnH20l5OnttF9ffawabty9QO3hPdvqiiSzl27F3FjbvHWL5qLva5bSMsW7JUUQ6f2ML5qwfCjQez50/QePAetQPjKQNjaTw2ns5LjRfddrBw6VSevLgc5uf99zeE2kHMaCwwnsaDqFkZXYH/qnXr1pEjRw5WrVpFw4YNo14hGn766Sd+++037O3tzb7u4eFB0qRJ42RfRpv920SePXvOFzW+J80nHzN+sht/BfzF4F9HhSubIkVyFi+bzopla+nQujc/NmvEoj+mUcqpGq9fvwlX3n30ryyY9wcnj59hwe/TWL9mC0+ePCOfYx5y5srOhnXb/o1DTBBsbKyZNnsMjvnzRlrOIV9ups0aQ7dO/Tl86Bit2vzE4j+mU8KpKm/e+Icpa2lpyZgJQ+jb0w1LS0smTnEnn11pAKpU/YyHD3w4fep8vB1TQmNjY82MOeOizCBV6pR4rp7HxvXbaNuqJ980qseCxVMo4VwVX98nYcpaWloybuIQevdww9LSgklT3cmTK2jgqlLtcx48fMTpU+fi7ZgSGhsba6bPHhtlBj9+3w7rUH1wseJFmDV/AnNnLQ5X1tLSkrETh9C351AsLC2ZOHU4DrZ/Z6B2EFZM+qLFf0xnwtgZLP9jDd/98BUr18ynTPEavHr1Olz54cHjwYnjp1n0+3TWr9nKkydPg8eDbBoPQlE7MJ4yMJbGY+PpvNR40W0HAA4OufmleVd279xvWvbsmV+4cmoHMaOxwHgaD6JHVxaa8fjxYw4cOEDbtm3x9vbm9u3b/8p+06RJw0cfffSv7Cs+5c5jS/GSTnRq04eLF65w6MBRRrp50OCr2mbLf9GgJv7+bxnUbySXL12jX69hvHz5ijr1akSwfTvWr9nKnl0H8XvuR85c2QHo0qM140ZOibfjSmjyOtizcesf5MqVI8qyFV3KcfHCFf5Yuoob128zZNBYMmXOSN58ucOVTZfuE9Kl+4TVXhtZtXI96dJ9Qvr0aQHo2rMto0dMivNjSagcHHKzeftybG2zR1m28bcNePXyNV07DeD6tVsMHzaRq1dv4FS0ULiyQRmkZdXKDXh5biBdurSmDHr0asdId2UQIq+DPRu3LSOXbdTt4NnT5zx65MujR774+j6h74AueEyYyYnjZ8KVDWkHq1ZuZJVn2HbQrWdbRg3/MK4Sjwsx6YuaNm/MkUPHGTFsIlevXGdw/1H4+b3gy6/rmC2fO48d69ZsCTUeZAOga482jNF4YKJ2YDxlYCyNx8bTeanxYtIOrK2tyZkrG8ePnTL1R48e+fLnn3+GK6t2EH0aC4yn8SD6NFloxsaNG0mVKhV169YlY8aMrFq1yvSai4sLnp6ept8PHTqEg4OD6ffffvuNSpUqUahQIRo0aIC3t7dpPYAffvgBDw8PPD09adSoEW3btqVYsWKsXr06zG3If/75J+7u7lSoUIECBQrg4uLC77///m8c/j/26JEv3zT4GR+fx2GWp06d0mz5YiWcOHTgaJhlhw8ep3hJJ7Pl7965T2Gn/GTL/ikfp/mYe/cektfBnlx2OXUVSShly5dk755DuFb9JsqyT588wyFfbkqWKoqFhQXfftcAv+cvuHH9Vriyjx8/5dWr1xQukp8iTgV59fIVT548w6VyBXwePdanVqGULV+SvbsPUr3y11GWLVehFBvWbyUgIMC0rErFL9m6eVe4siEZFHEqgJNzAV4GZ1C5SgUePfJNcJ9axady5Uuyb89BalaJOoPQGn/XgE8++RiPcTPNvm5qB05B7SAkA5cqFfB55Kt2EEpM+qKcubJz9OipMMvOn7tEiRJOZstHNB7Y2uXQeBCK2oHxlIGxNB4bT+elxotJO8idx5bAwEBuXI/6ohm1g+jTWGA8jQfRp9uQzVi3bh0VK1bE0tISFxcXvLy8aNu2LRYWFpGud+7cOUaOHMmkSZPInTs3v/32G506dWL37t0sX76cMmXK4OHhQbly5di0aRPHjx+nVatWdOnShU8++STM8xFnzJjBzp078fDwIF26dKxcuZIhQ4ZQuXJl0qdPH99vwT/i9/wFO7ftNf1uYWFBs1++Y8+ug2bLZ8qUgYsXLodZ5uPjSz7HPGbLuw0ay+QZI7G2TsqEMdN5+OARg9x6Mm7U1Lg7iA/AvNlLol3Wy3M9NVxdWLd5Cf/73/8ICAjg269b8tzMrQYBAQEMGTCaNRsXERAQSO8eQwgICKBrzzb06jY4Lg8hwZs7O/xtAhHJlSs7x46eZNzEIdRwrcztW3f5tY87hw4eC1c2ICCAQf1HsW7TYgICAunVfTABAQF079WO7l0GxeUhJHhzY9AOQuvQuQXTpsw3e+srBGUweMBo1m4MyqB396B20K1nW3p2VQahxaQv8nnkS5YsmcIs+zRrZp49fW62/NBBY5gyYxTW1kkZHzweDHbryViNB2GoHRhPGRhL47HxdF5qvJi0AwcHe/z8XjBt5mjKVSjJ3Tv3GTFsIlu37A5XVu0g+jQWGE/jQfTpysL33L9/n2PHjlGlShUAqlWrxu3btzl69GgUa8Ldu3exsLDg008/JVu2bHTq1IlRo0YREBBA2rRBl6B+/PHHpluNLSwsaN26Nfb29qbXQ+TLlw83NzecnJzInj07rVq14t27d9y4cSNuD/hf0H9IdwoVyY/7kPFmX0+eIhlv374Ls+zPt39G+IUo69dswdG2NI52ZRg5zIPceWyxz5OLzRt2MGLsAI6d3YHHtOGRfqGKhPVJ2k/ImDEDPbsOonrlr/lj6SomTnY3XTr9vtkzF5EnVykcbEuxYN4fVHQpx2Pfp1y9coNZ88Zz/MwOBrv1+pePImH76KMUdOrckocPfPi6wc/s23uY5V5zyZo1s9nys2YsxD5HCXLnLMH8ub9TyaU8vr5PuHrlOnPmT+DUuV0Mde/9Lx/Fh6F8hVJk+TQ
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "f, ax = plt.subplots(figsize=(15, 15))\n",
    "g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)\n",
    "for t in ax.texts: t.set_text(t.get_text() + \" %\")\n",
    "g.set(xlabel=\"\", ylabel=\"\")\n",
    "for i in range(year_percent_pivot.shape[1]+1):\n",
    "    ax.axvline(i, color='white', lw=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "id": "e7b754ea",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "48f2898f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Institutional collab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "id": "3a9538e1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "         UT (Unique WOS ID)         Country              Institution_harm   \n15207   WOS:000389385100008           China         Natl Univ Def Technol  \\\n31500   WOS:000474277900004           China              Xian Shiyou Univ   \n139850  WOS:000867238100001           Italy                   Univ Trento   \n83586   WOS:000365372900001     Netherlands            Delft Univ Technol   \n1377    WOS:000577327400001         Belgium                 Flanders Make   \n...                     ...             ...                           ...   \n55889   WOS:000661354600002           China                    Fudan Univ   \n7735    WOS:000337842700006           China         Natl Univ Def Technol   \n99512   WOS:000453778900009  United Kingdom                   Univ London   \n31184   WOS:000472596200056           China        Guangdong Univ Technol   \n149165  WOS:000549676600001         Germany  German Res Ctr Environm Hlth   \n\n            Country_Type    Eurovoc_Class ISO3   \n15207              China            China  CHN  \\\n31500              China            China  CHN   \n139850                EU  Southern Europe  ITA   \n83586                 EU   Western Europe  NLD   \n1377                  EU   Western Europe  BEL   \n...                  ...              ...  ...   \n55889              China            China  CHN   \n7735               China            China  CHN   \n99512   Non-EU associate   Western Europe  GBR   \n31184              China            China  CHN   \n149165                EU   Western Europe  DEU   \n\n                    Institution_harm_label  \n15207          Natl Univ Def Technol (CHN)  \n31500               Xian Shiyou Univ (CHN)  \n139850                   Univ Trento (ITA)  \n83586             Delft Univ Technol (NLD)  \n1377                   Flanders Make (BEL)  \n...                                    ...  \n55889                     Fudan Univ (CHN)  \n7735           Natl Univ Def Technol (CHN)  \n99512                    Univ London (GBR)  \n31184         Guangdong Univ Technol (CHN)  \n149165  German Res Ctr Environm Hlth (DEU)  \n\n[100 rows x 7 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>UT (Unique WOS ID)</th>\n      <th>Country</th>\n      <th>Institution_harm</th>\n      <th>Country_Type</th>\n      <th>Eurovoc_Class</th>\n      <th>ISO3</th>\n      <th>Institution_harm_label</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>15207</th>\n      <td>WOS:000389385100008</td>\n      <td>China</td>\n      <td>Natl Univ Def Technol</td>\n      <td>China</td>\n      <td>China</td>\n      <td>CHN</td>\n      <td>Natl Univ Def Technol (CHN)</td>\n    </tr>\n    <tr>\n      <th>31500</th>\n      <td>WOS:000474277900004</td>\n      <td>China</td>\n      <td>Xian Shiyou Univ</td>\n      <td>China</td>\n      <td>China</td>\n      <td>CHN</td>\n      <td>Xian Shiyou Univ (CHN)</td>\n    </tr>\n    <tr>\n      <th>139850</th>\n      <td>WOS:000867238100001</td>\n      <td>Italy</td>\n      <td>Univ Trento</td>\n      <td>EU</td>\n      <td>Southern Europe</td>\n      <td>ITA</td>\n      <td>Univ Trento (ITA)</td>\n    </tr>\n    <tr>\n      <th>83586</th>\n      <td>WOS:000365372900001</td>\n      <td>Netherlands</td>\n      <td>Delft Univ Technol</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n      <td>NLD</td>\n      <td>Delft Univ Technol (NLD)</td>\n    </tr>\n    <tr>\n      <th>1377</th>\n      <td>WOS:000577327400001</td>\n      <td>Belgium</td>\n      <td>Flanders Make</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n      <td>BEL</td>\n      <td>Flanders Make (BEL)</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>55889</th>\n      <td>WOS:000661354600002</td>\n      <td>China</td>\n      <td>Fudan Univ</td>\n      <td>China</td>\n      <td>China</td>\n      <td>CHN</td>\n      <td>Fudan Univ (CHN)</td>\n    </tr>\n    <tr>\n      <th>7735</th>\n      <td>WOS:000337842700006</td>\n      <td>China</td>\n      <td>Natl Univ Def Technol</td>\n      <td>China</td>\n      <td>China</td>\n      <td>CHN</td>\n      <td>Natl Univ Def Technol (CHN)</td>\n    </tr>\n    <tr>\n      <th>99512</th>\n      <td>WOS:000453778900009</td>\n      <td>United Kingdom</td>\n      <td>Univ London</td>\n      <td>Non-EU associate</td>\n      <td>Western Europe</td>\n      <td>GBR</td>\n      <td>Univ London (GBR)</td>\n    </tr>\n    <tr>\n      <th>31184</th>\n      <td>WOS:000472596200056</td>\n      <td>China</td>\n      <td>Guangdong Univ Technol</td>\n      <td>China</td>\n      <td>China</td>\n      <td>CHN</td>\n      <td>Guangdong Univ Technol (CHN)</td>\n    </tr>\n    <tr>\n      <th>149165</th>\n      <td>WOS:000549676600001</td>\n      <td>Germany</td>\n      <td>German Res Ctr Environm Hlth</td>\n      <td>EU</td>\n      <td>Western Europe</td>\n      <td>DEU</td>\n      <td>German Res Ctr Environm Hlth (DEU)</td>\n    </tr>\n  </tbody>\n</table>\n<p>100 rows × 7 columns</p>\n</div>"
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wos_univ_locations = wos_univ.merge(wos_country_types, on=\"Country\")\n",
    "wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n",
    "wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n",
    "wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n",
    "wos_univ_collabs.sample(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "6bb0e68d",
   "metadata": {},
   "outputs": [],
   "source": [
    "color_discrete_map= {'China': '#EF553B',\n",
    "                    'EU': '#636EFA',\n",
    "                    'Non-EU associate': '#00CC96'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "id": "df8701eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "TOPN = 25\n",
    "\n",
    "\n",
    "wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n",
    "wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n",
    "\n",
    "wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"EU\"]\n",
    "\n",
    "data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
    "           .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n",
    "\n",
    "data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm_label\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n",
    "           .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
    "\n",
    "data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
    "           .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
    "\n",
    "\n",
    "for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],\n",
    "                        [\"European countries in scope\",\"EU-28 only\",\"China\"],\n",
    "                        [\"Institution_harm_label\",\"Institution_harm_label\",\"Institution_harm\"],\n",
    "                        [\"Country\",\"Eurovoc_Class\",\"Country_Type\"],\n",
    "                                       [\"Country_Type\",None,None]):\n",
    "    fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,\n",
    "                              labels={\n",
    "                     record_col: 'Number of co-publications',\n",
    "                     \"Institution_harm\": \"Institution\",\n",
    "                                  \"Institution_harm_label\": \"Institution\",\n",
    "                                  \"Country_Type\":\"Country type\",\n",
    "                                  \"Eurovoc_Class\":\"Region\"\n",
    "                 },\n",
    "                title=f\"Most visible institutions (top {TOPN} within {c_scope})\", template='plotly')\n",
    "    fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n",
    "                                         width=1000, height=1000,)\n",
    "    fig.update_traces(hovertemplate='%{x:d}')\n",
    "    fig.add_shape(\n",
    "            # Rectangle with reference to the plot\n",
    "                type=\"rect\",\n",
    "                xref=\"paper\",\n",
    "                yref=\"paper\",\n",
    "                x0=0,\n",
    "                y0=0,\n",
    "                x1=1.0,\n",
    "                y1=1.0,\n",
    "                line=dict(\n",
    "                    color=\"black\",\n",
    "                     width=0.5,\n",
    "                 )\n",
    "             )\n",
    "    fig.update_yaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    fig.update_xaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    # fig.show(config= dict(displayModeBar = False))\n",
    "    fig.write_html(f\"plot_html/overall_inst_collab_bar_{c_scope}.html\",config= dict(displayModeBar = False, responsive = True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "31a0769d",
   "metadata": {},
   "outputs": [],
   "source": [
    "wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n",
    "wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n",
    "\n",
    "wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "id": "606e1af0",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = px.parallel_categories(wos_univ_dipol[[\"Country_eu\",\"Domain_English\",\"Country_ch\"]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "ea0951e9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "Index(['Country', 'Institution_harm', 'Country_Type', 'UT (Unique WOS ID)'], dtype='object')"
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_ch.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "dd4210b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n",
    "             (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
    "\n",
    "fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Domain_English\",\"Country_ch\"]])\n",
    "# fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "2c5d1d94",
   "metadata": {},
   "outputs": [],
   "source": [
    "subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n",
    "             (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
    "\n",
    "fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n",
    "# fig.show()\n",
    "sub_df =wos_univ_dipol[subfilter]\n",
    "\n",
    "inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n",
    "                            values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
    "\n",
    "eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n",
    "ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n",
    "\n",
    "inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n",
    "\n",
    "mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
    "data = np.where(mask,inst_co_occur,inst_co_occur)\n",
    "\n",
    "fig = px.imshow(data,\n",
    "                labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n",
    "                x=list(inst_co_occur.columns),\n",
    "                y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within Europe)\"\n",
    "               )\n",
    "fig.update_layout(title_x=0.5,\n",
    "                   width=1000, height=1000,\n",
    "                   xaxis_showgrid=False,\n",
    "                   yaxis_showgrid=False,\n",
    "                   yaxis_autorange='reversed',\n",
    "                  template='plotly_white',\n",
    "                  coloraxis_colorbar=dict(\n",
    "                            thicknessmode=\"pixels\", thickness=25,\n",
    "                            ticks=\"outside\", ticksuffix=\" \",\n",
    "                            dtick=20,outlinewidth=1,\n",
    "                        ))\n",
    "fig.update_xaxes(tickangle= -45)\n",
    "fig.update_yaxes(\n",
    "    ticks=\"outside\")\n",
    "fig.update_xaxes(\n",
    "    ticks=\"outside\")\n",
    "\n",
    "fig.write_html(f\"plot_html/overall_inst_collab_europe.html\",config= dict(displayModeBar = False, responsive = True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "id": "7bd7d149",
   "metadata": {},
   "outputs": [],
   "source": [
    "subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu_strict[\"Institution_harm_label\"]))&\n",
    "             (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
    "\n",
    "fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n",
    "# fig.show()\n",
    "sub_df =wos_univ_dipol[subfilter]\n",
    "\n",
    "inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n",
    "                            values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
    "\n",
    "eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n",
    "ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n",
    "\n",
    "inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n",
    "\n",
    "mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
    "data = np.where(mask,inst_co_occur,inst_co_occur)\n",
    "fig = px.imshow(data,\n",
    "                labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n",
    "                x=list(inst_co_occur.columns),\n",
    "                y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within EU-28)\"\n",
    "               )\n",
    "fig.update_layout(title_x=0.5,\n",
    "                   width=1000, height=1000,\n",
    "                   xaxis_showgrid=False,\n",
    "                   yaxis_showgrid=False,\n",
    "                   yaxis_autorange='reversed',\n",
    "                  template='plotly_white',\n",
    "                  coloraxis_colorbar=dict(\n",
    "                            thicknessmode=\"pixels\", thickness=25,\n",
    "                            ticks=\"outside\", ticksuffix=\" \",\n",
    "                            dtick=20,outlinewidth=1,\n",
    "                        ))\n",
    "fig.update_xaxes(tickangle= -45)\n",
    "fig.update_yaxes(\n",
    "    ticks=\"outside\")\n",
    "fig.update_xaxes(\n",
    "    ticks=\"outside\")\n",
    "\n",
    "# fig.show(config= dict(displayModeBar = False))\n",
    "fig.write_html(f\"plot_html/overall_inst_collab_eu28.html\",config= dict(displayModeBar = False, responsive = True))"
   ]
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Drilldown to field"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "outputs": [],
   "source": [
    "group = ['Publication Year',\"Domain_English\",'Field_English']\n",
    "# data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n",
    "\n",
    "\n",
    "data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()\n",
    "        .fillna(0)\n",
    "        .stack()\n",
    "        .reset_index()\n",
    "        .rename(columns={0:record_col}))\n",
    "\n",
    "data = data.merge(wos[[\"Domain_English\",'Field_English']].drop_duplicates(),on=\"Field_English\")\n",
    "\n",
    "data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Field_English'),\n",
    "                  on='Field_English', suffixes=[None,\"_relative_growth\"])\n",
    "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
    "\n",
    "data = data.sort_values(by =[\"Field_English\",\"Publication Year\"], ascending=[True,True])\n",
    "data[record_col+\"_cumsum\"] = (data.groupby('Field_English',as_index=False)[record_col].cumsum())"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "outputs": [
    {
     "data": {
      "text/plain": "     Publication Year                      Field_English  UT (Unique WOS ID)   \n0                2011  Agriculture, Fisheries & Forestry                 9.0  \\\n1                2012  Agriculture, Fisheries & Forestry                18.0   \n2                2013  Agriculture, Fisheries & Forestry                15.0   \n3                2014  Agriculture, Fisheries & Forestry                26.0   \n4                2015  Agriculture, Fisheries & Forestry                12.0   \n..                ...                                ...                 ...   \n255              2018                    Social Sciences                25.0   \n257              2019                    Social Sciences                37.0   \n259              2020                    Social Sciences                57.0   \n261              2021                    Social Sciences                65.0   \n263              2022                    Social Sciences                60.0   \n\n       Domain_English  Publication Year_relative_growth   \n0    Applied Sciences                              2011  \\\n1    Applied Sciences                              2011   \n2    Applied Sciences                              2011   \n3    Applied Sciences                              2011   \n4    Applied Sciences                              2011   \n..                ...                               ...   \n255  Applied Sciences                              2011   \n257  Applied Sciences                              2011   \n259  Applied Sciences                              2011   \n261  Applied Sciences                              2011   \n263  Applied Sciences                              2011   \n\n     UT (Unique WOS ID)_relative_growth Domain_English_relative_growth   \n0                              0.000000               Applied Sciences  \\\n1                              1.000000               Applied Sciences   \n2                              0.666667               Applied Sciences   \n3                              1.888889               Applied Sciences   \n4                              0.333333               Applied Sciences   \n..                                  ...                            ...   \n255                            1.272727               Applied Sciences   \n257                            2.363636               Applied Sciences   \n259                            4.181818               Applied Sciences   \n261                            4.909091               Applied Sciences   \n263                            4.454545               Applied Sciences   \n\n     UT (Unique WOS ID)_cumsum  \n0                          9.0  \n1                         27.0  \n2                         42.0  \n3                         68.0  \n4                         80.0  \n..                         ...  \n255                      216.0  \n257                      290.0  \n259                      404.0  \n261                      534.0  \n263                      654.0  \n\n[84 rows x 8 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Publication Year</th>\n      <th>Field_English</th>\n      <th>UT (Unique WOS ID)</th>\n      <th>Domain_English</th>\n      <th>Publication Year_relative_growth</th>\n      <th>UT (Unique WOS ID)_relative_growth</th>\n      <th>Domain_English_relative_growth</th>\n      <th>UT (Unique WOS ID)_cumsum</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>2011</td>\n      <td>Agriculture, Fisheries &amp; Forestry</td>\n      <td>9.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>0.000000</td>\n      <td>Applied Sciences</td>\n      <td>9.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2012</td>\n      <td>Agriculture, Fisheries &amp; Forestry</td>\n      <td>18.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>1.000000</td>\n      <td>Applied Sciences</td>\n      <td>27.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>2013</td>\n      <td>Agriculture, Fisheries &amp; Forestry</td>\n      <td>15.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>0.666667</td>\n      <td>Applied Sciences</td>\n      <td>42.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>2014</td>\n      <td>Agriculture, Fisheries &amp; Forestry</td>\n      <td>26.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>1.888889</td>\n      <td>Applied Sciences</td>\n      <td>68.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>2015</td>\n      <td>Agriculture, Fisheries &amp; Forestry</td>\n      <td>12.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>0.333333</td>\n      <td>Applied Sciences</td>\n      <td>80.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>255</th>\n      <td>2018</td>\n      <td>Social Sciences</td>\n      <td>25.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>1.272727</td>\n      <td>Applied Sciences</td>\n      <td>216.0</td>\n    </tr>\n    <tr>\n      <th>257</th>\n      <td>2019</td>\n      <td>Social Sciences</td>\n      <td>37.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>2.363636</td>\n      <td>Applied Sciences</td>\n      <td>290.0</td>\n    </tr>\n    <tr>\n      <th>259</th>\n      <td>2020</td>\n      <td>Social Sciences</td>\n      <td>57.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>4.181818</td>\n      <td>Applied Sciences</td>\n      <td>404.0</td>\n    </tr>\n    <tr>\n      <th>261</th>\n      <td>2021</td>\n      <td>Social Sciences</td>\n      <td>65.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>4.909091</td>\n      <td>Applied Sciences</td>\n      <td>534.0</td>\n    </tr>\n    <tr>\n      <th>263</th>\n      <td>2022</td>\n      <td>Social Sciences</td>\n      <td>60.0</td>\n      <td>Applied Sciences</td>\n      <td>2011</td>\n      <td>4.454545</td>\n      <td>Applied Sciences</td>\n      <td>654.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>84 rows × 8 columns</p>\n</div>"
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data[\"Domain_English\"]==\"Applied Sciences\"]"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "outputs": [
    {
     "data": {
      "text/plain": "                              Field_English  UT (Unique WOS ID)\n5  Information & Communication Technologies               15648\n4                               Engineering                9232\n3         Enabling & Strategic Technologies                3940\n0         Agriculture, Fisheries & Forestry                 612\n1                Built Environment & Design                 537\n2                     Economics & Business                   15\n6                           Social Sciences                   1",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Field_English</th>\n      <th>UT (Unique WOS ID)</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>5</th>\n      <td>Information &amp; Communication Technologies</td>\n      <td>15648</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Engineering</td>\n      <td>9232</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Enabling &amp; Strategic Technologies</td>\n      <td>3940</td>\n    </tr>\n    <tr>\n      <th>0</th>\n      <td>Agriculture, Fisheries &amp; Forestry</td>\n      <td>612</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Built Environment &amp; Design</td>\n      <td>537</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Economics &amp; Business</td>\n      <td>15</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Social Sciences</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wos[wos[\"Domain_English\"]==\"Applied Sciences\"].groupby(\"Field_English\", as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "outputs": [],
   "source": [
    "data_complete = pd.DataFrame()\n",
    "\n",
    "for cat in sorted(data[\"Domain_English\"].unique()):\n",
    "\n",
    "    bar_data = wos[wos[\"Domain_English\"]==cat].groupby(\"Field_English\", as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n",
    "\n",
    "    fig = px.bar(bar_data.sort_values(by=\"Field_English\"), x=record_col, y=\"Field_English\", color=\"Field_English\",barmode='relative',\n",
    "                                  labels={\n",
    "                         record_col: 'Number of co-publications',\n",
    "                     },\n",
    "                    title=\"Distribution of Domains\", template='plotly')\n",
    "    fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family=\"Montserrat\")\n",
    "    fig.update_traces(hovertemplate='%{x:d}')\n",
    "    fig.add_shape(\n",
    "            # Rectangle with reference to the plot\n",
    "                type=\"rect\",\n",
    "                xref=\"paper\",\n",
    "                yref=\"paper\",\n",
    "                x0=0,\n",
    "                y0=0,\n",
    "                x1=1.0,\n",
    "                y1=1.0,\n",
    "                line=dict(\n",
    "                    color=\"black\",\n",
    "                     width=0.5,\n",
    "                 )\n",
    "             )\n",
    "    fig.update_layout(yaxis={'categoryorder':'total ascending'})\n",
    "    fig.update_yaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    fig.update_xaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    dom_distr = go.Figure(fig)\n",
    "\n",
    "\n",
    "    #data segment\n",
    "    sub_data = data[data[\"Domain_English\"]==cat]\n",
    "    # data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n",
    "    fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]),y=record_col,x=\"Publication Year\", color=\"Field_English\", markers=True,\n",
    "                  labels={\n",
    "                     record_col: 'Number of co-publications',\n",
    "                     group[-1]: \"Domain\",\n",
    "                 },\n",
    "                title=\"Yearly output of co-publications\", template='plotly')\n",
    "    fig.update_traces(hovertemplate='%{y:d}')\n",
    "    fig.update_layout(hovermode='x unified')\n",
    "    fig.add_shape(\n",
    "            # Rectangle with reference to the plot\n",
    "                type=\"rect\",\n",
    "                xref=\"paper\",\n",
    "                yref=\"paper\",\n",
    "                x0=0,\n",
    "                y0=0,\n",
    "                x1=1.0,\n",
    "                y1=1.0,\n",
    "                line=dict(\n",
    "                    color=\"black\",\n",
    "                     width=0.5,\n",
    "                 )\n",
    "             )\n",
    "    fig.update_yaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    fig.update_xaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "\n",
    "    year_output_by_domain = go.Figure(fig)\n",
    "\n",
    "    fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]), y=record_col+\"_relative_growth\",x=\"Publication Year\", color=\"Field_English\",\n",
    "                  markers=True,labels={\n",
    "                         record_col+\"_relative_growth\": 'Rel. growth<br>in co-publications (%)',\n",
    "                         group[-1]: \"Domain\",\n",
    "                     },\n",
    "                    title=\"Relative growth in the output of co-publications\", template='plotly')\n",
    "    # fig.update_traces(hovertemplate='%{y:.2f}%')\n",
    "\n",
    "    fig.update_layout(hovermode='x unified',yaxis_tickformat='.0f%',font_family=\"Montserrat\")\n",
    "    fig.update_traces(hovertemplate='%{y:.0f}00%')\n",
    "    fig.add_shape(\n",
    "            # Rectangle with reference to the plot\n",
    "                type=\"rect\",\n",
    "                xref=\"paper\",\n",
    "                yref=\"paper\",\n",
    "                x0=0,\n",
    "                y0=0,\n",
    "                x1=1.0,\n",
    "                y1=1.0,\n",
    "                line=dict(\n",
    "                    color=\"black\",\n",
    "                     width=0.5,\n",
    "                 )\n",
    "             )\n",
    "    fig.update_yaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    fig.update_xaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    # fig['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
    "    # fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
    "\n",
    "    rel_output_by_domain = go.Figure(fig)\n",
    "\n",
    "    fig = px.area(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]),y=record_col+\"_cumsum\",x=\"Publication Year\", color=\"Field_English\",line_group=\"Field_English\",\n",
    "                  labels={\n",
    "                         record_col+\"_cumsum\": 'Cumulative number of co-publications',\n",
    "                     },\n",
    "                    title=\"Cumulative number of co-publications\", template='plotly')\n",
    "    fig.update_traces(hovertemplate='%{y:d}')\n",
    "    fig.update_layout(hovermode='x unified')\n",
    "    fig.add_shape(\n",
    "            # Rectangle with reference to the plot\n",
    "                type=\"rect\",\n",
    "                xref=\"paper\",\n",
    "                yref=\"paper\",\n",
    "                x0=0,\n",
    "                y0=0,\n",
    "                x1=1.0,\n",
    "                y1=1.0,\n",
    "                line=dict(\n",
    "                    color=\"black\",\n",
    "                     width=0.5,\n",
    "                 )\n",
    "             )\n",
    "    fig.update_yaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "    fig.update_xaxes(\n",
    "        showgrid=True,\n",
    "        ticks=\"outside\")\n",
    "\n",
    "    cumsum_by_domain = go.Figure(fig)\n",
    "    # cumsum_by_domain.show(config= dict(displayModeBar = False))\n",
    "\n",
    "    # dom_distr\n",
    "    # year_output_by_domain\n",
    "    # rel_output_by_domain\n",
    "    # cumsum_by_domain\n",
    "\n",
    "    figsuper = make_subplots(rows=2, cols=2, subplot_titles=[\"Distribution of domains\",\"Cumulative sum of co-publications\",\n",
    "                                                             \"Co-publications per year\",\"Relative growth of co-publications\"])\n",
    "\n",
    "\n",
    "    for trace in list(dom_distr.select_traces()):\n",
    "        trace.showlegend=False\n",
    "        # trace.barmode\n",
    "        figsuper.add_trace(trace,\n",
    "            row=1, col=1\n",
    "        )\n",
    "\n",
    "    for trace in list(cumsum_by_domain.select_traces()):\n",
    "        figsuper.add_trace(trace,\n",
    "            row=1, col=2\n",
    "        )\n",
    "\n",
    "    for trace in list(year_output_by_domain.select_traces()):\n",
    "        trace.showlegend=False\n",
    "        figsuper.add_trace(trace,\n",
    "            row=2, col=1\n",
    "        )\n",
    "\n",
    "    for trace in list(rel_output_by_domain.select_traces()):\n",
    "        trace.showlegend=False\n",
    "        figsuper.add_trace(trace,\n",
    "            row=2, col=2\n",
    "        )\n",
    "\n",
    "    # figsuper.update_layout(hovermode='x unified')\n",
    "    figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n",
    "    figsuper.update_yaxes(\n",
    "        showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "        ticks=\"outside\")\n",
    "    figsuper.update_xaxes(\n",
    "        showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
    "        ticks=\"outside\")\n",
    "    figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
    "    figsuper['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
    "    # figsuper.layout.annotations[0].update(x=0.1)\n",
    "    # figsuper.layout.annotations[2].update(x=0.105)\n",
    "    # figsuper.layout.annotations[1].update(x=0.7)\n",
    "    # figsuper.layout.annotations[3].update(x=0.7)\n",
    "    figsuper.update_layout(title_text=f\"{cat}\")\n",
    "\n",
    "    # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
    "    figsuper.write_html(f\"plot_html/{cat}_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))"
   ],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}