You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ZSI_Reconnect_China/WOS/wos_analysis/wos_analyses.ipynb

1904 lines
4.1 MiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "40038234",
"metadata": {},
"outputs": [
{
"data": {
"text/html": " <script type=\"text/javascript\">\n window.PlotlyConfig = {MathJaxConfig: 'local'};\n if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n if (typeof require !== 'undefined') {\n require.undef(\"plotly\");\n define('plotly', function(require, exports, module) {\n /**\n* plotly.js v2.20.0\n* Copyright 2012-2023, Plotly, Inc.\n* All rights reserved.\n* Licensed under the MIT license\n*/\n/*! For license information please see plotly.min.js.LICENSE.txt */\n!function(t,e){\"object\"==typeof exports&&\"object\"==typeof module?module.exports=e():\"function\"==typeof define&&define.amd?define([],e):\"object\"==typeof exports?exports.Plotly=e():t.Plotly=e()}(self,(function(){return function(){var t={98847:function(t,e,r){\"use strict\";var n=r(71828),i={\"X,X div\":'direction:ltr;font-family:\"Open Sans\",verdana,arial,sans-serif;margin:0;padding:0;',\"X input,X button\":'font-family:\"Open Sans\",verdana,arial,sans-serif;',\"X input:focus,X button:focus\":\"outline:none;\",\"X a\":\"text-decoration:none;\",\"X a:hover\":\"text-decoration:none;\",\"X .crisp\":\"shape-rendering:crispEdges;\",\"X .user-select-none\":\"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;\",\"X svg\":\"overflow:hidden;\",\"X svg a\":\"fill:#447adb;\",\"X svg a:hover\":\"fill:#3c6dc5;\",\"X .main-svg\":\"position:absolute;top:0;left:0;pointer-events:none;\",\"X .main-svg .draglayer\":\"pointer-events:all;\",\"X .cursor-default\":\"cursor:default;\",\"X .cursor-pointer\":\"cursor:pointer;\",\"X .cursor-crosshair\":\"cursor:crosshair;\",\"X .cursor-move\":\"cursor:move;\",\"X .cursor-col-resize\":\"cursor:col-resize;\",\"X .cursor-row-resize\":\"cursor:row-resize;\",\"X .cursor-ns-resize\":\"cursor:ns-resize;\",\"X .cursor-ew-resize\":\"cursor:ew-resize;\",\"X .cursor-sw-resize\":\"cursor:sw-resize;\",\"X .cursor-s-resize\":\"cursor:s-resize;\",\"X .cursor-se-resize\":\"cursor:se-resize;\",\"X .cursor-w-resize\":\"cursor:w-resize;\",\"X .cursor-e-resize\":\"cursor:e-resize;\",\"X .cursor-nw-resize\":\"cursor:nw-resize;\",\"X .cursor-n-resize\":\"cursor:n-resize;\",\"X .cursor-ne-resize\":\"cursor:ne-resize;\",\"X .cursor-grab\":\"cursor:-webkit-grab;cursor:grab;\",\"X .modebar\":\"position:absolute;top:2px;right:2px;\",\"X .ease-bg\":\"-webkit-transition:background-color .3s ease 0s;-moz-transition:background-color .3s ease 0s;-ms-transition:background-color .3s ease 0s;-o-transition:background-color .3s ease 0s;transition:background-color .3s ease 0s;\",\"X .modebar--hover>:not(.watermark)\":\"opacity:0;-webkit-transition:opacity .3s ease 0s;-moz-transition:opacity .3s ease 0s;-ms-transition:opacity .3s ease 0s;-o-transition:opacity .3s ease 0s;transition:opacity .3s ease 0s;\",\"X:hover .modebar--hover .modebar-group\":\"opacity:1;\",\"X .modebar-group\":\"float:left;display:inline-block;box-sizing:border-box;padding-left:8px;position:relative;vertical-align:middle;white-space:nowrap;\",\"X .modebar-btn\":\"position:relative;font-size:16px;padding:3px 4px;height:22px;cursor:pointer;line-height:normal;box-sizing:border-box;\",\"X .modebar-btn svg\":\"position:relative;top:2px;\",\"X .modebar.vertical\":\"display:flex;flex-direction:column;flex-wrap:wrap;align-content:flex-end;max-height:100%;\",\"X .modebar.vertical svg\":\"top:-1px;\",\"X .modebar.vertical .modebar-group\":\"display:block;float:none;padding-left:0px;padding-bottom:8px;\",\"X .modebar.vertical .modebar-group .modebar-btn\":\"display:block;text-align:center;\",\"X [data-title]:before,X [data-title]:after\":\"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;\",\"X [data-title]:hover:before,X [data-title]:hover:after\":\"display:block;opacity:1;\",\"X [data-title]:before
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import janitor\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from matplotlib.ticker import MaxNLocator\n",
"import math\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"import plotly.offline as pyo\n",
"from plotly.subplots import make_subplots\n",
"import plotly.graph_objects as go\n",
"pyo.init_notebook_mode()\n",
"\n",
"import plotly.io as pio\n",
"pio.renderers.default = \"plotly_mimetype+notebook\"\n",
"\n",
"import country_converter as coco\n",
"cc = coco.CountryConverter()\n",
"\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ea3629f5",
"metadata": {},
"outputs": [],
"source": [
"# Seaborn palette\n",
"# sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)\n",
"# sns.palplot(sns.color_palette())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "fb7baf32",
"metadata": {},
"outputs": [],
"source": [
"outdir=\"wos_processed_data\"\n",
"\n",
"wos = pd.read_excel(f\"../{outdir}/wos_processed.xlsx\")\n",
"wos_univ = pd.read_excel(f\"../{outdir}/wos_institution_locations_harmonized.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "4dd8e081",
"metadata": {},
"outputs": [],
"source": [
"def eurovoc_classer(x):\n",
" eurovoc_classification = {\"Eastern Europe\":[\"Bulgaria\",\"Czech Republic\",\"Croatia\",\"Hungary\",\"Poland\",\"Romania\",\"Slovakia\",\"Slovenia\"],\n",
" \"Northern Europe\":[\"Denmark\",\"Estonia\",\"Finland\",\"Latvia\",\"Lithuania\",\"Sweden\",\"Norway\",\"Iceland\"],\n",
" \"Southern Europe\":[\"Cyprus\",\"Greece\",\"Italy\",\"Portugal\",\"Spain\",\"Malta\"],\n",
" \"Western Europe\":[\"Austria\",\"Belgium\",\"France\",\"Germany\",\"Luxembourg\",\"Netherlands\",\"Switzerland\",\"United Kingdom\",\"Ireland\"]}\n",
" if x == 'China':\n",
" return x\n",
" for k in eurovoc_classification.keys():\n",
" if x in eurovoc_classification[k]:\n",
" return k"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "eb933d66",
"metadata": {},
"outputs": [],
"source": [
"wos_country = pd.read_excel(f\"../{outdir}/wos_countries.xlsx\")\n",
"wos_country_types = pd.read_excel(f\"../{outdir}/wos_country_types.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "cd0b0efa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " Country Country_Type Eurovoc_Class\n0 Belgium EU Western Europe\n1 China China China\n2 Luxembourg EU Western Europe\n3 Netherlands EU Western Europe\n4 Norway Non-EU associate Northern Europe\n5 United Kingdom Non-EU associate Western Europe\n6 France EU Western Europe\n7 Sweden EU Northern Europe\n8 Italy EU Southern Europe\n9 Denmark EU Northern Europe\n10 Germany EU Western Europe\n11 Slovenia EU Eastern Europe\n12 Estonia EU Northern Europe\n13 Finland EU Northern Europe\n14 Bulgaria EU Eastern Europe\n15 Slovakia EU Eastern Europe\n16 Spain EU Southern Europe\n17 Poland EU Eastern Europe\n18 Czech Republic EU Eastern Europe\n19 Greece EU Southern Europe\n20 Malta EU Southern Europe\n21 Austria EU Western Europe\n22 Switzerland Non-EU associate Western Europe\n23 Ireland EU Western Europe\n24 Portugal EU Southern Europe\n25 Romania EU Eastern Europe\n26 Hungary EU Eastern Europe\n27 Cyprus EU Southern Europe\n28 Croatia EU Eastern Europe\n29 Lithuania EU Northern Europe\n30 Latvia EU Northern Europe",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>Country_Type</th>\n <th>Eurovoc_Class</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Belgium</td>\n <td>EU</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>1</th>\n <td>China</td>\n <td>China</td>\n <td>China</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Luxembourg</td>\n <td>EU</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Netherlands</td>\n <td>EU</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Norway</td>\n <td>Non-EU associate</td>\n <td>Northern Europe</td>\n </tr>\n <tr>\n <th>5</th>\n <td>United Kingdom</td>\n <td>Non-EU associate</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>6</th>\n <td>France</td>\n <td>EU</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Sweden</td>\n <td>EU</td>\n <td>Northern Europe</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Italy</td>\n <td>EU</td>\n <td>Southern Europe</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Denmark</td>\n <td>EU</td>\n <td>Northern Europe</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Germany</td>\n <td>EU</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Slovenia</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Estonia</td>\n <td>EU</td>\n <td>Northern Europe</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Finland</td>\n <td>EU</td>\n <td>Northern Europe</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Bulgaria</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Slovakia</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Spain</td>\n <td>EU</td>\n <td>Southern Europe</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Poland</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Czech Republic</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Greece</td>\n <td>EU</td>\n <td>Southern Europe</td>\n </tr>\n <tr>\n <th>20</th>\n <td>Malta</td>\n <td>EU</td>\n <td>Southern Europe</td>\n </tr>\n <tr>\n <th>21</th>\n <td>Austria</td>\n <td>EU</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>22</th>\n <td>Switzerland</td>\n <td>Non-EU associate</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>23</th>\n <td>Ireland</td>\n <td>EU</td>\n <td>Western Europe</td>\n </tr>\n <tr>\n <th>24</th>\n <td>Portugal</td>\n <td>EU</td>\n <td>Southern Europe</td>\n </tr>\n <tr>\n <th>25</th>\n <td>Romania</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>26</th>\n <td>Hungary</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>27</th>\n <td>Cyprus</td>\n <td>EU</td>\n <td>Southern Europe</td>\n </tr>\n <tr>\n <th>28</th>\n <td>Croatia</td>\n <td>EU</td>\n <td>Eastern Europe</td>\n </tr>\n <tr>\n <th>29</th>\n <td>Lithuania</td>\n <td>EU</td>\n <td>Northern Europe</td>\n </tr>\n <tr>\n <th>30</th>\n <td>Latvia</td>\n <td>EU</td>\n <td>Northern Europe</td>\n </tr>\n </tbody>\
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wos_country_types[\"Eurovoc_Class\"] = wos_country_types[\"Country\"].map(eurovoc_classer)\n",
"wos_country_types"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "1e737dbf",
"metadata": {},
"outputs": [],
"source": [
"record_col = \"UT (Unique WOS ID)\""
]
},
{
"cell_type": "markdown",
"id": "b1aa7f2d",
"metadata": {},
"source": [
"# Analysis by METRIX classification"
]
},
{
"cell_type": "markdown",
"id": "a97f1cbb",
"metadata": {},
"source": [
"## Distribution of topics via the METRIX classification"
]
},
{
"cell_type": "code",
"execution_count": 203,
"id": "f39cb21d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " Domain_English Field_English \n37 Applied Sciences Information & Communication Technologies \\\n44 Applied Sciences Information & Communication Technologies \n32 Applied Sciences Engineering \n33 Applied Sciences Engineering \n15 Applied Sciences Enabling & Strategic Technologies \n.. ... ... \n11 Applied Sciences Economics & Business \n46 Applied Sciences Social Sciences \n54 Arts & Humanities Philosophy & Theology \n52 Arts & Humanities Historical Studies \n129 Health Sciences Psychology & Cognitive Sciences \n\n SubField_English UT (Unique WOS ID) percent \n37 Artificial Intelligence & Image Processing 7915 17.184108 \n44 Networking & Telecommunications 5360 11.636995 \n32 Geological & Geomatics Engineering 2576 5.592705 \n33 Industrial Engineering & Automation 2316 5.028224 \n15 Energy 1965 4.266175 \n.. ... ... ... \n11 Business & Management 1 0.002171 \n46 Anthropology 1 0.002171 \n54 Philosophy 1 0.002171 \n52 History of Social Sciences 1 0.002171 \n129 General Psychology & Cognitive Sciences 1 0.002171 \n\n[175 rows x 5 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Domain_English</th>\n <th>Field_English</th>\n <th>SubField_English</th>\n <th>UT (Unique WOS ID)</th>\n <th>percent</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>37</th>\n <td>Applied Sciences</td>\n <td>Information &amp; Communication Technologies</td>\n <td>Artificial Intelligence &amp; Image Processing</td>\n <td>7915</td>\n <td>17.184108</td>\n </tr>\n <tr>\n <th>44</th>\n <td>Applied Sciences</td>\n <td>Information &amp; Communication Technologies</td>\n <td>Networking &amp; Telecommunications</td>\n <td>5360</td>\n <td>11.636995</td>\n </tr>\n <tr>\n <th>32</th>\n <td>Applied Sciences</td>\n <td>Engineering</td>\n <td>Geological &amp; Geomatics Engineering</td>\n <td>2576</td>\n <td>5.592705</td>\n </tr>\n <tr>\n <th>33</th>\n <td>Applied Sciences</td>\n <td>Engineering</td>\n <td>Industrial Engineering &amp; Automation</td>\n <td>2316</td>\n <td>5.028224</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Applied Sciences</td>\n <td>Enabling &amp; Strategic Technologies</td>\n <td>Energy</td>\n <td>1965</td>\n <td>4.266175</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Applied Sciences</td>\n <td>Economics &amp; Business</td>\n <td>Business &amp; Management</td>\n <td>1</td>\n <td>0.002171</td>\n </tr>\n <tr>\n <th>46</th>\n <td>Applied Sciences</td>\n <td>Social Sciences</td>\n <td>Anthropology</td>\n <td>1</td>\n <td>0.002171</td>\n </tr>\n <tr>\n <th>54</th>\n <td>Arts &amp; Humanities</td>\n <td>Philosophy &amp; Theology</td>\n <td>Philosophy</td>\n <td>1</td>\n <td>0.002171</td>\n </tr>\n <tr>\n <th>52</th>\n <td>Arts &amp; Humanities</td>\n <td>Historical Studies</td>\n <td>History of Social Sciences</td>\n <td>1</td>\n <td>0.002171</td>\n </tr>\n <tr>\n <th>129</th>\n <td>Health Sciences</td>\n <td>Psychology &amp; Cognitive Sciences</td>\n <td>General Psychology &amp; Cognitive Sciences</td>\n <td>1</td>\n <td>0.002171</td>\n </tr>\n </tbody>\n</table>\n<p>175 rows × 5 columns</p>\n</div>"
},
"execution_count": 203,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def replace_nth(s, sub=\" \", repl=\"<br>\", n=2):\n",
" chunks = s.split(sub)\n",
" size = len(chunks)\n",
" rows = size // n + (0 if size % n == 0 else 1)\n",
" return (repl.join([\n",
" sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])\n",
" for i in range(rows)\n",
" ])).replace(\"<br>&\",\" &<br>\")\n",
"\n",
"\n",
"groups = ['Domain_English',\"Field_English\",'SubField_English']\n",
"data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n",
"data[\"percent\"] = data[record_col]/data[record_col].sum()*100\n",
"\n",
"data[groups] = data[groups].applymap(replace_nth)\n",
"# for c in [\"Domain_English\",\"Field_English\",\"SubField_English\"]:\n",
"# data[c] = data[c]+\"<br>(\"+(pd.DataFrame(data[c],columns=[c]).merge(data.groupby(c,as_index=False)[record_col].sum(), on=c)[record_col]).astype(str)+\")\"\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 223,
"id": "2c9d6d5a",
"metadata": {},
"outputs": [],
"source": [
"fig = px.sunburst(data, path=groups, values=record_col,\n",
" color='Domain_English',title=\"Distribution of topics<br>(METRIX taxonomy)\", template='plotly')\n",
"# fig.update_traces(hovertemplate='%{label}<br>%{value:.2f}%')\n",
"fig.update_traces(textinfo=\"label+value+percent root\")\n",
"fig.update_traces(hovertemplate='%{id}<br>%{value}<extra></extra>')\n",
"metrix_distr = go.Figure(fig)\n",
"# metrix_distr.show()"
]
},
{
"cell_type": "code",
"execution_count": 224,
"outputs": [],
"source": [
"# metrix_distr.show(config= dict(displayModeBar = False))\n",
"data = (wos.groupby(['Publication Year'])[record_col].nunique(dropna=False)\n",
" .reset_index()\n",
" .rename(columns={0:record_col}))\n",
"data[record_col+\"_relative_growth\"] = data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True)[record_col][0]\n",
"data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
"\n",
"data = data.sort_values(by =[\"Publication Year\"], ascending=[True])\n",
"data[record_col+\"_cumsum\"] = (data[record_col].cumsum())\n",
"\n",
"year_output = px.line(data,x=\"Publication Year\", y=record_col, markers=True)\n",
"year_output.update_traces(hovertemplate='Year:%{x:d}<br>Number of co-publications:%{y:d}')\n",
"\n",
"year_rel_output = px.line(data,x=\"Publication Year\", y=record_col+\"_relative_growth\", markers=True)\n",
"year_rel_output.update_traces(hovertemplate='Year:%{x:d}<br>Rel.growth in co-publications:%{y:.0%}')\n",
"\n",
"year_rel_cumsum = px.area(data,x=\"Publication Year\", y=record_col+\"_cumsum\")\n",
"year_rel_cumsum.update_traces(hovertemplate='Year:%{x:d}<br>Cumulative number co-publications:%{y:d}')\n",
"\n",
"\n",
"figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Distribution of topics\",\n",
" \"Co-publications per year\",\"Relative growth of co-publications\",\n",
" \"Cumulative sum of co-publications\",],\n",
" specs=[\n",
" [{\"type\": \"domain\", \"rowspan\":3}, {\"type\": \"xy\"}],\n",
" [None,{\"type\": \"xy\"}],\n",
" [None, {\"type\": \"xy\"}]\n",
" ])\n",
"\n",
"\n",
"for trace in list(metrix_distr.select_traces()):\n",
" # trace.barmode\n",
" figsuper.add_trace(trace,\n",
" row=[1,2,3], col=1\n",
" )\n",
"\n",
"for trace in list(year_output.select_traces()):\n",
" figsuper.add_trace(trace,\n",
" row=1, col=2\n",
" )\n",
"\n",
"for trace in list(year_rel_output.select_traces()):\n",
" figsuper.add_trace(trace,\n",
" row=2, col=2\n",
" )\n",
"\n",
"for trace in list(year_rel_cumsum.select_traces()):\n",
" figsuper.add_trace(trace,\n",
" row=3, col=2\n",
" )\n",
"\n",
"# figsuper.update_layout(hovermode='x unified')\n",
"figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n",
"figsuper.update_yaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_xaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
"figsuper['layout']['yaxis2'].update(zerolinecolor='grey',tickformat=\".0%\")\n",
"# figsuper.layout.annotations[0].update(x=0.1)\n",
"# figsuper.layout.annotations[2].update(x=0.105)\n",
"# figsuper.layout.annotations[1].update(x=0.7)\n",
"# figsuper.layout.annotations[3].update(x=0.7)\n",
"\n",
"# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
"figsuper.write_html(f\"plot_html/Overall_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 61,
"outputs": [],
"source": [
"# data\n"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"id": "66fca444",
"metadata": {},
"source": [
"## Domains, distribution, yearly trends"
]
},
{
"cell_type": "code",
"execution_count": 195,
"id": "14e82a73",
"metadata": {},
"outputs": [],
"source": [
"group = 'Domain_English'\n",
"data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n",
"\n",
"fig = px.bar(data.sort_values(by=group), x=record_col, y=group, color=group,barmode='relative',\n",
" labels={\n",
" record_col: 'Number of co-publications',\n",
" group: \"\",\n",
" },\n",
" title=\"Distribution of Domains\", template='plotly')\n",
"fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family=\"Montserrat\")\n",
"fig.update_traces(hovertemplate='%{x:d}')\n",
"fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
"fig.update_layout(yaxis={'categoryorder':'total ascending'})\n",
"fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"dom_distr = go.Figure(fig)\n",
"# dom_distr.show(config= dict(displayModeBar = False, responsive = True))"
]
},
{
"cell_type": "code",
"execution_count": 196,
"id": "8cbe20ab",
"metadata": {},
"outputs": [],
"source": [
"group = ['Publication Year','Domain_English']\n",
"data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()\n",
" .fillna(0)\n",
" .stack()\n",
" .reset_index()\n",
" .rename(columns={0:record_col}))\n",
"data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Domain_English'),\n",
" on='Domain_English', suffixes=[None,\"_relative_growth\"])\n",
"data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
"\n",
"data = data.sort_values(by =[\"Domain_English\",\"Publication Year\"], ascending=[True,True])\n",
"data[record_col+\"_cumsum\"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())\n",
"\n",
"# data"
]
},
{
"cell_type": "code",
"execution_count": 197,
"id": "05d0922a",
"metadata": {},
"outputs": [],
"source": [
"fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True, labels={\n",
" record_col: 'Number of co-publications',\n",
" group[-1]: \"Domain\",\n",
" },\n",
" title=\"Yearly output of co-publications\", template='plotly')\n",
"fig.update_traces(hovertemplate='%{y:d}')\n",
"fig.update_layout(hovermode='x unified')\n",
"fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
"fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"\n",
"year_output_by_domain = go.Figure(fig)\n",
"\n",
"fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+\"_relative_growth\",x=group[0], color=group[-1], markers=True, labels={\n",
" record_col+\"_relative_growth\": 'Rel. growth<br>in co-publications (%)',\n",
" group[-1]: \"Domain\",\n",
" },\n",
" title=\"Relative growth in the output of co-publications\", template='plotly')\n",
"# fig.update_traces(hovertemplate='%{y:.2f}%')\n",
"\n",
"fig.update_layout(hovermode='x unified',yaxis_tickformat='.0f%',font_family=\"Montserrat\")\n",
"fig.update_traces(hovertemplate='%{y:.0f}00%')\n",
"fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
"fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"# fig['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
"# fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
"\n",
"rel_output_by_domain = go.Figure(fig)\n",
"\n",
"\n",
"fig = px.area(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+\"_cumsum\",x=group[0], color=group[-1],line_group=group[-1],\n",
" labels={\n",
" record_col+\"_cumsum\": 'Cumulative number of co-publications',\n",
" group[-1]: \"Domain\",\n",
" },\n",
" title=\"Cumulative number of co-publications\", template='plotly')\n",
"fig.update_traces(hovertemplate='%{y:d}')\n",
"fig.update_layout(hovermode='x unified')\n",
"fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
"fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"\n",
"cumsum_by_domain = go.Figure(fig)\n",
"# cumsum_by_domain.show(config= dict(displayModeBar = False))"
]
},
{
"cell_type": "code",
"execution_count": 199,
"id": "3a07c24d",
"metadata": {},
"outputs": [],
"source": [
"from plotly.subplots import make_subplots\n",
"import plotly.graph_objects as go\n",
"\n",
"# dom_distr\n",
"# year_output_by_domain\n",
"# rel_output_by_domain\n",
"# cumsum_by_domain\n",
"\n",
"figsuper = make_subplots(rows=2, cols=2, subplot_titles=[\"Distribution of domains\",\"Cumulative sum of co-publications\",\n",
" \"Co-publications per year\",\"Relative growth of co-publications\"])\n",
"\n",
"\n",
"for trace in list(dom_distr.select_traces()):\n",
" trace.showlegend=False\n",
" # trace.barmode\n",
" figsuper.add_trace(trace,\n",
" row=1, col=1\n",
" )\n",
"\n",
"for trace in list(cumsum_by_domain.select_traces()):\n",
" figsuper.add_trace(trace,\n",
" row=1, col=2\n",
" )\n",
"\n",
"for trace in list(year_output_by_domain.select_traces()):\n",
" trace.showlegend=False\n",
" figsuper.add_trace(trace,\n",
" row=2, col=1\n",
" )\n",
"\n",
"for trace in list(rel_output_by_domain.select_traces()):\n",
" trace.showlegend=False\n",
" figsuper.add_trace(trace,\n",
" row=2, col=2\n",
" )\n",
"\n",
"# figsuper.update_layout(hovermode='x unified')\n",
"figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n",
"figsuper.update_yaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_xaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
"figsuper['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
"# figsuper.layout.annotations[0].update(x=0.1)\n",
"# figsuper.layout.annotations[2].update(x=0.105)\n",
"# figsuper.layout.annotations[1].update(x=0.7)\n",
"# figsuper.layout.annotations[3].update(x=0.7)\n",
"\n",
"# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
"figsuper.write_html(f\"plot_html/Domains_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))"
]
},
{
"cell_type": "code",
"execution_count": 68,
"outputs": [],
"source": [
"# figsuper['layout']"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 69,
"id": "329b6889",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "Publication Year 2011 2012 2013 2014 2015 2016 2017 2018 \nDomain_English \nApplied Sciences 490 593 738 1031 1201 1535 1920 2808 \\\nArts & Humanities 0 0 0 4 1 3 7 4 \nEconomic & Social Sciences 20 22 29 28 34 40 84 105 \nHealth Sciences 116 120 155 184 216 243 321 403 \nMultidisciplinary 15 21 43 52 57 64 75 76 \nNatural Sciences 181 223 298 318 380 437 568 753 \n\nPublication Year 2019 2020 2021 2022 \nDomain_English \nApplied Sciences 3729 4446 5295 6199 \nArts & Humanities 11 11 16 13 \nEconomic & Social Sciences 160 211 252 375 \nHealth Sciences 611 755 1035 1182 \nMultidisciplinary 83 97 115 149 \nNatural Sciences 999 1232 1403 1665 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>Publication Year</th>\n <th>2011</th>\n <th>2012</th>\n <th>2013</th>\n <th>2014</th>\n <th>2015</th>\n <th>2016</th>\n <th>2017</th>\n <th>2018</th>\n <th>2019</th>\n <th>2020</th>\n <th>2021</th>\n <th>2022</th>\n </tr>\n <tr>\n <th>Domain_English</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Applied Sciences</th>\n <td>490</td>\n <td>593</td>\n <td>738</td>\n <td>1031</td>\n <td>1201</td>\n <td>1535</td>\n <td>1920</td>\n <td>2808</td>\n <td>3729</td>\n <td>4446</td>\n <td>5295</td>\n <td>6199</td>\n </tr>\n <tr>\n <th>Arts &amp; Humanities</th>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>4</td>\n <td>1</td>\n <td>3</td>\n <td>7</td>\n <td>4</td>\n <td>11</td>\n <td>11</td>\n <td>16</td>\n <td>13</td>\n </tr>\n <tr>\n <th>Economic &amp; Social Sciences</th>\n <td>20</td>\n <td>22</td>\n <td>29</td>\n <td>28</td>\n <td>34</td>\n <td>40</td>\n <td>84</td>\n <td>105</td>\n <td>160</td>\n <td>211</td>\n <td>252</td>\n <td>375</td>\n </tr>\n <tr>\n <th>Health Sciences</th>\n <td>116</td>\n <td>120</td>\n <td>155</td>\n <td>184</td>\n <td>216</td>\n <td>243</td>\n <td>321</td>\n <td>403</td>\n <td>611</td>\n <td>755</td>\n <td>1035</td>\n <td>1182</td>\n </tr>\n <tr>\n <th>Multidisciplinary</th>\n <td>15</td>\n <td>21</td>\n <td>43</td>\n <td>52</td>\n <td>57</td>\n <td>64</td>\n <td>75</td>\n <td>76</td>\n <td>83</td>\n <td>97</td>\n <td>115</td>\n <td>149</td>\n </tr>\n <tr>\n <th>Natural Sciences</th>\n <td>181</td>\n <td>223</td>\n <td>298</td>\n <td>318</td>\n <td>380</td>\n <td>437</td>\n <td>568</td>\n <td>753</td>\n <td>999</td>\n <td>1232</td>\n <td>1403</td>\n <td>1665</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],\n",
"\n",
" columns=['Publication Year'], fill_value=0)\n",
"pivot_data"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "100f3002",
"metadata": {},
"outputs": [],
"source": [
"# f, ax = plt.subplots(figsize=(9, 6))\n",
"# g = sns.heatmap(pivot_data, annot=True, fmt=\"d\", linewidths=.5, ax=ax)\n",
"# g.set(xlabel=\"\", ylabel=\"\")"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "a8d24046",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "Publication Year 2011 2012 2013 2014 \nDomain_English \nApplied Sciences 59.610706 60.572012 58.432304 63.760049 \\\nArts & Humanities 0.000000 0.000000 0.000000 0.247372 \nEconomic & Social Sciences 2.433090 2.247191 2.296120 1.731602 \nHealth Sciences 14.111922 12.257406 12.272367 11.379097 \nMultidisciplinary 1.824818 2.145046 3.404592 3.215832 \nNatural Sciences 22.019465 22.778345 23.594616 19.666048 \n\nPublication Year 2015 2016 2017 2018 \nDomain_English \nApplied Sciences 63.578613 66.106804 64.537815 67.678959 \\\nArts & Humanities 0.052938 0.129199 0.235294 0.096409 \nEconomic & Social Sciences 1.799894 1.722653 2.823529 2.530730 \nHealth Sciences 11.434621 10.465116 10.789916 9.713184 \nMultidisciplinary 3.017470 2.756245 2.521008 1.831767 \nNatural Sciences 20.116464 18.819983 19.092437 18.148952 \n\nPublication Year 2019 2020 2021 2022 \nDomain_English \nApplied Sciences 66.672626 65.847156 65.241498 64.687467 \nArts & Humanities 0.196674 0.162915 0.197141 0.135657 \nEconomic & Social Sciences 2.860719 3.125000 3.104978 3.913180 \nHealth Sciences 10.924370 11.181872 12.752587 12.334342 \nMultidisciplinary 1.483998 1.436611 1.416954 1.554837 \nNatural Sciences 17.861613 18.246445 17.286841 17.374517 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>Publication Year</th>\n <th>2011</th>\n <th>2012</th>\n <th>2013</th>\n <th>2014</th>\n <th>2015</th>\n <th>2016</th>\n <th>2017</th>\n <th>2018</th>\n <th>2019</th>\n <th>2020</th>\n <th>2021</th>\n <th>2022</th>\n </tr>\n <tr>\n <th>Domain_English</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Applied Sciences</th>\n <td>59.610706</td>\n <td>60.572012</td>\n <td>58.432304</td>\n <td>63.760049</td>\n <td>63.578613</td>\n <td>66.106804</td>\n <td>64.537815</td>\n <td>67.678959</td>\n <td>66.672626</td>\n <td>65.847156</td>\n <td>65.241498</td>\n <td>64.687467</td>\n </tr>\n <tr>\n <th>Arts &amp; Humanities</th>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.000000</td>\n <td>0.247372</td>\n <td>0.052938</td>\n <td>0.129199</td>\n <td>0.235294</td>\n <td>0.096409</td>\n <td>0.196674</td>\n <td>0.162915</td>\n <td>0.197141</td>\n <td>0.135657</td>\n </tr>\n <tr>\n <th>Economic &amp; Social Sciences</th>\n <td>2.433090</td>\n <td>2.247191</td>\n <td>2.296120</td>\n <td>1.731602</td>\n <td>1.799894</td>\n <td>1.722653</td>\n <td>2.823529</td>\n <td>2.530730</td>\n <td>2.860719</td>\n <td>3.125000</td>\n <td>3.104978</td>\n <td>3.913180</td>\n </tr>\n <tr>\n <th>Health Sciences</th>\n <td>14.111922</td>\n <td>12.257406</td>\n <td>12.272367</td>\n <td>11.379097</td>\n <td>11.434621</td>\n <td>10.465116</td>\n <td>10.789916</td>\n <td>9.713184</td>\n <td>10.924370</td>\n <td>11.181872</td>\n <td>12.752587</td>\n <td>12.334342</td>\n </tr>\n <tr>\n <th>Multidisciplinary</th>\n <td>1.824818</td>\n <td>2.145046</td>\n <td>3.404592</td>\n <td>3.215832</td>\n <td>3.017470</td>\n <td>2.756245</td>\n <td>2.521008</td>\n <td>1.831767</td>\n <td>1.483998</td>\n <td>1.436611</td>\n <td>1.416954</td>\n <td>1.554837</td>\n </tr>\n <tr>\n <th>Natural Sciences</th>\n <td>22.019465</td>\n <td>22.778345</td>\n <td>23.594616</td>\n <td>19.666048</td>\n <td>20.116464</td>\n <td>18.819983</td>\n <td>19.092437</td>\n <td>18.148952</td>\n <td>17.861613</td>\n <td>18.246445</td>\n <td>17.286841</td>\n <td>17.374517</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100\n",
"percent_pivot"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "3bda79fb",
"metadata": {},
"outputs": [],
"source": [
" # f, ax = plt.subplots(figsize=(15, 6))\n",
"# # g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)\n",
"# # for t in ax.texts: t.set_text(t.get_text() + \" %\")\n",
"# g.set(xlabel=\"\", ylabel=\"\")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "01024cc0",
"metadata": {},
"outputs": [],
"source": [
"# percent_pivot.T.plot(kind='bar',\n",
"# stacked=True,\n",
"# figsize=(10, 6))"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "4caa215d",
"metadata": {},
"outputs": [],
"source": [
"# percent_pivot.T.plot(kind='bar',\n",
"# stacked=True,\n",
"# figsize=(15, 8))\n",
"#\n",
"# plt.legend(loc=\"lower left\", ncol=2)\n",
"# # plt.ylabel(\"Release Year\")\n",
"# # plt.xlabel(\"Proportion\")\n",
"#\n",
"#\n",
"# for n, x in enumerate([*pivot_data.T.index.values]):\n",
"# for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],\n",
"# pivot_data.T.loc[x],\n",
"# percent_pivot.T.loc[x].cumsum()):\n",
"#\n",
"# plt.text(y=(y_loc - proportion) + (proportion / 2),\n",
"# x=n - 0.11,\n",
"# s=f'{count}',# ({np.round(proportion, 1)}%)',\n",
"# color=\"black\",\n",
"# fontsize=8,\n",
"# fontweight=\"bold\")\n",
"#\n",
"# plt.show()"
]
},
{
"cell_type": "markdown",
"id": "dcae04bd",
"metadata": {},
"source": [
"## Field"
]
},
{
"cell_type": "code",
"execution_count": 193,
"id": "d3807072",
"metadata": {},
"outputs": [],
"source": [
"# group = ['Publication Year',\"Domain_English\",'Field_English']\n",
"# # data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n",
"#\n",
"#\n",
"# data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()\n",
"# .fillna(0)\n",
"# .stack()\n",
"# .reset_index()\n",
"# .rename(columns={0:record_col}))\n",
"#\n",
"# data = data.merge(wos[[\"Domain_English\",'Field_English']].drop_duplicates(),on=\"Field_English\")\n",
"#\n",
"# data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Field_English'),\n",
"# on='Field_English', suffixes=[None,\"_relative_growth\"])\n",
"# data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n",
"#\n",
"# data = data.sort_values(by =[\"Field_English\",\"Publication Year\"], ascending=[True,True])\n",
"# data[record_col+\"_cumsum\"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())"
]
},
{
"cell_type": "code",
"execution_count": 192,
"id": "756513b5",
"metadata": {},
"outputs": [],
"source": [
"# data_complete = pd.DataFrame()\n",
"#\n",
"# for cat in sorted(data[group[-2]].unique()):\n",
"# #data segment\n",
"# sub_data = data[data[group[-2]]==cat]\n",
"# sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n",
"# ,group[-1],fill_value=0)\n",
"# data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n",
"\n",
"\n",
" # seaborn version plot\n",
" # g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),\n",
" # y=record_col,x=group[0], hue=group[-1], marker=\"o\")\n",
" # g.set(xticks=list(range(2012,2022+1,2)))\n",
" # g.legend(title=None)\n",
" # g.set_title(cat)\n",
" # g.yaxis.set_major_locator(MaxNLocator(integer=True))\n",
" # plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 191,
"id": "d09c080a",
"metadata": {},
"outputs": [],
"source": [
"# data_complete = pd.DataFrame()\n",
"#\n",
"# # Creating subplot axes\n",
"# fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))\n",
"#\n",
"# for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):\n",
"# #data segment\n",
"# sub_data = data[data[group[-2]]==cat]\n",
"# sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n",
"# ,group[-1],fill_value=0)\n",
"# data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n",
"# #plot\n",
"# g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),\n",
"# y=record_col,x=group[0], hue=group[-1], marker=\"o\", ax=ax)\n",
"# g.set(xticks=list(range(2012,2022+1,2)))\n",
"# g.legend(title=None)\n",
"# g.set_title(cat)\n",
"# g.set_xlabel(None)\n",
"# g.set_ylabel(None)\n",
"# g.yaxis.set_major_locator(MaxNLocator(integer=True))\n",
"# fig.suptitle(\"Number of co-publications in domains and respective fields\", y=0.92)\n",
"# plt.show()"
]
},
{
"cell_type": "markdown",
"id": "09a6de71",
"metadata": {},
"source": [
"## SubField"
]
},
{
"cell_type": "code",
"execution_count": 78,
"id": "0397eb85",
"metadata": {},
"outputs": [],
"source": [
"group = ['Publication Year',\"Domain_English\",'Field_English',\"SubField_English\"]\n",
"data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n",
"# data"
]
},
{
"cell_type": "code",
"execution_count": 79,
"id": "846596cf",
"metadata": {},
"outputs": [],
"source": [
"for cat in sorted(data[group[-2]].unique()):\n",
" sub_data = data[data[group[-2]]==cat]\n",
" sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n",
" ,group[-1],fill_value=0)\n",
" # g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],\n",
" # hue=group[-1], marker=\"o\", errorbar=None)\n",
" # g.set(xticks=list(range(2012,2022+1,2)))\n",
" # g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))\n",
" # g.set_title(f'Number or co-publications in {cat}')\n",
" # g.set_ylabel(None)\n",
" # plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "27c90aaf",
"metadata": {},
"outputs": [],
"source": [
"from matplotlib.ticker import FuncFormatter\n",
"import math\n",
"def orderOfMagnitude(number):\n",
" return math.floor(math.log(number, 10))\n",
"\n",
"def roundToNearest(number):\n",
" order = orderOfMagnitude(number)\n",
" # if order!=0:\n",
" # order+=1\n",
" near = math.ceil(number/10**order)*10**order\n",
" return near"
]
},
{
"cell_type": "markdown",
"id": "91d2cc8a",
"metadata": {},
"source": [
"## Country contributions"
]
},
{
"cell_type": "code",
"execution_count": 190,
"id": "b3adb06a",
"metadata": {},
"outputs": [],
"source": [
"wos_univ_locations = wos_univ.merge(wos_country_types, on=\"Country\")\n",
"wos_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\"]].drop_duplicates()\n",
"\n",
"collab_desc = wos_collabs[wos_collabs[\"Country\"]!=\"China\"][\"Country\"].value_counts().reset_index()\n",
"collab_desc[\"percent_of_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].nunique()#*100\n",
"collab_desc[\"percent_contrib_in_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].size#*100\n",
"collab_desc = collab_desc.merge(wos_country_types, on=\"Country\")\n",
"# collab_desc\n",
"\n",
"c_dict = {\"count\":\"Number of co-publications\",\n",
" \"percent_of_copubs\":\"Percent of co-publications\",\n",
" \"percent_contrib_in_copubs\":\"Contribution to co-publications\"}\n",
"\n",
"color_discrete_map= {'China': '#EF553B',\n",
" 'EU': '#636EFA',\n",
" 'Non-EU associate': '#00CC96'}\n",
"\n",
"fig_dict = dict()\n",
"# Creating subplot axes\n",
"# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))\n",
"# for c,ax in zip(c_dict.keys(),axes.flatten()):\n",
"for c in c_dict.keys():\n",
" data = collab_desc[[\"Country\",c,\"Country_Type\"]]\n",
" # plt.figure(figsize=(9,12))\n",
" col_by=\"Country_Type\"\n",
" y_lab=\"Country\"\n",
" # g = sns.barplot(data, x=c, y=\"Country\", hue=\"Country_Type\", dodge=False)\n",
" fig = px.bar(data, x=c, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,\n",
" labels=dict({\n",
" record_col: 'Number of co-publications',\n",
" \"Institution_harm\": \"Institution\",\n",
" \"Institution_harm_label\": \"Institution\",\n",
" \"Country_Type\":\"Country type\",\n",
" \"Eurovoc_Class\":\"Region\"\n",
" },**c_dict),\n",
" title=c_dict[c], template='plotly')\n",
" fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",\n",
" yaxis={'categoryorder':'total ascending'},\n",
" width=1000, height=1000,)\n",
" if \"percent\" in c:\n",
" fig.update_traces(hovertemplate='%{y}<br>%{x}')\n",
" fig.update_xaxes(tickformat=\".1%\")\n",
" else:\n",
" fig.update_traces(hovertemplate='%{y}<br>%{x:d}')\n",
" fig_dict[c] = go.Figure(fig)\n",
" # fig.show(config= dict(displayModeBar = False, responsive = True))\n",
" # g.set_xlim(0,roundToNearest(data[c].max()))\n",
" # g.set_ylabel(None)\n",
" # g.set_xlabel(c_dict.get(c))\n",
" # g.set_title(c_dict.get(c))\n",
" # g.legend(title=None, loc=\"right\")\n",
" # for i in g.containers:\n",
" # g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')\n",
" # if 'percent' in c:\n",
" # g.xaxis.set_major_locator(MaxNLocator(integer=True))\n",
" # vals = g.get_xticks()\n",
" # g.set_xticklabels([str(int(val))+'%' for val in vals])\n",
" # plt.show()\n",
"figsuper = make_subplots(rows=1, cols=3, subplot_titles =list(c_dict.values()))\n",
"for i,f in enumerate(fig_dict.keys()):\n",
" sfig = fig_dict[f]\n",
" for trace in list(sfig.select_traces()):\n",
" trace.showlegend=False\n",
" figsuper.add_trace(trace,\n",
" row=1, col=i+1)\n",
"\n",
"figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative',yaxis2={'categoryorder':'total ascending'},yaxis3={'categoryorder':'total ascending'})\n",
"figsuper.update_yaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_xaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
"# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
"figsuper.write_html(f\"plot_html/europe_contribution_bar.html\",config= dict(displayModeBar = False, responsive = True))"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "140395ac",
"metadata": {},
"outputs": [],
"source": [
"# wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n",
"# wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n",
"# EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)\n",
"#\n",
"# # Generate a mask for the upper triangle\n",
"# mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n",
"#\n",
"# # Set up the matplotlib figure\n",
"# f, ax = plt.subplots(figsize=(11, 9))\n",
"#\n",
"# # Draw the heatmap with the mask and correct aspect ratio\n",
"# g = sns.heatmap(EU_co_occur, mask=mask,\n",
"# square=True, linewidths=.5)\n",
"#\n",
"# g.set_ylabel(None)\n",
"# g.set_xlabel(None)"
]
},
{
"cell_type": "code",
"execution_count": 186,
"id": "c959287e",
"metadata": {},
"outputs": [],
"source": [
"wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n",
"wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n",
"EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
"\n",
"\n",
"eu_list = wos_collabs_EU.groupby(['Country_x'])[record_col].count().sort_values(ascending=False).index\n",
"# pre_fig = sns.clustermap(EU_co_occur)\n",
"# re_index = [i.get_text() for i in pre_fig.ax_heatmap.yaxis.get_majorticklabels()]\n",
"# re_column = [i.get_text() for i in pre_fig.ax_heatmap.xaxis.get_majorticklabels()]\n",
"\n",
"EU_co_occur = EU_co_occur.reindex(index = eu_list, columns=eu_list)\n",
"\n",
"# Generate a mask for the upper triangle\n",
"mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n",
"data = np.where(mask,None,EU_co_occur)\n",
"\n",
"fig = px.imshow(data,\n",
" labels=dict(x=\"Country\", y=\"Country\", color=\"Co-publication with China\"),\n",
" x=list(EU_co_occur.columns),\n",
" y=list(EU_co_occur.index), title=\"Intraeuropean patterns<br>Co-occurences of countries in chinese co-publications\"\n",
" )\n",
"fig.update_layout(title_x=0.5,\n",
" width=1000, height=1000,\n",
" xaxis_showgrid=False,\n",
" yaxis_showgrid=False,\n",
" yaxis_autorange='reversed', template='plotly_white')\n",
"# fig.update_traces(hovertemplate='<b>%{y}</b><br>%{x}<br>Co-publications: %{hovertext}')\n",
"fig.update_xaxes(tickangle= -90)\n",
"fig.update_yaxes(\n",
" ticks=\"outside\")\n",
"fig.update_xaxes(\n",
" ticks=\"outside\")\n",
"# fig.show(config= dict(displayModeBar = False,responsive=True))\n",
"fig.write_html(f\"plot_html/intraeurope_collabs.html\",config= dict(displayModeBar = False, responsive = True))\n"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "df1f03ea",
"metadata": {},
"outputs": [],
"source": [
"# collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n",
"# collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n",
"# data = collab_year.groupby([\"Publication Year\",'Country_Type'],as_index=False)[record_col].nunique()\n",
"#\n",
"#\n",
"# g=sns.lineplot(data,y=record_col,x=\"Publication Year\", hue=\"Country_Type\", marker=\"o\")\n",
"# g.set(xticks=list(range(2012,2022+1,2)))\n",
"# g.legend(title=None)\n",
"# g.set_xlabel(None)\n",
"# g.set_ylabel(None)\n",
"# g.set_title(\"Yearly output of co-publications with China\")"
]
},
{
"cell_type": "markdown",
"id": "122d0260",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 182,
"id": "f19501a9",
"metadata": {},
"outputs": [],
"source": [
"collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n",
"collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n",
"\n",
"data = (collab_year.groupby(['Publication Year',\"Country\"])[record_col]\n",
" .nunique(dropna=False).unstack()\n",
" .fillna(0)\n",
" .stack()\n",
" .reset_index()\n",
" .rename(columns={0:record_col}))\n",
"data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset=\"Country\"),\n",
" on=[\"Country\"], suffixes=[None,\"_relative_growth\"])\n",
"data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n",
"data = data.sort_values(by =[\"Country\",\"Publication Year\"], ascending=[True,True])\n",
"data[record_col+\"_cumsum\"] = (data.groupby('Country',as_index=False)[record_col].cumsum())\n",
"data = data.merge(wos_country_types, on='Country')\n",
"# data"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "b9585045",
"metadata": {},
"outputs": [],
"source": [
"# data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n",
"# fig = px.choropleth(data, locations=\"ISO3\", color=record_col, hover_name=\"Country\",\n",
"# animation_frame='Publication Year', scope=\"europe\", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])\n",
"# fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 183,
"id": "952bdbfe",
"metadata": {},
"outputs": [],
"source": [
"data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n",
"fig = px.choropleth(data[data[\"Publication Year\"] == 2022], locations=\"ISO3\", color=record_col+\"_cumsum\", hover_name=\"Country\",\n",
" scope=\"europe\", template='plotly',\n",
" range_color=[data[record_col+\"_cumsum\"].min(),data[record_col+\"_cumsum\"].max()],hover_data=[\"Eurovoc_Class\"])\n",
"# original: '<b>%{hovertext}</b><br><br>ISO3=%{location}<br>Eurovoc_Class=%{customdata[0]}<br>UT (Unique WOS ID)_cumsum=%{z}<extra></extra>'\n",
"\n",
"fig.update_traces(hovertemplate='<b>%{hovertext}</b>'\n",
" '<br>Region: %{customdata[0]}<br>'\n",
" 'Co-pubications: %{z:d}<extra></extra>')\n",
"\n",
"cumsum_country = go.Figure(fig)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"id": "ae3cb8e1",
"metadata": {},
"outputs": [],
"source": [
"# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color=\"Eurovoc_Class\",line_group=\"Country\", markers=True,\n",
"# labels={\n",
"# record_col: 'Number of co-publications',\n",
"# \"Eurovoc_Class\": \"Region\"\n",
"# },\n",
"# title=\"Yearly output of co-publications\", template='plotly',hover_name= \"Country\")\n",
"# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')\n",
"# # fig.update_layout(hovermode='x unified')\n",
"# fig.add_shape(\n",
"# # Rectangle with reference to the plot\n",
"# type=\"rect\",\n",
"# xref=\"paper\",\n",
"# yref=\"paper\",\n",
"# x0=0,\n",
"# y0=0,\n",
"# x1=1.0,\n",
"# y1=1.0,\n",
"# line=dict(\n",
"# color=\"black\",\n",
"# width=0.5,\n",
"# )\n",
"# )\n",
"# fig.update_yaxes(\n",
"# showgrid=True,\n",
"# ticks=\"outside\")\n",
"# fig.update_xaxes(\n",
"# showgrid=True,\n",
"# ticks=\"outside\")\n",
"# fig.show(config= dict(displayModeBar = False))"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "dd72ad3f",
"metadata": {},
"outputs": [],
"source": [
"# fig.data[0].hovertemplate"
]
},
{
"cell_type": "code",
"execution_count": 91,
"id": "600d7459",
"metadata": {},
"outputs": [],
"source": [
"# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
"# y=record_col+\"_relative_growth\",\n",
"# x='Publication Year',\n",
"# color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n",
"# labels={\n",
"# record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n",
"# },\n",
"# title=\"Relative growth of co-publications<br>(baseline: 2011)\", template='plotly',hover_name= \"Country\")\n",
"# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')\n",
"# fig.add_shape(\n",
"# # Rectangle with reference to the plot\n",
"# type=\"rect\",\n",
"# xref=\"paper\",\n",
"# yref=\"paper\",\n",
"# x0=0,\n",
"# y0=0,\n",
"# x1=1.0,\n",
"# y1=1.0,\n",
"# line=dict(\n",
"# color=\"black\",\n",
"# width=0.5,\n",
"# )\n",
"# )\n",
"# fig.update_yaxes(\n",
"# showgrid=True,\n",
"# ticks=\"outside\")\n",
"# fig.update_xaxes(\n",
"# showgrid=True,\n",
"# ticks=\"outside\")\n",
"# fig.show(config= dict(displayModeBar = False))"
]
},
{
"cell_type": "code",
"execution_count": 184,
"id": "0ee76d32",
"metadata": {},
"outputs": [],
"source": [
"from plotly.subplots import make_subplots\n",
"import plotly.graph_objects as go\n",
"\n",
"figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Number of publications (2022)\",\"Cumulative number of co-publications\",\n",
" \"Yearly output of co-publications\",\"Relative growth of co-publications\"],\n",
" specs=[\n",
" [{\"type\": \"geo\", \"rowspan\":3}, {\"type\": \"xy\"}],\n",
" [None,{\"type\": \"xy\"}],\n",
" [None, {\"type\": \"xy\"}]\n",
" ])\n",
"\n",
"for trace in list(cumsum_country.select_traces()):\n",
" figsuper.add_trace(trace,\n",
" row=1, col=1\n",
" )\n",
"\n",
"fig = px.area(data.sort_values(ascending=True, by='Publication Year'), y=record_col+\"_cumsum\",\n",
" x='Publication Year',\n",
" color=\"Eurovoc_Class\",\n",
" line_group=\"Country\",\n",
" labels={\n",
" record_col: 'Number of co-publications',\n",
" \"Eurovoc_Class\": \"Region\"\n",
" },\n",
" title=\"Cumulative number of co-publications\",\n",
" hover_name= \"Country\")\n",
"fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')\n",
"\n",
"for trace in list(fig.select_traces()):\n",
" figsuper.add_trace(trace,\n",
" row=1, col=2\n",
" )\n",
"\n",
"\n",
"fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
" y=record_col,\n",
" x='Publication Year',\n",
" color=\"Eurovoc_Class\",\n",
" line_group=\"Country\",\n",
" markers=True,\n",
" labels={\n",
" record_col: 'Number of co-publications',\n",
" \"Eurovoc_Class\": \"Region\"\n",
" },\n",
" title=\"Yearly output of co-publications\",hover_name= \"Country\")\n",
"fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')\n",
"\n",
"for trace in list(fig.select_traces()):\n",
" trace.showlegend=False\n",
" figsuper.add_trace(trace,\n",
" row=2, col=2\n",
" )\n",
"\n",
"fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n",
" y=record_col+\"_relative_growth\",\n",
" x='Publication Year',\n",
" color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n",
" labels={\n",
" record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n",
" },\n",
" title=\"Relative growth of co-publications\", template='plotly',hover_name= \"Country\")\n",
"fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')\n",
"fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
"\n",
"for trace in list(fig.select_traces()):\n",
" trace.showlegend=False\n",
" figsuper.add_trace(trace,\n",
" row=3, col=2\n",
" )\n",
"\n",
"figsuper.update_yaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_xaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
"figsuper.update_layout({'template':\"plotly\"})\n",
"figsuper.layout[\"geo\"][\"scope\"] = 'europe'\n",
"figsuper.update_coloraxes(colorbar=dict(lenmode='fraction',len=0.55, orientation=\"v\",yanchor='top', title=\"Co-publications\",\n",
" ticks=\"outside\", ticksuffix=\" \",outlinewidth=0.5))\n",
"# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
"figsuper.write_html(f\"plot_html/country_trends_overall.html\",config= dict(displayModeBar = False, responsive = True))"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "e4c50e14",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "Publication Year 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 \nCountry \nAustria 22 24 26 39 50 57 72 89 138 137 \\\nBelgium 34 38 40 65 71 81 90 133 179 213 \nBulgaria 4 5 8 9 7 19 21 18 10 25 \nCroatia 1 2 6 8 10 7 10 19 27 29 \nCyprus 2 1 5 5 5 5 8 7 15 28 \nCzech Republic 13 15 16 21 20 36 37 56 64 81 \nDenmark 35 33 40 59 68 74 101 195 234 245 \nEstonia 3 3 7 10 12 10 15 15 16 38 \nFinland 31 35 44 82 100 125 126 198 241 256 \nFrance 117 130 174 231 269 325 348 491 648 691 \nGermany 123 172 192 273 310 365 456 604 801 907 \nGreece 15 18 19 32 35 50 47 81 114 122 \nHungary 11 11 21 16 20 38 34 47 61 61 \nIreland 13 16 22 31 27 45 66 72 84 116 \nItaly 51 70 84 116 178 187 247 325 441 571 \nLatvia 0 0 1 0 1 8 10 15 10 9 \nLithuania 1 2 10 4 4 13 12 23 38 36 \nLuxembourg 2 3 3 1 8 9 13 15 18 22 \nMalta 1 0 0 0 1 1 0 0 6 2 \nNetherlands 72 64 77 103 139 166 220 297 408 470 \nNorway 30 42 60 76 67 88 104 134 222 253 \nPoland 17 31 37 57 73 82 98 110 138 181 \nPortugal 16 23 35 41 45 58 79 119 136 147 \nRomania 7 15 13 16 25 26 37 57 64 55 \nSlovakia 9 6 6 10 12 22 18 27 27 34 \nSlovenia 7 7 10 12 17 27 22 47 54 31 \nSpain 50 49 69 112 138 185 232 273 356 386 \nSweden 34 50 59 83 113 170 233 232 385 359 \nSwitzerland 37 50 54 74 74 95 155 195 233 263 \nUnited Kingdom 363 417 531 660 781 979 1350 1837 2430 3108 \n\nPublication Year 2021 2022 \nCountry \nAustria 185 205 \nBelgium 242 292 \nBulgaria 32 19 \nCroatia 33 35 \nCyprus 36 43 \nCzech Republic 93 123 \nDenmark 293 343 \nEstonia 45 39 \nFinland 289 380 \nFrance 807 858 \nGermany 1210 1386 \nGreece 139 181 \nHungary 83 90 \nIreland 167 187 \nItaly 641 811 \nLatvia 13 18 \nLithuania 38 38 \nLuxembourg 35 51 \nMalta 7 10 \nNetherlands 529 655 \nNorway 304 311 \nPoland 276 353 \nPortugal 204 212 \nRomania 48 62 \nSlovakia 36 45 \nSlovenia 48 40 \nSpain 473 640 \nSweden 428 510 \nSwitzerland 349 447 \nUnited Kingdom 3718 4245 ",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>Publication Year</th>\n <th>2011</th>\n <th>2012</th>\n <th>2013</th>\n <th>2014</th>\n <th>2015</th>\n <th>2016</th>\n <th>2017</th>\n <th>2018</th>\n <th>2019</th>\n <th>2020</th>\n <th>2021</th>\n <th>2022</th>\n </tr>\n <tr>\n <th>Country</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Austria</th>\n <td>22</td>\n <td>24</td>\n <td>26</td>\n <td>39</td>\n <td>50</td>\n <td>57</td>\n <td>72</td>\n <td>89</td>\n <td>138</td>\n <td>137</td>\n <td>185</td>\n <td>205</td>\n </tr>\n <tr>\n <th>Belgium</th>\n <td>34</td>\n <td>38</td>\n <td>40</td>\n <td>65</td>\n <td>71</td>\n <td>81</td>\n <td>90</td>\n <td>133</td>\n <td>179</td>\n <td>213</td>\n <td>242</td>\n <td>292</td>\n </tr>\n <tr>\n <th>Bulgaria</th>\n <td>4</td>\n <td>5</td>\n <td>8</td>\n <td>9</td>\n <td>7</td>\n <td>19</td>\n <td>21</td>\n <td>18</td>\n <td>10</td>\n <td>25</td>\n <td>32</td>\n <td>19</td>\n </tr>\n <tr>\n <th>Croatia</th>\n <td>1</td>\n <td>2</td>\n <td>6</td>\n <td>8</td>\n <td>10</td>\n <td>7</td>\n <td>10</td>\n <td>19</td>\n <td>27</td>\n <td>29</td>\n <td>33</td>\n <td>35</td>\n </tr>\n <tr>\n <th>Cyprus</th>\n <td>2</td>\n <td>1</td>\n <td>5</td>\n <td>5</td>\n <td>5</td>\n <td>5</td>\n <td>8</td>\n <td>7</td>\n <td>15</td>\n <td>28</td>\n <td>36</td>\n <td>43</td>\n </tr>\n <tr>\n <th>Czech Republic</th>\n <td>13</td>\n <td>15</td>\n <td>16</td>\n <td>21</td>\n <td>20</td>\n <td>36</td>\n <td>37</td>\n <td>56</td>\n <td>64</td>\n <td>81</td>\n <td>93</td>\n <td>123</td>\n </tr>\n <tr>\n <th>Denmark</th>\n <td>35</td>\n <td>33</td>\n <td>40</td>\n <td>59</td>\n <td>68</td>\n <td>74</td>\n <td>101</td>\n <td>195</td>\n <td>234</td>\n <td>245</td>\n <td>293</td>\n <td>343</td>\n </tr>\n <tr>\n <th>Estonia</th>\n <td>3</td>\n <td>3</td>\n <td>7</td>\n <td>10</td>\n <td>12</td>\n <td>10</td>\n <td>15</td>\n <td>15</td>\n <td>16</td>\n <td>38</td>\n <td>45</td>\n <td>39</td>\n </tr>\n <tr>\n <th>Finland</th>\n <td>31</td>\n <td>35</td>\n <td>44</td>\n <td>82</td>\n <td>100</td>\n <td>125</td>\n <td>126</td>\n <td>198</td>\n <td>241</td>\n <td>256</td>\n <td>289</td>\n <td>380</td>\n </tr>\n <tr>\n <th>France</th>\n <td>117</td>\n <td>130</td>\n <td>174</td>\n <td>231</td>\n <td>269</td>\n <td>325</td>\n <td>348</td>\n <td>491</td>\n <td>648</td>\n <td>691</td>\n <td>807</td>\n <td>858</td>\n </tr>\n <tr>\n <th>Germany</th>\n <td>123</td>\n <td>172</td>\n <td>192</td>\n <td>273</td>\n <td>310</td>\n <td>365</td>\n <td>456</td>\n <td>604</td>\n <td>801</td>\n <td>907</td>\n <td>1210</td>\n <td>1386</td>\n </tr>\n <tr>\n <th>Greece</th>\n <td>15</td>\n <td>18</td>\n <td>19</td>\n <td>32</td>\n <td>35</td>\n
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
"year_pivot"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "e4e82db7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "<Figure size 1500x1500 with 2 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAABL8AAASuCAYAAAAj9oupAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddVhU6fvH8TeKhIEYiCIGoYCNCqJigN3d4tqJrt26dscqJnbn2pi7aweKgYUiihISotgCKvz+QGcdMb7DTz0D3K/rOtflnBg+cztx5pnneY5OQkJCAkIIIYQQQgghhBBCpELplA4ghBBCCCGEEEIIIcTPIo1fQgghhBBCCCGEECLVksYvIYQQQgghhBBCCJFqSeOXEEIIIYQQQgghhEi1pPFLCCGEEEIIIYQQQqRa0vglhBBCCCGEEEIIIVItafwSQgghhBBCCCGEEKmWNH4JIYQQQgghhBBCiFRLV+kAQgghhBBCCCGEECnJ26h7Skf46TLktFQ6wg8jPb+EEEIIIYQQQgghRKoljV9CCCGEEEIIIYQQItWSxi8hhBBCCCGEEEIIkWpJ45cQQgghhBBCCCGESLVkwnshhBBCCCGEEEIITcS/VzqB0IA0fqVyz5+/UDpCimJklEVqlgxSN81JzZJH6qY5qVnySN00JzVLHqmb5qRmyWNklIUXz18qHSNFyWKUWWqWDFmMMisdQYgkdBISEhKUDiF+ngx6eZWOkKK8jQuVmiWD1E1zb+NC0dM3VzpGihMXG4K+QT6lY6QosTHBUrNkiI0JxsAgv9IxUpSYmCAMDQsoHSPFefPmAUaZUs+l5H+F56/uSc2S4fmre2TPUkjpGCnKkxd3yGlUWOkYKU7Uc3+lI/wSbyPvKB3hp8uQK/W8Z8icX0IIIYQQQgghhBAi1ZJhj0IIIYQQQgghhBCaSIhXOoHQgPT8EkIIIYQQQgghhBCpljR+CSGEEEIIIYQQQohUSxq/hBBCCCGEEEIIIUSqJY1fX7Fjxw5sbGzYtm3bD7vPs2fPcvfu3a9ud3Nzw8PD44f9PaWZmeVm82ZPIsKvcz/Qh5kz/kBfXx+Aco6lOXF8N9FP/Ll+/QSdO7VROK32+FbdPjIyysL9QB86uLVUKKV2+VbN8uUzY8/utTx7GoDfzVM0b95A4bTaw8qqIPv2refJ49sE3PFm4MCeqm329sU5cXw3Tx7f5uSJPTg6llYwqfZo2LA2sTHBasumjUsAKFmyKCdP7CH6iT+nT+3D3r64wmm1x9fqdvjw1iTrY2OCWbp0ltKRFeXm1pyYmKAky+vX9wGoXdsVb+8DREX5ceHCIerVq6FsYC1ibp6Hv/5aSUTEdW7dOoW7e2fVtmrVKuHtfYBHj27i5bWBQoXS9tUC9fT0OHfhAM6VyqnWVateidPnvIiIusnpc17UqFlF7ZjOXdrie/0YIWG+7Ni1ioIF096VZDWt27WbJ3j+6l6SZdjwvkrEV4Senh6nvb2o6OyoWudUoSz/nthJcLgvx0/voUrVCmrHBAZf5MmLO2pLpkwZf3V0ReTOY8rKtfO58+A8126dZOKUEejr6wGQv4A5f+1ezYOwK5w+v5+qrhXVjj12eg9Rz/3VFlu71HO1PiH+P2TC+6/w8vIif/787N69mxYtWvyQ++zYsSNr167Fysrqi9s9PDzIkCHDD/lb2mDLZk+io5/i4tqUbNmMWeY5h/fv3zP3z6Xs3buOpZ7r6NylP6VLF2f5sjmEhUdy4MA/SsdW3NfqNnzEJNU+U6eMIm/ePAqm1C5fq9mo0VPZs3st9wKDcHCsRZXK5Vmzej5+fv7cuHFb6diK0tHRYfeuNfj4+OJYrjbW1hasW7uAh6Hh/PPvSQ4d3Mz2v/bRrftAatVy4cD+jZSydyU4+KHS0RVlZ1eIffuO0LvPMNW6mJhYMmY0ZPeuNWzevItu3QbSrVt7du1cjV0RZ16/fqNgYu3wtbqlS5cOPb3/PvccHezZsGERS5euVSKm1ti2bS+HDx9X3c6QQZeDBzezf/8/FCtmy5YtSxkxYgoHD/5LjRpV2LRpMRUrNuDaNT8FU2uH9esXERQUQoUK9bGzK8Tq1fMJCgrlzp177Ny5ipkzF7F58y46dmzFwYObKFHChVevXisd+5fT19djxap5FClio1pnaVmADZuWMHH8bLz2HaFeg5ps3LyEMqWqExQUSrXqlZgwaRhdOw0gICCQP8YPYcPmJVR0qqfgI/m1klO3qpUbkz79f/0NGjeuw+ixg9i44S8lHsIvp6+vh+fKudgVKaxalzNndjZtWcqcWYvZs/sQTZvXY/3mxZQrXYuHD8PJk8eUrMZG2Bd35c0nn6Fp5bW6at18nj59Tv1abcmWzZj5i6bw/v17xo2ZwdqNi/C7eZvqVZpRt3511mxYSAWHOoSGhJEuXTqsrAvSoHZb7gbcV93f48fRyj2Y1C5eJrxPSaTx6wseP37M2bNnmTJlCsOHDyc4OJh8+X7+L1vGxsY//W/8KjY2Vjg5lSGveUkiI6MAGD9hJtOnjeHevQeERzxizJhpAAQEBFK1SkXatG6c5hu/vlW3j41fFSs44OLiTFhYhJJRtca3anbqtDfm5mZUrtKYFy9e4u9/l1q1XSjvVDbNN36Zmprg63sD974jePnyFQEBgRw9epoKFR3IY2bK4yfRuLuPID4+ntu371K9ehV6dO/A6A+v27TK1taaGzdvExHxSG39b7+1IiYmRvU6HTR4HLVru9KsWX3WrftxPYhTqq/V7VPp0qVjwoShzJ6zhEuXrv7CdNonJiaWmJj/ajVkSB90dHQYPXoaY8YM5NixMyxatAqApUvXUr9+DZo3r5/mG7+MjY0oV640vXsP4+7d+9y9e58jR47j4lIRF5eKnDt3kYkT5wAwatRU6tSpRuvWjVmxYqPCyX8tG1trVqz6Ex0dHbX1Znlzs3rVZhYuWAnAQo8VDB3ahzJlSxIUFErNWlX5959THDz4LwBTp8zj3PkDZM+RjSdp4Mt1cuv2OOqJal8joywMG9GXUSOmpIkfk2xsrPFcOYfPSka58mV49/4dHvOWAzB31hL69O1MWYdS7Nl9kMI2VoSFRfDgfrACqZVlXcgSB0d77KzK8+jRYwCmTZ7H+EnD+efICQpa5KNujVa8fv2GeXPuUrlKedq5NWfGVA8KFDRHTy8Dly5eJTY2TuFHIoT2kWGPX3Dw4EGyZMlCw4YNyZUrF7t371Ztc3V1ZceOHarb3t7e2Nj89+vP2rVrcXFxoXjx4jRt2hQfHx/VcQAdOnTAw8ODHTt20Lp1a/r06UOZMmXYs2eP2rDHuLg4pk6dSqVKlShatCiurq5s2bLlVzz8HyI8/BF167VVNUZ8lDWrEYcOH6Vb14FJjjEyMvpV8bTWt+oGid3GFy+ZSb/fR8qH2gffqlmVyhX49+gpXrx4qVrfvHkXlq/Y8Ktjap3w8Ejate/Ny5evAChfvizOzuU4cfwsFhb5uXzpGvGf/Jp1/Zof5cqVUSqu1rCzLcSdO/eSrC/naM/pMxfU1p0564NTORkuCl+v26c6dGhBtmzGzJq16BelShmyZcvKoEE9GT16GnFxcaxfv53Ro5M2QhsZZVEgnXZ58yaWV69e06FDS3R1dSlUyBInpzJcuXKDggXzc+HCFbX9b9y4Rbk0+Bp1di7HyRPnqO7STG39qZPeDB86EQBdXV3cOrRET1+Piz6+ADx5/JSKzg4UKmxJ+vTpadO2CffvB/M0+tkvfwxKSG7dPtXv966Ehz9ifRr5UaSCsyOnTpyjVjX1aTqePHlKjhzZqd+wJgB161cnc+ZM3LyZ+MOkja21Ws+ltCQy8hEtmnRWNXx9lMUoM2UcSnHN96Zaj3Lvcxcp61gKSGxsDA0Jk+8IQnyFNH59gZeXF1WrViVdunS4urqya9cuEhISvnvczZs3mTFjBn/88QcHDhygbNmy9O/fn/j4eLZv3w4kDm3s3Dlx/onLly9jbW3N1q1bcXZ2VrsvT09Pjh07hoeHBwcPHqRx48ZMnDiRqKioJH9XGz179pwjR/4bsqGjo0PvXp349+gpHjwIwfv
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"f, ax = plt.subplots(figsize=(15, 15))\n",
"g = sns.heatmap(year_pivot, annot=True, fmt=\"d\", linewidths=.5, ax=ax)\n",
"g.set(xlabel=\"\", ylabel=\"\")\n",
"for i in range(year_pivot.shape[0]+1):\n",
" ax.axhline(i, color='white', lw=10)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"id": "78bb0b4e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "Publication Year 2011 2012 2013 2014 2015 \nCountry \nAustria 1.962533 1.801802 1.557819 1.736420 1.865672 \\\nBelgium 3.033006 2.852853 2.396645 2.894034 2.649254 \nBulgaria 0.356824 0.375375 0.479329 0.400712 0.261194 \nCroatia 0.089206 0.150150 0.359497 0.356189 0.373134 \nCyprus 0.178412 0.075075 0.299581 0.222618 0.186567 \nCzech Republic 1.159679 1.126126 0.958658 0.934996 0.746269 \nDenmark 3.122212 2.477477 2.396645 2.626892 2.537313 \nEstonia 0.267618 0.225225 0.419413 0.445236 0.447761 \nFinland 2.765388 2.627628 2.636309 3.650935 3.731343 \nFrance 10.437110 9.759760 10.425404 10.284951 10.037313 \nGermany 10.972346 12.912913 11.503895 12.154942 11.567164 \nGreece 1.338091 1.351351 1.138406 1.424755 1.305970 \nHungary 0.981267 0.825826 1.258238 0.712378 0.746269 \nIreland 1.159679 1.201201 1.318155 1.380232 1.007463 \nItaly 4.549509 5.255255 5.032954 5.164737 6.641791 \nLatvia 0.000000 0.000000 0.059916 0.000000 0.037313 \nLithuania 0.089206 0.150150 0.599161 0.178094 0.149254 \nLuxembourg 0.178412 0.225225 0.179748 0.044524 0.298507 \nMalta 0.089206 0.000000 0.000000 0.000000 0.037313 \nNetherlands 6.422837 4.804805 4.613541 4.585931 5.186567 \nNorway 2.676182 3.153153 3.594967 3.383793 2.500000 \nPoland 1.516503 2.327327 2.216896 2.537845 2.723881 \nPortugal 1.427297 1.726727 2.097064 1.825467 1.679104 \nRomania 0.624442 1.126126 0.778910 0.712378 0.932836 \nSlovakia 0.802855 0.450450 0.359497 0.445236 0.447761 \nSlovenia 0.624442 0.525526 0.599161 0.534283 0.634328 \nSpain 4.460303 3.678679 4.134212 4.986643 5.149254 \nSweden 3.033006 3.753754 3.535051 3.695459 4.216418 \nSwitzerland 3.300624 3.753754 3.235470 3.294746 2.761194 \nUnited Kingdom 32.381802 31.306306 31.815458 29.385574 29.141791 \n\nPublication Year 2016 2017 2018 2019 2020 \nCountry \nAustria 1.699970 1.689744 1.552958 1.816267 1.543488 \\\nBelgium 2.415747 2.112180 2.320712 2.355883 2.399730 \nBulgaria 0.566657 0.492842 0.314081 0.131614 0.281658 \nCroatia 0.208768 0.234687 0.331530 0.355357 0.326724 \nCyprus 0.149120 0.187749 0.122143 0.197420 0.315457 \nCzech Republic 1.073665 0.868341 0.977142 0.842327 0.912573 \nDenmark 2.206979 2.370336 3.402548 3.079758 2.760252 \nEstonia 0.298240 0.352030 0.261734 0.210582 0.428121 \nFinland 3.728005 2.957052 3.454894 3.171887 2.884182 \nFrance 9.692812 8.167097 8.567440 8.528560 7.785038 \nGermany 10.885774 10.701713 10.539173 10.542248 10.218567 \nGreece 1.491202 1.103027 1.413366 1.500395 1.374493 \nHungary 1.133313 0.797935 0.820101 0.802843 0.687247 \nIreland 1.342082 1.548932 1.256325 1.105554 1.306895 \nItaly 5.577095 5.796761 5.670913 5.804159 6.433078 \nLatvia 0.238592 0.234687 0.261734 0.131614 0.101397 \nLithuania 0.387712 0.281624 0.401326 0.500132 0.405588 \nLuxembourg 0.268416 0.305093 0.261734 0.236904 0.247859 \nMalta 0.029824 0.000000 0.000000 0.078968 0.022533 \nNetherlands 4.950790 5.163107 5
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>Publication Year</th>\n <th>2011</th>\n <th>2012</th>\n <th>2013</th>\n <th>2014</th>\n <th>2015</th>\n <th>2016</th>\n <th>2017</th>\n <th>2018</th>\n <th>2019</th>\n <th>2020</th>\n <th>2021</th>\n <th>2022</th>\n </tr>\n <tr>\n <th>Country</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Austria</th>\n <td>1.962533</td>\n <td>1.801802</td>\n <td>1.557819</td>\n <td>1.736420</td>\n <td>1.865672</td>\n <td>1.699970</td>\n <td>1.689744</td>\n <td>1.552958</td>\n <td>1.816267</td>\n <td>1.543488</td>\n <td>1.712804</td>\n <td>1.623248</td>\n </tr>\n <tr>\n <th>Belgium</th>\n <td>3.033006</td>\n <td>2.852853</td>\n <td>2.396645</td>\n <td>2.894034</td>\n <td>2.649254</td>\n <td>2.415747</td>\n <td>2.112180</td>\n <td>2.320712</td>\n <td>2.355883</td>\n <td>2.399730</td>\n <td>2.240533</td>\n <td>2.312139</td>\n </tr>\n <tr>\n <th>Bulgaria</th>\n <td>0.356824</td>\n <td>0.375375</td>\n <td>0.479329</td>\n <td>0.400712</td>\n <td>0.261194</td>\n <td>0.566657</td>\n <td>0.492842</td>\n <td>0.314081</td>\n <td>0.131614</td>\n <td>0.281658</td>\n <td>0.296269</td>\n <td>0.150447</td>\n </tr>\n <tr>\n <th>Croatia</th>\n <td>0.089206</td>\n <td>0.150150</td>\n <td>0.359497</td>\n <td>0.356189</td>\n <td>0.373134</td>\n <td>0.208768</td>\n <td>0.234687</td>\n <td>0.331530</td>\n <td>0.355357</td>\n <td>0.326724</td>\n <td>0.305527</td>\n <td>0.277140</td>\n </tr>\n <tr>\n <th>Cyprus</th>\n <td>0.178412</td>\n <td>0.075075</td>\n <td>0.299581</td>\n <td>0.222618</td>\n <td>0.186567</td>\n <td>0.149120</td>\n <td>0.187749</td>\n <td>0.122143</td>\n <td>0.197420</td>\n <td>0.315457</td>\n <td>0.333302</td>\n <td>0.340486</td>\n </tr>\n <tr>\n <th>Czech Republic</th>\n <td>1.159679</td>\n <td>1.126126</td>\n <td>0.958658</td>\n <td>0.934996</td>\n <td>0.746269</td>\n <td>1.073665</td>\n <td>0.868341</td>\n <td>0.977142</td>\n <td>0.842327</td>\n <td>0.912573</td>\n <td>0.861031</td>\n <td>0.973949</td>\n </tr>\n <tr>\n <th>Denmark</th>\n <td>3.122212</td>\n <td>2.477477</td>\n <td>2.396645</td>\n <td>2.626892</td>\n <td>2.537313</td>\n <td>2.206979</td>\n <td>2.370336</td>\n <td>3.402548</td>\n <td>3.079758</td>\n <td>2.760252</td>\n <td>2.712712</td>\n <td>2.715971</td>\n </tr>\n <tr>\n <th>Estonia</th>\n <td>0.267618</td>\n <td>0.225225</td>\n <td>0.419413</td>\n <td>0.445236</td>\n <td>0.447761</td>\n <td>0.298240</td>\n <td>0.352030</td>\n <td>0.261734</td>\n <td>0.210582</td>\n <td>0.428121</td>\n <td>0.416628</td>\n <td>0.308813</td>\n </tr>\n <tr>\n <th>Finland</th>\n <td>2.765388</td>\n <td>2.627628</td>\n <td>2.636309</td>\n <td>3.650935</td>\n <td>3.731343</td>\n <td>3.728005</td>\n <td>2.957052</td>\n <td>3.454894</td>\n <td>3.171887</td>\n <td>2.884182</td>\n <td>2.675678</td>\n <td>3.008948</td>\n </tr>\n <tr>\n <th>France</th>\n <td>10.437110</
},
"execution_count": 95,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100\n",
"year_percent_pivot"
]
},
{
"cell_type": "code",
"execution_count": 96,
"id": "42dc8be7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "<Figure size 1500x1500 with 1 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAABQsAAASuCAYAAABlZX8qAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1QVRwPG4R+IoEaNsRsroCJWsNdEsWOJmphoEpOoMfbeS+yIvWHvxpqoiL33rth77xVs2DB+ge8P4AbkUgPZIO9zDidh7+zu7H2dmWXu7l6LwMDAQERERERERERERCTRszS6AiIiIiIiIiIiIvLfoMlCERERERERERERATRZKCIiIiIiIiIiIsE0WSgiIiIiIiIiIiKAJgtFREREREREREQkmCYLRUREREREREREBNBkoYiIiIiIiIiIiATTZKGIiIiIiIiIiIgAmiwUERERERERERGRYFZGV0D+fS/8XpI7e3Gjq/FBuHLbm1SpU8ZonRd+L7HLXiyeapT4XLt9NFYZ5MzqHE81Snxu3j0eqwxssxWNpxolPtfvHFNfZLDY9kVqB3Entu1AGcSd2GagMTnuxHZM1ngQN3ReajydlxpPY4HxYtMO3qcrC0VERERERERERATQZKGIiIiIiIiIiIgE02ShiIiIiIiIiIiIAJosFBERERERERERkWCaLBQRERERERERERFAk4UiIiIiIiIiIiISTJOFIiIiIiIiIiIiAmiyUERERERERERERIJpslBEREREREREREQATRaKiIiIiIiIiIhIME0WioiIiIiIiIiICKDJwgh5enri4ODAsmXL4mybBw4c4OrVqxG+3qRJEzw8POJsf0bKnCUjs36bwIUbBzlxfheD3HphY2MdYfmChR3ZsO13rt8/zsYdyyjsVCDCsiVKOXPoxGbOXt3P9z82DPPazPnjqVmrcpwdx4fA2jopuw+soWz5kpGWc61dhX2H13Pj7jHWblxM4SL5Iyxbq05Vzlzcw4mzO6lWo1KY1zZu+4NChR3jpO4fCmtra/YdWke5KDJwzJ+X9ZuXcPfRafYeXEv5CqUiLFu7bjXOXd7H6fO7qV7TJcxrW7Yvp1DhiPNLjKytk7Ln4NpIM1i1bgG+fpfC/UyYPMxs+Vp1qnH20l5OnttF9ffawabty9QO3hPdvqiiSzl27F3FjbvHWL5qLva5bSMsW7JUUQ6f2ML5qwfCjQez50/QePAetQPjKQNjaTw2ns5LjRfddrBw6VSevLgc5uf99zeE2kHMaCwwnsaDqFkZXYH/qnXr1pEjRw5WrVpFw4YNo14hGn766Sd+++037O3tzb7u4eFB0qRJ42RfRpv920SePXvOFzW+J80nHzN+sht/BfzF4F9HhSubIkVyFi+bzopla+nQujc/NmvEoj+mUcqpGq9fvwlX3n30ryyY9wcnj59hwe/TWL9mC0+ePCOfYx5y5srOhnXb/o1DTBBsbKyZNnsMjvnzRlrOIV9ups0aQ7dO/Tl86Bit2vzE4j+mU8KpKm/e+Icpa2lpyZgJQ+jb0w1LS0smTnEnn11pAKpU/YyHD3w4fep8vB1TQmNjY82MOeOizCBV6pR4rp7HxvXbaNuqJ980qseCxVMo4VwVX98nYcpaWloybuIQevdww9LSgklT3cmTK2jgqlLtcx48fMTpU+fi7ZgSGhsba6bPHhtlBj9+3w7rUH1wseJFmDV/AnNnLQ5X1tLSkrETh9C351AsLC2ZOHU4DrZ/Z6B2EFZM+qLFf0xnwtgZLP9jDd/98BUr18ynTPEavHr1Olz54cHjwYnjp1n0+3TWr9nKkydPg8eDbBoPQlE7MJ4yMJbGY+PpvNR40W0HAA4OufmleVd279xvWvbsmV+4cmoHMaOxwHgaD6JHVxaa8fjxYw4cOEDbtm3x9vbm9u3b/8p+06RJw0cfffSv7Cs+5c5jS/GSTnRq04eLF65w6MBRRrp50OCr2mbLf9GgJv7+bxnUbySXL12jX69hvHz5ijr1akSwfTvWr9nKnl0H8XvuR85c2QHo0qM140ZOibfjSmjyOtizcesf5MqVI8qyFV3KcfHCFf5Yuoob128zZNBYMmXOSN58ucOVTZfuE9Kl+4TVXhtZtXI96dJ9Qvr0aQHo2rMto0dMivNjSagcHHKzeftybG2zR1m28bcNePXyNV07DeD6tVsMHzaRq1dv4FS0ULiyQRmkZdXKDXh5biBdurSmDHr0asdId2UQIq+DPRu3LSOXbdTt4NnT5zx65MujR774+j6h74AueEyYyYnjZ8KVDWkHq1ZuZJVn2HbQrWdbRg3/MK4Sjwsx6YuaNm/MkUPHGTFsIlevXGdw/1H4+b3gy6/rmC2fO48d69ZsCTUeZAOga482jNF4YKJ2YDxlYCyNx8bTeanxYtIOrK2tyZkrG8ePnTL1R48e+fLnn3+GK6t2EH0aC4yn8SD6NFloxsaNG0mVKhV169YlY8aMrFq1yvSai4sLnp6ept8PHTqEg4OD6ffffvuNSpUqUahQIRo0aIC3t7dpPYAffvgBDw8PPD09adSoEW3btqVYsWKsXr06zG3If/75J+7u7lSoUIECBQrg4uLC77///m8c/j/26JEv3zT4GR+fx2GWp06d0mz5YiWcOHTgaJhlhw8ep3hJJ7Pl7965T2Gn/GTL/ikfp/mYe/cektfBnlx2OXUVSShly5dk755DuFb9JsqyT588wyFfbkqWKoqFhQXfftcAv+cvuHH9Vriyjx8/5dWr1xQukp8iTgV59fIVT548w6VyBXwePdanVqGULV+SvbsPUr3y11GWLVehFBvWbyUgIMC0rErFL9m6eVe4siEZFHEqgJNzAV4GZ1C5SgUePfJNcJ9axady5Uuyb89BalaJOoPQGn/XgE8++RiPcTPNvm5qB05B7SAkA5cqFfB55Kt2EEpM+qKcubJz9OipMMvOn7tEiRJOZstHNB7Y2uXQeBCK2oHxlIGxNB4bT+elxotJO8idx5bAwEBuXI/6ohm1g+jTWGA8jQfRp9uQzVi3bh0VK1bE0tISFxcXvLy8aNu2LRYWFpGud+7cOUaOHMmkSZPInTs3v/32G506dWL37t0sX76cMmXK4OHhQbly5di0aRPHjx+nVatWdOnShU8++STM8xFnzJjBzp078fDwIF26dKxcuZIhQ4ZQuXJl0qdPH99vwT/i9/wFO7ftNf1uYWFBs1++Y8+ug2bLZ8qUgYsXLodZ5uPjSz7HPGbLuw0ay+QZI7G2TsqEMdN5+OARg9x6Mm7U1Lg7iA/AvNlLol3Wy3M9NVxdWLd5Cf/73/8ICAjg269b8tzMrQYBAQEMGTCaNRsXERAQSO8eQwgICKBrzzb06jY4Lg8hwZs7O/xtAhHJlSs7x46eZNzEIdRwrcztW3f5tY87hw4eC1c2ICCAQf1HsW7TYgICAunVfTABAQF079WO7l0GxeUhJHhzY9AOQuvQuQXTpsw3e+srBGUweMBo1m4MyqB396B20K1nW3p2VQahxaQv8nnkS5YsmcIs+zRrZp49fW62/NBBY5gyYxTW1kkZHzweDHbryViNB2GoHRhPGRhL47HxdF5qvJi0AwcHe/z8XjBt5mjKVSjJ3Tv3GTFsIlu37A5XVu0g+jQWGE/jQfTpysL33L9/n2PHjlGlShUAqlWrxu3btzl69GgUa8Ldu3exsLDg008/JVu2bHTq1IlRo0YREBBA2rRBl6B+/PHHpluNLSwsaN26Nfb29qbXQ+TLlw83NzecnJzInj07rVq14t27d9y4cSNuD/hf0H9IdwoVyY/7kPFmX0+eIhlv374Ls+zPt39G+IUo69dswdG2NI52ZRg5zIPceWyxz5OLzRt2MGLsAI6d3YHHtOGRfqGKhPVJ2k/ImDEDPbsOonrlr/lj6SomTnY3XTr9vtkzF5EnVykcbEuxYN4fVHQpx2Pfp1y9coNZ88Zz/MwOBrv1+pePImH76KMUdOrckocPfPi6wc/s23uY5V5zyZo1s9nys2YsxD5HCXLnLMH8ub9TyaU8vr5PuHrlOnPmT+DUuV0Mde/9Lx/Fh6F8hVJk+TQ
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"f, ax = plt.subplots(figsize=(15, 15))\n",
"g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)\n",
"for t in ax.texts: t.set_text(t.get_text() + \" %\")\n",
"g.set(xlabel=\"\", ylabel=\"\")\n",
"for i in range(year_percent_pivot.shape[1]+1):\n",
" ax.axvline(i, color='white', lw=10)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"id": "e7b754ea",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 97,
"id": "48f2898f",
"metadata": {},
"outputs": [],
"source": [
"# Institutional collab"
]
},
{
"cell_type": "code",
"execution_count": 98,
"id": "3a9538e1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": " UT (Unique WOS ID) Country Institution_harm \n15207 WOS:000389385100008 China Natl Univ Def Technol \\\n31500 WOS:000474277900004 China Xian Shiyou Univ \n139850 WOS:000867238100001 Italy Univ Trento \n83586 WOS:000365372900001 Netherlands Delft Univ Technol \n1377 WOS:000577327400001 Belgium Flanders Make \n... ... ... ... \n55889 WOS:000661354600002 China Fudan Univ \n7735 WOS:000337842700006 China Natl Univ Def Technol \n99512 WOS:000453778900009 United Kingdom Univ London \n31184 WOS:000472596200056 China Guangdong Univ Technol \n149165 WOS:000549676600001 Germany German Res Ctr Environm Hlth \n\n Country_Type Eurovoc_Class ISO3 \n15207 China China CHN \\\n31500 China China CHN \n139850 EU Southern Europe ITA \n83586 EU Western Europe NLD \n1377 EU Western Europe BEL \n... ... ... ... \n55889 China China CHN \n7735 China China CHN \n99512 Non-EU associate Western Europe GBR \n31184 China China CHN \n149165 EU Western Europe DEU \n\n Institution_harm_label \n15207 Natl Univ Def Technol (CHN) \n31500 Xian Shiyou Univ (CHN) \n139850 Univ Trento (ITA) \n83586 Delft Univ Technol (NLD) \n1377 Flanders Make (BEL) \n... ... \n55889 Fudan Univ (CHN) \n7735 Natl Univ Def Technol (CHN) \n99512 Univ London (GBR) \n31184 Guangdong Univ Technol (CHN) \n149165 German Res Ctr Environm Hlth (DEU) \n\n[100 rows x 7 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>UT (Unique WOS ID)</th>\n <th>Country</th>\n <th>Institution_harm</th>\n <th>Country_Type</th>\n <th>Eurovoc_Class</th>\n <th>ISO3</th>\n <th>Institution_harm_label</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>15207</th>\n <td>WOS:000389385100008</td>\n <td>China</td>\n <td>Natl Univ Def Technol</td>\n <td>China</td>\n <td>China</td>\n <td>CHN</td>\n <td>Natl Univ Def Technol (CHN)</td>\n </tr>\n <tr>\n <th>31500</th>\n <td>WOS:000474277900004</td>\n <td>China</td>\n <td>Xian Shiyou Univ</td>\n <td>China</td>\n <td>China</td>\n <td>CHN</td>\n <td>Xian Shiyou Univ (CHN)</td>\n </tr>\n <tr>\n <th>139850</th>\n <td>WOS:000867238100001</td>\n <td>Italy</td>\n <td>Univ Trento</td>\n <td>EU</td>\n <td>Southern Europe</td>\n <td>ITA</td>\n <td>Univ Trento (ITA)</td>\n </tr>\n <tr>\n <th>83586</th>\n <td>WOS:000365372900001</td>\n <td>Netherlands</td>\n <td>Delft Univ Technol</td>\n <td>EU</td>\n <td>Western Europe</td>\n <td>NLD</td>\n <td>Delft Univ Technol (NLD)</td>\n </tr>\n <tr>\n <th>1377</th>\n <td>WOS:000577327400001</td>\n <td>Belgium</td>\n <td>Flanders Make</td>\n <td>EU</td>\n <td>Western Europe</td>\n <td>BEL</td>\n <td>Flanders Make (BEL)</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>55889</th>\n <td>WOS:000661354600002</td>\n <td>China</td>\n <td>Fudan Univ</td>\n <td>China</td>\n <td>China</td>\n <td>CHN</td>\n <td>Fudan Univ (CHN)</td>\n </tr>\n <tr>\n <th>7735</th>\n <td>WOS:000337842700006</td>\n <td>China</td>\n <td>Natl Univ Def Technol</td>\n <td>China</td>\n <td>China</td>\n <td>CHN</td>\n <td>Natl Univ Def Technol (CHN)</td>\n </tr>\n <tr>\n <th>99512</th>\n <td>WOS:000453778900009</td>\n <td>United Kingdom</td>\n <td>Univ London</td>\n <td>Non-EU associate</td>\n <td>Western Europe</td>\n <td>GBR</td>\n <td>Univ London (GBR)</td>\n </tr>\n <tr>\n <th>31184</th>\n <td>WOS:000472596200056</td>\n <td>China</td>\n <td>Guangdong Univ Technol</td>\n <td>China</td>\n <td>China</td>\n <td>CHN</td>\n <td>Guangdong Univ Technol (CHN)</td>\n </tr>\n <tr>\n <th>149165</th>\n <td>WOS:000549676600001</td>\n <td>Germany</td>\n <td>German Res Ctr Environm Hlth</td>\n <td>EU</td>\n <td>Western Europe</td>\n <td>DEU</td>\n <td>German Res Ctr Environm Hlth (DEU)</td>\n </tr>\n </tbody>\n</table>\n<p>100 rows × 7 columns</p>\n</div>"
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wos_univ_locations = wos_univ.merge(wos_country_types, on=\"Country\")\n",
"wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n",
"wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n",
"wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n",
"wos_univ_collabs.sample(100)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"id": "6bb0e68d",
"metadata": {},
"outputs": [],
"source": [
"color_discrete_map= {'China': '#EF553B',\n",
" 'EU': '#636EFA',\n",
" 'Non-EU associate': '#00CC96'}"
]
},
{
"cell_type": "code",
"execution_count": 180,
"id": "df8701eb",
"metadata": {},
"outputs": [],
"source": [
"TOPN = 25\n",
"\n",
"\n",
"wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n",
"wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n",
"\n",
"wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"EU\"]\n",
"\n",
"data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
" .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n",
"\n",
"data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm_label\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n",
" .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
"\n",
"data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n",
" .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n",
"\n",
"\n",
"for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],\n",
" [\"European countries in scope\",\"EU-28 only\",\"China\"],\n",
" [\"Institution_harm_label\",\"Institution_harm_label\",\"Institution_harm\"],\n",
" [\"Country\",\"Eurovoc_Class\",\"Country_Type\"],\n",
" [\"Country_Type\",None,None]):\n",
" fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,\n",
" labels={\n",
" record_col: 'Number of co-publications',\n",
" \"Institution_harm\": \"Institution\",\n",
" \"Institution_harm_label\": \"Institution\",\n",
" \"Country_Type\":\"Country type\",\n",
" \"Eurovoc_Class\":\"Region\"\n",
" },\n",
" title=f\"Most visible institutions (top {TOPN} within {c_scope})\", template='plotly')\n",
" fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n",
" width=1000, height=1000,)\n",
" fig.update_traces(hovertemplate='%{x:d}')\n",
" fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
" fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" # fig.show(config= dict(displayModeBar = False))\n",
" fig.write_html(f\"plot_html/overall_inst_collab_bar_{c_scope}.html\",config= dict(displayModeBar = False, responsive = True))"
]
},
{
"cell_type": "code",
"execution_count": 101,
"id": "31a0769d",
"metadata": {},
"outputs": [],
"source": [
"wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n",
"wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n",
"\n",
"wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"id": "606e1af0",
"metadata": {},
"outputs": [],
"source": [
"fig = px.parallel_categories(wos_univ_dipol[[\"Country_eu\",\"Domain_English\",\"Country_ch\"]])"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "ea0951e9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "Index(['Country', 'Institution_harm', 'Country_Type', 'UT (Unique WOS ID)'], dtype='object')"
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_ch.columns"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "dd4210b3",
"metadata": {},
"outputs": [],
"source": [
"subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n",
" (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
"\n",
"fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Domain_English\",\"Country_ch\"]])\n",
"# fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 177,
"id": "2c5d1d94",
"metadata": {},
"outputs": [],
"source": [
"subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n",
" (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
"\n",
"fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n",
"# fig.show()\n",
"sub_df =wos_univ_dipol[subfilter]\n",
"\n",
"inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n",
" values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
"\n",
"eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n",
"ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n",
"\n",
"inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n",
"\n",
"mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
"data = np.where(mask,inst_co_occur,inst_co_occur)\n",
"\n",
"fig = px.imshow(data,\n",
" labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n",
" x=list(inst_co_occur.columns),\n",
" y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within Europe)\"\n",
" )\n",
"fig.update_layout(title_x=0.5,\n",
" width=1000, height=1000,\n",
" xaxis_showgrid=False,\n",
" yaxis_showgrid=False,\n",
" yaxis_autorange='reversed',\n",
" template='plotly_white',\n",
" coloraxis_colorbar=dict(\n",
" thicknessmode=\"pixels\", thickness=25,\n",
" ticks=\"outside\", ticksuffix=\" \",\n",
" dtick=20,outlinewidth=1,\n",
" ))\n",
"fig.update_xaxes(tickangle= -45)\n",
"fig.update_yaxes(\n",
" ticks=\"outside\")\n",
"fig.update_xaxes(\n",
" ticks=\"outside\")\n",
"\n",
"fig.write_html(f\"plot_html/overall_inst_collab_europe.html\",config= dict(displayModeBar = False, responsive = True))"
]
},
{
"cell_type": "code",
"execution_count": 176,
"id": "7bd7d149",
"metadata": {},
"outputs": [],
"source": [
"subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu_strict[\"Institution_harm_label\"]))&\n",
" (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n",
"\n",
"fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n",
"# fig.show()\n",
"sub_df =wos_univ_dipol[subfilter]\n",
"\n",
"inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n",
" values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n",
"\n",
"eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n",
"ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n",
"\n",
"inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n",
"\n",
"mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n",
"data = np.where(mask,inst_co_occur,inst_co_occur)\n",
"fig = px.imshow(data,\n",
" labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n",
" x=list(inst_co_occur.columns),\n",
" y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within EU-28)\"\n",
" )\n",
"fig.update_layout(title_x=0.5,\n",
" width=1000, height=1000,\n",
" xaxis_showgrid=False,\n",
" yaxis_showgrid=False,\n",
" yaxis_autorange='reversed',\n",
" template='plotly_white',\n",
" coloraxis_colorbar=dict(\n",
" thicknessmode=\"pixels\", thickness=25,\n",
" ticks=\"outside\", ticksuffix=\" \",\n",
" dtick=20,outlinewidth=1,\n",
" ))\n",
"fig.update_xaxes(tickangle= -45)\n",
"fig.update_yaxes(\n",
" ticks=\"outside\")\n",
"fig.update_xaxes(\n",
" ticks=\"outside\")\n",
"\n",
"# fig.show(config= dict(displayModeBar = False))\n",
"fig.write_html(f\"plot_html/overall_inst_collab_eu28.html\",config= dict(displayModeBar = False, responsive = True))"
]
},
{
"cell_type": "markdown",
"source": [
"# Drilldown to field"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 171,
"outputs": [],
"source": [
"group = ['Publication Year',\"Domain_English\",'Field_English']\n",
"# data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n",
"\n",
"\n",
"data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()\n",
" .fillna(0)\n",
" .stack()\n",
" .reset_index()\n",
" .rename(columns={0:record_col}))\n",
"\n",
"data = data.merge(wos[[\"Domain_English\",'Field_English']].drop_duplicates(),on=\"Field_English\")\n",
"\n",
"data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Field_English'),\n",
" on='Field_English', suffixes=[None,\"_relative_growth\"])\n",
"data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n",
"\n",
"data = data.sort_values(by =[\"Field_English\",\"Publication Year\"], ascending=[True,True])\n",
"data[record_col+\"_cumsum\"] = (data.groupby('Field_English',as_index=False)[record_col].cumsum())"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 172,
"outputs": [
{
"data": {
"text/plain": " Publication Year Field_English UT (Unique WOS ID) \n0 2011 Agriculture, Fisheries & Forestry 9.0 \\\n1 2012 Agriculture, Fisheries & Forestry 18.0 \n2 2013 Agriculture, Fisheries & Forestry 15.0 \n3 2014 Agriculture, Fisheries & Forestry 26.0 \n4 2015 Agriculture, Fisheries & Forestry 12.0 \n.. ... ... ... \n255 2018 Social Sciences 25.0 \n257 2019 Social Sciences 37.0 \n259 2020 Social Sciences 57.0 \n261 2021 Social Sciences 65.0 \n263 2022 Social Sciences 60.0 \n\n Domain_English Publication Year_relative_growth \n0 Applied Sciences 2011 \\\n1 Applied Sciences 2011 \n2 Applied Sciences 2011 \n3 Applied Sciences 2011 \n4 Applied Sciences 2011 \n.. ... ... \n255 Applied Sciences 2011 \n257 Applied Sciences 2011 \n259 Applied Sciences 2011 \n261 Applied Sciences 2011 \n263 Applied Sciences 2011 \n\n UT (Unique WOS ID)_relative_growth Domain_English_relative_growth \n0 0.000000 Applied Sciences \\\n1 1.000000 Applied Sciences \n2 0.666667 Applied Sciences \n3 1.888889 Applied Sciences \n4 0.333333 Applied Sciences \n.. ... ... \n255 1.272727 Applied Sciences \n257 2.363636 Applied Sciences \n259 4.181818 Applied Sciences \n261 4.909091 Applied Sciences \n263 4.454545 Applied Sciences \n\n UT (Unique WOS ID)_cumsum \n0 9.0 \n1 27.0 \n2 42.0 \n3 68.0 \n4 80.0 \n.. ... \n255 216.0 \n257 290.0 \n259 404.0 \n261 534.0 \n263 654.0 \n\n[84 rows x 8 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Publication Year</th>\n <th>Field_English</th>\n <th>UT (Unique WOS ID)</th>\n <th>Domain_English</th>\n <th>Publication Year_relative_growth</th>\n <th>UT (Unique WOS ID)_relative_growth</th>\n <th>Domain_English_relative_growth</th>\n <th>UT (Unique WOS ID)_cumsum</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>2011</td>\n <td>Agriculture, Fisheries &amp; Forestry</td>\n <td>9.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>0.000000</td>\n <td>Applied Sciences</td>\n <td>9.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2012</td>\n <td>Agriculture, Fisheries &amp; Forestry</td>\n <td>18.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>1.000000</td>\n <td>Applied Sciences</td>\n <td>27.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>2013</td>\n <td>Agriculture, Fisheries &amp; Forestry</td>\n <td>15.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>0.666667</td>\n <td>Applied Sciences</td>\n <td>42.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>2014</td>\n <td>Agriculture, Fisheries &amp; Forestry</td>\n <td>26.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>1.888889</td>\n <td>Applied Sciences</td>\n <td>68.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>2015</td>\n <td>Agriculture, Fisheries &amp; Forestry</td>\n <td>12.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>0.333333</td>\n <td>Applied Sciences</td>\n <td>80.0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>255</th>\n <td>2018</td>\n <td>Social Sciences</td>\n <td>25.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>1.272727</td>\n <td>Applied Sciences</td>\n <td>216.0</td>\n </tr>\n <tr>\n <th>257</th>\n <td>2019</td>\n <td>Social Sciences</td>\n <td>37.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>2.363636</td>\n <td>Applied Sciences</td>\n <td>290.0</td>\n </tr>\n <tr>\n <th>259</th>\n <td>2020</td>\n <td>Social Sciences</td>\n <td>57.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>4.181818</td>\n <td>Applied Sciences</td>\n <td>404.0</td>\n </tr>\n <tr>\n <th>261</th>\n <td>2021</td>\n <td>Social Sciences</td>\n <td>65.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>4.909091</td>\n <td>Applied Sciences</td>\n <td>534.0</td>\n </tr>\n <tr>\n <th>263</th>\n <td>2022</td>\n <td>Social Sciences</td>\n <td>60.0</td>\n <td>Applied Sciences</td>\n <td>2011</td>\n <td>4.454545</td>\n <td>Applied Sciences</td>\n <td>654.0</td>\n </tr>\n </tbody>\n</table>\n<p>84 rows × 8 columns</p>\n</div>"
},
"execution_count": 172,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[data[\"Domain_English\"]==\"Applied Sciences\"]"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 168,
"outputs": [
{
"data": {
"text/plain": " Field_English UT (Unique WOS ID)\n5 Information & Communication Technologies 15648\n4 Engineering 9232\n3 Enabling & Strategic Technologies 3940\n0 Agriculture, Fisheries & Forestry 612\n1 Built Environment & Design 537\n2 Economics & Business 15\n6 Social Sciences 1",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Field_English</th>\n <th>UT (Unique WOS ID)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>5</th>\n <td>Information &amp; Communication Technologies</td>\n <td>15648</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Engineering</td>\n <td>9232</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Enabling &amp; Strategic Technologies</td>\n <td>3940</td>\n </tr>\n <tr>\n <th>0</th>\n <td>Agriculture, Fisheries &amp; Forestry</td>\n <td>612</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Built Environment &amp; Design</td>\n <td>537</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Economics &amp; Business</td>\n <td>15</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Social Sciences</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 168,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wos[wos[\"Domain_English\"]==\"Applied Sciences\"].groupby(\"Field_English\", as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 174,
"outputs": [],
"source": [
"data_complete = pd.DataFrame()\n",
"\n",
"for cat in sorted(data[\"Domain_English\"].unique()):\n",
"\n",
" bar_data = wos[wos[\"Domain_English\"]==cat].groupby(\"Field_English\", as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n",
"\n",
" fig = px.bar(bar_data.sort_values(by=\"Field_English\"), x=record_col, y=\"Field_English\", color=\"Field_English\",barmode='relative',\n",
" labels={\n",
" record_col: 'Number of co-publications',\n",
" },\n",
" title=\"Distribution of Domains\", template='plotly')\n",
" fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family=\"Montserrat\")\n",
" fig.update_traces(hovertemplate='%{x:d}')\n",
" fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
" fig.update_layout(yaxis={'categoryorder':'total ascending'})\n",
" fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" dom_distr = go.Figure(fig)\n",
"\n",
"\n",
" #data segment\n",
" sub_data = data[data[\"Domain_English\"]==cat]\n",
" # data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n",
" fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]),y=record_col,x=\"Publication Year\", color=\"Field_English\", markers=True,\n",
" labels={\n",
" record_col: 'Number of co-publications',\n",
" group[-1]: \"Domain\",\n",
" },\n",
" title=\"Yearly output of co-publications\", template='plotly')\n",
" fig.update_traces(hovertemplate='%{y:d}')\n",
" fig.update_layout(hovermode='x unified')\n",
" fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
" fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"\n",
" year_output_by_domain = go.Figure(fig)\n",
"\n",
" fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]), y=record_col+\"_relative_growth\",x=\"Publication Year\", color=\"Field_English\",\n",
" markers=True,labels={\n",
" record_col+\"_relative_growth\": 'Rel. growth<br>in co-publications (%)',\n",
" group[-1]: \"Domain\",\n",
" },\n",
" title=\"Relative growth in the output of co-publications\", template='plotly')\n",
" # fig.update_traces(hovertemplate='%{y:.2f}%')\n",
"\n",
" fig.update_layout(hovermode='x unified',yaxis_tickformat='.0f%',font_family=\"Montserrat\")\n",
" fig.update_traces(hovertemplate='%{y:.0f}00%')\n",
" fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
" fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" # fig['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
" # fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n",
"\n",
" rel_output_by_domain = go.Figure(fig)\n",
"\n",
" fig = px.area(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]),y=record_col+\"_cumsum\",x=\"Publication Year\", color=\"Field_English\",line_group=\"Field_English\",\n",
" labels={\n",
" record_col+\"_cumsum\": 'Cumulative number of co-publications',\n",
" },\n",
" title=\"Cumulative number of co-publications\", template='plotly')\n",
" fig.update_traces(hovertemplate='%{y:d}')\n",
" fig.update_layout(hovermode='x unified')\n",
" fig.add_shape(\n",
" # Rectangle with reference to the plot\n",
" type=\"rect\",\n",
" xref=\"paper\",\n",
" yref=\"paper\",\n",
" x0=0,\n",
" y0=0,\n",
" x1=1.0,\n",
" y1=1.0,\n",
" line=dict(\n",
" color=\"black\",\n",
" width=0.5,\n",
" )\n",
" )\n",
" fig.update_yaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
" fig.update_xaxes(\n",
" showgrid=True,\n",
" ticks=\"outside\")\n",
"\n",
" cumsum_by_domain = go.Figure(fig)\n",
" # cumsum_by_domain.show(config= dict(displayModeBar = False))\n",
"\n",
" # dom_distr\n",
" # year_output_by_domain\n",
" # rel_output_by_domain\n",
" # cumsum_by_domain\n",
"\n",
" figsuper = make_subplots(rows=2, cols=2, subplot_titles=[\"Distribution of domains\",\"Cumulative sum of co-publications\",\n",
" \"Co-publications per year\",\"Relative growth of co-publications\"])\n",
"\n",
"\n",
" for trace in list(dom_distr.select_traces()):\n",
" trace.showlegend=False\n",
" # trace.barmode\n",
" figsuper.add_trace(trace,\n",
" row=1, col=1\n",
" )\n",
"\n",
" for trace in list(cumsum_by_domain.select_traces()):\n",
" figsuper.add_trace(trace,\n",
" row=1, col=2\n",
" )\n",
"\n",
" for trace in list(year_output_by_domain.select_traces()):\n",
" trace.showlegend=False\n",
" figsuper.add_trace(trace,\n",
" row=2, col=1\n",
" )\n",
"\n",
" for trace in list(rel_output_by_domain.select_traces()):\n",
" trace.showlegend=False\n",
" figsuper.add_trace(trace,\n",
" row=2, col=2\n",
" )\n",
"\n",
" # figsuper.update_layout(hovermode='x unified')\n",
" figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n",
" figsuper.update_yaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
" figsuper.update_xaxes(\n",
" showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n",
" ticks=\"outside\")\n",
" figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n",
" figsuper['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n",
" # figsuper.layout.annotations[0].update(x=0.1)\n",
" # figsuper.layout.annotations[2].update(x=0.105)\n",
" # figsuper.layout.annotations[1].update(x=0.7)\n",
" # figsuper.layout.annotations[3].update(x=0.7)\n",
" figsuper.update_layout(title_text=f\"{cat}\")\n",
"\n",
" # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n",
" figsuper.write_html(f\"plot_html/{cat}_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))"
],
"metadata": {
"collapsed": false
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}