{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "40038234", "metadata": {}, "outputs": [ { "data": { "text/html": " \n " }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "\n", "import pandas as pd\n", "import janitor\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from matplotlib.ticker import MaxNLocator\n", "import math\n", "import plotly.express as px\n", "import plotly.graph_objects as go\n", "import plotly.offline as pyo\n", "from plotly.subplots import make_subplots\n", "import plotly.graph_objects as go\n", "pyo.init_notebook_mode()\n", "\n", "import plotly.io as pio\n", "pio.renderers.default = \"plotly_mimetype+notebook\"\n", "\n", "import country_converter as coco\n", "cc = coco.CountryConverter()\n", "\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "os.makedirs('plot_html',exist_ok=True)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 4, "outputs": [ { "data": { "text/plain": "'https://plotly.com/~radvanyimome/5/'" }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import chart_studio.plotly as py\n", "import plotly.graph_objects as go\n", "\n", "trace0 = go.Scatter(\n", " x=[1, 2, 3, 4],\n", " y=[10, 15, 13, 17]\n", ")\n", "trace1 = go.Scatter(\n", " x=[1, 2, 3, 4],\n", " y=[16, 5, 11, 9]\n", ")\n", "data = [trace0, trace1]\n", "\n", "py.plot(data, filename = 'basic-line', auto_open=True)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 5, "id": "ea3629f5", "metadata": {}, "outputs": [], "source": [ "# Seaborn palette\n", "# sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)\n", "# sns.palplot(sns.color_palette())" ] }, { "cell_type": "code", "execution_count": 6, "id": "fb7baf32", "metadata": {}, "outputs": [], "source": [ "outdir=\"wos_processed_data\"\n", "\n", "wos = pd.read_excel(f\"../{outdir}/wos_processed.xlsx\")\n", "wos_univ = pd.read_excel(f\"../{outdir}/wos_institution_locations_harmonized.xlsx\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "4dd8e081", "metadata": {}, "outputs": [], "source": [ "def eurovoc_classer(x):\n", " eurovoc_classification = {\"Eastern Europe\":[\"Bulgaria\",\"Czech Republic\",\"Croatia\",\"Hungary\",\"Poland\",\"Romania\",\"Slovakia\",\"Slovenia\"],\n", " \"Northern Europe\":[\"Denmark\",\"Estonia\",\"Finland\",\"Latvia\",\"Lithuania\",\"Sweden\",\"Norway\",\"Iceland\"],\n", " \"Southern Europe\":[\"Cyprus\",\"Greece\",\"Italy\",\"Portugal\",\"Spain\",\"Malta\"],\n", " \"Western Europe\":[\"Austria\",\"Belgium\",\"France\",\"Germany\",\"Luxembourg\",\"Netherlands\",\"Switzerland\",\"United Kingdom\",\"Ireland\"]}\n", " if x == 'China':\n", " return x\n", " for k in eurovoc_classification.keys():\n", " if x in eurovoc_classification[k]:\n", " return k" ] }, { "cell_type": "code", "execution_count": 8, "id": "eb933d66", "metadata": {}, "outputs": [], "source": [ "wos_country = pd.read_excel(f\"../{outdir}/wos_countries.xlsx\")\n", "wos_country_types = pd.read_excel(f\"../{outdir}/wos_country_types.xlsx\")" ] }, { "cell_type": "code", "execution_count": 9, "id": "cd0b0efa", "metadata": {}, "outputs": [ { "data": { "text/plain": " Country Country_Type Eurovoc_Class\n0 Belgium EU Western Europe\n1 China China China\n2 Luxembourg EU Western Europe\n3 Netherlands EU Western Europe\n4 Norway Non-EU associate Northern Europe\n5 United Kingdom Non-EU associate Western Europe\n6 France EU Western Europe\n7 Sweden EU Northern Europe\n8 Italy EU Southern Europe\n9 Denmark EU Northern Europe\n10 Germany EU Western Europe\n11 Slovenia EU Eastern Europe\n12 Estonia EU Northern Europe\n13 Finland EU Northern Europe\n14 Bulgaria EU Eastern Europe\n15 Slovakia EU Eastern Europe\n16 Spain EU Southern Europe\n17 Poland EU Eastern Europe\n18 Czech Republic EU Eastern Europe\n19 Greece EU Southern Europe\n20 Malta EU Southern Europe\n21 Austria EU Western Europe\n22 Switzerland Non-EU associate Western Europe\n23 Ireland EU Western Europe\n24 Portugal EU Southern Europe\n25 Romania EU Eastern Europe\n26 Hungary EU Eastern Europe\n27 Cyprus EU Southern Europe\n28 Croatia EU Eastern Europe\n29 Lithuania EU Northern Europe\n30 Latvia EU Northern Europe", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CountryCountry_TypeEurovoc_Class
0BelgiumEUWestern Europe
1ChinaChinaChina
2LuxembourgEUWestern Europe
3NetherlandsEUWestern Europe
4NorwayNon-EU associateNorthern Europe
5United KingdomNon-EU associateWestern Europe
6FranceEUWestern Europe
7SwedenEUNorthern Europe
8ItalyEUSouthern Europe
9DenmarkEUNorthern Europe
10GermanyEUWestern Europe
11SloveniaEUEastern Europe
12EstoniaEUNorthern Europe
13FinlandEUNorthern Europe
14BulgariaEUEastern Europe
15SlovakiaEUEastern Europe
16SpainEUSouthern Europe
17PolandEUEastern Europe
18Czech RepublicEUEastern Europe
19GreeceEUSouthern Europe
20MaltaEUSouthern Europe
21AustriaEUWestern Europe
22SwitzerlandNon-EU associateWestern Europe
23IrelandEUWestern Europe
24PortugalEUSouthern Europe
25RomaniaEUEastern Europe
26HungaryEUEastern Europe
27CyprusEUSouthern Europe
28CroatiaEUEastern Europe
29LithuaniaEUNorthern Europe
30LatviaEUNorthern Europe
\n
" }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wos_country_types[\"Eurovoc_Class\"] = wos_country_types[\"Country\"].map(eurovoc_classer)\n", "wos_country_types" ] }, { "cell_type": "code", "execution_count": 10, "id": "1e737dbf", "metadata": {}, "outputs": [], "source": [ "record_col = \"UT (Unique WOS ID)\"" ] }, { "cell_type": "markdown", "id": "b1aa7f2d", "metadata": {}, "source": [ "# Analysis by METRIX classification" ] }, { "cell_type": "markdown", "id": "a97f1cbb", "metadata": {}, "source": [ "## Distribution of topics via the METRIX classification" ] }, { "cell_type": "code", "execution_count": 11, "id": "f39cb21d", "metadata": {}, "outputs": [], "source": [ "def replace_nth(s, sub=\" \", repl=\"
\", n=2):\n", " chunks = s.split(sub)\n", " size = len(chunks)\n", " rows = size // n + (0 if size % n == 0 else 1)\n", " return (repl.join([\n", " sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])\n", " for i in range(rows)\n", " ])).replace(\"
&\",\" &
\")\n", "\n", "\n", "groups = ['Domain_English',\"Field_English\",'SubField_English']\n", "data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n", "data[\"percent\"] = data[record_col]/data[record_col].sum()*100\n", "\n", "data[groups] = data[groups].applymap(replace_nth)\n", "# for c in [\"Domain_English\",\"Field_English\",\"SubField_English\"]:\n", "# data[c] = data[c]+\"
(\"+(pd.DataFrame(data[c],columns=[c]).merge(data.groupby(c,as_index=False)[record_col].sum(), on=c)[record_col]).astype(str)+\")\"\n", "# data" ] }, { "cell_type": "code", "execution_count": 12, "id": "2c9d6d5a", "metadata": {}, "outputs": [], "source": [ "fig = px.sunburst(data, path=groups, values=record_col,\n", " color='Domain_English',title=\"Distribution of topics
(METRIX taxonomy)\", template='plotly')\n", "# fig.update_traces(hovertemplate='%{label}
%{value:.2f}%')\n", "fig.update_traces(textinfo=\"label+value+percent root\")\n", "fig.update_traces(hovertemplate='%{id}
%{value}')\n", "metrix_distr = go.Figure(fig)\n", "# metrix_distr.show()" ] }, { "cell_type": "code", "execution_count": 13, "outputs": [], "source": [ "# metrix_distr.show(config= dict(displayModeBar = False))\n", "data = (wos.groupby(['Publication Year'])[record_col].nunique(dropna=False)\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", "data[record_col+\"_relative_growth\"] = data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True)[record_col][0]\n", "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n", "\n", "data = data.sort_values(by =[\"Publication Year\"], ascending=[True])\n", "data[record_col+\"_cumsum\"] = (data[record_col].cumsum())\n", "\n", "year_output = px.line(data,x=\"Publication Year\", y=record_col, markers=True)\n", "year_output.update_traces(hovertemplate='Year:%{x:d}
Number of co-publications:%{y:d}')\n", "\n", "year_rel_output = px.line(data,x=\"Publication Year\", y=record_col+\"_relative_growth\", markers=True)\n", "year_rel_output.update_traces(hovertemplate='Year:%{x:d}
Rel.growth in co-publications:%{y:.0%}')\n", "\n", "year_rel_cumsum = px.area(data,x=\"Publication Year\", y=record_col+\"_cumsum\")\n", "year_rel_cumsum.update_traces(hovertemplate='Year:%{x:d}
Cumulative number co-publications:%{y:d}')\n", "\n", "\n", "figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Distribution of topics\",\n", " \"Co-publications per year\",\"Relative growth of co-publications\",\n", " \"Cumulative sum of co-publications\",],\n", " specs=[\n", " [{\"type\": \"domain\", \"rowspan\":3}, {\"type\": \"xy\"}],\n", " [None,{\"type\": \"xy\"}],\n", " [None, {\"type\": \"xy\"}]\n", " ])\n", "\n", "\n", "for trace in list(metrix_distr.select_traces()):\n", " # trace.barmode\n", " figsuper.add_trace(trace,\n", " row=[1,2,3], col=1\n", " )\n", "\n", "for trace in list(year_output.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=2\n", " )\n", "\n", "for trace in list(year_rel_output.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=2, col=2\n", " )\n", "\n", "for trace in list(year_rel_cumsum.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=3, col=2\n", " )\n", "\n", "# figsuper.update_layout(hovermode='x unified')\n", "figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n", "figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", "figsuper['layout']['yaxis2'].update(zerolinecolor='grey',tickformat=\".0%\")\n", "# figsuper.layout.annotations[0].update(x=0.1)\n", "# figsuper.layout.annotations[2].update(x=0.105)\n", "# figsuper.layout.annotations[1].update(x=0.7)\n", "# figsuper.layout.annotations[3].update(x=0.7)\n", "\n", "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", "figsuper.write_html(f\"plot_html/Overall_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", "# py.plot(figsuper, filename = 'ZSI_ReConnect_overall_distr&trend', auto_open=True,config= dict(displayModeBar = False, responsive = True))" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 14, "outputs": [], "source": [ "# data\n" ], "metadata": { "collapsed": false } }, { "cell_type": "markdown", "id": "66fca444", "metadata": {}, "source": [ "## Domains, distribution, yearly trends" ] }, { "cell_type": "code", "execution_count": 15, "id": "14e82a73", "metadata": {}, "outputs": [], "source": [ "group = 'Domain_English'\n", "data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n", "\n", "fig = px.bar(data.sort_values(by=group), x=record_col, y=group, color=group,barmode='relative',\n", " labels={\n", " record_col: 'Number of co-publications',\n", " group: \"\",\n", " },\n", " title=\"Distribution of Domains\", template='plotly')\n", "fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family=\"Montserrat\")\n", "fig.update_traces(hovertemplate='%{x:d}')\n", "fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", "fig.update_layout(yaxis={'categoryorder':'total ascending'})\n", "fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "dom_distr = go.Figure(fig)\n", "# dom_distr.show(config= dict(displayModeBar = False, responsive = True))" ] }, { "cell_type": "code", "execution_count": 16, "id": "8cbe20ab", "metadata": {}, "outputs": [], "source": [ "group = ['Publication Year','Domain_English']\n", "data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()\n", " .fillna(0)\n", " .stack()\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", "data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Domain_English'),\n", " on='Domain_English', suffixes=[None,\"_relative_growth\"])\n", "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n", "\n", "data = data.sort_values(by =[\"Domain_English\",\"Publication Year\"], ascending=[True,True])\n", "data[record_col+\"_cumsum\"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())\n", "\n", "# data" ] }, { "cell_type": "code", "execution_count": 17, "id": "05d0922a", "metadata": {}, "outputs": [], "source": [ "fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True, labels={\n", " record_col: 'Number of co-publications',\n", " group[-1]: \"Domain\",\n", " },\n", " title=\"Yearly output of co-publications\", template='plotly')\n", "fig.update_traces(hovertemplate='%{y:d}')\n", "fig.update_layout(hovermode='x unified')\n", "fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", "fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "\n", "year_output_by_domain = go.Figure(fig)\n", "\n", "fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+\"_relative_growth\",x=group[0], color=group[-1], markers=True, labels={\n", " record_col+\"_relative_growth\": 'Rel. growth
in co-publications (%)',\n", " group[-1]: \"Domain\",\n", " },\n", " title=\"Relative growth in the output of co-publications\", template='plotly')\n", "# fig.update_traces(hovertemplate='%{y:.2f}%')\n", "\n", "fig.update_layout(hovermode='x unified',yaxis_tickformat='.0f%',font_family=\"Montserrat\")\n", "fig.update_traces(hovertemplate='%{y:.0f}00%')\n", "fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", "fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "# fig['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n", "# fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n", "\n", "rel_output_by_domain = go.Figure(fig)\n", "\n", "\n", "fig = px.area(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+\"_cumsum\",x=group[0], color=group[-1],line_group=group[-1],\n", " labels={\n", " record_col+\"_cumsum\": 'Cumulative number of co-publications',\n", " group[-1]: \"Domain\",\n", " },\n", " title=\"Cumulative number of co-publications\", template='plotly')\n", "fig.update_traces(hovertemplate='%{y:d}')\n", "fig.update_layout(hovermode='x unified')\n", "fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", "fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "\n", "cumsum_by_domain = go.Figure(fig)\n", "# cumsum_by_domain.show(config= dict(displayModeBar = False))" ] }, { "cell_type": "code", "execution_count": 18, "id": "3a07c24d", "metadata": {}, "outputs": [], "source": [ "from plotly.subplots import make_subplots\n", "import plotly.graph_objects as go\n", "\n", "# dom_distr\n", "# year_output_by_domain\n", "# rel_output_by_domain\n", "# cumsum_by_domain\n", "\n", "figsuper = make_subplots(rows=2, cols=2, subplot_titles=[\"Distribution of domains\",\"Cumulative sum of co-publications\",\n", " \"Co-publications per year\",\"Relative growth of co-publications\"])\n", "\n", "\n", "for trace in list(dom_distr.select_traces()):\n", " trace.showlegend=False\n", " # trace.barmode\n", " figsuper.add_trace(trace,\n", " row=1, col=1\n", " )\n", "\n", "for trace in list(cumsum_by_domain.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=2\n", " )\n", "\n", "for trace in list(year_output_by_domain.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=1\n", " )\n", "\n", "for trace in list(rel_output_by_domain.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=2\n", " )\n", "\n", "# figsuper.update_layout(hovermode='x unified')\n", "figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n", "figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", "figsuper['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n", "# figsuper.layout.annotations[0].update(x=0.1)\n", "# figsuper.layout.annotations[2].update(x=0.105)\n", "# figsuper.layout.annotations[1].update(x=0.7)\n", "# figsuper.layout.annotations[3].update(x=0.7)\n", "\n", "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", "figsuper.write_html(f\"plot_html/Domains_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))" ] }, { "cell_type": "code", "execution_count": 19, "outputs": [], "source": [ "# figsuper['layout']" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 20, "id": "329b6889", "metadata": {}, "outputs": [ { "data": { "text/plain": "Publication Year 2011 2012 2013 2014 2015 2016 2017 2018 \nDomain_English \nApplied Sciences 490 593 738 1031 1201 1535 1920 2808 \\\nArts & Humanities 0 0 0 4 1 3 7 4 \nEconomic & Social Sciences 20 22 29 28 34 40 84 105 \nHealth Sciences 116 120 155 184 216 243 321 403 \nMultidisciplinary 15 21 43 52 57 64 75 76 \nNatural Sciences 181 223 298 318 380 437 568 753 \n\nPublication Year 2019 2020 2021 2022 \nDomain_English \nApplied Sciences 3729 4446 5295 6199 \nArts & Humanities 11 11 16 13 \nEconomic & Social Sciences 160 211 252 375 \nHealth Sciences 611 755 1035 1182 \nMultidisciplinary 83 97 115 149 \nNatural Sciences 999 1232 1403 1665 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Publication Year201120122013201420152016201720182019202020212022
Domain_English
Applied Sciences490593738103112011535192028083729444652956199
Arts & Humanities0004137411111613
Economic & Social Sciences20222928344084105160211252375
Health Sciences11612015518421624332140361175510351182
Multidisciplinary15214352576475768397115149
Natural Sciences181223298318380437568753999123214031665
\n
" }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],\n", "\n", " columns=['Publication Year'], fill_value=0)\n", "pivot_data" ] }, { "cell_type": "code", "execution_count": 21, "id": "100f3002", "metadata": {}, "outputs": [], "source": [ "# f, ax = plt.subplots(figsize=(9, 6))\n", "# g = sns.heatmap(pivot_data, annot=True, fmt=\"d\", linewidths=.5, ax=ax)\n", "# g.set(xlabel=\"\", ylabel=\"\")" ] }, { "cell_type": "code", "execution_count": 22, "id": "a8d24046", "metadata": {}, "outputs": [ { "data": { "text/plain": "Publication Year 2011 2012 2013 2014 \nDomain_English \nApplied Sciences 59.610706 60.572012 58.432304 63.760049 \\\nArts & Humanities 0.000000 0.000000 0.000000 0.247372 \nEconomic & Social Sciences 2.433090 2.247191 2.296120 1.731602 \nHealth Sciences 14.111922 12.257406 12.272367 11.379097 \nMultidisciplinary 1.824818 2.145046 3.404592 3.215832 \nNatural Sciences 22.019465 22.778345 23.594616 19.666048 \n\nPublication Year 2015 2016 2017 2018 \nDomain_English \nApplied Sciences 63.578613 66.106804 64.537815 67.678959 \\\nArts & Humanities 0.052938 0.129199 0.235294 0.096409 \nEconomic & Social Sciences 1.799894 1.722653 2.823529 2.530730 \nHealth Sciences 11.434621 10.465116 10.789916 9.713184 \nMultidisciplinary 3.017470 2.756245 2.521008 1.831767 \nNatural Sciences 20.116464 18.819983 19.092437 18.148952 \n\nPublication Year 2019 2020 2021 2022 \nDomain_English \nApplied Sciences 66.672626 65.847156 65.241498 64.687467 \nArts & Humanities 0.196674 0.162915 0.197141 0.135657 \nEconomic & Social Sciences 2.860719 3.125000 3.104978 3.913180 \nHealth Sciences 10.924370 11.181872 12.752587 12.334342 \nMultidisciplinary 1.483998 1.436611 1.416954 1.554837 \nNatural Sciences 17.861613 18.246445 17.286841 17.374517 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Publication Year201120122013201420152016201720182019202020212022
Domain_English
Applied Sciences59.61070660.57201258.43230463.76004963.57861366.10680464.53781567.67895966.67262665.84715665.24149864.687467
Arts & Humanities0.0000000.0000000.0000000.2473720.0529380.1291990.2352940.0964090.1966740.1629150.1971410.135657
Economic & Social Sciences2.4330902.2471912.2961201.7316021.7998941.7226532.8235292.5307302.8607193.1250003.1049783.913180
Health Sciences14.11192212.25740612.27236711.37909711.43462110.46511610.7899169.71318410.92437011.18187212.75258712.334342
Multidisciplinary1.8248182.1450463.4045923.2158323.0174702.7562452.5210081.8317671.4839981.4366111.4169541.554837
Natural Sciences22.01946522.77834523.59461619.66604820.11646418.81998319.09243718.14895217.86161318.24644517.28684117.374517
\n
" }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100\n", "percent_pivot" ] }, { "cell_type": "code", "execution_count": 23, "id": "3bda79fb", "metadata": {}, "outputs": [], "source": [ " # f, ax = plt.subplots(figsize=(15, 6))\n", "# # g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)\n", "# # for t in ax.texts: t.set_text(t.get_text() + \" %\")\n", "# g.set(xlabel=\"\", ylabel=\"\")" ] }, { "cell_type": "code", "execution_count": 24, "id": "01024cc0", "metadata": {}, "outputs": [], "source": [ "# percent_pivot.T.plot(kind='bar',\n", "# stacked=True,\n", "# figsize=(10, 6))" ] }, { "cell_type": "code", "execution_count": 25, "id": "4caa215d", "metadata": {}, "outputs": [], "source": [ "# percent_pivot.T.plot(kind='bar',\n", "# stacked=True,\n", "# figsize=(15, 8))\n", "#\n", "# plt.legend(loc=\"lower left\", ncol=2)\n", "# # plt.ylabel(\"Release Year\")\n", "# # plt.xlabel(\"Proportion\")\n", "#\n", "#\n", "# for n, x in enumerate([*pivot_data.T.index.values]):\n", "# for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],\n", "# pivot_data.T.loc[x],\n", "# percent_pivot.T.loc[x].cumsum()):\n", "#\n", "# plt.text(y=(y_loc - proportion) + (proportion / 2),\n", "# x=n - 0.11,\n", "# s=f'{count}',# ({np.round(proportion, 1)}%)',\n", "# color=\"black\",\n", "# fontsize=8,\n", "# fontweight=\"bold\")\n", "#\n", "# plt.show()" ] }, { "cell_type": "markdown", "id": "dcae04bd", "metadata": {}, "source": [ "## Field" ] }, { "cell_type": "code", "execution_count": 26, "id": "d3807072", "metadata": {}, "outputs": [], "source": [ "# group = ['Publication Year',\"Domain_English\",'Field_English']\n", "# # data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n", "#\n", "#\n", "# data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()\n", "# .fillna(0)\n", "# .stack()\n", "# .reset_index()\n", "# .rename(columns={0:record_col}))\n", "#\n", "# data = data.merge(wos[[\"Domain_English\",'Field_English']].drop_duplicates(),on=\"Field_English\")\n", "#\n", "# data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Field_English'),\n", "# on='Field_English', suffixes=[None,\"_relative_growth\"])\n", "# data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n", "#\n", "# data = data.sort_values(by =[\"Field_English\",\"Publication Year\"], ascending=[True,True])\n", "# data[record_col+\"_cumsum\"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())" ] }, { "cell_type": "code", "execution_count": 27, "id": "756513b5", "metadata": {}, "outputs": [], "source": [ "# data_complete = pd.DataFrame()\n", "#\n", "# for cat in sorted(data[group[-2]].unique()):\n", "# #data segment\n", "# sub_data = data[data[group[-2]]==cat]\n", "# sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n", "# ,group[-1],fill_value=0)\n", "# data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n", "\n", "\n", " # seaborn version plot\n", " # g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),\n", " # y=record_col,x=group[0], hue=group[-1], marker=\"o\")\n", " # g.set(xticks=list(range(2012,2022+1,2)))\n", " # g.legend(title=None)\n", " # g.set_title(cat)\n", " # g.yaxis.set_major_locator(MaxNLocator(integer=True))\n", " # plt.show()" ] }, { "cell_type": "code", "execution_count": 28, "id": "d09c080a", "metadata": {}, "outputs": [], "source": [ "# data_complete = pd.DataFrame()\n", "#\n", "# # Creating subplot axes\n", "# fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))\n", "#\n", "# for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):\n", "# #data segment\n", "# sub_data = data[data[group[-2]]==cat]\n", "# sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n", "# ,group[-1],fill_value=0)\n", "# data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n", "# #plot\n", "# g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),\n", "# y=record_col,x=group[0], hue=group[-1], marker=\"o\", ax=ax)\n", "# g.set(xticks=list(range(2012,2022+1,2)))\n", "# g.legend(title=None)\n", "# g.set_title(cat)\n", "# g.set_xlabel(None)\n", "# g.set_ylabel(None)\n", "# g.yaxis.set_major_locator(MaxNLocator(integer=True))\n", "# fig.suptitle(\"Number of co-publications in domains and respective fields\", y=0.92)\n", "# plt.show()" ] }, { "cell_type": "markdown", "id": "09a6de71", "metadata": {}, "source": [ "## SubField" ] }, { "cell_type": "code", "execution_count": 29, "id": "0397eb85", "metadata": {}, "outputs": [], "source": [ "group = ['Publication Year',\"Domain_English\",'Field_English',\"SubField_English\"]\n", "data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n", "# data" ] }, { "cell_type": "code", "execution_count": 30, "id": "846596cf", "metadata": {}, "outputs": [], "source": [ "for cat in sorted(data[group[-2]].unique()):\n", " sub_data = data[data[group[-2]]==cat]\n", " sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}\n", " ,group[-1],fill_value=0)\n", " # g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],\n", " # hue=group[-1], marker=\"o\", errorbar=None)\n", " # g.set(xticks=list(range(2012,2022+1,2)))\n", " # g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))\n", " # g.set_title(f'Number or co-publications in {cat}')\n", " # g.set_ylabel(None)\n", " # plt.show()" ] }, { "cell_type": "code", "execution_count": 31, "id": "27c90aaf", "metadata": {}, "outputs": [], "source": [ "from matplotlib.ticker import FuncFormatter\n", "import math\n", "def orderOfMagnitude(number):\n", " return math.floor(math.log(number, 10))\n", "\n", "def roundToNearest(number):\n", " order = orderOfMagnitude(number)\n", " # if order!=0:\n", " # order+=1\n", " near = math.ceil(number/10**order)*10**order\n", " return near" ] }, { "cell_type": "markdown", "id": "91d2cc8a", "metadata": {}, "source": [ "## Country contributions" ] }, { "cell_type": "code", "execution_count": 32, "id": "b3adb06a", "metadata": {}, "outputs": [], "source": [ "wos_univ_locations = wos_univ.merge(wos_country_types, on=\"Country\")\n", "wos_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\"]].drop_duplicates()\n", "\n", "collab_desc = wos_collabs[wos_collabs[\"Country\"]!=\"China\"][\"Country\"].value_counts().reset_index()\n", "collab_desc[\"percent_of_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].nunique()#*100\n", "collab_desc[\"percent_contrib_in_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].size#*100\n", "collab_desc = collab_desc.merge(wos_country_types, on=\"Country\")\n", "# collab_desc\n", "\n", "c_dict = {\"count\":\"Number of co-publications\",\n", " \"percent_of_copubs\":\"Percent of co-publications\",\n", " \"percent_contrib_in_copubs\":\"Contribution to co-publications\"}\n", "\n", "color_discrete_map= {'China': '#EF553B',\n", " 'EU': '#636EFA',\n", " 'Non-EU associate': '#00CC96'}\n", "\n", "fig_dict = dict()\n", "# Creating subplot axes\n", "# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))\n", "# for c,ax in zip(c_dict.keys(),axes.flatten()):\n", "for c in c_dict.keys():\n", " data = collab_desc[[\"Country\",c,\"Country_Type\"]]\n", " # plt.figure(figsize=(9,12))\n", " col_by=\"Country_Type\"\n", " y_lab=\"Country\"\n", " # g = sns.barplot(data, x=c, y=\"Country\", hue=\"Country_Type\", dodge=False)\n", " fig = px.bar(data, x=c, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,\n", " labels=dict({\n", " record_col: 'Number of co-publications',\n", " \"Institution_harm\": \"Institution\",\n", " \"Institution_harm_label\": \"Institution\",\n", " \"Country_Type\":\"Country type\",\n", " \"Eurovoc_Class\":\"Region\"\n", " },**c_dict),\n", " title=c_dict[c], template='plotly')\n", " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",\n", " yaxis={'categoryorder':'total ascending'},\n", " width=1000, height=1000,)\n", " if \"percent\" in c:\n", " fig.update_traces(hovertemplate='%{y}
%{x}')\n", " fig.update_xaxes(tickformat=\".1%\")\n", " else:\n", " fig.update_traces(hovertemplate='%{y}
%{x:d}')\n", " fig_dict[c] = go.Figure(fig)\n", " # fig.show(config= dict(displayModeBar = False, responsive = True))\n", " # g.set_xlim(0,roundToNearest(data[c].max()))\n", " # g.set_ylabel(None)\n", " # g.set_xlabel(c_dict.get(c))\n", " # g.set_title(c_dict.get(c))\n", " # g.legend(title=None, loc=\"right\")\n", " # for i in g.containers:\n", " # g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')\n", " # if 'percent' in c:\n", " # g.xaxis.set_major_locator(MaxNLocator(integer=True))\n", " # vals = g.get_xticks()\n", " # g.set_xticklabels([str(int(val))+'%' for val in vals])\n", " # plt.show()\n", "figsuper = make_subplots(rows=1, cols=3, subplot_titles =list(c_dict.values()))\n", "for i,f in enumerate(fig_dict.keys()):\n", " sfig = fig_dict[f]\n", " for trace in list(sfig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=1, col=i+1)\n", "\n", "figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative',yaxis2={'categoryorder':'total ascending'},yaxis3={'categoryorder':'total ascending'})\n", "figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", "figsuper.write_html(f\"plot_html/europe_contribution_bar.html\",config= dict(displayModeBar = False, responsive = True))" ] }, { "cell_type": "code", "execution_count": 33, "id": "140395ac", "metadata": {}, "outputs": [], "source": [ "# wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n", "# wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n", "# EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)\n", "#\n", "# # Generate a mask for the upper triangle\n", "# mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n", "#\n", "# # Set up the matplotlib figure\n", "# f, ax = plt.subplots(figsize=(11, 9))\n", "#\n", "# # Draw the heatmap with the mask and correct aspect ratio\n", "# g = sns.heatmap(EU_co_occur, mask=mask,\n", "# square=True, linewidths=.5)\n", "#\n", "# g.set_ylabel(None)\n", "# g.set_xlabel(None)" ] }, { "cell_type": "code", "execution_count": 34, "id": "c959287e", "metadata": {}, "outputs": [], "source": [ "wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n", "wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n", "EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", "\n", "eu_list = wos_collabs_EU.groupby(['Country_x'])[record_col].count().sort_values(ascending=False).index\n", "# pre_fig = sns.clustermap(EU_co_occur)\n", "# re_index = [i.get_text() for i in pre_fig.ax_heatmap.yaxis.get_majorticklabels()]\n", "# re_column = [i.get_text() for i in pre_fig.ax_heatmap.xaxis.get_majorticklabels()]\n", "\n", "EU_co_occur = EU_co_occur.reindex(index = eu_list, columns=eu_list)\n", "\n", "# Generate a mask for the upper triangle\n", "mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n", "data = np.where(mask,None,EU_co_occur)\n", "\n", "fig = px.imshow(data,\n", " labels=dict(x=\"Country\", y=\"Country\", color=\"Co-publication with China\"),\n", " x=list(EU_co_occur.columns),\n", " y=list(EU_co_occur.index), title=\"Intraeuropean patterns
Co-occurences of countries in chinese co-publications\"\n", " )\n", "fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed', template='plotly_white')\n", "# fig.update_traces(hovertemplate='%{y}
%{x}
Co-publications: %{hovertext}')\n", "fig.update_xaxes(tickangle= -90)\n", "fig.update_yaxes(\n", " ticks=\"outside\")\n", "fig.update_xaxes(\n", " ticks=\"outside\")\n", "# fig.show(config= dict(displayModeBar = False,responsive=True))\n", "fig.write_html(f\"plot_html/intraeurope_collabs.html\",config= dict(displayModeBar = False, responsive = True))\n" ] }, { "cell_type": "code", "execution_count": 35, "id": "df1f03ea", "metadata": {}, "outputs": [], "source": [ "# collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n", "# collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n", "# data = collab_year.groupby([\"Publication Year\",'Country_Type'],as_index=False)[record_col].nunique()\n", "#\n", "#\n", "# g=sns.lineplot(data,y=record_col,x=\"Publication Year\", hue=\"Country_Type\", marker=\"o\")\n", "# g.set(xticks=list(range(2012,2022+1,2)))\n", "# g.legend(title=None)\n", "# g.set_xlabel(None)\n", "# g.set_ylabel(None)\n", "# g.set_title(\"Yearly output of co-publications with China\")" ] }, { "cell_type": "markdown", "id": "122d0260", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 36, "id": "f19501a9", "metadata": {}, "outputs": [], "source": [ "collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n", "collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n", "\n", "data = (collab_year.groupby(['Publication Year',\"Country\"])[record_col]\n", " .nunique(dropna=False).unstack()\n", " .fillna(0)\n", " .stack()\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", "data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset=\"Country\"),\n", " on=[\"Country\"], suffixes=[None,\"_relative_growth\"])\n", "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n", "data = data.sort_values(by =[\"Country\",\"Publication Year\"], ascending=[True,True])\n", "data[record_col+\"_cumsum\"] = (data.groupby('Country',as_index=False)[record_col].cumsum())\n", "data = data.merge(wos_country_types, on='Country')\n", "# data\n", "\n", "data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n", "fig = px.choropleth(data[data[\"Publication Year\"] == 2022], locations=\"ISO3\", color=record_col+\"_cumsum\", hover_name=\"Country\",\n", " scope=\"europe\", template='plotly',\n", " range_color=[data[record_col+\"_cumsum\"].min(),data[record_col+\"_cumsum\"].max()],hover_data=[\"Eurovoc_Class\"])\n", "# original: '%{hovertext}

ISO3=%{location}
Eurovoc_Class=%{customdata[0]}
UT (Unique WOS ID)_cumsum=%{z}'\n", "\n", "fig.update_traces(hovertemplate='%{hovertext}'\n", " '
Region: %{customdata[0]}
'\n", " 'Co-pubications: %{z:d}')\n", "\n", "cumsum_country = go.Figure(fig)" ] }, { "cell_type": "code", "execution_count": 37, "id": "ae3cb8e1", "metadata": {}, "outputs": [], "source": [ "# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color=\"Eurovoc_Class\",line_group=\"Country\", markers=True,\n", "# labels={\n", "# record_col: 'Number of co-publications',\n", "# \"Eurovoc_Class\": \"Region\"\n", "# },\n", "# title=\"Yearly output of co-publications\", template='plotly',hover_name= \"Country\")\n", "# fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", "# # fig.update_layout(hovermode='x unified')\n", "# fig.add_shape(\n", "# # Rectangle with reference to the plot\n", "# type=\"rect\",\n", "# xref=\"paper\",\n", "# yref=\"paper\",\n", "# x0=0,\n", "# y0=0,\n", "# x1=1.0,\n", "# y1=1.0,\n", "# line=dict(\n", "# color=\"black\",\n", "# width=0.5,\n", "# )\n", "# )\n", "# fig.update_yaxes(\n", "# showgrid=True,\n", "# ticks=\"outside\")\n", "# fig.update_xaxes(\n", "# showgrid=True,\n", "# ticks=\"outside\")\n", "# fig.show(config= dict(displayModeBar = False))" ] }, { "cell_type": "code", "execution_count": 38, "id": "dd72ad3f", "metadata": {}, "outputs": [], "source": [ "# fig.data[0].hovertemplate" ] }, { "cell_type": "code", "execution_count": 39, "id": "600d7459", "metadata": {}, "outputs": [], "source": [ "# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", "# y=record_col+\"_relative_growth\",\n", "# x='Publication Year',\n", "# color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n", "# labels={\n", "# record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n", "# },\n", "# title=\"Relative growth of co-publications
(baseline: 2011)\", template='plotly',hover_name= \"Country\")\n", "# fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}%')\n", "# fig.add_shape(\n", "# # Rectangle with reference to the plot\n", "# type=\"rect\",\n", "# xref=\"paper\",\n", "# yref=\"paper\",\n", "# x0=0,\n", "# y0=0,\n", "# x1=1.0,\n", "# y1=1.0,\n", "# line=dict(\n", "# color=\"black\",\n", "# width=0.5,\n", "# )\n", "# )\n", "# fig.update_yaxes(\n", "# showgrid=True,\n", "# ticks=\"outside\")\n", "# fig.update_xaxes(\n", "# showgrid=True,\n", "# ticks=\"outside\")\n", "# fig.show(config= dict(displayModeBar = False))" ] }, { "cell_type": "code", "execution_count": 40, "id": "0ee76d32", "metadata": {}, "outputs": [], "source": [ "from plotly.subplots import make_subplots\n", "import plotly.graph_objects as go\n", "\n", "figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Number of publications (2022)\",\"Cumulative number of co-publications\",\n", " \"Yearly output of co-publications\",\"Relative growth of co-publications\"],\n", " specs=[\n", " [{\"type\": \"geo\", \"rowspan\":3}, {\"type\": \"xy\"}],\n", " [None,{\"type\": \"xy\"}],\n", " [None, {\"type\": \"xy\"}]\n", " ])\n", "\n", "for trace in list(cumsum_country.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=1\n", " )\n", "\n", "fig = px.area(data.sort_values(ascending=True, by='Publication Year'), y=record_col+\"_cumsum\",\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",\n", " line_group=\"Country\",\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Cumulative number of co-publications\",\n", " hover_name= \"Country\")\n", "fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", "\n", "for trace in list(fig.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=2\n", " )\n", "\n", "\n", "fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", " y=record_col,\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",\n", " line_group=\"Country\",\n", " markers=True,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Yearly output of co-publications\",hover_name= \"Country\")\n", "fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", "\n", "for trace in list(fig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=2\n", " )\n", "\n", "fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", " y=record_col+\"_relative_growth\",\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n", " labels={\n", " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Relative growth of co-publications\", template='plotly',hover_name= \"Country\")\n", "fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}%')\n", "fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", "\n", "for trace in list(fig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=3, col=2\n", " )\n", "\n", "figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", "figsuper.update_layout({'template':\"plotly\"})\n", "figsuper.layout[\"geo\"][\"scope\"] = 'europe'\n", "figsuper.update_coloraxes(colorbar=dict(lenmode='fraction',len=0.55, orientation=\"v\",yanchor='top', title=\"Co-publications\",\n", " ticks=\"outside\", ticksuffix=\" \",outlinewidth=0.5))\n", "# figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", "figsuper.write_html(f\"plot_html/country_trends_overall.html\",config= dict(displayModeBar = False, responsive = True))" ] }, { "cell_type": "code", "execution_count": 41, "id": "e4c50e14", "metadata": {}, "outputs": [ { "data": { "text/plain": "Publication Year 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 \nCountry \nAustria 22 24 26 39 50 57 72 89 138 137 \\\nBelgium 34 38 40 65 71 81 90 133 179 213 \nBulgaria 4 5 8 9 7 19 21 18 10 25 \nCroatia 1 2 6 8 10 7 10 19 27 29 \nCyprus 2 1 5 5 5 5 8 7 15 28 \nCzech Republic 13 15 16 21 20 36 37 56 64 81 \nDenmark 35 33 40 59 68 74 101 195 234 245 \nEstonia 3 3 7 10 12 10 15 15 16 38 \nFinland 31 35 44 82 100 125 126 198 241 256 \nFrance 117 130 174 231 269 325 348 491 648 691 \nGermany 123 172 192 273 310 365 456 604 801 907 \nGreece 15 18 19 32 35 50 47 81 114 122 \nHungary 11 11 21 16 20 38 34 47 61 61 \nIreland 13 16 22 31 27 45 66 72 84 116 \nItaly 51 70 84 116 178 187 247 325 441 571 \nLatvia 0 0 1 0 1 8 10 15 10 9 \nLithuania 1 2 10 4 4 13 12 23 38 36 \nLuxembourg 2 3 3 1 8 9 13 15 18 22 \nMalta 1 0 0 0 1 1 0 0 6 2 \nNetherlands 72 64 77 103 139 166 220 297 408 470 \nNorway 30 42 60 76 67 88 104 134 222 253 \nPoland 17 31 37 57 73 82 98 110 138 181 \nPortugal 16 23 35 41 45 58 79 119 136 147 \nRomania 7 15 13 16 25 26 37 57 64 55 \nSlovakia 9 6 6 10 12 22 18 27 27 34 \nSlovenia 7 7 10 12 17 27 22 47 54 31 \nSpain 50 49 69 112 138 185 232 273 356 386 \nSweden 34 50 59 83 113 170 233 232 385 359 \nSwitzerland 37 50 54 74 74 95 155 195 233 263 \nUnited Kingdom 363 417 531 660 781 979 1350 1837 2430 3108 \n\nPublication Year 2021 2022 \nCountry \nAustria 185 205 \nBelgium 242 292 \nBulgaria 32 19 \nCroatia 33 35 \nCyprus 36 43 \nCzech Republic 93 123 \nDenmark 293 343 \nEstonia 45 39 \nFinland 289 380 \nFrance 807 858 \nGermany 1210 1386 \nGreece 139 181 \nHungary 83 90 \nIreland 167 187 \nItaly 641 811 \nLatvia 13 18 \nLithuania 38 38 \nLuxembourg 35 51 \nMalta 7 10 \nNetherlands 529 655 \nNorway 304 311 \nPoland 276 353 \nPortugal 204 212 \nRomania 48 62 \nSlovakia 36 45 \nSlovenia 48 40 \nSpain 473 640 \nSweden 428 510 \nSwitzerland 349 447 \nUnited Kingdom 3718 4245 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Publication Year201120122013201420152016201720182019202020212022
Country
Austria2224263950577289138137185205
Belgium34384065718190133179213242292
Bulgaria4589719211810253219
Croatia1268107101927293335
Cyprus2155558715283643
Czech Republic1315162120363756648193123
Denmark353340596874101195234245293343
Estonia337101210151516384539
Finland31354482100125126198241256289380
France117130174231269325348491648691807858
Germany12317219227331036545660480190712101386
Greece1518193235504781114122139181
Hungary111121162038344761618390
Ireland131622312745667284116167187
Italy517084116178187247325441571641811
Latvia00101810151091318
Lithuania12104413122338363838
Luxembourg233189131518223551
Malta1000110062710
Netherlands726477103139166220297408470529655
Norway304260766788104134222253304311
Poland17313757738298110138181276353
Portugal16233541455879119136147204212
Romania71513162526375764554862
Slovakia966101222182727343645
Slovenia7710121727224754314840
Spain504969112138185232273356386473640
Sweden34505983113170233232385359428510
Switzerland375054747495155195233263349447
United Kingdom363417531660781979135018372430310837184245
\n
" }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "year_pivot" ] }, { "cell_type": "code", "execution_count": 42, "id": "e4e82db7", "metadata": {}, "outputs": [ { "data": { "text/plain": "
", "image/png": "\n" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "f, ax = plt.subplots(figsize=(15, 15))\n", "g = sns.heatmap(year_pivot, annot=True, fmt=\"d\", linewidths=.5, ax=ax)\n", "g.set(xlabel=\"\", ylabel=\"\")\n", "for i in range(year_pivot.shape[0]+1):\n", " ax.axhline(i, color='white', lw=10)" ] }, { "cell_type": "code", "execution_count": 43, "id": "78bb0b4e", "metadata": {}, "outputs": [ { "data": { "text/plain": "Publication Year 2011 2012 2013 2014 2015 \nCountry \nAustria 1.962533 1.801802 1.557819 1.736420 1.865672 \\\nBelgium 3.033006 2.852853 2.396645 2.894034 2.649254 \nBulgaria 0.356824 0.375375 0.479329 0.400712 0.261194 \nCroatia 0.089206 0.150150 0.359497 0.356189 0.373134 \nCyprus 0.178412 0.075075 0.299581 0.222618 0.186567 \nCzech Republic 1.159679 1.126126 0.958658 0.934996 0.746269 \nDenmark 3.122212 2.477477 2.396645 2.626892 2.537313 \nEstonia 0.267618 0.225225 0.419413 0.445236 0.447761 \nFinland 2.765388 2.627628 2.636309 3.650935 3.731343 \nFrance 10.437110 9.759760 10.425404 10.284951 10.037313 \nGermany 10.972346 12.912913 11.503895 12.154942 11.567164 \nGreece 1.338091 1.351351 1.138406 1.424755 1.305970 \nHungary 0.981267 0.825826 1.258238 0.712378 0.746269 \nIreland 1.159679 1.201201 1.318155 1.380232 1.007463 \nItaly 4.549509 5.255255 5.032954 5.164737 6.641791 \nLatvia 0.000000 0.000000 0.059916 0.000000 0.037313 \nLithuania 0.089206 0.150150 0.599161 0.178094 0.149254 \nLuxembourg 0.178412 0.225225 0.179748 0.044524 0.298507 \nMalta 0.089206 0.000000 0.000000 0.000000 0.037313 \nNetherlands 6.422837 4.804805 4.613541 4.585931 5.186567 \nNorway 2.676182 3.153153 3.594967 3.383793 2.500000 \nPoland 1.516503 2.327327 2.216896 2.537845 2.723881 \nPortugal 1.427297 1.726727 2.097064 1.825467 1.679104 \nRomania 0.624442 1.126126 0.778910 0.712378 0.932836 \nSlovakia 0.802855 0.450450 0.359497 0.445236 0.447761 \nSlovenia 0.624442 0.525526 0.599161 0.534283 0.634328 \nSpain 4.460303 3.678679 4.134212 4.986643 5.149254 \nSweden 3.033006 3.753754 3.535051 3.695459 4.216418 \nSwitzerland 3.300624 3.753754 3.235470 3.294746 2.761194 \nUnited Kingdom 32.381802 31.306306 31.815458 29.385574 29.141791 \n\nPublication Year 2016 2017 2018 2019 2020 \nCountry \nAustria 1.699970 1.689744 1.552958 1.816267 1.543488 \\\nBelgium 2.415747 2.112180 2.320712 2.355883 2.399730 \nBulgaria 0.566657 0.492842 0.314081 0.131614 0.281658 \nCroatia 0.208768 0.234687 0.331530 0.355357 0.326724 \nCyprus 0.149120 0.187749 0.122143 0.197420 0.315457 \nCzech Republic 1.073665 0.868341 0.977142 0.842327 0.912573 \nDenmark 2.206979 2.370336 3.402548 3.079758 2.760252 \nEstonia 0.298240 0.352030 0.261734 0.210582 0.428121 \nFinland 3.728005 2.957052 3.454894 3.171887 2.884182 \nFrance 9.692812 8.167097 8.567440 8.528560 7.785038 \nGermany 10.885774 10.701713 10.539173 10.542248 10.218567 \nGreece 1.491202 1.103027 1.413366 1.500395 1.374493 \nHungary 1.133313 0.797935 0.820101 0.802843 0.687247 \nIreland 1.342082 1.548932 1.256325 1.105554 1.306895 \nItaly 5.577095 5.796761 5.670913 5.804159 6.433078 \nLatvia 0.238592 0.234687 0.261734 0.131614 0.101397 \nLithuania 0.387712 0.281624 0.401326 0.500132 0.405588 \nLuxembourg 0.268416 0.305093 0.261734 0.236904 0.247859 \nMalta 0.029824 0.000000 0.000000 0.078968 0.022533 \nNetherlands 4.950790 5.163107 5.182342 5.369834 5.295178 \nNorway 2.624515 2.440742 2.338161 2.921822 2.850383 \nPoland 2.445571 2.299930 1.919386 1.816267 2.039207 \nPortugal 1.729794 1.854025 2.076426 1.789945 1.656151 \nRomania 0.775425 0.868341 0.994591 0.842327 0.619648 \nSlovakia 0.656129 0.422436 0.471122 0.355357 0.383055 \nSlovenia 0.805249 0.516311 0.820101 0.710713 0.349256 \nSpain 5.517447 5.444731 4.763567 4.685444 4.348806 \nSweden 5.070086 5.468200 4.048159 5.067123 4.044615 \nSwitzerland 2.833284 3.637644 3.402548 3.066596 2.963046 \nUnited Kingdom 29.197733 31.682704 32.053743 31.982101 35.015773 \n\nPublication Year 2021 2022 \nCountry \nAustria 1.712804 1.623248 \nBelgium 2.240533 2.312139 \nBulgaria 0.296269 0.150447 \nCroatia 0.305527 0.277140 \nCyprus 0.333302 0.340486 \nCzech Republic 0.861031 0.973949 \nDenmark 2.712712 2.715971 \nEstonia 0.416628 0.308813 \nFinland 2.675678 3.008948 \nFrance 7.471530 6.793887 \nGermany 11.202666 10.974741 \nGreece 1.286918 1.433209 \nHungary 0.768447 0.712645 \nIreland 1.546153 1.480719 \nItaly 5.934636 6.421728 \nLatvia 0.120359 0.142529 \nLithuania 0.351819 0.300895 \nLuxembourg 0.324044 0.403832 \nMalta 0.064809 0.079183 \nNetherlands 4.897695 5.186476 \nNorway 2.814554 2.462586 \nPoland 2.555319 2.795154 \nPortugal 1.888714 1.678676 \nRomania 0.444403 0.490934 \nSlovakia 0.333302 0.356323 \nSlovenia 0.444403 0.316731 \nSpain 4.379224 5.067701 \nSweden 3.962596 4.038324 \nSwitzerland 3.231182 3.539473 \nUnited Kingdom 34.422739 33.613113 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Publication Year201120122013201420152016201720182019202020212022
Country
Austria1.9625331.8018021.5578191.7364201.8656721.6999701.6897441.5529581.8162671.5434881.7128041.623248
Belgium3.0330062.8528532.3966452.8940342.6492542.4157472.1121802.3207122.3558832.3997302.2405332.312139
Bulgaria0.3568240.3753750.4793290.4007120.2611940.5666570.4928420.3140810.1316140.2816580.2962690.150447
Croatia0.0892060.1501500.3594970.3561890.3731340.2087680.2346870.3315300.3553570.3267240.3055270.277140
Cyprus0.1784120.0750750.2995810.2226180.1865670.1491200.1877490.1221430.1974200.3154570.3333020.340486
Czech Republic1.1596791.1261260.9586580.9349960.7462691.0736650.8683410.9771420.8423270.9125730.8610310.973949
Denmark3.1222122.4774772.3966452.6268922.5373132.2069792.3703363.4025483.0797582.7602522.7127122.715971
Estonia0.2676180.2252250.4194130.4452360.4477610.2982400.3520300.2617340.2105820.4281210.4166280.308813
Finland2.7653882.6276282.6363093.6509353.7313433.7280052.9570523.4548943.1718872.8841822.6756783.008948
France10.4371109.75976010.42540410.28495110.0373139.6928128.1670978.5674408.5285607.7850387.4715306.793887
Germany10.97234612.91291311.50389512.15494211.56716410.88577410.70171310.53917310.54224810.21856711.20266610.974741
Greece1.3380911.3513511.1384061.4247551.3059701.4912021.1030271.4133661.5003951.3744931.2869181.433209
Hungary0.9812670.8258261.2582380.7123780.7462691.1333130.7979350.8201010.8028430.6872470.7684470.712645
Ireland1.1596791.2012011.3181551.3802321.0074631.3420821.5489321.2563251.1055541.3068951.5461531.480719
Italy4.5495095.2552555.0329545.1647376.6417915.5770955.7967615.6709135.8041596.4330785.9346366.421728
Latvia0.0000000.0000000.0599160.0000000.0373130.2385920.2346870.2617340.1316140.1013970.1203590.142529
Lithuania0.0892060.1501500.5991610.1780940.1492540.3877120.2816240.4013260.5001320.4055880.3518190.300895
Luxembourg0.1784120.2252250.1797480.0445240.2985070.2684160.3050930.2617340.2369040.2478590.3240440.403832
Malta0.0892060.0000000.0000000.0000000.0373130.0298240.0000000.0000000.0789680.0225330.0648090.079183
Netherlands6.4228374.8048054.6135414.5859315.1865674.9507905.1631075.1823425.3698345.2951784.8976955.186476
Norway2.6761823.1531533.5949673.3837932.5000002.6245152.4407422.3381612.9218222.8503832.8145542.462586
Poland1.5165032.3273272.2168962.5378452.7238812.4455712.2999301.9193861.8162672.0392072.5553192.795154
Portugal1.4272971.7267272.0970641.8254671.6791041.7297941.8540252.0764261.7899451.6561511.8887141.678676
Romania0.6244421.1261260.7789100.7123780.9328360.7754250.8683410.9945910.8423270.6196480.4444030.490934
Slovakia0.8028550.4504500.3594970.4452360.4477610.6561290.4224360.4711220.3553570.3830550.3333020.356323
Slovenia0.6244420.5255260.5991610.5342830.6343280.8052490.5163110.8201010.7107130.3492560.4444030.316731
Spain4.4603033.6786794.1342124.9866435.1492545.5174475.4447314.7635674.6854444.3488064.3792245.067701
Sweden3.0330063.7537543.5350513.6954594.2164185.0700865.4682004.0481595.0671234.0446153.9625964.038324
Switzerland3.3006243.7537543.2354703.2947462.7611942.8332843.6376443.4025483.0665962.9630463.2311823.539473
United Kingdom32.38180231.30630631.81545829.38557429.14179129.19773331.68270432.05374331.98210135.01577334.42273933.613113
\n
" }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100\n", "year_percent_pivot" ] }, { "cell_type": "code", "execution_count": 44, "id": "42dc8be7", "metadata": {}, "outputs": [ { "data": { "text/plain": "
", "image/png": "\n" }, "metadata": {}, "output_type": "display_data" } ], "source": [ "f, ax = plt.subplots(figsize=(15, 15))\n", "g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)\n", "for t in ax.texts: t.set_text(t.get_text() + \" %\")\n", "g.set(xlabel=\"\", ylabel=\"\")\n", "for i in range(year_percent_pivot.shape[1]+1):\n", " ax.axvline(i, color='white', lw=10)" ] }, { "cell_type": "code", "execution_count": 44, "id": "e7b754ea", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 45, "id": "48f2898f", "metadata": {}, "outputs": [], "source": [ "# Institutional collab" ] }, { "cell_type": "code", "execution_count": 45, "id": "3a9538e1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 46, "id": "6bb0e68d", "metadata": {}, "outputs": [], "source": [ "color_discrete_map= {'China': '#EF553B',\n", " 'EU': '#636EFA',\n", " 'Non-EU associate': '#00CC96'}" ] }, { "cell_type": "code", "execution_count": 47, "id": "df8701eb", "metadata": {}, "outputs": [], "source": [ "TOPN = 25\n", "\n", "\n", "wos_univ_locations = wos_univ.merge(wos_country_types, on=\"Country\")\n", "wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n", "wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n", "wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n", "\n", "\n", "wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", "wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", "\n", "wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"EU\"]\n", "\n", "data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n", "\n", "data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm_label\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", "\n", "data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", "\n", "\n", "for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],\n", " [\"European countries in scope\",\"EU-28 only\",\"China\"],\n", " [\"Institution_harm_label\",\"Institution_harm_label\",\"Institution_harm\"],\n", " [\"Country\",\"Eurovoc_Class\",\"Country_Type\"],\n", " [\"Country_Type\",None,None]):\n", " fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Institution_harm\": \"Institution\",\n", " \"Institution_harm_label\": \"Institution\",\n", " \"Country_Type\":\"Country type\",\n", " \"Eurovoc_Class\":\"Region\"\n", " },\n", " title=f\"Most visible institutions (top {TOPN} within {c_scope})\", template='plotly')\n", " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n", " width=1000, height=1000,)\n", " fig.update_traces(hovertemplate='%{x:d}')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " # fig.show(config= dict(displayModeBar = False))\n", " fig.write_html(f\"plot_html/overall_inst_collab_bar_{c_scope}.html\",config= dict(displayModeBar = False, responsive = True))" ] }, { "cell_type": "code", "execution_count": 48, "id": "31a0769d", "metadata": {}, "outputs": [], "source": [ "wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", "wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", "\n", "wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)" ] }, { "cell_type": "code", "execution_count": 49, "id": "606e1af0", "metadata": {}, "outputs": [], "source": [ "fig = px.parallel_categories(wos_univ_dipol[[\"Country_eu\",\"Domain_English\",\"Country_ch\"]])" ] }, { "cell_type": "code", "execution_count": 50, "id": "ea0951e9", "metadata": {}, "outputs": [ { "data": { "text/plain": "Index(['Country', 'Institution_harm', 'Country_Type', 'UT (Unique WOS ID)'], dtype='object')" }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_ch.columns" ] }, { "cell_type": "code", "execution_count": 51, "id": "dd4210b3", "metadata": {}, "outputs": [], "source": [ "subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n", " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", "\n", "fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Domain_English\",\"Country_ch\"]])\n", "# fig.show()" ] }, { "cell_type": "code", "execution_count": 52, "id": "2c5d1d94", "metadata": {}, "outputs": [], "source": [ "subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n", " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", "\n", "fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", "# fig.show()\n", "sub_df =wos_univ_dipol[subfilter]\n", "\n", "inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", "eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", "ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", "\n", "inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", "\n", "mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", "data = np.where(mask,inst_co_occur,inst_co_occur)\n", "\n", "fig = px.imshow(data,\n", " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within Europe)\"\n", " )\n", "fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed',\n", " template='plotly_white',\n", " coloraxis_colorbar=dict(\n", " thicknessmode=\"pixels\", thickness=25,\n", " ticks=\"outside\", ticksuffix=\" \",\n", " dtick=20,outlinewidth=1,\n", " ))\n", "fig.update_xaxes(tickangle= -45)\n", "fig.update_yaxes(\n", " ticks=\"outside\")\n", "fig.update_xaxes(\n", " ticks=\"outside\")\n", "\n", "fig.write_html(f\"plot_html/overall_inst_collab_europe.html\",config= dict(displayModeBar = False, responsive = True))" ] }, { "cell_type": "code", "execution_count": 53, "id": "7bd7d149", "metadata": {}, "outputs": [], "source": [ "subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu_strict[\"Institution_harm_label\"]))&\n", " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", "\n", "fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", "# fig.show()\n", "sub_df =wos_univ_dipol[subfilter]\n", "\n", "inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", "eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", "ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", "\n", "inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", "\n", "mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", "data = np.where(mask,inst_co_occur,inst_co_occur)\n", "fig = px.imshow(data,\n", " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within EU-28)\"\n", " )\n", "fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed',\n", " template='plotly_white',\n", " coloraxis_colorbar=dict(\n", " thicknessmode=\"pixels\", thickness=25,\n", " ticks=\"outside\", ticksuffix=\" \",\n", " dtick=20,outlinewidth=1,\n", " ))\n", "fig.update_xaxes(tickangle= -45)\n", "fig.update_yaxes(\n", " ticks=\"outside\")\n", "fig.update_xaxes(\n", " ticks=\"outside\")\n", "\n", "# fig.show(config= dict(displayModeBar = False))\n", "fig.write_html(f\"plot_html/overall_inst_collab_eu28.html\",config= dict(displayModeBar = False, responsive = True))" ] }, { "cell_type": "markdown", "source": [ "# Drilldown to field" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 54, "outputs": [], "source": [ "group = ['Publication Year',\"Domain_English\",'Field_English']\n", "# data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n", "\n", "\n", "data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()\n", " .fillna(0)\n", " .stack()\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", "\n", "data = data.merge(wos[[\"Domain_English\",'Field_English']].drop_duplicates(),on=\"Field_English\")\n", "\n", "data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Field_English'),\n", " on='Field_English', suffixes=[None,\"_relative_growth\"])\n", "data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n", "\n", "data = data.sort_values(by =[\"Field_English\",\"Publication Year\"], ascending=[True,True])\n", "data[record_col+\"_cumsum\"] = (data.groupby('Field_English',as_index=False)[record_col].cumsum())" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 55, "outputs": [ { "data": { "text/plain": " Publication Year Field_English UT (Unique WOS ID) \n0 2011 Agriculture, Fisheries & Forestry 9.0 \\\n1 2012 Agriculture, Fisheries & Forestry 18.0 \n2 2013 Agriculture, Fisheries & Forestry 15.0 \n3 2014 Agriculture, Fisheries & Forestry 26.0 \n4 2015 Agriculture, Fisheries & Forestry 12.0 \n.. ... ... ... \n255 2018 Social Sciences 25.0 \n257 2019 Social Sciences 37.0 \n259 2020 Social Sciences 57.0 \n261 2021 Social Sciences 65.0 \n263 2022 Social Sciences 60.0 \n\n Domain_English Publication Year_relative_growth \n0 Applied Sciences 2011 \\\n1 Applied Sciences 2011 \n2 Applied Sciences 2011 \n3 Applied Sciences 2011 \n4 Applied Sciences 2011 \n.. ... ... \n255 Applied Sciences 2011 \n257 Applied Sciences 2011 \n259 Applied Sciences 2011 \n261 Applied Sciences 2011 \n263 Applied Sciences 2011 \n\n UT (Unique WOS ID)_relative_growth Domain_English_relative_growth \n0 0.000000 Applied Sciences \\\n1 1.000000 Applied Sciences \n2 0.666667 Applied Sciences \n3 1.888889 Applied Sciences \n4 0.333333 Applied Sciences \n.. ... ... \n255 1.272727 Applied Sciences \n257 2.363636 Applied Sciences \n259 4.181818 Applied Sciences \n261 4.909091 Applied Sciences \n263 4.454545 Applied Sciences \n\n UT (Unique WOS ID)_cumsum \n0 9.0 \n1 27.0 \n2 42.0 \n3 68.0 \n4 80.0 \n.. ... \n255 216.0 \n257 290.0 \n259 404.0 \n261 534.0 \n263 654.0 \n\n[84 rows x 8 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Publication YearField_EnglishUT (Unique WOS ID)Domain_EnglishPublication Year_relative_growthUT (Unique WOS ID)_relative_growthDomain_English_relative_growthUT (Unique WOS ID)_cumsum
02011Agriculture, Fisheries & Forestry9.0Applied Sciences20110.000000Applied Sciences9.0
12012Agriculture, Fisheries & Forestry18.0Applied Sciences20111.000000Applied Sciences27.0
22013Agriculture, Fisheries & Forestry15.0Applied Sciences20110.666667Applied Sciences42.0
32014Agriculture, Fisheries & Forestry26.0Applied Sciences20111.888889Applied Sciences68.0
42015Agriculture, Fisheries & Forestry12.0Applied Sciences20110.333333Applied Sciences80.0
...........................
2552018Social Sciences25.0Applied Sciences20111.272727Applied Sciences216.0
2572019Social Sciences37.0Applied Sciences20112.363636Applied Sciences290.0
2592020Social Sciences57.0Applied Sciences20114.181818Applied Sciences404.0
2612021Social Sciences65.0Applied Sciences20114.909091Applied Sciences534.0
2632022Social Sciences60.0Applied Sciences20114.454545Applied Sciences654.0
\n

84 rows × 8 columns

\n
" }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[data[\"Domain_English\"]==\"Applied Sciences\"]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 56, "outputs": [ { "data": { "text/plain": " Field_English UT (Unique WOS ID)\n5 Information & Communication Technologies 15648\n4 Engineering 9232\n3 Enabling & Strategic Technologies 3940\n0 Agriculture, Fisheries & Forestry 612\n1 Built Environment & Design 537\n2 Economics & Business 15\n6 Social Sciences 1", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Field_EnglishUT (Unique WOS ID)
5Information & Communication Technologies15648
4Engineering9232
3Enabling & Strategic Technologies3940
0Agriculture, Fisheries & Forestry612
1Built Environment & Design537
2Economics & Business15
6Social Sciences1
\n
" }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wos[wos[\"Domain_English\"]==\"Applied Sciences\"].groupby(\"Field_English\", as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 59, "outputs": [], "source": [ "group = ['Publication Year',\"Domain_English\",'Field_English']\n", "# data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n", "data_complete = pd.DataFrame()\n", "\n", "for cat in sorted(wos[\"Domain_English\"].unique()):\n", "\n", " os.makedirs(rf'plot_html/{cat}',exist_ok=True)\n", " id_subset = wos[wos[\"Domain_English\"]==cat][record_col].unique()\n", "\n", " data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()\n", " .fillna(0)\n", " .stack()\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", "\n", " data = data.merge(wos[[\"Domain_English\",'Field_English']].drop_duplicates(),on=\"Field_English\")\n", "\n", " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='Field_English'),\n", " on='Field_English', suffixes=[None,\"_relative_growth\"])\n", " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n", "\n", " data = data.sort_values(by =[\"Field_English\",\"Publication Year\"], ascending=[True,True])\n", " data[record_col+\"_cumsum\"] = (data.groupby('Field_English',as_index=False)[record_col].cumsum())\n", "\n", "\n", "\n", " bar_data = wos[wos[\"Domain_English\"]==cat].groupby(\"Field_English\", as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)\n", "\n", " fig = px.bar(bar_data.sort_values(by=\"Field_English\"), x=record_col, y=\"Field_English\", color=\"Field_English\",barmode='relative',\n", " labels={\n", " record_col: 'Number of co-publications',\n", " },\n", " title=\"Distribution of Domains\", template='plotly')\n", " fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family=\"Montserrat\")\n", " fig.update_traces(hovertemplate='%{x:d}')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_layout(yaxis={'categoryorder':'total ascending'})\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " dom_distr = go.Figure(fig)\n", "\n", "\n", " #data segment\n", " sub_data = data[data[\"Domain_English\"]==cat]\n", " # data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n", " fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]),y=record_col,x=\"Publication Year\", color=\"Field_English\", markers=True,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " group[-1]: \"Domain\",\n", " },\n", " title=\"Yearly output of co-publications\", template='plotly')\n", " fig.update_traces(hovertemplate='%{y:d}')\n", " fig.update_layout(hovermode='x unified')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "\n", " year_output_by_domain = go.Figure(fig)\n", "\n", " fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]), y=record_col+\"_relative_growth\",x=\"Publication Year\", color=\"Field_English\",\n", " markers=True,labels={\n", " record_col+\"_relative_growth\": 'Rel. growth
in co-publications (%)',\n", " group[-1]: \"Domain\",\n", " },\n", " title=\"Relative growth in the output of co-publications\", template='plotly')\n", " # fig.update_traces(hovertemplate='%{y:.2f}%')\n", "\n", " fig.update_layout(hovermode='x unified',yaxis_tickformat='.0f%',font_family=\"Montserrat\")\n", " fig.update_traces(hovertemplate='%{y:.0f}00%')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " # fig['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n", " # fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n", "\n", " rel_output_by_domain = go.Figure(fig)\n", "\n", " fig = px.area(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"Field_English\"]),y=record_col+\"_cumsum\",x=\"Publication Year\", color=\"Field_English\",line_group=\"Field_English\",\n", " labels={\n", " record_col+\"_cumsum\": 'Cumulative number of co-publications',\n", " },\n", " title=\"Cumulative number of co-publications\", template='plotly')\n", " fig.update_traces(hovertemplate='%{y:d}')\n", " fig.update_layout(hovermode='x unified')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "\n", " cumsum_by_domain = go.Figure(fig)\n", " # cumsum_by_domain.show(config= dict(displayModeBar = False))\n", "\n", " # dom_distr\n", " # year_output_by_domain\n", " # rel_output_by_domain\n", " # cumsum_by_domain\n", "\n", " figsuper = make_subplots(rows=2, cols=2, subplot_titles=[\"Distribution of domains\",\"Cumulative sum of co-publications\",\n", " \"Co-publications per year\",\"Relative growth of co-publications\"])\n", "\n", "\n", " for trace in list(dom_distr.select_traces()):\n", " trace.showlegend=False\n", " # trace.barmode\n", " figsuper.add_trace(trace,\n", " row=1, col=1\n", " )\n", "\n", " for trace in list(cumsum_by_domain.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=2\n", " )\n", "\n", " for trace in list(year_output_by_domain.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=1\n", " )\n", "\n", " for trace in list(rel_output_by_domain.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=2\n", " )\n", "\n", " # figsuper.update_layout(hovermode='x unified')\n", " figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n", " figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", " figsuper['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n", " # figsuper.layout.annotations[0].update(x=0.1)\n", " # figsuper.layout.annotations[2].update(x=0.105)\n", " # figsuper.layout.annotations[1].update(x=0.7)\n", " # figsuper.layout.annotations[3].update(x=0.7)\n", " figsuper.update_layout(title_text=f\"{cat}\")\n", "\n", " # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", " figsuper.write_html(f\"plot_html/{cat}/{cat}_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", " # country contributions\n", " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n", " wos_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\"]].drop_duplicates()\n", "\n", " collab_desc = wos_collabs[wos_collabs[\"Country\"]!=\"China\"][\"Country\"].value_counts().reset_index()\n", " collab_desc[\"percent_of_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].nunique()#*100\n", " collab_desc[\"percent_contrib_in_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].size#*100\n", " collab_desc = collab_desc.merge(wos_country_types, on=\"Country\")\n", " # collab_desc\n", "\n", " c_dict = {\"count\":\"Number of co-publications\",\n", " \"percent_of_copubs\":\"Percent of co-publications\",\n", " \"percent_contrib_in_copubs\":\"Contribution to co-publications\"}\n", "\n", " color_discrete_map= {'China': '#EF553B',\n", " 'EU': '#636EFA',\n", " 'Non-EU associate': '#00CC96'}\n", "\n", " fig_dict = dict()\n", " for c in c_dict.keys():\n", " data = collab_desc[[\"Country\",c,\"Country_Type\"]]\n", " # plt.figure(figsize=(9,12))\n", " col_by=\"Country_Type\"\n", " y_lab=\"Country\"\n", " fig = px.bar(data, x=c, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,\n", " labels=dict({\n", " record_col: 'Number of co-publications',\n", " \"Institution_harm\": \"Institution\",\n", " \"Institution_harm_label\": \"Institution\",\n", " \"Country_Type\":\"Country type\",\n", " \"Eurovoc_Class\":\"Region\"\n", " },**c_dict),\n", " title=c_dict[c], template='plotly')\n", " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",\n", " yaxis={'categoryorder':'total ascending'},\n", " width=1000, height=1000,)\n", " if \"percent\" in c:\n", " fig.update_traces(hovertemplate='%{y}
%{x}')\n", " fig.update_xaxes(tickformat=\".1%\")\n", " else:\n", " fig.update_traces(hovertemplate='%{y}
%{x:d}')\n", " fig_dict[c] = go.Figure(fig)\n", "\n", " figsuper = make_subplots(rows=1, cols=3, subplot_titles =list(c_dict.values()))\n", " for i,f in enumerate(fig_dict.keys()):\n", " sfig = fig_dict[f]\n", " for trace in list(sfig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=1, col=i+1)\n", "\n", " figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative',yaxis2={'categoryorder':'total ascending'},yaxis3={'categoryorder':'total ascending'})\n", " figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", " # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", " figsuper.write_html(f\"plot_html/{cat}/{cat}_europe_contribution_bar.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", " # intraeurope collabs\n", " wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n", " wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n", " EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", "\n", " eu_list = wos_collabs_EU.groupby(['Country_x'])[record_col].count().sort_values(ascending=False).index\n", "\n", " EU_co_occur = EU_co_occur.reindex(index = eu_list, columns=eu_list)\n", "\n", " # Generate a mask for the upper triangle\n", " mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n", " data = np.where(mask,None,EU_co_occur)\n", "\n", " fig = px.imshow(data,\n", " labels=dict(x=\"Country\", y=\"Country\", color=\"Co-publication with China\"),\n", " x=list(EU_co_occur.columns),\n", " y=list(EU_co_occur.index), title=\"Intraeuropean patterns
Co-occurences of countries in chinese co-publications\"\n", " )\n", " fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed', template='plotly_white')\n", " # fig.update_traces(hovertemplate='%{y}
%{x}
Co-publications: %{hovertext}')\n", " fig.update_xaxes(tickangle= -90)\n", " fig.update_yaxes(\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " ticks=\"outside\")\n", " # fig.show(config= dict(displayModeBar = False,responsive=True))\n", " fig.write_html(f\"plot_html/{cat}/{cat}_intraeurope_collabs.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", " # country trends\n", " collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n", " collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n", "\n", " data = (collab_year.groupby(['Publication Year',\"Country\"])[record_col]\n", " .nunique(dropna=False).unstack()\n", " .fillna(0)\n", " .stack()\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset=\"Country\"),\n", " on=[\"Country\"], suffixes=[None,\"_relative_growth\"])\n", " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n", " data = data.sort_values(by =[\"Country\",\"Publication Year\"], ascending=[True,True])\n", " data[record_col+\"_cumsum\"] = (data.groupby('Country',as_index=False)[record_col].cumsum())\n", " data = data.merge(wos_country_types, on='Country')\n", " # data\n", "\n", " data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n", " fig = px.choropleth(data[data[\"Publication Year\"] == 2022], locations=\"ISO3\", color=record_col+\"_cumsum\", hover_name=\"Country\",\n", " scope=\"europe\", template='plotly',\n", " range_color=[data[record_col+\"_cumsum\"].min(),data[record_col+\"_cumsum\"].max()],hover_data=[\"Eurovoc_Class\"])\n", " # original: '%{hovertext}

ISO3=%{location}
Eurovoc_Class=%{customdata[0]}
UT (Unique WOS ID)_cumsum=%{z}'\n", "\n", " fig.update_traces(hovertemplate='%{hovertext}'\n", " '
Region: %{customdata[0]}
'\n", " 'Co-pubications: %{z:d}')\n", "\n", " cumsum_country = go.Figure(fig)\n", "\n", " figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Number of publications (2022)\",\"Cumulative number of co-publications\",\n", " \"Yearly output of co-publications\",\"Relative growth of co-publications\"],\n", " specs=[\n", " [{\"type\": \"geo\", \"rowspan\":3}, {\"type\": \"xy\"}],\n", " [None,{\"type\": \"xy\"}],\n", " [None, {\"type\": \"xy\"}]\n", " ])\n", "\n", " for trace in list(cumsum_country.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=1\n", " )\n", "\n", " fig = px.area(data.sort_values(ascending=True, by='Publication Year'), y=record_col+\"_cumsum\",\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",\n", " line_group=\"Country\",\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Cumulative number of co-publications\",\n", " hover_name= \"Country\")\n", " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", "\n", " for trace in list(fig.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=2\n", " )\n", "\n", "\n", " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", " y=record_col,\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",\n", " line_group=\"Country\",\n", " markers=True,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Yearly output of co-publications\",hover_name= \"Country\")\n", " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", "\n", " for trace in list(fig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=2\n", " )\n", "\n", " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", " y=record_col+\"_relative_growth\",\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n", " labels={\n", " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Relative growth of co-publications\", template='plotly',hover_name= \"Country\")\n", " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}%')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", "\n", " for trace in list(fig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=3, col=2\n", " )\n", "\n", " figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_layout({'template':\"plotly\"})\n", " figsuper.layout[\"geo\"][\"scope\"] = 'europe'\n", " figsuper.update_coloraxes(colorbar=dict(lenmode='fraction',len=0.55, orientation=\"v\",yanchor='top', title=\"Co-publications\",\n", " ticks=\"outside\", ticksuffix=\" \",outlinewidth=0.5))\n", " # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", " figsuper.write_html(f\"plot_html/{cat}/{cat}_country_trends_overall.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", " TOPN = 25\n", " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n", " wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n", " wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n", " wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n", "\n", "\n", " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", "\n", " wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"EU\"]\n", "\n", " data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n", "\n", " data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm_label\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", "\n", " data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", "\n", "\n", " for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],\n", " [\"European countries in scope\",\"EU-28 only\",\"China\"],\n", " [\"Institution_harm_label\",\"Institution_harm_label\",\"Institution_harm\"],\n", " [\"Country\",\"Eurovoc_Class\",\"Country_Type\"],\n", " [\"Country_Type\",None,None]):\n", " fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Institution_harm\": \"Institution\",\n", " \"Institution_harm_label\": \"Institution\",\n", " \"Country_Type\":\"Country type\",\n", " \"Eurovoc_Class\":\"Region\"\n", " },\n", " title=f\"Most visible institutions (top {TOPN} within {c_scope})\", template='plotly')\n", " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n", " width=1000, height=1000,)\n", " fig.update_traces(hovertemplate='%{x:d}')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " # fig.show(config= dict(displayModeBar = False))\n", " fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_bar_{c_scope}.html\",config= dict(displayModeBar = False, responsive = True))\n", " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", "\n", " wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)\n", "\n", " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n", " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", "\n", " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", " # fig.show()\n", " sub_df = wos_univ_dipol[subfilter]\n", "\n", " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", "\n", " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", "\n", " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", " data = np.where(mask,inst_co_occur,inst_co_occur)\n", "\n", " fig = px.imshow(data,\n", " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within Europe)\"\n", " )\n", " fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed',\n", " template='plotly_white',\n", " coloraxis_colorbar=dict(\n", " thicknessmode=\"pixels\", thickness=25,\n", " ticks=\"outside\", ticksuffix=\" \",\n", " dtick=20,outlinewidth=1,\n", " ))\n", " fig.update_xaxes(tickangle= -45)\n", " fig.update_yaxes(\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " ticks=\"outside\")\n", "\n", " fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_europe.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", "\n", " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu_strict[\"Institution_harm_label\"]))&\n", " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", "\n", " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", " # fig.show()\n", " sub_df =wos_univ_dipol[subfilter]\n", "\n", " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", "\n", " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", "\n", " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", " data = np.where(mask,inst_co_occur,inst_co_occur)\n", " fig = px.imshow(data,\n", " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within EU-28)\"\n", " )\n", " fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed',\n", " template='plotly_white',\n", " coloraxis_colorbar=dict(\n", " thicknessmode=\"pixels\", thickness=25,\n", " ticks=\"outside\", ticksuffix=\" \",\n", " dtick=20,outlinewidth=1,\n", " ))\n", " fig.update_xaxes(tickangle= -45)\n", " fig.update_yaxes(\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " ticks=\"outside\")\n", "\n", " # fig.show(config= dict(displayModeBar = False))\n", " fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_eu28.html\",config= dict(displayModeBar = False, responsive = True))" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# Drill down to subfield" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 63, "outputs": [], "source": [ "group = ['Publication Year',\"Domain_English\",'Field_English']\n", "# data = wos.groupby(['Publication Year',\"Domain_English\",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])\n", "data_complete = pd.DataFrame()\n", "\n", "colt=[\"Domain_English\",'Field_English','SubField_English']\n", "\n", "for c in colt:\n", " wos[c] = wos[c].str.strip()\n", "\n", "for cat in sorted(wos[\"Domain_English\"].unique()):\n", " os.makedirs(rf'plot_html/{cat}',exist_ok=True)\n", " wos_sub = wos[wos[\"Domain_English\"]==cat]\n", "\n", " for cat2 in sorted(wos_sub[\"Field_English\"].unique()):\n", " os.makedirs(rf'plot_html/{cat}/{cat2}',exist_ok=True)\n", "\n", " id_subset = wos[((wos[\"Domain_English\"]==cat)&\n", " (wos[\"Field_English\"]==cat2))][record_col].unique()\n", "\n", " data = (wos[wos[record_col].isin(id_subset)]\n", " .groupby(['Publication Year','SubField_English'],)[record_col].nunique(dropna=False).unstack()\n", " .fillna(0)\n", " .stack()\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", "\n", " data = data.merge(wos_sub[[\"Field_English\",'SubField_English']]\n", " .drop_duplicates(),on=\"SubField_English\")\n", "\n", " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='SubField_English'),\n", " on='SubField_English', suffixes=[None,\"_relative_growth\"])\n", " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n", "\n", " data = data.sort_values(by =[\"SubField_English\",\"Publication Year\"], ascending=[True,True])\n", " data[record_col+\"_cumsum\"] = (data.groupby('SubField_English',as_index=False)[record_col].cumsum())\n", "\n", "\n", "\n", " bar_data = (wos[((wos[\"Domain_English\"]==cat)&\n", " (wos[\"Field_English\"]==cat2))]\n", " .groupby(\"SubField_English\", as_index=False)[record_col]\n", " .nunique()\n", " .sort_values(ascending=False, by=record_col))\n", "\n", " fig = px.bar(bar_data.sort_values(by=\"SubField_English\"),\n", " x=record_col, y=\"SubField_English\", color=\"SubField_English\",barmode='relative',\n", " labels={\n", " record_col: 'Number of co-publications',\n", " },\n", " title=\"Distribution of Domains\", template='plotly')\n", " fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family=\"Montserrat\")\n", " fig.update_traces(hovertemplate='%{x:d}')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_layout(yaxis={'categoryorder':'total ascending'})\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " dom_distr = go.Figure(fig)\n", "\n", "\n", " #data segment\n", " sub_data = data[data[\"Field_English\"]==cat2]\n", " # data_complete = pd.concat([data_complete,sub_data], ignore_index=True)\n", " fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"SubField_English\"]),y=record_col,x=\"Publication Year\", color=\"SubField_English\", markers=True,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " group[-1]: \"Domain\",\n", " },\n", " title=\"Yearly output of co-publications\", template='plotly')\n", " fig.update_traces(hovertemplate='%{y:d}')\n", " fig.update_layout(hovermode='x unified')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "\n", " year_output_by_domain = go.Figure(fig)\n", "\n", " fig = px.line(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"SubField_English\"]), y=record_col+\"_relative_growth\",x=\"Publication Year\", color=\"SubField_English\",\n", " markers=True,labels={\n", " record_col+\"_relative_growth\": 'Rel. growth
in co-publications (%)',\n", " group[-1]: \"Domain\",\n", " },\n", " title=\"Relative growth in the output of co-publications\", template='plotly')\n", " # fig.update_traces(hovertemplate='%{y:.2f}%')\n", "\n", " fig.update_layout(hovermode='x unified',yaxis_tickformat='.0f%',font_family=\"Montserrat\")\n", " fig.update_traces(hovertemplate='%{y:.0f}00%')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " # fig['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n", " # fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')\n", "\n", " rel_output_by_domain = go.Figure(fig)\n", "\n", " fig = px.area(sub_data.sort_values(ascending=[True,True], by=[\"Publication Year\",\"SubField_English\"]),y=record_col+\"_cumsum\",x=\"Publication Year\", color=\"SubField_English\",line_group=\"SubField_English\",\n", " labels={\n", " record_col+\"_cumsum\": 'Cumulative number of co-publications',\n", " },\n", " title=\"Cumulative number of co-publications\", template='plotly')\n", " fig.update_traces(hovertemplate='%{y:d}')\n", " fig.update_layout(hovermode='x unified')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", "\n", " cumsum_by_domain = go.Figure(fig)\n", " # cumsum_by_domain.show(config= dict(displayModeBar = False))\n", "\n", " # dom_distr\n", " # year_output_by_domain\n", " # rel_output_by_domain\n", " # cumsum_by_domain\n", "\n", " figsuper = make_subplots(rows=2, cols=2, subplot_titles=[\"Distribution of domains\",\"Cumulative sum of co-publications\",\n", " \"Co-publications per year\",\"Relative growth of co-publications\"])\n", "\n", "\n", " for trace in list(dom_distr.select_traces()):\n", " trace.showlegend=False\n", " # trace.barmode\n", " figsuper.add_trace(trace,\n", " row=1, col=1\n", " )\n", "\n", " for trace in list(cumsum_by_domain.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=2\n", " )\n", "\n", " for trace in list(year_output_by_domain.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=1\n", " )\n", "\n", " for trace in list(rel_output_by_domain.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=2\n", " )\n", "\n", " # figsuper.update_layout(hovermode='x unified')\n", " figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')\n", " figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", " figsuper['layout']['yaxis4'].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n", " # figsuper.layout.annotations[0].update(x=0.1)\n", " # figsuper.layout.annotations[2].update(x=0.105)\n", " # figsuper.layout.annotations[1].update(x=0.7)\n", " # figsuper.layout.annotations[3].update(x=0.7)\n", " figsuper.update_layout(title_text=f\"{cat}: {cat2}\")\n", "\n", " # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", " figsuper.write_html(f\"plot_html/{cat}/{cat2}/{cat2}_distr&trends.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", " # country contributions\n", " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n", " wos_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\"]].drop_duplicates()\n", "\n", " collab_desc = wos_collabs[wos_collabs[\"Country\"]!=\"China\"][\"Country\"].value_counts().reset_index()\n", " collab_desc[\"percent_of_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].nunique()#*100\n", " collab_desc[\"percent_contrib_in_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].size#*100\n", " collab_desc = collab_desc.merge(wos_country_types, on=\"Country\")\n", " # collab_desc\n", "\n", " c_dict = {\"count\":\"Number of co-publications\",\n", " \"percent_of_copubs\":\"Percent of co-publications\",\n", " \"percent_contrib_in_copubs\":\"Contribution to co-publications\"}\n", "\n", " color_discrete_map= {'China': '#EF553B',\n", " 'EU': '#636EFA',\n", " 'Non-EU associate': '#00CC96'}\n", "\n", " fig_dict = dict()\n", " for c in c_dict.keys():\n", " data = collab_desc[[\"Country\",c,\"Country_Type\"]]\n", " # plt.figure(figsize=(9,12))\n", " col_by=\"Country_Type\"\n", " y_lab=\"Country\"\n", " fig = px.bar(data, x=c, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,\n", " labels=dict({\n", " record_col: 'Number of co-publications',\n", " \"Institution_harm\": \"Institution\",\n", " \"Institution_harm_label\": \"Institution\",\n", " \"Country_Type\":\"Country type\",\n", " \"Eurovoc_Class\":\"Region\"\n", " },**c_dict),\n", " title=c_dict[c], template='plotly')\n", " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",\n", " yaxis={'categoryorder':'total ascending'},\n", " width=1000, height=1000,)\n", " if \"percent\" in c:\n", " fig.update_traces(hovertemplate='%{y}
%{x}')\n", " fig.update_xaxes(tickformat=\".1%\")\n", " else:\n", " fig.update_traces(hovertemplate='%{y}
%{x:d}')\n", " fig_dict[c] = go.Figure(fig)\n", "\n", " figsuper = make_subplots(rows=1, cols=3, subplot_titles =list(c_dict.values()))\n", " for i,f in enumerate(fig_dict.keys()):\n", " sfig = fig_dict[f]\n", " for trace in list(sfig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=1, col=i+1)\n", "\n", " figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative',yaxis2={'categoryorder':'total ascending'},yaxis3={'categoryorder':'total ascending'})\n", " figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", " # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", " figsuper.write_html(f\"plot_html/{cat}/{cat2}/{cat2}_europe_contribution_bar.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", " # intraeurope collabs\n", " wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n", " wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n", " EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", "\n", " eu_list = wos_collabs_EU.groupby(['Country_x'])[record_col].count().sort_values(ascending=False).index\n", "\n", " EU_co_occur = EU_co_occur.reindex(index = eu_list, columns=eu_list)\n", "\n", " # Generate a mask for the upper triangle\n", " mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n", " data = np.where(mask,None,EU_co_occur)\n", "\n", " fig = px.imshow(data,\n", " labels=dict(x=\"Country\", y=\"Country\", color=\"Co-publication with China\"),\n", " x=list(EU_co_occur.columns),\n", " y=list(EU_co_occur.index), title=\"Intraeuropean patterns
Co-occurences of countries in chinese co-publications\"\n", " )\n", " fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed', template='plotly_white')\n", " # fig.update_traces(hovertemplate='%{y}
%{x}
Co-publications: %{hovertext}')\n", " fig.update_xaxes(tickangle= -90)\n", " fig.update_yaxes(\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " ticks=\"outside\")\n", " # fig.show(config= dict(displayModeBar = False,responsive=True))\n", " fig.write_html(f\"plot_html/{cat}/{cat2}/{cat2}_intraeurope_collabs.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", " # country trends\n", " collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n", " collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n", "\n", " data = (collab_year.groupby(['Publication Year',\"Country\"])[record_col]\n", " .nunique(dropna=False).unstack()\n", " .fillna(0)\n", " .stack()\n", " .reset_index()\n", " .rename(columns={0:record_col}))\n", " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset=\"Country\"),\n", " on=[\"Country\"], suffixes=[None,\"_relative_growth\"])\n", " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]*100\n", " data = data.sort_values(by =[\"Country\",\"Publication Year\"], ascending=[True,True])\n", " data[record_col+\"_cumsum\"] = (data.groupby('Country',as_index=False)[record_col].cumsum())\n", " data = data.merge(wos_country_types, on='Country')\n", " # data\n", "\n", " data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n", " fig = px.choropleth(data[data[\"Publication Year\"] == 2022], locations=\"ISO3\", color=record_col+\"_cumsum\", hover_name=\"Country\",\n", " scope=\"europe\", template='plotly',\n", " range_color=[data[record_col+\"_cumsum\"].min(),data[record_col+\"_cumsum\"].max()],hover_data=[\"Eurovoc_Class\"])\n", " # original: '%{hovertext}

ISO3=%{location}
Eurovoc_Class=%{customdata[0]}
UT (Unique WOS ID)_cumsum=%{z}'\n", "\n", " fig.update_traces(hovertemplate='%{hovertext}'\n", " '
Region: %{customdata[0]}
'\n", " 'Co-pubications: %{z:d}')\n", "\n", " cumsum_country = go.Figure(fig)\n", "\n", " figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Number of publications (2022)\",\"Cumulative number of co-publications\",\n", " \"Yearly output of co-publications\",\"Relative growth of co-publications\"],\n", " specs=[\n", " [{\"type\": \"geo\", \"rowspan\":3}, {\"type\": \"xy\"}],\n", " [None,{\"type\": \"xy\"}],\n", " [None, {\"type\": \"xy\"}]\n", " ])\n", "\n", " for trace in list(cumsum_country.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=1\n", " )\n", "\n", " fig = px.area(data.sort_values(ascending=True, by='Publication Year'), y=record_col+\"_cumsum\",\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",\n", " line_group=\"Country\",\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Cumulative number of co-publications\",\n", " hover_name= \"Country\")\n", " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", "\n", " for trace in list(fig.select_traces()):\n", " figsuper.add_trace(trace,\n", " row=1, col=2\n", " )\n", "\n", "\n", " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", " y=record_col,\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",\n", " line_group=\"Country\",\n", " markers=True,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Yearly output of co-publications\",hover_name= \"Country\")\n", " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", "\n", " for trace in list(fig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=2, col=2\n", " )\n", "\n", " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", " y=record_col+\"_relative_growth\",\n", " x='Publication Year',\n", " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n", " labels={\n", " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n", " },\n", " title=\"Relative growth of co-publications\", template='plotly',hover_name= \"Country\")\n", " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}%')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", "\n", " for trace in list(fig.select_traces()):\n", " trace.showlegend=False\n", " figsuper.add_trace(trace,\n", " row=3, col=2\n", " )\n", "\n", " figsuper.update_yaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_xaxes(\n", " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", " ticks=\"outside\")\n", " figsuper.update_layout({'template':\"plotly\"})\n", " figsuper.layout[\"geo\"][\"scope\"] = 'europe'\n", " figsuper.update_coloraxes(colorbar=dict(lenmode='fraction',len=0.55, orientation=\"v\",yanchor='top', title=\"Co-publications\",\n", " ticks=\"outside\", ticksuffix=\" \",outlinewidth=0.5))\n", " # figsuper.show(config= dict(displayModeBar = False, responsive = True))\n", " figsuper.write_html(f\"plot_html/{cat}/{cat2}/{cat2}_country_trends_overall.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", " TOPN = 25\n", " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n", " wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n", " wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n", " wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n", "\n", "\n", " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", "\n", " wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"EU\"]\n", "\n", " data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n", "\n", " data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm_label\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", "\n", " data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", "\n", "\n", " for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],\n", " [\"European countries in scope\",\"EU-28 only\",\"China\"],\n", " [\"Institution_harm_label\",\"Institution_harm_label\",\"Institution_harm\"],\n", " [\"Country\",\"Eurovoc_Class\",\"Country_Type\"],\n", " [\"Country_Type\",None,None]):\n", " fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,\n", " labels={\n", " record_col: 'Number of co-publications',\n", " \"Institution_harm\": \"Institution\",\n", " \"Institution_harm_label\": \"Institution\",\n", " \"Country_Type\":\"Country type\",\n", " \"Eurovoc_Class\":\"Region\"\n", " },\n", " title=f\"Most visible institutions (top {TOPN} within {c_scope})\", template='plotly')\n", " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n", " width=1000, height=1000,)\n", " fig.update_traces(hovertemplate='%{x:d}')\n", " fig.add_shape(\n", " # Rectangle with reference to the plot\n", " type=\"rect\",\n", " xref=\"paper\",\n", " yref=\"paper\",\n", " x0=0,\n", " y0=0,\n", " x1=1.0,\n", " y1=1.0,\n", " line=dict(\n", " color=\"black\",\n", " width=0.5,\n", " )\n", " )\n", " fig.update_yaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " showgrid=True,\n", " ticks=\"outside\")\n", " # fig.show(config= dict(displayModeBar = False))\n", " fig.write_html(f\"plot_html/{cat}/{cat2}/{cat2}_overall_inst_collab_bar_{c_scope}.html\",config= dict(displayModeBar = False, responsive = True))\n", " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", "\n", " wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)\n", "\n", " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n", " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", "\n", " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", " # fig.show()\n", " sub_df = wos_univ_dipol[subfilter]\n", "\n", " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", "\n", " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", "\n", " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", " data = np.where(mask,inst_co_occur,inst_co_occur)\n", "\n", " fig = px.imshow(data,\n", " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within Europe)\"\n", " )\n", " fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed',\n", " template='plotly_white',\n", " coloraxis_colorbar=dict(\n", " thicknessmode=\"pixels\", thickness=25,\n", " ticks=\"outside\", ticksuffix=\" \",\n", " dtick=20,outlinewidth=1,\n", " ))\n", " fig.update_xaxes(tickangle= -45)\n", " fig.update_yaxes(\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " ticks=\"outside\")\n", "\n", " fig.write_html(f\"plot_html/{cat}/{cat2}/{cat2}_overall_inst_collab_europe.html\",config= dict(displayModeBar = False, responsive = True))\n", "\n", "\n", "\n", " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu_strict[\"Institution_harm_label\"]))&\n", " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", "\n", " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", " # fig.show()\n", " sub_df =wos_univ_dipol[subfilter]\n", "\n", " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", "\n", " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", "\n", " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", "\n", " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", " data = np.where(mask,inst_co_occur,inst_co_occur)\n", " fig = px.imshow(data,\n", " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Most visible institutions (top {TOPN} within EU-28)\"\n", " )\n", " fig.update_layout(title_x=0.5,\n", " width=1000, height=1000,\n", " xaxis_showgrid=False,\n", " yaxis_showgrid=False,\n", " yaxis_autorange='reversed',\n", " template='plotly_white',\n", " coloraxis_colorbar=dict(\n", " thicknessmode=\"pixels\", thickness=25,\n", " ticks=\"outside\", ticksuffix=\" \",\n", " dtick=20,outlinewidth=1,\n", " ))\n", " fig.update_xaxes(tickangle= -45)\n", " fig.update_yaxes(\n", " ticks=\"outside\")\n", " fig.update_xaxes(\n", " ticks=\"outside\")\n", "\n", " # fig.show(config= dict(displayModeBar = False))\n", " fig.write_html(f\"plot_html/{cat}/{cat2}/{cat2}_overall_inst_collab_eu28.html\",config= dict(displayModeBar = False, responsive = True))" ], "metadata": { "collapsed": false } } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" } }, "nbformat": 4, "nbformat_minor": 5 }