diff --git a/WOS/wos_analysis/wos_analyses.ipynb b/WOS/wos_analysis/wos_analyses.ipynb index 6ee3ade..f9cd4dd 100644 --- a/WOS/wos_analysis/wos_analyses.ipynb +++ b/WOS/wos_analysis/wos_analyses.ipynb @@ -238094,7 +238094,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -239236,7 +239236,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -240285,7 +240285,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -243263,7 +243263,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -246137,7 +246137,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -248389,7 +248389,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -250526,7 +250526,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -250662,7 +250662,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_20376\\1606125869.py:24: SettingWithCopyWarning:\n", + "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_13036\\1606125869.py:24: SettingWithCopyWarning:\n", "\n", "\n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", @@ -250670,7 +250670,7 @@ "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "\n", - "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_20376\\1606125869.py:24: SettingWithCopyWarning:\n", + "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_13036\\1606125869.py:24: SettingWithCopyWarning:\n", "\n", "\n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", @@ -250678,7 +250678,7 @@ "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "\n", - "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_20376\\1606125869.py:24: SettingWithCopyWarning:\n", + "C:\\Users\\radvanyi\\AppData\\Local\\Temp\\ipykernel_13036\\1606125869.py:24: SettingWithCopyWarning:\n", "\n", "\n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", @@ -251983,7 +251983,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -254032,7 +254032,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -256045,7 +256045,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -262809,7 +262809,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -269623,7 +269623,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -273697,7 +273697,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -278067,7 +278067,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -282558,7 +282558,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -287072,7 +287072,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -288332,7 +288332,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -289368,7 +289368,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -290332,7 +290332,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -292114,7 +292114,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -293847,7 +293847,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -295576,7 +295576,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -297312,7 +297312,7 @@ "plotlyServerURL": "https://plotly.com" } }, - "text/html": "
" + "text/html": "
" }, "metadata": {}, "output_type": "display_data" @@ -299127,7 +299127,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 76, "outputs": [], "source": [ "%%capture\n", @@ -299630,7 +299630,7 @@ " data = np.where(mask,inst_co_occur,inst_co_occur)\n", "\n", " fig = px.imshow(data,\n", - " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", + " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within Europe ({t})\"\n", " )\n", @@ -299683,7 +299683,7 @@ " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", " data = np.where(mask,inst_co_occur,inst_co_occur)\n", " fig = px.imshow(data,\n", - " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),\n", + " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n", " x=list(inst_co_occur.columns),\n", " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within EU-28 ({t})\"\n", " )\n", @@ -299721,6 +299721,862 @@ "metadata": { "collapsed": false } + }, + { + "cell_type": "code", + "execution_count": 78, + "outputs": [], + "source": [ + "%%capture\n", + "# Trending fields\n", + "# Build environment & design\n", + "\n", + "# Adding trending subfields\n", + "# Applied sciences\n", + "trending_topics = [\"Distributed Computing\", \"Nanoscience & Nanotechnology\", \"Building & Construction\"]\n", + "\n", + "# Natural Sciences -> Analytical chemistry\n", + "\n", + "trending_topics = [\"Distributed Computing\", \"Nanoscience & Nanotechnology\", \"Building & Construction\",\"Analytical Chemistry\"]\n", + "for t in trending_topics:\n", + " os.makedirs(rf'plot_html/PPT_plots/trending_topics/{t}',exist_ok=True)\n", + "\n", + " if t == \"Analytical Chemistry\":\n", + " subset = \"Natural Sciences\"\n", + " else:\n", + " subset = \"Applied Sciences\"\n", + "\n", + "\n", + " id_subset = wos[((wos[\"Domain_English\"]==subset)&\n", + " (wos[\"SubField_English\"]==t))][record_col].unique()\n", + "\n", + " data = (wos[wos[record_col].isin(id_subset)]\n", + " .groupby(['Publication Year','SubField_English'],)[record_col].nunique(dropna=False).unstack()\n", + " .fillna(0)\n", + " .stack()\n", + " .reset_index()\n", + " .rename(columns={0:record_col}))\n", + " print(data)\n", + "\n", + " data = data.merge(wos[wos[record_col].isin(id_subset)][[\"Domain_English\",'SubField_English']].drop_duplicates(),on=\"SubField_English\")\n", + "\n", + " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset='SubField_English'),\n", + " on='SubField_English', suffixes=[None,\"_relative_growth\"])\n", + " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n", + "\n", + " data = data.sort_values(by =[\"SubField_English\",\"Publication Year\"], ascending=[True,True])\n", + " data[record_col+\"_cumsum\"] = (data.groupby('SubField_English',as_index=False)[record_col].cumsum())\n", + "\n", + " # country contributions\n", + " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n", + " wos_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\"]].drop_duplicates()\n", + "\n", + " collab_desc = wos_collabs[wos_collabs[\"Country\"]!=\"China\"][\"Country\"].value_counts().reset_index()\n", + " collab_desc[\"percent_of_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].nunique()#*100\n", + " collab_desc[\"percent_contrib_in_copubs\"] = collab_desc[\"count\"]/wos_collabs[record_col].size#*100\n", + " collab_desc = collab_desc.merge(wos_country_types, on=\"Country\")\n", + " # collab_desc\n", + "\n", + " c_dict = {\"count\":\"Number of co-publications\",\n", + " \"percent_of_copubs\":\"Percent of co-publications\",\n", + " \"percent_contrib_in_copubs\":\"Contribution to co-publications\"}\n", + "\n", + " color_discrete_map= {'China': '#EF553B',\n", + " 'EU': '#636EFA',\n", + " 'Non-EU associate': '#00CC96'}\n", + "\n", + " fig_dict = dict()\n", + " for c in c_dict.keys():\n", + " data = collab_desc[[\"Country\",c,\"Country_Type\"]]\n", + " data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n", + " col_by=\"Country_Type\"\n", + " y_lab=\"ISO3\"\n", + " fig = px.bar(data, x=c, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,text_auto=True,\n", + " labels=dict({\n", + " record_col: 'Number of co-publications',\n", + " \"Institution_harm\": \"Institution\",\n", + " \"Institution_harm_label\": \"Institution\",\n", + " \"Country_Type\":\"Country type\",\n", + " \"Eurovoc_Class\":\"Region\"\n", + " },**c_dict),\n", + " title=c_dict[c], template='plotly')\n", + " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",\n", + " yaxis={'categoryorder':'total ascending'},\n", + " width=1000, height=1000,)\n", + " if \"percent\" in c:\n", + " fig.update_traces(hovertemplate='%{y}
%{x}')\n", + " fig.update_xaxes(tickformat=\".1%\")\n", + " else:\n", + " fig.update_traces(hovertemplate='%{y}
%{x:d}')\n", + " fig_dict[c] = go.Figure(fig)\n", + "\n", + " figsuper = make_subplots(rows=1, cols=3, subplot_titles =list(c_dict.values()))\n", + " for i,f in enumerate(fig_dict.keys()):\n", + " sfig = fig_dict[f]\n", + " for trace in list(sfig.select_traces()):\n", + " trace.showlegend=False\n", + " figsuper.add_trace(trace,\n", + " row=1, col=i+1)\n", + "\n", + " figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative',yaxis2={'categoryorder':'total ascending'},yaxis3={'categoryorder':'total ascending'})\n", + " figsuper.update_yaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " figsuper.update_xaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " figsuper.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", + " figsuper['layout']['xaxis1'].update(tickformat=\".0f\")\n", + " figsuper['layout']['xaxis2'].update(tickformat=\".1%\")\n", + " figsuper['layout']['xaxis3'].update(tickformat=\".1%\")\n", + " figsuper['layout'][\"font\"][\"size\"]=12\n", + " for a in figsuper['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 14\n", + " figsuper[\"layout\"][\"yaxis\"][\"tickfont\"][\"size\"] = 10\n", + " figsuper[\"layout\"][\"yaxis2\"][\"tickfont\"][\"size\"] = 10\n", + " figsuper[\"layout\"][\"yaxis3\"][\"tickfont\"][\"size\"] = 10\n", + "\n", + " figsuper.update_layout(uniformtext_minsize=10)\n", + " figsuper.update_layout(title=f\"Contribution of european countries ({t})\")\n", + " # figsuper.write_html(f\"plot_html/{cat}/{cat}_europe_contribution_bar.html\",config= dict(displayModeBar = False, responsive = True))\n", + "\n", + " figsuper_ppt = go.Figure(figsuper)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + " figsuper_ppt.show()\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=18))\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_europe_contribution_bar.png\",height=900,width=1600,scale = 4)\n", + "\n", + "\n", + " # intraeurope collabs\n", + " wos_collabs_EU = wos_univ_locations[~wos_univ_locations[\"Country_Type\"].isin([\"Other\",\"China\"])][[record_col,\"Country\"]].drop_duplicates()\n", + " wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)\n", + " EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)\n", + "\n", + "\n", + " eu_list = wos_collabs_EU.groupby(['Country_x'])[record_col].count().sort_values(ascending=False).index\n", + "\n", + " EU_co_occur = EU_co_occur.reindex(index = eu_list, columns=eu_list)\n", + "\n", + " # Generate a mask for the upper triangle\n", + " mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))\n", + " data = np.where(mask,None,EU_co_occur)\n", + "\n", + " fig = px.imshow(data,\n", + " labels=dict(x=\"Country\", y=\"Country\", color=\"Co-publication with China\"),\n", + " x=list(EU_co_occur.columns),\n", + " y=list(EU_co_occur.index), title=f\"Intraeuropean patterns: Co-occurences of countries in chinese co-publications ({t})\"\n", + " )\n", + " fig.update_layout(\n", + " width=1000, height=1000,\n", + " xaxis_showgrid=False,\n", + " yaxis_showgrid=False,\n", + " yaxis_autorange='reversed', template='plotly_white',font_family=\"Montserrat\",)\n", + " # fig.update_traces(hovertemplate='%{y}
%{x}
Co-publications: %{hovertext}')\n", + " fig.update_xaxes(tickangle= -90)\n", + " fig.update_yaxes(\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " ticks=\"outside\")\n", + " # fig.write_html(f\"plot_html/{cat}/{cat}_intraeurope_collabs.html\",config= dict(displayModeBar = False, responsive = True))\n", + " figsuper_ppt = go.Figure(fig)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_intraeurope_collabs.png\",height=900,width=1600,scale = 4)\n", + "\n", + "\n", + " # country trends\n", + " collab_year = wos_collabs[wos_collabs[\"Country\"]!=\"China\"].copy()\n", + " collab_year = collab_year.merge(wos_country_types, on=\"Country\").merge(wos[[record_col,\"Publication Year\"]],on=record_col).drop_duplicates()\n", + "\n", + " data = (collab_year.groupby(['Publication Year',\"Country\"])[record_col]\n", + " .nunique(dropna=False).unstack()\n", + " .fillna(0)\n", + " .stack()\n", + " .reset_index()\n", + " .rename(columns={0:record_col}))\n", + " data = data.merge(data[data[record_col]>0].sort_values(by=[\"Publication Year\"], ascending=True).drop_duplicates(subset=\"Country\"),\n", + " on=[\"Country\"], suffixes=[None,\"_relative_growth\"])\n", + " data[record_col+\"_relative_growth\"] = (data[record_col]-data[record_col+\"_relative_growth\"])/data[record_col+\"_relative_growth\"]\n", + " data = data.sort_values(by =[\"Country\",\"Publication Year\"], ascending=[True,True])\n", + " data[record_col+\"_cumsum\"] = (data.groupby('Country',as_index=False)[record_col].cumsum())\n", + " data = data.merge(wos_country_types, on='Country')\n", + "\n", + " yearsum = collab_year.groupby(\"Publication Year\")[record_col].nunique().reset_index().rename(columns={record_col:\"year_unique\"})\n", + " data = data.merge(yearsum, on=\"Publication Year\")\n", + " data[\"pub_output_percent\"] = data[record_col]/data[\"year_unique\"]\n", + " data[\"ISO3\"] = cc.pandas_convert(series=data[\"Country\"], to='ISO3')\n", + "\n", + "\n", + " fig = px.choropleth(data[data[\"Publication Year\"] == 2022], locations=\"ISO3\", color=record_col+\"_cumsum\", hover_name=\"Country\",\n", + " scope=\"europe\", template='plotly',\n", + " range_color=[data[record_col+\"_cumsum\"].min(),data[record_col+\"_cumsum\"].max()],hover_data=[\"Eurovoc_Class\"])\n", + "\n", + " fig.update_traces(hovertemplate='%{hovertext}'\n", + " '
Region: %{customdata[0]}
'\n", + " 'Co-pubications: %{z:d}')\n", + "\n", + " cumsum_country = go.Figure(fig)\n", + "\n", + " figsuper = make_subplots(rows=3, cols=2, subplot_titles=[\"Number of publications (2022)\",\"Cumulative number of co-publications\",\n", + " \"Yearly output of co-publications\",\"Relative growth of co-publications\"],\n", + " specs=[\n", + " [{\"type\": \"geo\", \"rowspan\":3}, {\"type\": \"xy\"}],\n", + " [None,{\"type\": \"xy\"}],\n", + " [None, {\"type\": \"xy\"}]\n", + " ])\n", + "\n", + " for trace in list(cumsum_country.select_traces()):\n", + " figsuper.add_trace(trace,\n", + " row=1, col=1\n", + " )\n", + "\n", + " fig = px.area(data.sort_values(ascending=True, by='Publication Year'), y=record_col+\"_cumsum\",\n", + " x='Publication Year',\n", + " color=\"Eurovoc_Class\",\n", + " line_group=\"Country\",\n", + " labels={\n", + " record_col: 'Number of co-publications',\n", + " \"Eurovoc_Class\": \"Region\"\n", + " },\n", + " title=\"Cumulative number of co-publications\",\n", + " hover_name= \"Country\")\n", + " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", + "\n", + " for trace in list(fig.select_traces()):\n", + " figsuper.add_trace(trace,\n", + " row=1, col=2\n", + " )\n", + "\n", + "\n", + " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", + " y=record_col,\n", + " x='Publication Year',\n", + " color=\"Eurovoc_Class\",\n", + " line_group=\"Country\",\n", + " markers=True,\n", + " labels={\n", + " record_col: 'Number of co-publications',\n", + " \"Eurovoc_Class\": \"Region\"\n", + " },\n", + " title=\"Yearly output of co-publications\",hover_name= \"Country\")\n", + " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", + "\n", + " for trace in list(fig.select_traces()):\n", + " trace.showlegend=False\n", + " figsuper.add_trace(trace,\n", + " row=2, col=2\n", + " )\n", + "\n", + " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", + " y=record_col+\"_relative_growth\",\n", + " x='Publication Year',\n", + " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,\n", + " labels={\n", + " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n", + " },\n", + " title=\"Relative growth of co-publications\", template='plotly',hover_name= \"Country\")\n", + " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}%')\n", + " fig.add_shape(\n", + " # Rectangle with reference to the plot\n", + " type=\"rect\",\n", + " xref=\"paper\",\n", + " yref=\"paper\",\n", + " x0=0,\n", + " y0=0,\n", + " x1=1.0,\n", + " y1=1.0,\n", + " line=dict(\n", + " color=\"black\",\n", + " width=0.5,\n", + " )\n", + " )\n", + "\n", + " for trace in list(fig.select_traces()):\n", + " trace.showlegend=False\n", + " figsuper.add_trace(trace,\n", + " row=3, col=2\n", + " )\n", + "\n", + " figsuper.update_yaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " figsuper.update_xaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " figsuper.update_layout({'template':\"plotly\"})\n", + " figsuper.update_layout(font_family=\"Montserrat\")\n", + " figsuper.layout[\"geo\"][\"scope\"] = 'europe'\n", + " figsuper.update_coloraxes(colorbar=dict(lenmode='fraction',len=0.55, orientation=\"v\",yanchor='top', title=\"Co-publications\",\n", + " ticks=\"outside\", ticksuffix=\" \",outlinewidth=0.5))\n", + " for i in[\"xaxis\",\"xaxis2\",\"xaxis3\"]:\n", + " figsuper['layout'][f'{i}'][\"range\"] = [2010.8,2022.2]\n", + " # figsuper.write_html(f\"plot_html/{cat}/{cat}_country_trends_overall.html\",config= dict(displayModeBar = False, responsive = True))\n", + "\n", + " figsuper_ppt = go.Figure(figsuper)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + " figsuper_ppt.show()\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=18))\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_country_trends_overall.png\",height=900,width=1600,scale = 4)\n", + "\n", + " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", + " y=record_col,\n", + " x='Publication Year',\n", + " color=\"Eurovoc_Class\",\n", + " line_group=\"Country\",facet_col=\"Country\",facet_col_wrap=6,category_orders={\"Country\": sorted(data[\"Country\"].unique())},\n", + " markers=True,\n", + " labels={\n", + " record_col: 'Number of co-publications',\n", + " \"Eurovoc_Class\": \"Region\"\n", + " },\n", + " title=f\"Yearly output of co-publications ({t})\",hover_name= \"Country\")\n", + " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Co-publications: %{y}')\n", + " fig.update_yaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " fig.update_layout({'template':\"plotly\"})\n", + " fig.update_layout(font_family=\"Montserrat\")\n", + " fig.update_yaxes(title='')\n", + " fig.update_xaxes(title='')\n", + " fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n", + " fig.show(config= dict(displayModeBar = False, responsive = True))\n", + " figsuper_ppt = go.Figure(fig)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n", + " figsuper_ppt.write_image(f\"plot_html//PPT_plots/trending_topics/{t}/{t}_country_year_trends.png\",height=900,width=1600,scale = 4)\n", + "\n", + "\n", + " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", + " y=record_col+\"_relative_growth\",\n", + " x='Publication Year',\n", + " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,facet_col=\"Country\",facet_col_wrap=6,category_orders={\"Country\": sorted(data[\"Country\"].unique())},\n", + " labels={\n", + " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n", + " },\n", + " title=f\"Relative growth of co-publication output ({t})\", template='plotly',hover_name= \"Country\")\n", + " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}')\n", + "\n", + "\n", + " fig.update_yaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " # for candidate in fig[\"layout\"].keys():\n", + " # if \"yaxis\" in candidate:\n", + " # fig[\"layout\"][candidate].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n", + " fig.update_layout({'template':\"plotly\"})\n", + " fig.update_layout(font_family=\"Montserrat\")\n", + " fig.update_yaxes(title='',zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n", + " fig.update_xaxes(title='')\n", + " fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n", + " figsuper_ppt = go.Figure(fig)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(title='',zeroline=True, zerolinewidth=2, zerolinecolor='grey',tickformat=\".0%\")\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_country_year_rel_trends.png\",height=900,width=1600,scale = 4)\n", + "\n", + " fig = px.line(data.sort_values(ascending=True, by='Publication Year'),\n", + " y=\"pub_output_percent\",\n", + " x='Publication Year',\n", + " color=\"Eurovoc_Class\",line_group=\"Country\",markers=True,facet_col=\"Country\",facet_col_wrap=6,category_orders={\"Country\": sorted(data[\"Country\"].unique())},\n", + " labels={\n", + " record_col+\"_relative_growth\": 'Relative growth of co-publications (%)',\"Eurovoc_Class\": \"Region\"\n", + " },\n", + " title=f\"Relative changes in co-publication focus of China ({t})\", template='plotly',hover_name= \"Country\")\n", + " fig.update_traces(hovertemplate='%{hovertext}
%{x}
Relative growth: %{y}')\n", + "\n", + "\n", + " fig.update_yaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,\n", + " ticks=\"outside\")\n", + " # for candidate in fig[\"layout\"].keys():\n", + " # if \"yaxis\" in candidate:\n", + " # fig[\"layout\"][candidate].update(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n", + " fig.update_layout({'template':\"plotly\"})\n", + " fig.update_layout(font_family=\"Montserrat\")\n", + " fig.update_yaxes(title='',zeroline=True, zerolinewidth=0.5, zerolinecolor='grey',tickformat=\".0%\")\n", + " fig.update_xaxes(title='')\n", + " fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n", + " figsuper_ppt = go.Figure(fig)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(title='',zeroline=True, zerolinewidth=2, zerolinecolor='grey',tickformat=\".0%\")\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_country_year_rel_focus_trend.png\",height=900,width=1600,scale = 4)\n", + "\n", + "\n", + "\n", + " TOPN = 15\n", + " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n", + " wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n", + " wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n", + " wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n", + "\n", + "\n", + " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", + " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", + "\n", + " wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"EU\"]\n", + "\n", + " data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", + " .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n", + "\n", + " data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm_label\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n", + " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", + "\n", + " data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", + " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", + "\n", + "\n", + " for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],\n", + " [\"Europe\",\"EU-28 only\",\"China\"],\n", + " [\"Institution_harm_label\",\"Institution_harm_label\",\"Institution_harm\"],\n", + " [\"Country\",\"Eurovoc_Class\",\"Country_Type\"],\n", + " [\"Country_Type\",None,None]):\n", + " fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,text_auto=True,\n", + " labels={\n", + " record_col: 'Number of co-publications',\n", + " \"Institution_harm\": \"Institution\",\n", + " \"Institution_harm_label\": \"Institution\",\n", + " \"Country_Type\":\"Country type\",\n", + " \"Eurovoc_Class\":\"Region\"\n", + " },\n", + " title=f\"Top {TOPN} institutes within {c_scope}
({t})\", template='plotly')\n", + " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n", + " width=1000, height=1000,)\n", + " fig.update_traces(hovertemplate='%{x:d}')\n", + " fig.add_shape(\n", + " # Rectangle with reference to the plot\n", + " type=\"rect\",\n", + " xref=\"paper\",\n", + " yref=\"paper\",\n", + " x0=0,\n", + " y0=0,\n", + " x1=1.0,\n", + " y1=1.0,\n", + " line=dict(\n", + " color=\"black\",\n", + " width=0.5,\n", + " )\n", + " )\n", + " fig.update_yaxes(\n", + " showgrid=True,\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " showgrid=True,\n", + " ticks=\"outside\")\n", + " # fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_bar_{c_scope}.html\",config= dict(displayModeBar = False, responsive = True))\n", + " # fig.write_image(f\"plot_html/overall_inst_collab_bar_{c_scope}.svg\",height=800,width=1600)\n", + " fig.write_image(f\"plot_html/overall_inst_collab_bar_{c_scope}.png\",height=800,width=1600)\n", + " figsuper_ppt = go.Figure(fig)\n", + " figsuper_ppt.update_traces(textposition='inside')\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_overall_inst_collab_bar_{c_scope}.png\",height=900,width=1000,scale = 4)\n", + "\n", + " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", + " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", + "\n", + " wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)\n", + "\n", + " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu[\"Institution_harm_label\"]))&\n", + " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", + "\n", + " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", + " # fig.show()\n", + " sub_df = wos_univ_dipol[subfilter]\n", + "\n", + " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", + " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", + "\n", + " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", + " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", + "\n", + " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", + "\n", + " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", + " data = np.where(mask,inst_co_occur,inst_co_occur)\n", + "\n", + " fig = px.imshow(data,\n", + " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n", + " x=list(inst_co_occur.columns),\n", + " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within Europe ({t})\"\n", + " )\n", + " fig.update_layout(\n", + " width=1000, height=1000,\n", + " xaxis_showgrid=False,\n", + " yaxis_showgrid=False,\n", + " yaxis_autorange='reversed',\n", + " template='plotly_white',font_family=\"Montserrat\",\n", + " coloraxis_colorbar=dict(\n", + " thicknessmode=\"pixels\", thickness=25,\n", + " ticks=\"outside\", ticksuffix=\" \",\n", + " dtick=20,outlinewidth=1,\n", + " ))\n", + " fig.update_xaxes(tickangle= -45)\n", + " fig.update_yaxes(\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " ticks=\"outside\")\n", + "\n", + " # fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_europe.html\",config= dict(displayModeBar = False, responsive = True))\n", + " figsuper_ppt = go.Figure(fig)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_overall_inst_collab_europe.png\",height=900,width=1600,scale = 4)\n", + "\n", + "\n", + "\n", + " subfilter = ((wos_univ_dipol[\"Institution_harm_label_eu\"].isin(data_eu_strict[\"Institution_harm_label\"]))&\n", + " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", + "\n", + " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", + " # fig.show()\n", + " sub_df =wos_univ_dipol[subfilter]\n", + "\n", + " inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],\n", + " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", + "\n", + " eu_list = sub_df.groupby(['Institution_harm_label_eu'])[record_col].count().sort_values(ascending=False).index\n", + " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", + "\n", + " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", + "\n", + " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", + " data = np.where(mask,inst_co_occur,inst_co_occur)\n", + " fig = px.imshow(data,\n", + " labels=dict(x=\"Institute (CH)\", y=\"Institute (EU)\", color=\"Co-publication\"),text_auto=True,\n", + " x=list(inst_co_occur.columns),\n", + " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes within EU-28 ({t})\"\n", + " )\n", + " fig.update_layout(\n", + " width=1000, height=1000,\n", + " xaxis_showgrid=False,\n", + " yaxis_showgrid=False,\n", + " yaxis_autorange='reversed',\n", + " template='plotly_white',font_family=\"Montserrat\",\n", + " coloraxis_colorbar=dict(\n", + " thicknessmode=\"pixels\", thickness=25,\n", + " ticks=\"outside\", ticksuffix=\" \",\n", + " dtick=20,outlinewidth=1,\n", + " ))\n", + " fig.update_xaxes(tickangle= -45)\n", + " fig.update_yaxes(\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " ticks=\"outside\")\n", + "\n", + " # fig.show(config= dict(displayModeBar = False))\n", + " # fig.write_html(f\"plot_html/{cat}/{cat}_overall_inst_collab_eu28.html\",config= dict(displayModeBar = False, responsive = True))\n", + "\n", + " figsuper_ppt = go.Figure(fig)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/trending_topics/{t}/{t}_overall_inst_collab_eu28.png\",height=900,width=1600,scale = 4)\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 97, + "outputs": [], + "source": [ + "# Adding emphasized countries\n", + "os.makedirs(f\"plot_html/PPT_plots/highlight_countries\", exist_ok=True)\n", + "# General: Croatia, Cyprus, Luxembourg\n", + "for c in [\"Croatia\",\"Cyprus\",\"Luxembourg\"]:\n", + " os.makedirs(f\"plot_html/PPT_plots/highlight_countries/general_{c}\", exist_ok=True)\n", + "\n", + " t = \"General\"\n", + "\n", + " #sunburst distribution\n", + " groups = ['Domain_English',\"Field_English\",'SubField_English']\n", + "\n", + " id_subset = wos_country[wos_country[\"Country\"]==c][record_col].unique()\n", + "\n", + " data = (wos[wos[record_col].isin(id_subset)]\n", + " .groupby(groups, as_index=False)[record_col]\n", + " .nunique()\n", + " .sort_values(ascending=False, by=record_col))\n", + " data[\"percent\"] = data[record_col]/data[record_col].sum()*100\n", + " data[groups] = data[groups].applymap(replace_nth)\n", + " fig = px.sunburst(data, path=groups, values=record_col,\n", + " color='Domain_English', template='plotly')\n", + " fig.update_traces(textinfo=\"label+value+percent root\")\n", + " fig.update_traces(hovertemplate='%{id}
%{value}')\n", + " metrix_distr = go.Figure(fig)\n", + " metrix_distr.update_layout({'template':\"plotly\",\"font_family\":\"Montserrat\"})\n", + " figsuper_ppt = go.Figure(metrix_distr)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/highlight_countries/general_{c}/sunburst.png\",height=900,width=900,scale = 4)\n", + "\n", + " TOPN = 10\n", + " wos_univ_locations = wos_univ[wos_univ[record_col].isin(id_subset)].merge(wos_country_types, on=\"Country\")\n", + " wos_univ_collabs = wos_univ_locations[wos_univ_locations[\"Country_Type\"]!=\"Other\"][[record_col,\"Country\",\"Institution_harm\",\"Country_Type\",\"Eurovoc_Class\"]].drop_duplicates()\n", + " wos_univ_collabs[\"ISO3\"] = cc.pandas_convert(series=wos_univ_collabs[\"Country\"], to='ISO3')\n", + " wos_univ_collabs[\"Institution_harm_label\"] = wos_univ_collabs[\"Institution_harm\"] + \" (\"+wos_univ_collabs[\"ISO3\"]+ \")\"\n", + "\n", + "\n", + " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]==\"China\"]\n", + " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country_Type\"]!=\"China\"]\n", + "\n", + " wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs[\"Country\"]==c]\n", + "\n", + " # data_eu = (wos_univ_eu.groupby([\"Country\",\"Institution_harm_label\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", + " # .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by=\"Country_Type\")\n", + "\n", + " data_eu_strict = (wos_univ_eu_strict.groupby([\"Country\",\"Institution_harm\",\"Eurovoc_Class\"], as_index=False)[record_col].nunique()\n", + " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", + "\n", + " data_ch = (wos_univ_ch.groupby([\"Country\",\"Institution_harm\",\"Country_Type\"], as_index=False)[record_col].nunique()\n", + " .sort_values(by=record_col,ascending=False).head(TOPN).copy())\n", + "\n", + "\n", + " for data,c_scope, y_lab, in zip(\n", + " [data_eu_strict,data_ch],\n", + " [c,\"China\"],\n", + " [\"Institution_harm\",\"Institution_harm\"]):\n", + " fig = px.bar(data, x=record_col, y=y_lab, color_discrete_map=color_discrete_map,text_auto=True,\n", + " labels={\n", + " record_col: 'Number of co-publications',\n", + " \"Institution_harm\": \"Institution\",\n", + " \"Institution_harm_label\": \"Institution\",\n", + " \"Country_Type\":\"Country type\",\n", + " \"Eurovoc_Class\":\"Region\"\n", + " },\n", + " title=f\"Top {TOPN} institutes ({c_scope})\", template='plotly')\n", + " fig.update_layout(xaxis_tickformat='d',font_family=\"Montserrat\",yaxis={'categoryorder':'total ascending'},\n", + " width=1000, height=1000,)\n", + " fig.update_traces(hovertemplate='%{x:d}')\n", + " fig.add_shape(\n", + " # Rectangle with reference to the plot\n", + " type=\"rect\",\n", + " xref=\"paper\",\n", + " yref=\"paper\",\n", + " x0=0,\n", + " y0=0,\n", + " x1=1.0,\n", + " y1=1.0,\n", + " line=dict(\n", + " color=\"black\",\n", + " width=0.5,\n", + " )\n", + " )\n", + " fig.update_yaxes(\n", + " showgrid=True,\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " showgrid=True,\n", + " ticks=\"outside\")\n", + " figsuper_ppt = go.Figure(fig)\n", + " figsuper_ppt.update_traces(textposition='inside')\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/highlight_countries/general_{c}/inst_bar_{c_scope}.png\",height=900,width=900,scale = 4)\n", + "\n", + " wos_univ_ch = wos_univ_collabs[wos_univ_collabs[\"Country\"]==\"China\"]\n", + " wos_univ_eu = wos_univ_collabs[wos_univ_collabs[\"Country\"]==c]\n", + "\n", + " wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,\"Domain_English\",\"Field_English\",\"SubField_English\"]], on =record_col)\n", + "\n", + " subfilter = ((wos_univ_dipol[\"Institution_harm_eu\"].isin(data_eu_strict[\"Institution_harm\"]))&\n", + " (wos_univ_dipol[\"Institution_harm_ch\"].isin(data_ch[\"Institution_harm\"])))\n", + "\n", + " fig = px.parallel_categories(wos_univ_dipol[subfilter][[\"Country_eu\",\"Institution_harm_eu\",\"Domain_English\",\"Institution_harm_ch\"]])\n", + " # fig.show()\n", + " sub_df = wos_univ_dipol[subfilter]\n", + "\n", + " inst_co_occur = pd.crosstab(sub_df['Institution_harm_eu'], sub_df['Institution_harm_ch'],\n", + " values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)\n", + "\n", + " eu_list = sub_df.groupby(['Institution_harm_eu'])[record_col].count().sort_values(ascending=False).index\n", + " ch_list = sub_df.groupby(['Institution_harm_ch'])[record_col].count().sort_values(ascending=False).index\n", + "\n", + " inst_co_occur = inst_co_occur.reindex(index = eu_list, columns=ch_list)\n", + "\n", + " mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))\n", + " data = np.where(mask,inst_co_occur,inst_co_occur)\n", + "\n", + " fig = px.imshow(data,\n", + " labels=dict(x=\"Institute (CH)\", y=f\"Institute ({c})\", color=\"Co-publication\"),text_auto=True,\n", + " x=list(inst_co_occur.columns),\n", + " y=list(inst_co_occur.index), title=f\"Top {TOPN} institutes ({t})\"\n", + " )\n", + " fig.update_layout(\n", + " width=1000, height=1000,\n", + " xaxis_showgrid=False,\n", + " yaxis_showgrid=False,\n", + " yaxis_autorange='reversed',\n", + " template='plotly_white',font_family=\"Montserrat\",\n", + " coloraxis_colorbar=dict(\n", + " thicknessmode=\"pixels\", thickness=25,\n", + " ticks=\"outside\", ticksuffix=\" \",\n", + " dtick=20,outlinewidth=1,\n", + " ))\n", + " fig.update_xaxes(tickangle= -45)\n", + " fig.update_traces(showlegend=False)\n", + " fig.update_traces(showscale=False)\n", + " fig.update_layout(coloraxis_showscale=False)\n", + " fig.update_yaxes(\n", + " ticks=\"outside\")\n", + " fig.update_xaxes(\n", + " ticks=\"outside\")\n", + "\n", + " figsuper_ppt = go.Figure(fig)\n", + "\n", + " figsuper_ppt['layout'][\"font\"][\"size\"]=22\n", + " for a in figsuper_ppt['layout'][\"annotations\"]:\n", + " a[\"font\"][\"size\"] = 22\n", + "\n", + " s=16\n", + " figsuper_ppt.update_yaxes(tickfont=dict(size=s))\n", + " figsuper_ppt.update_xaxes(tickfont=dict(size=s),tickangle=45)\n", + " figsuper_ppt.write_image(f\"plot_html/PPT_plots/highlight_countries/general_{c}/inst_collab.png\",height=900,width=900,scale = 4)\n", + "\n", + "# Applied Sciences: Hungary, Poland\n", + "\n", + "# Natural Sciences: Ireland\n", + "\n", + "# Health Sciences: Austria, Czeck Republic, Ireland, Poland, Portugal\n", + "\n", + "#Economic Social Sciences :France" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 91, + "outputs": [ + { + "data": { + "text/plain": " Country Institution_harm Eurovoc_Class UT (Unique WOS ID)\n47 Croatia Univ Zagreb Eastern Europe 70\n19 Croatia Inst Rudjer Boskovic Eastern Europe 59\n45 Croatia Univ Split Eastern Europe 58\n42 Croatia Univ North Eastern Europe 16\n33 Croatia Tech Univ Split Eastern Europe 12", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CountryInstitution_harmEurovoc_ClassUT (Unique WOS ID)
47CroatiaUniv ZagrebEastern Europe70
19CroatiaInst Rudjer BoskovicEastern Europe59
45CroatiaUniv SplitEastern Europe58
42CroatiaUniv NorthEastern Europe16
33CroatiaTech Univ SplitEastern Europe12
\n
" + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_eu_strict.head()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# Adding emphasized institutes\n", + "\n", + "# General:\n", + "# Polish academy of Science\n", + "\n", + "# University of Groningen/Politecnico Milano – prominent co-publisher but not necessarily with the largest chinese partners​\n", + "#\n", + "# Aalto University – Xidian University​\n", + "#\n", + "# Technical University of Munich – Tongji University​\n", + "#\n", + "# Aalborg University – University of Electric Science & Technology\n", + "\n", + "\n", + "# Natural Sciences: Charles Univ of Prague\n", + "\n", + "# Health Sciences Karolinska Institute\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "def print_hello_world():\n", + " # just a general hello world print" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "class Test" + ], + "metadata": { + "collapsed": false + } } ], "metadata": {