import pandas as pd
import janitor
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MaxNLocator
import math
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
pyo.init_notebook_mode()
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
import country_converter as coco
cc = coco.CountryConverter()
%matplotlib inline
# Seaborn palette
# sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)
# sns.palplot(sns.color_palette())
outdir="wos_processed_data"
wos = pd.read_excel(f"../{outdir}/wos_processed.xlsx")
wos_univ = pd.read_excel(f"../{outdir}/wos_institution_locations_harmonized.xlsx")
def eurovoc_classer(x):
eurovoc_classification = {"Eastern Europe":["Bulgaria","Czech Republic","Croatia","Hungary","Poland","Romania","Slovakia","Slovenia"],
"Northern Europe":["Denmark","Estonia","Finland","Latvia","Lithuania","Sweden","Norway","Iceland"],
"Southern Europe":["Cyprus","Greece","Italy","Portugal","Spain","Malta"],
"Western Europe":["Austria","Belgium","France","Germany","Luxembourg","Netherlands","Switzerland","United Kingdom","Ireland"]}
if x == 'China':
return x
for k in eurovoc_classification.keys():
if x in eurovoc_classification[k]:
return k
wos_country = pd.read_excel(f"../{outdir}/wos_countries.xlsx")
wos_country_types = pd.read_excel(f"../{outdir}/wos_country_types.xlsx")
wos_country_types["Eurovoc_Class"] = wos_country_types["Country"].map(eurovoc_classer)
wos_country_types
Country | Country_Type | Eurovoc_Class | |
---|---|---|---|
0 | Belgium | EU | Western Europe |
1 | China | China | China |
2 | Luxembourg | EU | Western Europe |
3 | Netherlands | EU | Western Europe |
4 | Norway | Non-EU associate | Northern Europe |
5 | United Kingdom | Non-EU associate | Western Europe |
6 | France | EU | Western Europe |
7 | Sweden | EU | Northern Europe |
8 | Italy | EU | Southern Europe |
9 | Denmark | EU | Northern Europe |
10 | Germany | EU | Western Europe |
11 | Slovenia | EU | Eastern Europe |
12 | Estonia | EU | Northern Europe |
13 | Finland | EU | Northern Europe |
14 | Bulgaria | EU | Eastern Europe |
15 | Slovakia | EU | Eastern Europe |
16 | Spain | EU | Southern Europe |
17 | Poland | EU | Eastern Europe |
18 | Czech Republic | EU | Eastern Europe |
19 | Greece | EU | Southern Europe |
20 | Malta | EU | Southern Europe |
21 | Austria | EU | Western Europe |
22 | Switzerland | Non-EU associate | Western Europe |
23 | Ireland | EU | Western Europe |
24 | Portugal | EU | Southern Europe |
25 | Romania | EU | Eastern Europe |
26 | Hungary | EU | Eastern Europe |
27 | Cyprus | EU | Southern Europe |
28 | Croatia | EU | Eastern Europe |
29 | Lithuania | EU | Northern Europe |
30 | Latvia | EU | Northern Europe |
record_col = "UT (Unique WOS ID)"
def replace_nth(s, sub=" ", repl="<br>", n=2):
chunks = s.split(sub)
size = len(chunks)
rows = size // n + (0 if size % n == 0 else 1)
return (repl.join([
sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])
for i in range(rows)
])).replace("<br>&"," &<br>")
groups = ['Domain_English',"Field_English",'SubField_English']
data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data["percent"] = data[record_col]/data[record_col].sum()*100
# data[groups] = data[groups].applymap(replace_nth)
for c in ["Domain_English","Field_English","SubField_English"]:
data[c] = data[c]+"<br>("+(pd.DataFrame(data[c],columns=[c]).merge(data.groupby(c,as_index=False)[record_col].sum(), on=c)[record_col]).astype(str)+")"
# data
fig = px.sunburst(data, path=groups, values="percent",
color='Domain_English',title="Distribution of topics<br>(METRIX classification)", template='plotly')
fig.update_traces(hovertemplate='%{label}<br>%{value:.2f}%')
metrix_distr = go.Figure(fig)
metrix_distr.show(config= dict(displayModeBar = False))
group = 'Domain_English'
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
fig = px.bar(data.sort_values(by=group), x=record_col, y=group, color=group,barmode='relative',
labels={
record_col: 'Number of co-publications',
group: "",
},
title="Distribution of Domains", template='plotly')
fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family="Montserrat")
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
dom_distr = go.Figure(fig)
dom_distr.show(config= dict(displayModeBar = False, responsive = True))
group = ['Publication Year','Domain_English']
data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()
.fillna(0)
.stack()
.reset_index()
.rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Domain_English'),
on='Domain_English', suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data = data.sort_values(by =["Domain_English","Publication Year"], ascending=[True,True])
data[record_col+"_cumsum"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())
# data
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True, labels={
record_col: 'Number of co-publications',
group[-1]: "Domain",
},
title="Yearly output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
year_output_by_domain = go.Figure(fig)
year_output_by_domain.show(config= dict(displayModeBar = False))
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_relative_growth",x=group[0], color=group[-1], markers=True, labels={
record_col+"_relative_growth": 'Rel. growth<br>in co-publications (%)',
group[-1]: "Domain",
},
title="Relative growth in the output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:.2f}%')
fig.update_layout(hovermode='x unified',yaxis_tickformat='d',font_family="Montserrat")
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')
rel_output_by_domain = go.Figure(fig)
rel_output_by_domain.show(config= dict(displayModeBar = False))
fig = px.area(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_cumsum",x=group[0], color=group[-1],line_group=group[-1],
labels={
record_col+"_cumsum": 'Cumulative number of co-publications',
group[-1]: "Domain",
},
title="Cumulative number of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
cumsum_by_domain = go.Figure(fig)
cumsum_by_domain.show(config= dict(displayModeBar = False))
from plotly.subplots import make_subplots
import plotly.graph_objects as go
# dom_distr
# year_output_by_domain
# rel_output_by_domain
# cumsum_by_domain
figsuper = make_subplots(rows=2, cols=2, subplot_titles=["a","b",
"c","d"])
for trace in list(dom_distr.select_traces()):
trace.showlegend=False
# trace.barmode
figsuper.add_trace(trace,
row=1, col=1
)
for trace in list(cumsum_by_domain.select_traces()):
figsuper.add_trace(trace,
row=1, col=2
)
for trace in list(year_output_by_domain.select_traces()):
trace.showlegend=False
figsuper.add_trace(trace,
row=2, col=1
)
for trace in list(rel_output_by_domain.select_traces()):
trace.showlegend=False
figsuper.add_trace(trace,
row=2, col=2
)
# figsuper.update_layout(hovermode='x unified')
figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')
figsuper.update_yaxes(
showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
ticks="outside")
figsuper.update_xaxes(
showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
ticks="outside")
figsuper.update_layout({'template':"plotly"})
figsuper.show(config= dict(displayModeBar = False, responsive = True))
pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],
columns=['Publication Year'], fill_value=0)
pivot_data
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ||||||||||||
Applied Sciences | 490 | 593 | 738 | 1031 | 1201 | 1535 | 1920 | 2808 | 3729 | 4446 | 5295 | 6199 |
Arts & Humanities | 0 | 0 | 0 | 4 | 1 | 3 | 7 | 4 | 11 | 11 | 16 | 13 |
Economic & Social Sciences | 20 | 22 | 29 | 28 | 34 | 40 | 84 | 105 | 160 | 211 | 252 | 375 |
Health Sciences | 116 | 120 | 155 | 184 | 216 | 243 | 321 | 403 | 611 | 755 | 1035 | 1182 |
Multidisciplinary | 15 | 21 | 43 | 52 | 57 | 64 | 75 | 76 | 83 | 97 | 115 | 149 |
Natural Sciences | 181 | 223 | 298 | 318 | 380 | 437 | 568 | 753 | 999 | 1232 | 1403 | 1665 |
# f, ax = plt.subplots(figsize=(9, 6))
# g = sns.heatmap(pivot_data, annot=True, fmt="d", linewidths=.5, ax=ax)
# g.set(xlabel="", ylabel="")
import numpy as np
percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100
percent_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ||||||||||||
Applied Sciences | 59.610706 | 60.572012 | 58.432304 | 63.760049 | 63.578613 | 66.106804 | 64.537815 | 67.678959 | 66.672626 | 65.847156 | 65.241498 | 64.687467 |
Arts & Humanities | 0.000000 | 0.000000 | 0.000000 | 0.247372 | 0.052938 | 0.129199 | 0.235294 | 0.096409 | 0.196674 | 0.162915 | 0.197141 | 0.135657 |
Economic & Social Sciences | 2.433090 | 2.247191 | 2.296120 | 1.731602 | 1.799894 | 1.722653 | 2.823529 | 2.530730 | 2.860719 | 3.125000 | 3.104978 | 3.913180 |
Health Sciences | 14.111922 | 12.257406 | 12.272367 | 11.379097 | 11.434621 | 10.465116 | 10.789916 | 9.713184 | 10.924370 | 11.181872 | 12.752587 | 12.334342 |
Multidisciplinary | 1.824818 | 2.145046 | 3.404592 | 3.215832 | 3.017470 | 2.756245 | 2.521008 | 1.831767 | 1.483998 | 1.436611 | 1.416954 | 1.554837 |
Natural Sciences | 22.019465 | 22.778345 | 23.594616 | 19.666048 | 20.116464 | 18.819983 | 19.092437 | 18.148952 | 17.861613 | 18.246445 | 17.286841 | 17.374517 |
# f, ax = plt.subplots(figsize=(15, 6))
# # g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)
# # for t in ax.texts: t.set_text(t.get_text() + " %")
# g.set(xlabel="", ylabel="")
# percent_pivot.T.plot(kind='bar',
# stacked=True,
# figsize=(10, 6))
# percent_pivot.T.plot(kind='bar',
# stacked=True,
# figsize=(15, 8))
#
# plt.legend(loc="lower left", ncol=2)
# # plt.ylabel("Release Year")
# # plt.xlabel("Proportion")
#
#
# for n, x in enumerate([*pivot_data.T.index.values]):
# for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],
# pivot_data.T.loc[x],
# percent_pivot.T.loc[x].cumsum()):
#
# plt.text(y=(y_loc - proportion) + (proportion / 2),
# x=n - 0.11,
# s=f'{count}',# ({np.round(proportion, 1)}%)',
# color="black",
# fontsize=8,
# fontweight="bold")
#
# plt.show()
group = ['Publication Year',"Domain_English",'Field_English']
# data = wos.groupby(['Publication Year',"Domain_English",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()
.fillna(0)
.stack()
.reset_index()
.rename(columns={0:record_col}))
data = data.merge(wos[["Domain_English",'Field_English']].drop_duplicates(),on="Field_English")
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Field_English'),
on='Field_English', suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data = data.sort_values(by =["Field_English","Publication Year"], ascending=[True,True])
data[record_col+"_cumsum"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())
data_complete = pd.DataFrame()
for cat in sorted(data[group[-2]].unique()):
#data segment
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
# seaborn version plot
# g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
# y=record_col,x=group[0], hue=group[-1], marker="o")
# g.set(xticks=list(range(2012,2022+1,2)))
# g.legend(title=None)
# g.set_title(cat)
# g.yaxis.set_major_locator(MaxNLocator(integer=True))
# plt.show()
data_complete = pd.DataFrame()
# Creating subplot axes
fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))
for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):
#data segment
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
#plot
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
y=record_col,x=group[0], hue=group[-1], marker="o", ax=ax)
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_title(cat)
g.set_xlabel(None)
g.set_ylabel(None)
g.yaxis.set_major_locator(MaxNLocator(integer=True))
fig.suptitle("Number of co-publications in domains and respective fields", y=0.92)
plt.show()
group = ['Publication Year',"Domain_English",'Field_English',"SubField_English"]
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data
Publication Year | Domain_English | Field_English | SubField_English | UT (Unique WOS ID) | |
---|---|---|---|---|---|
1598 | 2022 | Natural Sciences | Physics & Astronomy | Optics | 134 |
1597 | 2022 | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics | 65 |
1596 | 2022 | Natural Sciences | Physics & Astronomy | Mathematical Physics | 10 |
1595 | 2022 | Natural Sciences | Physics & Astronomy | General Physics | 31 |
1594 | 2022 | Natural Sciences | Physics & Astronomy | Fluids & Plasmas | 79 |
... | ... | ... | ... | ... | ... |
4 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Forestry | 1 |
3 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Food Science | 1 |
2 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Fisheries | 2 |
1 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Dairy & Animal Science | 2 |
0 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Agronomy & Agriculture | 3 |
1599 rows × 5 columns
for cat in sorted(data[group[-2]].unique()):
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],
hue=group[-1], marker="o", errorbar=None)
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))
g.set_title(f'Number or co-publications in {cat}')
g.set_ylabel(None)
plt.show()
from matplotlib.ticker import FuncFormatter
import math
def orderOfMagnitude(number):
return math.floor(math.log(number, 10))
def roundToNearest(number):
order = orderOfMagnitude(number)
# if order!=0:
# order+=1
near = math.ceil(number/10**order)*10**order
return near
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_locations.sample(100)
UT (Unique WOS ID) | Institution | Country | Institution_harm | merge_iter | Country_Type | Eurovoc_Class | |
---|---|---|---|---|---|---|---|
32326 | WOS:000480328900026 | Chinese Univ Hong Kong | China | Chinese Univ Hong Kong | 0 | China | China |
168425 | WOS:000721883100002 | Natl Tech Univ Athens | Greece | Natl Tech Univ Athens | 0 | EU | Southern Europe |
114303 | WOS:000779000900001 | Cardiff Sch Technol | United Kingdom | Cardiff Sch Technol | 0 | Non-EU associate | Western Europe |
96915 | WOS:000418781800008 | Univ London | United Kingdom | Univ London | 0 | Non-EU associate | Western Europe |
37409 | WOS:000514081100008 | Changjiang Waterway Planning Design & Res Inst | China | Changjiang Waterway Planning Design & Res Inst | 0 | China | China |
... | ... | ... | ... | ... | ... | ... | ... |
56377 | WOS:000664002600088 | Univ Sci & Technol Beijing | China | Univ Sci & Technol Beijing | 0 | China | China |
37559 | WOS:000515393800112 | Southwest Minzu Univ | China | Southwest Minzu Univ | 0 | China | China |
50960 | WOS:000616422300002 | Xinxiang Med Univ | China | Xinxiang Med Univ | 0 | China | China |
13885 | WOS:000381268400001 | Shandong Univ | China | Shandong Univ | 0 | China | China |
124265 | WOS:000575964500001 | CNRS R 7225 | France | 7225 | 0 | EU | Western Europe |
100 rows × 7 columns
wos_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country"]].drop_duplicates()
collab_desc = wos_collabs[wos_collabs["Country"]!="China"]["Country"].value_counts().reset_index()
collab_desc["percent_of_copubs"] = collab_desc["count"]/wos_collabs[record_col].nunique()*100
collab_desc["percent_contrib_in_copubs"] = collab_desc["count"]/wos_collabs[record_col].size*100
collab_desc = collab_desc.merge(wos_country_types, on="Country")
collab_desc
c_dict = {"count":"Number of co-publications",
"percent_of_copubs":"Percent of co-publications",
"percent_contrib_in_copubs":"Contribution to co-publications"}
# Creating subplot axes
# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))
# for c,ax in zip(c_dict.keys(),axes.flatten()):
for c in c_dict.keys():
data = collab_desc[["Country",c,"Country_Type"]]
plt.figure(figsize=(9,12))
g = sns.barplot(data, x=c, y="Country", hue="Country_Type", dodge=False)
g.set_xlim(0,roundToNearest(data[c].max()))
g.set_ylabel(None)
g.set_xlabel(c_dict.get(c))
g.set_title(c_dict.get(c))
g.legend(title=None, loc="right")
for i in g.containers:
g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')
if 'percent' in c:
g.xaxis.set_major_locator(MaxNLocator(integer=True))
vals = g.get_xticks()
g.set_xticklabels([str(int(val))+'%' for val in vals])
plt.show()
# wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
# wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
# EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)
#
# # Generate a mask for the upper triangle
# mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
#
# # Set up the matplotlib figure
# f, ax = plt.subplots(figsize=(11, 9))
#
# # Draw the heatmap with the mask and correct aspect ratio
# g = sns.heatmap(EU_co_occur, mask=mask,
# square=True, linewidths=.5)
#
# g.set_ylabel(None)
# g.set_xlabel(None)
%%capture
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)
pre_fig = sns.clustermap(EU_co_occur)
re_index = [i.get_text() for i in pre_fig.ax_heatmap.yaxis.get_majorticklabels()]
re_column = [i.get_text() for i in pre_fig.ax_heatmap.xaxis.get_majorticklabels()]
EU_co_occur = EU_co_occur.reindex(index = re_index, columns=re_column)
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
data = np.where(mask,None,EU_co_occur)
EU_co_occur.columns
fig = px.imshow(data,
labels=dict(x="Country", y="Country", color="Co-publication with China"),
x=list(EU_co_occur.columns),
y=list(EU_co_occur.index), title="Intraeuropean patterns<br>Co-occurences of countries in chinese co-publications"
)
fig.update_layout(title_x=0.5,
width=1000, height=1000,
xaxis_showgrid=False,
yaxis_showgrid=False,
yaxis_autorange='reversed', template='plotly_white')
# fig.update_traces(hovertemplate='<b>%{y}</b><br>%{x}<br>Co-publications: %{hovertext}')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
ticks="outside")
fig.update_xaxes(
ticks="outside")
fig.show(config= dict(displayModeBar = False,responsive=True))
collab_year = wos_collabs[wos_collabs["Country"]!="China"].copy()
collab_year = collab_year.merge(wos_country_types, on="Country").merge(wos[[record_col,"Publication Year"]],on=record_col).drop_duplicates()
data = collab_year.groupby(["Publication Year",'Country_Type'],as_index=False)[record_col].nunique()
g=sns.lineplot(data,y=record_col,x="Publication Year", hue="Country_Type", marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications with China")
Text(0.5, 1.0, 'Yearly output of co-publications with China')
data = (collab_year.groupby(['Publication Year',"Country"])[record_col]
.nunique(dropna=False).unstack()
.fillna(0)
.stack()
.reset_index()
.rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset="Country"),
on=["Country"], suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data = data.sort_values(by =["Country","Publication Year"], ascending=[True,True])
data[record_col+"_cumsum"] = (data.groupby('Country',as_index=False)[record_col].cumsum())
data = data.merge(wos_country_types, on='Country')
data
Publication Year | Country | UT (Unique WOS ID) | Publication Year_relative_growth | UT (Unique WOS ID)_relative_growth | UT (Unique WOS ID)_cumsum | Country_Type | Eurovoc_Class | |
---|---|---|---|---|---|---|---|---|
0 | 2011 | Austria | 22.0 | 2011 | 0.000000 | 22.0 | EU | Western Europe |
1 | 2012 | Austria | 24.0 | 2011 | 9.090909 | 46.0 | EU | Western Europe |
2 | 2013 | Austria | 26.0 | 2011 | 18.181818 | 72.0 | EU | Western Europe |
3 | 2014 | Austria | 39.0 | 2011 | 77.272727 | 111.0 | EU | Western Europe |
4 | 2015 | Austria | 50.0 | 2011 | 127.272727 | 161.0 | EU | Western Europe |
... | ... | ... | ... | ... | ... | ... | ... | ... |
355 | 2018 | United Kingdom | 1837.0 | 2011 | 406.060606 | 6918.0 | Non-EU associate | Western Europe |
356 | 2019 | United Kingdom | 2430.0 | 2011 | 569.421488 | 9348.0 | Non-EU associate | Western Europe |
357 | 2020 | United Kingdom | 3108.0 | 2011 | 756.198347 | 12456.0 | Non-EU associate | Western Europe |
358 | 2021 | United Kingdom | 3718.0 | 2011 | 924.242424 | 16174.0 | Non-EU associate | Western Europe |
359 | 2022 | United Kingdom | 4245.0 | 2011 | 1069.421488 | 20419.0 | Non-EU associate | Western Europe |
360 rows × 8 columns
# data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
# fig = px.choropleth(data, locations="ISO3", color=record_col, hover_name="Country",
# animation_frame='Publication Year', scope="europe", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])
# fig.show()
data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col+"_relative_growth", hover_name="Country",
animation_frame='Publication Year', scope="europe", template='plotly',
range_color=[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()])
fig.show()
# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color="Eurovoc_Class",line_group="Country", markers=True,
# labels={
# record_col: 'Number of co-publications',
# "Eurovoc_Class": "Region"
# },
# title="Yearly output of co-publications", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')
# # fig.update_layout(hovermode='x unified')
# fig.add_shape(
# # Rectangle with reference to the plot
# type="rect",
# xref="paper",
# yref="paper",
# x0=0,
# y0=0,
# x1=1.0,
# y1=1.0,
# line=dict(
# color="black",
# width=0.5,
# )
# )
# fig.update_yaxes(
# showgrid=True,
# ticks="outside")
# fig.update_xaxes(
# showgrid=True,
# ticks="outside")
# fig.show(config= dict(displayModeBar = False))
# fig.data[0].hovertemplate
# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
# y=record_col+"_relative_growth",
# x='Publication Year',
# color="Eurovoc_Class",line_group="Country",markers=True,
# labels={
# record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
# },
# title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
# fig.add_shape(
# # Rectangle with reference to the plot
# type="rect",
# xref="paper",
# yref="paper",
# x0=0,
# y0=0,
# x1=1.0,
# y1=1.0,
# line=dict(
# color="black",
# width=0.5,
# )
# )
# fig.update_yaxes(
# showgrid=True,
# ticks="outside")
# fig.update_xaxes(
# showgrid=True,
# ticks="outside")
# fig.show(config= dict(displayModeBar = False))
from plotly.subplots import make_subplots
import plotly.graph_objects as go
figsuper = make_subplots(rows=2, cols=2, subplot_titles=["placeholder","Cumulative number of co-publications",
"Yearly output of co-publications","Relative growth of co-publications<br>(baseline: 2011)"])
fig = px.area(data.sort_values(ascending=True, by='Publication Year'), y=record_col+"_cumsum",
x='Publication Year',
color="Eurovoc_Class",
line_group="Country",
labels={
record_col: 'Number of co-publications',
"Eurovoc_Class": "Region"
},
title="Cumulative number of co-publications",hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')
for trace in list(fig.select_traces()):
figsuper.add_trace(trace,
row=1, col=2
)
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
y=record_col,
x='Publication Year',
color="Eurovoc_Class",
line_group="Country",
markers=True,
labels={
record_col: 'Number of co-publications',
"Eurovoc_Class": "Region"
},
title="Yearly output of co-publications",hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')
for trace in list(fig.select_traces()):
trace.showlegend=False
figsuper.add_trace(trace,
row=2, col=1
)
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
y=record_col+"_relative_growth",
x='Publication Year',
color="Eurovoc_Class",line_group="Country",markers=True,
labels={
record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
},
title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
for trace in list(fig.select_traces()):
trace.showlegend=False
figsuper.add_trace(trace,
row=2, col=2
)
figsuper.update_yaxes(
showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
ticks="outside")
figsuper.update_xaxes(
showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
ticks="outside")
figsuper.update_layout({'template':"plotly"})
figsuper.show(config= dict(displayModeBar = False))
year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)
year_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ||||||||||||
Austria | 22 | 24 | 26 | 39 | 50 | 57 | 72 | 89 | 138 | 137 | 185 | 205 |
Belgium | 34 | 38 | 40 | 65 | 71 | 81 | 90 | 133 | 179 | 213 | 242 | 292 |
Bulgaria | 4 | 5 | 8 | 9 | 7 | 19 | 21 | 18 | 10 | 25 | 32 | 19 |
Croatia | 1 | 2 | 6 | 8 | 10 | 7 | 10 | 19 | 27 | 29 | 33 | 35 |
Cyprus | 2 | 1 | 5 | 5 | 5 | 5 | 8 | 7 | 15 | 28 | 36 | 43 |
Czech Republic | 13 | 15 | 16 | 21 | 20 | 36 | 37 | 56 | 64 | 81 | 93 | 123 |
Denmark | 35 | 33 | 40 | 59 | 68 | 74 | 101 | 195 | 234 | 245 | 293 | 343 |
Estonia | 3 | 3 | 7 | 10 | 12 | 10 | 15 | 15 | 16 | 38 | 45 | 39 |
Finland | 31 | 35 | 44 | 82 | 100 | 125 | 126 | 198 | 241 | 256 | 289 | 380 |
France | 117 | 130 | 174 | 231 | 269 | 325 | 348 | 491 | 648 | 691 | 807 | 858 |
Germany | 123 | 172 | 192 | 273 | 310 | 365 | 456 | 604 | 801 | 907 | 1210 | 1386 |
Greece | 15 | 18 | 19 | 32 | 35 | 50 | 47 | 81 | 114 | 122 | 139 | 181 |
Hungary | 11 | 11 | 21 | 16 | 20 | 38 | 34 | 47 | 61 | 61 | 83 | 90 |
Ireland | 13 | 16 | 22 | 31 | 27 | 45 | 66 | 72 | 84 | 116 | 167 | 187 |
Italy | 51 | 70 | 84 | 116 | 178 | 187 | 247 | 325 | 441 | 571 | 641 | 811 |
Latvia | 0 | 0 | 1 | 0 | 1 | 8 | 10 | 15 | 10 | 9 | 13 | 18 |
Lithuania | 1 | 2 | 10 | 4 | 4 | 13 | 12 | 23 | 38 | 36 | 38 | 38 |
Luxembourg | 2 | 3 | 3 | 1 | 8 | 9 | 13 | 15 | 18 | 22 | 35 | 51 |
Malta | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 6 | 2 | 7 | 10 |
Netherlands | 72 | 64 | 77 | 103 | 139 | 166 | 220 | 297 | 408 | 470 | 529 | 655 |
Norway | 30 | 42 | 60 | 76 | 67 | 88 | 104 | 134 | 222 | 253 | 304 | 311 |
Poland | 17 | 31 | 37 | 57 | 73 | 82 | 98 | 110 | 138 | 181 | 276 | 353 |
Portugal | 16 | 23 | 35 | 41 | 45 | 58 | 79 | 119 | 136 | 147 | 204 | 212 |
Romania | 7 | 15 | 13 | 16 | 25 | 26 | 37 | 57 | 64 | 55 | 48 | 62 |
Slovakia | 9 | 6 | 6 | 10 | 12 | 22 | 18 | 27 | 27 | 34 | 36 | 45 |
Slovenia | 7 | 7 | 10 | 12 | 17 | 27 | 22 | 47 | 54 | 31 | 48 | 40 |
Spain | 50 | 49 | 69 | 112 | 138 | 185 | 232 | 273 | 356 | 386 | 473 | 640 |
Sweden | 34 | 50 | 59 | 83 | 113 | 170 | 233 | 232 | 385 | 359 | 428 | 510 |
Switzerland | 37 | 50 | 54 | 74 | 74 | 95 | 155 | 195 | 233 | 263 | 349 | 447 |
United Kingdom | 363 | 417 | 531 | 660 | 781 | 979 | 1350 | 1837 | 2430 | 3108 | 3718 | 4245 |
f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_pivot, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")
for i in range(year_pivot.shape[0]+1):
ax.axhline(i, color='white', lw=10)
year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100
year_percent_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ||||||||||||
Austria | 1.962533 | 1.801802 | 1.557819 | 1.736420 | 1.865672 | 1.699970 | 1.689744 | 1.552958 | 1.816267 | 1.543488 | 1.712804 | 1.623248 |
Belgium | 3.033006 | 2.852853 | 2.396645 | 2.894034 | 2.649254 | 2.415747 | 2.112180 | 2.320712 | 2.355883 | 2.399730 | 2.240533 | 2.312139 |
Bulgaria | 0.356824 | 0.375375 | 0.479329 | 0.400712 | 0.261194 | 0.566657 | 0.492842 | 0.314081 | 0.131614 | 0.281658 | 0.296269 | 0.150447 |
Croatia | 0.089206 | 0.150150 | 0.359497 | 0.356189 | 0.373134 | 0.208768 | 0.234687 | 0.331530 | 0.355357 | 0.326724 | 0.305527 | 0.277140 |
Cyprus | 0.178412 | 0.075075 | 0.299581 | 0.222618 | 0.186567 | 0.149120 | 0.187749 | 0.122143 | 0.197420 | 0.315457 | 0.333302 | 0.340486 |
Czech Republic | 1.159679 | 1.126126 | 0.958658 | 0.934996 | 0.746269 | 1.073665 | 0.868341 | 0.977142 | 0.842327 | 0.912573 | 0.861031 | 0.973949 |
Denmark | 3.122212 | 2.477477 | 2.396645 | 2.626892 | 2.537313 | 2.206979 | 2.370336 | 3.402548 | 3.079758 | 2.760252 | 2.712712 | 2.715971 |
Estonia | 0.267618 | 0.225225 | 0.419413 | 0.445236 | 0.447761 | 0.298240 | 0.352030 | 0.261734 | 0.210582 | 0.428121 | 0.416628 | 0.308813 |
Finland | 2.765388 | 2.627628 | 2.636309 | 3.650935 | 3.731343 | 3.728005 | 2.957052 | 3.454894 | 3.171887 | 2.884182 | 2.675678 | 3.008948 |
France | 10.437110 | 9.759760 | 10.425404 | 10.284951 | 10.037313 | 9.692812 | 8.167097 | 8.567440 | 8.528560 | 7.785038 | 7.471530 | 6.793887 |
Germany | 10.972346 | 12.912913 | 11.503895 | 12.154942 | 11.567164 | 10.885774 | 10.701713 | 10.539173 | 10.542248 | 10.218567 | 11.202666 | 10.974741 |
Greece | 1.338091 | 1.351351 | 1.138406 | 1.424755 | 1.305970 | 1.491202 | 1.103027 | 1.413366 | 1.500395 | 1.374493 | 1.286918 | 1.433209 |
Hungary | 0.981267 | 0.825826 | 1.258238 | 0.712378 | 0.746269 | 1.133313 | 0.797935 | 0.820101 | 0.802843 | 0.687247 | 0.768447 | 0.712645 |
Ireland | 1.159679 | 1.201201 | 1.318155 | 1.380232 | 1.007463 | 1.342082 | 1.548932 | 1.256325 | 1.105554 | 1.306895 | 1.546153 | 1.480719 |
Italy | 4.549509 | 5.255255 | 5.032954 | 5.164737 | 6.641791 | 5.577095 | 5.796761 | 5.670913 | 5.804159 | 6.433078 | 5.934636 | 6.421728 |
Latvia | 0.000000 | 0.000000 | 0.059916 | 0.000000 | 0.037313 | 0.238592 | 0.234687 | 0.261734 | 0.131614 | 0.101397 | 0.120359 | 0.142529 |
Lithuania | 0.089206 | 0.150150 | 0.599161 | 0.178094 | 0.149254 | 0.387712 | 0.281624 | 0.401326 | 0.500132 | 0.405588 | 0.351819 | 0.300895 |
Luxembourg | 0.178412 | 0.225225 | 0.179748 | 0.044524 | 0.298507 | 0.268416 | 0.305093 | 0.261734 | 0.236904 | 0.247859 | 0.324044 | 0.403832 |
Malta | 0.089206 | 0.000000 | 0.000000 | 0.000000 | 0.037313 | 0.029824 | 0.000000 | 0.000000 | 0.078968 | 0.022533 | 0.064809 | 0.079183 |
Netherlands | 6.422837 | 4.804805 | 4.613541 | 4.585931 | 5.186567 | 4.950790 | 5.163107 | 5.182342 | 5.369834 | 5.295178 | 4.897695 | 5.186476 |
Norway | 2.676182 | 3.153153 | 3.594967 | 3.383793 | 2.500000 | 2.624515 | 2.440742 | 2.338161 | 2.921822 | 2.850383 | 2.814554 | 2.462586 |
Poland | 1.516503 | 2.327327 | 2.216896 | 2.537845 | 2.723881 | 2.445571 | 2.299930 | 1.919386 | 1.816267 | 2.039207 | 2.555319 | 2.795154 |
Portugal | 1.427297 | 1.726727 | 2.097064 | 1.825467 | 1.679104 | 1.729794 | 1.854025 | 2.076426 | 1.789945 | 1.656151 | 1.888714 | 1.678676 |
Romania | 0.624442 | 1.126126 | 0.778910 | 0.712378 | 0.932836 | 0.775425 | 0.868341 | 0.994591 | 0.842327 | 0.619648 | 0.444403 | 0.490934 |
Slovakia | 0.802855 | 0.450450 | 0.359497 | 0.445236 | 0.447761 | 0.656129 | 0.422436 | 0.471122 | 0.355357 | 0.383055 | 0.333302 | 0.356323 |
Slovenia | 0.624442 | 0.525526 | 0.599161 | 0.534283 | 0.634328 | 0.805249 | 0.516311 | 0.820101 | 0.710713 | 0.349256 | 0.444403 | 0.316731 |
Spain | 4.460303 | 3.678679 | 4.134212 | 4.986643 | 5.149254 | 5.517447 | 5.444731 | 4.763567 | 4.685444 | 4.348806 | 4.379224 | 5.067701 |
Sweden | 3.033006 | 3.753754 | 3.535051 | 3.695459 | 4.216418 | 5.070086 | 5.468200 | 4.048159 | 5.067123 | 4.044615 | 3.962596 | 4.038324 |
Switzerland | 3.300624 | 3.753754 | 3.235470 | 3.294746 | 2.761194 | 2.833284 | 3.637644 | 3.402548 | 3.066596 | 2.963046 | 3.231182 | 3.539473 |
United Kingdom | 32.381802 | 31.306306 | 31.815458 | 29.385574 | 29.141791 | 29.197733 | 31.682704 | 32.053743 | 31.982101 | 35.015773 | 34.422739 | 33.613113 |
f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")
for i in range(year_percent_pivot.shape[1]+1):
ax.axvline(i, color='white', lw=10)
# Institutional collab
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution_harm","Country_Type","Eurovoc_Class"]].drop_duplicates()
wos_univ_collabs["ISO3"] = cc.pandas_convert(series=wos_univ_collabs["Country"], to='ISO3')
wos_univ_collabs["Institution_harm_label"] = wos_univ_collabs["Institution_harm"] + " ("+wos_univ_collabs["ISO3"]+ ")"
wos_univ_collabs.sample(100)
UT (Unique WOS ID) | Country | Institution_harm | Country_Type | Eurovoc_Class | ISO3 | Institution_harm_label | |
---|---|---|---|---|---|---|---|
62496 | WOS:000713807500098 | China | Tsinghua Univ | China | China | CHN | Tsinghua Univ (CHN) |
125594 | WOS:000694719000013 | France | Univ Paris | EU | Western Europe | FRA | Univ Paris (FRA) |
169955 | WOS:000766762800026 | Austria | Inst Adv Res Artificial Intelligence | EU | Western Europe | AUT | Inst Adv Res Artificial Intelligence (AUT) |
48357 | WOS:000596356000001 | China | Beijing Inst Technol | China | China | CHN | Beijing Inst Technol (CHN) |
148599 | WOS:000517228300013 | Germany | Univ Wurzburg | EU | Western Europe | DEU | Univ Wurzburg (DEU) |
... | ... | ... | ... | ... | ... | ... | ... |
92516 | WOS:000348141800002 | United Kingdom | Heriot Watt Univ | Non-EU associate | Western Europe | GBR | Heriot Watt Univ (GBR) |
153610 | WOS:000838053000004 | Germany | Cluster Excellence Hearing4all | EU | Western Europe | DEU | Cluster Excellence Hearing4all (DEU) |
92564 | WOS:000349389800004 | United Kingdom | European Ctr Medium Range Weather Forecasts | Non-EU associate | Western Europe | GBR | European Ctr Medium Range Weather Forecasts (GBR) |
56459 | WOS:000665034700031 | China | Nanjing Univ Sci & Technol | China | China | CHN | Nanjing Univ Sci & Technol (CHN) |
54749 | WOS:000651626700012 | China | Ningbo Univ | China | China | CHN | Ningbo Univ (CHN) |
100 rows × 7 columns
color_discrete_map= {'China': '#EF553B',
'EU': '#636EFA',
'Non-EU associate': '#00CC96'}
TOPN = 25
wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="EU"]
data_eu = (wos_univ_eu.groupby(["Country","Institution_harm_label","Country_Type"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by="Country_Type")
data_eu_strict = (wos_univ_eu_strict.groupby(["Country","Institution_harm_label","Eurovoc_Class"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy())
data_ch = (wos_univ_ch.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy())
for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],
["European countries in scope","EU-28 only","China"],
["Institution_harm_label","Institution_harm_label","Institution_harm"],
["Country","Eurovoc_Class","Country_Type"],
["Country_Type",None,None]):
fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,
labels={
record_col: 'Number of co-publications',
"Institution_harm": "Institution",
"Institution_harm_label": "Institution",
"Country_Type":"Country type",
"Eurovoc_Class":"Region"
},
title=f"Most visible institutions (top {TOPN} within {c_scope})", template='plotly')
fig.update_layout(xaxis_tickformat='d',font_family="Montserrat",yaxis={'categoryorder':'total ascending'},
width=1000, height=1000,)
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
wos_univ_test = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution","Institution_harm","Country_Type"]].drop_duplicates()
www = wos_univ_test.groupby(["Institution","Institution_harm"], as_index=False)[record_col].nunique()
www[www["Institution_harm"]=="Chinese Acad Sci"]
Institution | Institution_harm | UT (Unique WOS ID) | |
---|---|---|---|
16 | Chinese Acad Sci | Chinese Acad Sci | 1 |
3149 | Chinese Acad Sci | Chinese Acad Sci | 4614 |
3153 | Chinese Acad Sci AIRCAS | Chinese Acad Sci | 2 |
3155 | Chinese Acad Sci CAREERI CAS | Chinese Acad Sci | 1 |
3157 | Chinese Acad Sci CASIA | Chinese Acad Sci | 8 |
3159 | Chinese Acad Sci GUCAS | Chinese Acad Sci | 2 |
3160 | Chinese Acad Sci IAP | Chinese Acad Sci | 1 |
3161 | Chinese Acad Sci IECAS | Chinese Acad Sci | 2 |
3162 | Chinese Acad Sci IME CAS | Chinese Acad Sci | 1 |
3163 | Chinese Acad Sci IMECAS | Chinese Acad Sci | 1 |
3164 | Chinese Acad Sci ITP CAS | Chinese Acad Sci | 1 |
3166 | Chinese Acad Sci NAOC | Chinese Acad Sci | 1 |
3167 | Chinese Acad Sci NAOC CAS | Chinese Acad Sci | 2 |
13501 | RCEES Chinese Acad Sci | Chinese Acad Sci | 1 |
19499 | ZIAT Chinese Acad Sci | Chinese Acad Sci | 1 |
wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,"Domain_English","Field_English","SubField_English"]], on =record_col)
wos_univ_dipol.sample(100)
UT (Unique WOS ID) | Country_eu | Institution_harm_eu | Country_Type_eu | Eurovoc_Class_eu | ISO3_eu | Institution_harm_label_eu | Country_ch | Institution_harm_ch | Country_Type_ch | Eurovoc_Class_ch | ISO3_ch | Institution_harm_label_ch | Domain_English | Field_English | SubField_English | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
151306 | WOS:000387273800018 | Slovakia | Reg Author Publ Hlth | EU | Eastern Europe | SVK | Reg Author Publ Hlth (SVK) | China | Chinese Univ Hong Kong | China | China | CHN | Chinese Univ Hong Kong (CHN) | Health Sciences | Clinical Medicine | Environmental & Occupational Health |
288046 | WOS:000447568300005 | France | Sorbonne Univ | EU | Western Europe | FRA | Sorbonne Univ (FRA) | China | Xiamen Univ | China | China | CHN | Xiamen Univ (CHN) | Applied Sciences | Information & Communication Technologies | Networking & Telecommunications |
86767 | WOS:000552035900004 | Italy | Terrasystem Srl | EU | Southern Europe | ITA | Terrasystem Srl (ITA) | China | Nanjing Univ Informat Sci & Technol | China | China | CHN | Nanjing Univ Informat Sci & Technol (CHN) | Multidisciplinary | Multidisciplinary | Multidisciplinary |
74398 | WOS:000494411700001 | Germany | Georg August Univ Gottingen | EU | Western Europe | DEU | Georg August Univ Gottingen (DEU) | China | China Three Gorges Univ | China | China | CHN | China Three Gorges Univ (CHN) | Economic & Social Sciences | Social Sciences | Information & Library Sciences |
266078 | WOS:000639495800005 | United Kingdom | Univ Lincoln | Non-EU associate | Western Europe | GBR | Univ Lincoln (GBR) | China | Minist Educ | China | China | CHN | Minist Educ (CHN) | Applied Sciences | Information & Communication Technologies | Information Systems |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
38553 | WOS:000418982000012 | Spain | Reg Univ Carlos Haya | EU | Southern Europe | ESP | Reg Univ Carlos Haya (ESP) | China | Zhengzhou Univ | China | China | CHN | Zhengzhou Univ (CHN) | Health Sciences | Clinical Medicine | Oncology & Carcinogenesis |
19393 | WOS:000406549900063 | Italy | Azienda Ospedaliero Univ | EU | Southern Europe | ITA | Azienda Ospedaliero Univ (ITA) | China | Shanxi Prov Peoples Hosp | China | China | CHN | Shanxi Prov Peoples Hosp (CHN) | Health Sciences | Clinical Medicine | Anesthesiology |
77625 | WOS:000494411700001 | Germany | Martin Luther Univ Halle Wittenberg | EU | Western Europe | DEU | Martin Luther Univ Halle Wittenberg (DEU) | China | Nankai Univ | China | China | CHN | Nankai Univ (CHN) | Economic & Social Sciences | Social Sciences | Information & Library Sciences |
138669 | WOS:000355671300001 | France | Aix Marseille Univ | EU | Western Europe | FRA | Aix Marseille Univ (FRA) | China | Chinese Acad Sci | China | China | CHN | Chinese Acad Sci (CHN) | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics |
136858 | WOS:000347046200017 | Slovenia | Univ Ljubljana | EU | Eastern Europe | SVN | Univ Ljubljana (SVN) | China | Shanghai Jiao Tong Univ | China | China | CHN | Shanghai Jiao Tong Univ (CHN) | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics |
100 rows × 16 columns
fig = px.parallel_categories(wos_univ_dipol[["Country_eu","Domain_English","Country_ch"]])
fig.show()