import pandas as pd
import janitor
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MaxNLocator
import math
import plotly.express as px
import country_converter as coco
cc = coco.CountryConverter()
%matplotlib inline
sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)
sns.palplot(sns.color_palette())
outdir="wos_processed_data"
wos = pd.read_excel(f"../{outdir}/wos_processed.xlsx")
wos_univ = pd.read_excel(f"../{outdir}/wos_institution_locations_harmonized.xlsx")
def eurovoc_classer(x):
eurovoc_classification = {"Eastern Europe":["Bulgaria","Czech Republic","Croatia","Hungary","Poland","Romania","Slovakia","Slovenia"],
"Northern Europe":["Denmark","Estonia","Finland","Latvia","Lithuania","Sweden","Norway","Iceland"],
"Southern Europe":["Cyprus","Greece","Italy","Portugal","Spain","Malta"],
"Western Europe":["Austria","Belgium","France","Germany","Luxembourg","Netherlands","Switzerland","United Kingdom","Ireland"]}
if x == 'China':
return x
for k in eurovoc_classification.keys():
if x in eurovoc_classification[k]:
return k
wos_country = pd.read_excel(f"../{outdir}/wos_countries.xlsx")
wos_country_types = pd.read_excel(f"../{outdir}/wos_country_types.xlsx")
wos_country_types["Eurovoc_Class"] = wos_country_types["Country"].map(eurovoc_classer)
wos_country_types
Country | Country_Type | Eurovoc_Class | |
---|---|---|---|
0 | Belgium | EU | Western Europe |
1 | China | China | China |
2 | Luxembourg | EU | Western Europe |
3 | Netherlands | EU | Western Europe |
4 | Norway | Non-EU associate | Northern Europe |
5 | United Kingdom | Non-EU associate | Western Europe |
6 | France | EU | Western Europe |
7 | Sweden | EU | Northern Europe |
8 | Italy | EU | Southern Europe |
9 | Denmark | EU | Northern Europe |
10 | Germany | EU | Western Europe |
11 | Slovenia | EU | Eastern Europe |
12 | Estonia | EU | Northern Europe |
13 | Finland | EU | Northern Europe |
14 | Bulgaria | EU | Eastern Europe |
15 | Slovakia | EU | Eastern Europe |
16 | Spain | EU | Southern Europe |
17 | Poland | EU | Eastern Europe |
18 | Czech Republic | EU | Eastern Europe |
19 | Greece | EU | Southern Europe |
20 | Malta | EU | Southern Europe |
21 | Austria | EU | Western Europe |
22 | Switzerland | Non-EU associate | Western Europe |
23 | Ireland | EU | Western Europe |
24 | Portugal | EU | Southern Europe |
25 | Romania | EU | Eastern Europe |
26 | Hungary | EU | Eastern Europe |
27 | Cyprus | EU | Southern Europe |
28 | Croatia | EU | Eastern Europe |
29 | Lithuania | EU | Northern Europe |
30 | Latvia | EU | Northern Europe |
# len(wos),len(wos_univ_locations)
# wos_addresses = pd.read_excel(f"/{outdir}/wos_addresses.xlsx")
# wos_affiliations = pd.read_excel(f"/{outdir}/wos_affiliations.xlsx")
# wos_author_locations = pd.read_excel(f"/{outdir}/wos_author_locations.xlsx")
# wos_univ_locations = pd.read_excel(f"/{outdir}/wos_univ_locations.xlsx")
record_col = "UT (Unique WOS ID)"
# def nth_repl_all(s, sub="", repl="<br>", nth=2):
# find = s.find(sub)
# # loop util we find no match
# i = 1
# while find != -1:
# # if i is equal to nth we found nth matches so replace
# if i == nth:
# s = s[:find]+repl+s[find + len(sub):]
# i = 0
# # find + len(sub) + 1 means we start after the last match
# find = s.find(sub, find + len(sub) + 1)
# i += 1
# return s.replace("<br>&","&<br")
def replace_nth(s, sub=" ", repl="<br>", n=2):
chunks = s.split(sub)
size = len(chunks)
rows = size // n + (0 if size % n == 0 else 1)
return (repl.join([
sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])
for i in range(rows)
])).replace("<br>&"," &<br>")
groups = ['Domain_English',"Field_English",'SubField_English']
data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data["percent"] = data[record_col]/data[record_col].sum()*100
# data[groups] = data[groups].applymap(replace_nth)
for c in ["Domain_English","Field_English","SubField_English"]:
data[c] = data[c]+"<br>("+(pd.DataFrame(data[c],columns=[c]).merge(data.groupby(c,as_index=False)[record_col].sum(), on=c)[record_col]).astype(str)+")"
data
Domain_English | Field_English | SubField_English | UT (Unique WOS ID) | percent | |
---|---|---|---|---|---|
37 | Applied Sciences<br>(29985) | Information & Communication Technologies<br>(1... | Artificial Intelligence & Image Processing<br>... | 7915 | 17.184108 |
44 | Applied Sciences<br>(29985) | Information & Communication Technologies<br>(2... | Networking & Telecommunications<br>(303) | 5360 | 11.636995 |
32 | Applied Sciences<br>(29985) | Engineering<br>(3940) | Geological & Geomatics Engineering<br>(436) | 2576 | 5.592705 |
33 | Applied Sciences<br>(29985) | Engineering<br>(1226) | Industrial Engineering & Automation<br>(425) | 2316 | 5.028224 |
15 | Applied Sciences<br>(29985) | Enabling & Strategic Technologies<br>(9232) | Energy<br>(598) | 1965 | 4.266175 |
... | ... | ... | ... | ... | ... |
11 | Applied Sciences<br>(29985) | Economics & Business <br>(9232) | Business & Management<br>(792) | 1 | 0.002171 |
46 | Applied Sciences<br>(29985) | Social Sciences<br>(2032) | Anthropology<br>(285) | 1 | 0.002171 |
54 | Arts & Humanities<br>(8457) | Philosophy & Theology<br>(3385) | Philosophy<br>(208) | 1 | 0.002171 |
52 | Arts & Humanities<br>(8457) | Historical Studies<br>(3385) | History of Social Sciences<br>(211) | 1 | 0.002171 |
129 | Health Sciences<br>(5341) | Psychology & Cognitive Sciences<br>(1067) | General Psychology & Cognitive Sciences<br>(19) | 1 | 0.002171 |
175 rows × 5 columns
fig = px.sunburst(data, path=groups, values="percent",
color='Domain_English',title="Distribution of topics<br>(METRIX classification)", template='plotly')
fig.update_traces(hovertemplate='%{label}<br>%{value:.2f}%')
fig.show(config= dict(displayModeBar = False))
group = 'Domain_English'
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data
Domain_English | UT (Unique WOS ID) | |
---|---|---|
0 | Applied Sciences | 29985 |
5 | Natural Sciences | 8457 |
3 | Health Sciences | 5341 |
2 | Economic & Social Sciences | 1360 |
4 | Multidisciplinary | 847 |
1 | Arts & Humanities | 70 |
g = sns.barplot(data, x=record_col, y=group)
g.set_xlim(0,35000)
g.set_ylabel(None)
g.set_xlabel("Number of co-publications")
g.set_title("Distribution of Domains")
for i in g.containers:
g.bar_label(i,fontsize=10)
fig = px.bar(data, x=record_col, y=group, color=group,
labels={
record_col: 'Number of co-publications',
group: "",
},
title="Distribution of Domains", template='plotly')
fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family="Montserrat")
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
# # define a function to divide each row's 'Count' by the value of the first year
# def divide_by_first_year(group):
# group['relative_growth'] = group[record_col] / group.loc[group['Publication Year'] == group['Publication Year'].min(), record_col].values[0]
# return group
#
#
#
# data = (wos.groupby(group)[record_col].nunique()
# .unstack(fill_value=0).stack()
# .reset_index()
# .rename(columns={0:record_col})
# .sort_values(ascending=False, by=group+[record_col]))
#
# # group by 'Topic'
# grouped = data.groupby('Domain_English')
# # apply the function to each group
# data = grouped.apply(divide_by_first_year).reset_index(drop=True)
# data['relative_growth'] = data['relative_growth']*100
group = ['Publication Year','Domain_English']
data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()
.fillna(0)
.stack()
.reset_index()
.rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Domain_English'),
on='Domain_English', suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data
Publication Year | Domain_English | UT (Unique WOS ID) | Publication Year_relative_growth | UT (Unique WOS ID)_relative_growth | |
---|---|---|---|---|---|
0 | 2011 | Applied Sciences | 490.0 | 2011 | 0.000000 |
1 | 2012 | Applied Sciences | 593.0 | 2011 | 21.020408 |
2 | 2013 | Applied Sciences | 738.0 | 2011 | 50.612245 |
3 | 2014 | Applied Sciences | 1031.0 | 2011 | 110.408163 |
4 | 2015 | Applied Sciences | 1201.0 | 2011 | 145.102041 |
... | ... | ... | ... | ... | ... |
67 | 2018 | Natural Sciences | 753.0 | 2011 | 316.022099 |
68 | 2019 | Natural Sciences | 999.0 | 2011 | 451.933702 |
69 | 2020 | Natural Sciences | 1232.0 | 2011 | 580.662983 |
70 | 2021 | Natural Sciences | 1403.0 | 2011 | 675.138122 |
71 | 2022 | Natural Sciences | 1665.0 | 2011 | 819.889503 |
72 rows × 5 columns
g=sns.lineplot(data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0], hue=group[-1], marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications")
Text(0.5, 1.0, 'Yearly output of co-publications')
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True, labels={
record_col: 'Number of co-publications',
group[-1]: "Domain",
},
title="Yearly output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_relative_growth",x=group[0], color=group[-1], markers=True, labels={
record_col+"_relative_growth": 'Rel. growth<br>in co-publications (%)',
group[-1]: "Domain",
},
title="Relative growth in the output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:.2f}%')
fig.update_layout(hovermode='x unified',yaxis_tickformat='d',font_family="Montserrat")
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')
fig.show(config= dict(displayModeBar = False))
pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],
columns=['Publication Year'], fill_value=0)
pivot_data
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ||||||||||||
Applied Sciences | 490 | 593 | 738 | 1031 | 1201 | 1535 | 1920 | 2808 | 3729 | 4446 | 5295 | 6199 |
Arts & Humanities | 0 | 0 | 0 | 4 | 1 | 3 | 7 | 4 | 11 | 11 | 16 | 13 |
Economic & Social Sciences | 20 | 22 | 29 | 28 | 34 | 40 | 84 | 105 | 160 | 211 | 252 | 375 |
Health Sciences | 116 | 120 | 155 | 184 | 216 | 243 | 321 | 403 | 611 | 755 | 1035 | 1182 |
Multidisciplinary | 15 | 21 | 43 | 52 | 57 | 64 | 75 | 76 | 83 | 97 | 115 | 149 |
Natural Sciences | 181 | 223 | 298 | 318 | 380 | 437 | 568 | 753 | 999 | 1232 | 1403 | 1665 |
f, ax = plt.subplots(figsize=(9, 6))
g = sns.heatmap(pivot_data, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")
[Text(0.5, 33.249999999999986, ''), Text(79.74999999999999, 0.5, '')]
import numpy as np
percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100
percent_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ||||||||||||
Applied Sciences | 59.610706 | 60.572012 | 58.432304 | 63.760049 | 63.578613 | 66.106804 | 64.537815 | 67.678959 | 66.672626 | 65.847156 | 65.241498 | 64.687467 |
Arts & Humanities | 0.000000 | 0.000000 | 0.000000 | 0.247372 | 0.052938 | 0.129199 | 0.235294 | 0.096409 | 0.196674 | 0.162915 | 0.197141 | 0.135657 |
Economic & Social Sciences | 2.433090 | 2.247191 | 2.296120 | 1.731602 | 1.799894 | 1.722653 | 2.823529 | 2.530730 | 2.860719 | 3.125000 | 3.104978 | 3.913180 |
Health Sciences | 14.111922 | 12.257406 | 12.272367 | 11.379097 | 11.434621 | 10.465116 | 10.789916 | 9.713184 | 10.924370 | 11.181872 | 12.752587 | 12.334342 |
Multidisciplinary | 1.824818 | 2.145046 | 3.404592 | 3.215832 | 3.017470 | 2.756245 | 2.521008 | 1.831767 | 1.483998 | 1.436611 | 1.416954 | 1.554837 |
Natural Sciences | 22.019465 | 22.778345 | 23.594616 | 19.666048 | 20.116464 | 18.819983 | 19.092437 | 18.148952 | 17.861613 | 18.246445 | 17.286841 | 17.374517 |
f, ax = plt.subplots(figsize=(15, 6))
g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")
[Text(0.5, 33.249999999999986, ''), Text(154.75, 0.5, '')]
# percent_pivot.T.plot(kind='bar',
# stacked=True,
# figsize=(10, 6))
# percent_pivot.T.plot(kind='bar',
# stacked=True,
# figsize=(15, 8))
#
# plt.legend(loc="lower left", ncol=2)
# # plt.ylabel("Release Year")
# # plt.xlabel("Proportion")
#
#
# for n, x in enumerate([*pivot_data.T.index.values]):
# for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],
# pivot_data.T.loc[x],
# percent_pivot.T.loc[x].cumsum()):
#
# plt.text(y=(y_loc - proportion) + (proportion / 2),
# x=n - 0.11,
# s=f'{count}',# ({np.round(proportion, 1)}%)',
# color="black",
# fontsize=8,
# fontweight="bold")
#
# plt.show()
group = ['Publication Year',"Domain_English",'Field_English']
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data
Publication Year | Domain_English | Field_English | UT (Unique WOS ID) | |
---|---|---|---|---|
233 | 2022 | Natural Sciences | Physics & Astronomy | 596 |
232 | 2022 | Natural Sciences | Mathematics & Statistics | 228 |
231 | 2022 | Natural Sciences | Earth & Environmental Sciences | 409 |
230 | 2022 | Natural Sciences | Chemistry | 251 |
229 | 2022 | Natural Sciences | Biology | 181 |
... | ... | ... | ... | ... |
4 | 2011 | Applied Sciences | Information & Communication Technologies | 256 |
3 | 2011 | Applied Sciences | Engineering | 166 |
2 | 2011 | Applied Sciences | Enabling & Strategic Technologies | 53 |
1 | 2011 | Applied Sciences | Built Environment & Design | 6 |
0 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | 9 |
234 rows × 4 columns
len(data[group[-2]].unique())
6
data_complete = pd.DataFrame()
for cat in sorted(data[group[-2]].unique()):
#data segment
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
#plot
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
y=record_col,x=group[0], hue=group[-1], marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_title(cat)
g.yaxis.set_major_locator(MaxNLocator(integer=True))
plt.show()
data_complete = pd.DataFrame()
# Creating subplot axes
fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))
for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):
#data segment
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
#plot
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
y=record_col,x=group[0], hue=group[-1], marker="o", ax=ax)
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_title(cat)
g.set_xlabel(None)
g.set_ylabel(None)
g.yaxis.set_major_locator(MaxNLocator(integer=True))
fig.suptitle("Number of co-publications in domains and respective fields", y=0.92)
plt.show()
group = ['Publication Year',"Domain_English",'Field_English',"SubField_English"]
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data
Publication Year | Domain_English | Field_English | SubField_English | UT (Unique WOS ID) | |
---|---|---|---|---|---|
1598 | 2022 | Natural Sciences | Physics & Astronomy | Optics | 134 |
1597 | 2022 | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics | 65 |
1596 | 2022 | Natural Sciences | Physics & Astronomy | Mathematical Physics | 10 |
1595 | 2022 | Natural Sciences | Physics & Astronomy | General Physics | 31 |
1594 | 2022 | Natural Sciences | Physics & Astronomy | Fluids & Plasmas | 79 |
... | ... | ... | ... | ... | ... |
4 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Forestry | 1 |
3 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Food Science | 1 |
2 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Fisheries | 2 |
1 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Dairy & Animal Science | 2 |
0 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Agronomy & Agriculture | 3 |
1599 rows × 5 columns
for cat in sorted(data[group[-2]].unique()):
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],
hue=group[-1], marker="o", errorbar=None)
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))
g.set_title(f'Number or co-publications in {cat}')
g.set_ylabel(None)
plt.show()
from matplotlib.ticker import FuncFormatter
import math
def orderOfMagnitude(number):
return math.floor(math.log(number, 10))
def roundToNearest(number):
order = orderOfMagnitude(number)
# if order!=0:
# order+=1
near = math.ceil(number/10**order)*10**order
return near
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_locations.sample(100)
UT (Unique WOS ID) | Institution | Country | Institution_harm | merge_iter | Country_Type | |
---|---|---|---|---|---|---|
41191 | WOS:000538161600016 | Anhui Univ | China | Anhui Univ | 0 | China |
175692 | WOS:000709411500003 | Univ Porto | Portugal | Univ Porto | 0 | EU |
75198 | WOS:000831217100027 | Zhejiang Univ | China | Zhejiang Univ | 0 | China |
48614 | WOS:000597938400003 | Shanghai Jiao Tong Univ | China | Shanghai Jiao Tong Univ | 0 | China |
133670 | WOS:000411824101159 | Univ Pisa | Italy | Univ Pisa | 0 | EU |
... | ... | ... | ... | ... | ... | ... |
2892 | WOS:000293708200019 | Natl Univ Def Technol | China | Natl Univ Def Technol | 0 | China |
125259 | WOS:000663324800010 | INRAE | France | INRAE | 0 | EU |
55780 | WOS:000659952900011 | Huazhong Univ Sci & Technol | China | Huazhong Univ Sci & Technol | 0 | China |
138600 | WOS:000744399000001 | Brignone Clin | Italy | Brignone Clin | 0 | EU |
31040 | WOS:000471758500010 | Chinese Acad Sci | China | Chinese Acad Sci | 0 | China |
100 rows × 6 columns
wos_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country"]].drop_duplicates()
collab_desc = wos_collabs[wos_collabs["Country"]!="China"]["Country"].value_counts().reset_index()
collab_desc["percent_of_copubs"] = collab_desc["count"]/wos_collabs[record_col].nunique()*100
collab_desc["percent_contrib_in_copubs"] = collab_desc["count"]/wos_collabs[record_col].size*100
collab_desc = collab_desc.merge(wos_country_types, on="Country")
collab_desc
c_dict = {"count":"Number of co-publications",
"percent_of_copubs":"Percent of co-publications",
"percent_contrib_in_copubs":"Contribution to co-publications"}
# Creating subplot axes
# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))
# for c,ax in zip(c_dict.keys(),axes.flatten()):
for c in c_dict.keys():
data = collab_desc[["Country",c,"Country_Type"]]
plt.figure(figsize=(9,12))
g = sns.barplot(data, x=c, y="Country", hue="Country_Type", dodge=False)
g.set_xlim(0,roundToNearest(data[c].max()))
g.set_ylabel(None)
g.set_xlabel(c_dict.get(c))
g.set_title(c_dict.get(c))
g.legend(title=None, loc="right")
for i in g.containers:
g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')
if 'percent' in c:
g.xaxis.set_major_locator(MaxNLocator(integer=True))
vals = g.get_xticks()
g.set_xticklabels([str(int(val))+'%' for val in vals])
plt.show()
C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning: FixedFormatter should only be used together with FixedLocator
C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning: FixedFormatter should only be used together with FixedLocator
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Draw the heatmap with the mask and correct aspect ratio
g = sns.heatmap(EU_co_occur, mask=mask,
square=True, linewidths=.5)
g.set_ylabel(None)
g.set_xlabel(None)
Text(0.5, 71.74999999999994, '')
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
wos_collabs_EU
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
data = np.where(mask,None,EU_co_occur)
EU_co_occur.columns
Index(['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'United Kingdom'], dtype='object', name='Country_y')
fig = px.imshow(data,
labels=dict(x="Country (x)", y="Country (y)", color="Co-publication"),
x=list(EU_co_occur.columns),
y=list(EU_co_occur.index), title="Intraeuropean patterns"
)
fig.update_layout(title_x=0.5,
width=1000, height=1000,
xaxis_showgrid=False,
yaxis_showgrid=False,
yaxis_autorange='reversed', template='plotly_white')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
ticks="outside")
fig.update_xaxes(
ticks="outside")
fig.show(config= dict(displayModeBar = False))
collab_year = wos_collabs[wos_collabs["Country"]!="China"].copy()
collab_year = collab_year.merge(wos_country_types, on="Country").merge(wos[[record_col,"Publication Year"]],on=record_col).drop_duplicates()
data = collab_year.groupby(["Publication Year",'Country_Type'],as_index=False)[record_col].nunique()
g=sns.lineplot(data,y=record_col,x="Publication Year", hue="Country_Type", marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications with China")
Text(0.5, 1.0, 'Yearly output of co-publications with China')
data = (collab_year.groupby(['Publication Year',"Country"])[record_col]
.nunique(dropna=False).unstack()
.fillna(0)
.stack()
.reset_index()
.rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset="Country"),
on=["Country"], suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data
Publication Year | Country | UT (Unique WOS ID) | Publication Year_relative_growth | UT (Unique WOS ID)_relative_growth | |
---|---|---|---|---|---|
0 | 2011 | Austria | 22.0 | 2011 | 0.000000 |
1 | 2012 | Austria | 24.0 | 2011 | 9.090909 |
2 | 2013 | Austria | 26.0 | 2011 | 18.181818 |
3 | 2014 | Austria | 39.0 | 2011 | 77.272727 |
4 | 2015 | Austria | 50.0 | 2011 | 127.272727 |
... | ... | ... | ... | ... | ... |
355 | 2018 | United Kingdom | 1837.0 | 2011 | 406.060606 |
356 | 2019 | United Kingdom | 2430.0 | 2011 | 569.421488 |
357 | 2020 | United Kingdom | 3108.0 | 2011 | 756.198347 |
358 | 2021 | United Kingdom | 3718.0 | 2011 | 924.242424 |
359 | 2022 | United Kingdom | 4245.0 | 2011 | 1069.421488 |
360 rows × 5 columns
data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col, hover_name="Country",
animation_frame='Publication Year', scope="europe", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])
fig.show()
data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col+"_relative_growth", hover_name="Country",
animation_frame='Publication Year', scope="europe", template='plotly',
range_color=[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()])
fig.show()
data = data.merge(wos_country_types, on='Country')
data
Publication Year | Country | UT (Unique WOS ID) | Publication Year_relative_growth | UT (Unique WOS ID)_relative_growth | Country_Type | Eurovoc_Class | |
---|---|---|---|---|---|---|---|
0 | 2011 | Austria | 22.0 | 2011 | 0.000000 | EU | Western Europe |
1 | 2012 | Austria | 24.0 | 2011 | 9.090909 | EU | Western Europe |
2 | 2013 | Austria | 26.0 | 2011 | 18.181818 | EU | Western Europe |
3 | 2014 | Austria | 39.0 | 2011 | 77.272727 | EU | Western Europe |
4 | 2015 | Austria | 50.0 | 2011 | 127.272727 | EU | Western Europe |
... | ... | ... | ... | ... | ... | ... | ... |
355 | 2018 | United Kingdom | 1837.0 | 2011 | 406.060606 | Non-EU associate | Western Europe |
356 | 2019 | United Kingdom | 2430.0 | 2011 | 569.421488 | Non-EU associate | Western Europe |
357 | 2020 | United Kingdom | 3108.0 | 2011 | 756.198347 | Non-EU associate | Western Europe |
358 | 2021 | United Kingdom | 3718.0 | 2011 | 924.242424 | Non-EU associate | Western Europe |
359 | 2022 | United Kingdom | 4245.0 | 2011 | 1069.421488 | Non-EU associate | Western Europe |
360 rows × 7 columns
# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color="Eurovoc_Class",line_group="Country", markers=True,
# labels={
# record_col: 'Number of co-publications',
# "Eurovoc_Class": "Region"
# },
# title="Yearly output of co-publications", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')
# # fig.update_layout(hovermode='x unified')
# fig.add_shape(
# # Rectangle with reference to the plot
# type="rect",
# xref="paper",
# yref="paper",
# x0=0,
# y0=0,
# x1=1.0,
# y1=1.0,
# line=dict(
# color="black",
# width=0.5,
# )
# )
# fig.update_yaxes(
# showgrid=True,
# ticks="outside")
# fig.update_xaxes(
# showgrid=True,
# ticks="outside")
# fig.show(config= dict(displayModeBar = False))
# fig.data[0].hovertemplate
'<b>%{hovertext}</b><br><br>Eurovoc_Class=Western Europe<br>Country=Austria<br>Publication Year=%{x}<br>Number of co-publications=%{y}<extra></extra>'
# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
# y=record_col+"_relative_growth",
# x='Publication Year',
# color="Eurovoc_Class",line_group="Country",markers=True,
# labels={
# record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
# },
# title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
# fig.add_shape(
# # Rectangle with reference to the plot
# type="rect",
# xref="paper",
# yref="paper",
# x0=0,
# y0=0,
# x1=1.0,
# y1=1.0,
# line=dict(
# color="black",
# width=0.5,
# )
# )
# fig.update_yaxes(
# showgrid=True,
# ticks="outside")
# fig.update_xaxes(
# showgrid=True,
# ticks="outside")
# fig.show(config= dict(displayModeBar = False))
from plotly.subplots import make_subplots
import plotly.graph_objects as go
figsuper = make_subplots(rows=1, cols=2, subplot_titles=["Yearly output of co-publications","Relative growth of co-publications<br>(baseline: 2011)"])
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
y=record_col,
x='Publication Year',
color="Eurovoc_Class",
line_group="Country",
markers=True,
labels={
record_col: 'Number of co-publications',
"Eurovoc_Class": "Region"
},
title="Yearly output of co-publications",hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')
for trace in list(fig.select_traces()):
figsuper.add_trace(trace,
row=1, col=1
)
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
y=record_col+"_relative_growth",
x='Publication Year',
color="Eurovoc_Class",line_group="Country",markers=True,
labels={
record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
},
title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
for trace in list(fig.select_traces()):
trace.showlegend=False
trace
figsuper.add_trace(trace,
row=1, col=2
)
figsuper.update_yaxes(
showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
ticks="outside")
figsuper.update_xaxes(
showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
ticks="outside")
figsuper.update_layout({'template':"plotly"})
figsuper.show(config= dict(displayModeBar = False))
year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)
year_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ||||||||||||
Austria | 22 | 24 | 26 | 39 | 50 | 57 | 72 | 89 | 138 | 137 | 185 | 205 |
Belgium | 34 | 38 | 40 | 65 | 71 | 81 | 90 | 133 | 179 | 213 | 242 | 292 |
Bulgaria | 4 | 5 | 8 | 9 | 7 | 19 | 21 | 18 | 10 | 25 | 32 | 19 |
Croatia | 1 | 2 | 6 | 8 | 10 | 7 | 10 | 19 | 27 | 29 | 33 | 35 |
Cyprus | 2 | 1 | 5 | 5 | 5 | 5 | 8 | 7 | 15 | 28 | 36 | 43 |
Czech Republic | 13 | 15 | 16 | 21 | 20 | 36 | 37 | 56 | 64 | 81 | 93 | 123 |
Denmark | 35 | 33 | 40 | 59 | 68 | 74 | 101 | 195 | 234 | 245 | 293 | 343 |
Estonia | 3 | 3 | 7 | 10 | 12 | 10 | 15 | 15 | 16 | 38 | 45 | 39 |
Finland | 31 | 35 | 44 | 82 | 100 | 125 | 126 | 198 | 241 | 256 | 289 | 380 |
France | 117 | 130 | 174 | 231 | 269 | 325 | 348 | 491 | 648 | 691 | 807 | 858 |
Germany | 123 | 172 | 192 | 273 | 310 | 365 | 456 | 604 | 801 | 907 | 1210 | 1386 |
Greece | 15 | 18 | 19 | 32 | 35 | 50 | 47 | 81 | 114 | 122 | 139 | 181 |
Hungary | 11 | 11 | 21 | 16 | 20 | 38 | 34 | 47 | 61 | 61 | 83 | 90 |
Ireland | 13 | 16 | 22 | 31 | 27 | 45 | 66 | 72 | 84 | 116 | 167 | 187 |
Italy | 51 | 70 | 84 | 116 | 178 | 187 | 247 | 325 | 441 | 571 | 641 | 811 |
Latvia | 0 | 0 | 1 | 0 | 1 | 8 | 10 | 15 | 10 | 9 | 13 | 18 |
Lithuania | 1 | 2 | 10 | 4 | 4 | 13 | 12 | 23 | 38 | 36 | 38 | 38 |
Luxembourg | 2 | 3 | 3 | 1 | 8 | 9 | 13 | 15 | 18 | 22 | 35 | 51 |
Malta | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 6 | 2 | 7 | 10 |
Netherlands | 72 | 64 | 77 | 103 | 139 | 166 | 220 | 297 | 408 | 470 | 529 | 655 |
Norway | 30 | 42 | 60 | 76 | 67 | 88 | 104 | 134 | 222 | 253 | 304 | 311 |
Poland | 17 | 31 | 37 | 57 | 73 | 82 | 98 | 110 | 138 | 181 | 276 | 353 |
Portugal | 16 | 23 | 35 | 41 | 45 | 58 | 79 | 119 | 136 | 147 | 204 | 212 |
Romania | 7 | 15 | 13 | 16 | 25 | 26 | 37 | 57 | 64 | 55 | 48 | 62 |
Slovakia | 9 | 6 | 6 | 10 | 12 | 22 | 18 | 27 | 27 | 34 | 36 | 45 |
Slovenia | 7 | 7 | 10 | 12 | 17 | 27 | 22 | 47 | 54 | 31 | 48 | 40 |
Spain | 50 | 49 | 69 | 112 | 138 | 185 | 232 | 273 | 356 | 386 | 473 | 640 |
Sweden | 34 | 50 | 59 | 83 | 113 | 170 | 233 | 232 | 385 | 359 | 428 | 510 |
Switzerland | 37 | 50 | 54 | 74 | 74 | 95 | 155 | 195 | 233 | 263 | 349 | 447 |
United Kingdom | 363 | 417 | 531 | 660 | 781 | 979 | 1350 | 1837 | 2430 | 3108 | 3718 | 4245 |
f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_pivot, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")
for i in range(year_pivot.shape[0]+1):
ax.axhline(i, color='white', lw=10)
year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100
year_percent_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ||||||||||||
Austria | 1.962533 | 1.801802 | 1.557819 | 1.736420 | 1.865672 | 1.699970 | 1.689744 | 1.552958 | 1.816267 | 1.543488 | 1.712804 | 1.623248 |
Belgium | 3.033006 | 2.852853 | 2.396645 | 2.894034 | 2.649254 | 2.415747 | 2.112180 | 2.320712 | 2.355883 | 2.399730 | 2.240533 | 2.312139 |
Bulgaria | 0.356824 | 0.375375 | 0.479329 | 0.400712 | 0.261194 | 0.566657 | 0.492842 | 0.314081 | 0.131614 | 0.281658 | 0.296269 | 0.150447 |
Croatia | 0.089206 | 0.150150 | 0.359497 | 0.356189 | 0.373134 | 0.208768 | 0.234687 | 0.331530 | 0.355357 | 0.326724 | 0.305527 | 0.277140 |
Cyprus | 0.178412 | 0.075075 | 0.299581 | 0.222618 | 0.186567 | 0.149120 | 0.187749 | 0.122143 | 0.197420 | 0.315457 | 0.333302 | 0.340486 |
Czech Republic | 1.159679 | 1.126126 | 0.958658 | 0.934996 | 0.746269 | 1.073665 | 0.868341 | 0.977142 | 0.842327 | 0.912573 | 0.861031 | 0.973949 |
Denmark | 3.122212 | 2.477477 | 2.396645 | 2.626892 | 2.537313 | 2.206979 | 2.370336 | 3.402548 | 3.079758 | 2.760252 | 2.712712 | 2.715971 |
Estonia | 0.267618 | 0.225225 | 0.419413 | 0.445236 | 0.447761 | 0.298240 | 0.352030 | 0.261734 | 0.210582 | 0.428121 | 0.416628 | 0.308813 |
Finland | 2.765388 | 2.627628 | 2.636309 | 3.650935 | 3.731343 | 3.728005 | 2.957052 | 3.454894 | 3.171887 | 2.884182 | 2.675678 | 3.008948 |
France | 10.437110 | 9.759760 | 10.425404 | 10.284951 | 10.037313 | 9.692812 | 8.167097 | 8.567440 | 8.528560 | 7.785038 | 7.471530 | 6.793887 |
Germany | 10.972346 | 12.912913 | 11.503895 | 12.154942 | 11.567164 | 10.885774 | 10.701713 | 10.539173 | 10.542248 | 10.218567 | 11.202666 | 10.974741 |
Greece | 1.338091 | 1.351351 | 1.138406 | 1.424755 | 1.305970 | 1.491202 | 1.103027 | 1.413366 | 1.500395 | 1.374493 | 1.286918 | 1.433209 |
Hungary | 0.981267 | 0.825826 | 1.258238 | 0.712378 | 0.746269 | 1.133313 | 0.797935 | 0.820101 | 0.802843 | 0.687247 | 0.768447 | 0.712645 |
Ireland | 1.159679 | 1.201201 | 1.318155 | 1.380232 | 1.007463 | 1.342082 | 1.548932 | 1.256325 | 1.105554 | 1.306895 | 1.546153 | 1.480719 |
Italy | 4.549509 | 5.255255 | 5.032954 | 5.164737 | 6.641791 | 5.577095 | 5.796761 | 5.670913 | 5.804159 | 6.433078 | 5.934636 | 6.421728 |
Latvia | 0.000000 | 0.000000 | 0.059916 | 0.000000 | 0.037313 | 0.238592 | 0.234687 | 0.261734 | 0.131614 | 0.101397 | 0.120359 | 0.142529 |
Lithuania | 0.089206 | 0.150150 | 0.599161 | 0.178094 | 0.149254 | 0.387712 | 0.281624 | 0.401326 | 0.500132 | 0.405588 | 0.351819 | 0.300895 |
Luxembourg | 0.178412 | 0.225225 | 0.179748 | 0.044524 | 0.298507 | 0.268416 | 0.305093 | 0.261734 | 0.236904 | 0.247859 | 0.324044 | 0.403832 |
Malta | 0.089206 | 0.000000 | 0.000000 | 0.000000 | 0.037313 | 0.029824 | 0.000000 | 0.000000 | 0.078968 | 0.022533 | 0.064809 | 0.079183 |
Netherlands | 6.422837 | 4.804805 | 4.613541 | 4.585931 | 5.186567 | 4.950790 | 5.163107 | 5.182342 | 5.369834 | 5.295178 | 4.897695 | 5.186476 |
Norway | 2.676182 | 3.153153 | 3.594967 | 3.383793 | 2.500000 | 2.624515 | 2.440742 | 2.338161 | 2.921822 | 2.850383 | 2.814554 | 2.462586 |
Poland | 1.516503 | 2.327327 | 2.216896 | 2.537845 | 2.723881 | 2.445571 | 2.299930 | 1.919386 | 1.816267 | 2.039207 | 2.555319 | 2.795154 |
Portugal | 1.427297 | 1.726727 | 2.097064 | 1.825467 | 1.679104 | 1.729794 | 1.854025 | 2.076426 | 1.789945 | 1.656151 | 1.888714 | 1.678676 |
Romania | 0.624442 | 1.126126 | 0.778910 | 0.712378 | 0.932836 | 0.775425 | 0.868341 | 0.994591 | 0.842327 | 0.619648 | 0.444403 | 0.490934 |
Slovakia | 0.802855 | 0.450450 | 0.359497 | 0.445236 | 0.447761 | 0.656129 | 0.422436 | 0.471122 | 0.355357 | 0.383055 | 0.333302 | 0.356323 |
Slovenia | 0.624442 | 0.525526 | 0.599161 | 0.534283 | 0.634328 | 0.805249 | 0.516311 | 0.820101 | 0.710713 | 0.349256 | 0.444403 | 0.316731 |
Spain | 4.460303 | 3.678679 | 4.134212 | 4.986643 | 5.149254 | 5.517447 | 5.444731 | 4.763567 | 4.685444 | 4.348806 | 4.379224 | 5.067701 |
Sweden | 3.033006 | 3.753754 | 3.535051 | 3.695459 | 4.216418 | 5.070086 | 5.468200 | 4.048159 | 5.067123 | 4.044615 | 3.962596 | 4.038324 |
Switzerland | 3.300624 | 3.753754 | 3.235470 | 3.294746 | 2.761194 | 2.833284 | 3.637644 | 3.402548 | 3.066596 | 2.963046 | 3.231182 | 3.539473 |
United Kingdom | 32.381802 | 31.306306 | 31.815458 | 29.385574 | 29.141791 | 29.197733 | 31.682704 | 32.053743 | 31.982101 | 35.015773 | 34.422739 | 33.613113 |
f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")
for i in range(year_percent_pivot.shape[1]+1):
ax.axvline(i, color='white', lw=10)
# Institutional collab
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution_harm","Country_Type","Eurovoc_Class"]].drop_duplicates()
wos_univ_collabs["ISO3"] = cc.pandas_convert(series=wos_univ_collabs["Country"], to='ISO3')
wos_univ_collabs["Institution_harm_label"] = wos_univ_collabs["Institution_harm"] + " ("+wos_univ_collabs["ISO3"]+ ")"
wos_univ_collabs.sample(100)
UT (Unique WOS ID) | Country | Institution_harm | Country_Type | Eurovoc_Class | ISO3 | Institution_harm_label | |
---|---|---|---|---|---|---|---|
86064 | WOS:000640648500012 | Netherlands | Eindhoven Univ Technol | EU | Western Europe | NLD | Eindhoven Univ Technol (NLD) |
115079 | WOS:000798227800073 | United Kingdom | Univ Leeds | Non-EU associate | Western Europe | GBR | Univ Leeds (GBR) |
97887 | WOS:000431633800004 | United Kingdom | Francis Crick Inst | Non-EU associate | Western Europe | GBR | Francis Crick Inst (GBR) |
147070 | WOS:000460118200077 | Germany | Johannes Gutenberg Univ Mainz | EU | Western Europe | DEU | Johannes Gutenberg Univ Mainz (DEU) |
93724 | WOS:000371153900007 | United Kingdom | Royal Marsden Fdn Trust | Non-EU associate | Western Europe | GBR | Royal Marsden Fdn Trust (GBR) |
... | ... | ... | ... | ... | ... | ... | ... |
75906 | WOS:000838382400015 | China | China Aerosp Sci & Ind Corp | China | China | CHN | China Aerosp Sci & Ind Corp (CHN) |
153124 | WOS:000802927600001 | Germany | Rhein Westfal Aachen | EU | Western Europe | DEU | Rhein Westfal Aachen (DEU) |
82197 | WOS:000911585800012 | China | Chinese Acad Med Sci | China | China | CHN | Chinese Acad Med Sci (CHN) |
150080 | WOS:000605979600009 | Germany | Univ Med Ctr Goettingen | EU | Western Europe | DEU | Univ Med Ctr Goettingen (DEU) |
112004 | WOS:000717881300001 | United Kingdom | Imperial Coll London | Non-EU associate | Western Europe | GBR | Imperial Coll London (GBR) |
100 rows × 7 columns
color_discrete_map= {'China': '#EF553B',
'EU': '#636EFA',
'Non-EU associate': '#00CC96'}
TOPN = 25
wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="EU"]
data_eu = (wos_univ_eu.groupby(["Country","Institution_harm_label","Country_Type"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by="Country_Type")
data_eu_strict = (wos_univ_eu_strict.groupby(["Country","Institution_harm_label","Eurovoc_Class"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy())
data_ch = (wos_univ_ch.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy())
for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],
["European countries in scope","EU-28 only","China"],
["Institution_harm_label","Institution_harm_label","Institution_harm"],
["Country","Eurovoc_Class","Country_Type"],
["Country_Type",None,None]):
fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,
labels={
record_col: 'Number of co-publications',
"Institution_harm": "Institution",
"Institution_harm_label": "Institution",
"Country_Type":"Country type",
"Eurovoc_Class":"Region"
},
title=f"Most visible institutions (top {TOPN} within {c_scope})", template='plotly')
fig.update_layout(xaxis_tickformat='d',font_family="Montserrat",yaxis={'categoryorder':'total ascending'},
width=1000, height=1000,)
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
wos_univ_test = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution","Institution_harm","Country_Type"]].drop_duplicates()
www = wos_univ_test.groupby(["Institution","Institution_harm"], as_index=False)[record_col].nunique()
www[www["Institution_harm"]=="Chinese Acad Sci"]
Institution | Institution_harm | UT (Unique WOS ID) | |
---|---|---|---|
16 | Chinese Acad Sci | Chinese Acad Sci | 1 |
3149 | Chinese Acad Sci | Chinese Acad Sci | 4614 |
3153 | Chinese Acad Sci AIRCAS | Chinese Acad Sci | 2 |
3155 | Chinese Acad Sci CAREERI CAS | Chinese Acad Sci | 1 |
3157 | Chinese Acad Sci CASIA | Chinese Acad Sci | 8 |
3159 | Chinese Acad Sci GUCAS | Chinese Acad Sci | 2 |
3160 | Chinese Acad Sci IAP | Chinese Acad Sci | 1 |
3161 | Chinese Acad Sci IECAS | Chinese Acad Sci | 2 |
3162 | Chinese Acad Sci IME CAS | Chinese Acad Sci | 1 |
3163 | Chinese Acad Sci IMECAS | Chinese Acad Sci | 1 |
3164 | Chinese Acad Sci ITP CAS | Chinese Acad Sci | 1 |
3166 | Chinese Acad Sci NAOC | Chinese Acad Sci | 1 |
3167 | Chinese Acad Sci NAOC CAS | Chinese Acad Sci | 2 |
13501 | RCEES Chinese Acad Sci | Chinese Acad Sci | 1 |
19499 | ZIAT Chinese Acad Sci | Chinese Acad Sci | 1 |
wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,"Domain_English","Field_English","SubField_English"]], on =record_col)
wos_univ_dipol.sample(100)
UT (Unique WOS ID) | Country_eu | Institution_harm_eu | Country_Type_eu | Eurovoc_Class_eu | ISO3_eu | Institution_harm_label_eu | Country_ch | Institution_harm_ch | Country_Type_ch | Eurovoc_Class_ch | ISO3_ch | Institution_harm_label_ch | Domain_English | Field_English | SubField_English | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
329695 | WOS:000702508000001 | Greece | Sch Pedag & Technol Educ | EU | Southern Europe | GRC | Sch Pedag & Technol Educ (GRC) | China | Huanghuai Univ | China | China | CHN | Huanghuai Univ (CHN) | Applied Sciences | Engineering | Geological & Geomatics Engineering |
103762 | WOS:000696019400001 | France | Inst Phys 2 Infinis Lyon | EU | Western Europe | FRA | Inst Phys 2 Infinis Lyon (FRA) | China | Fudan Univ | China | China | CHN | Fudan Univ (CHN) | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics |
237699 | WOS:000353892300005 | France | SUBATECH | EU | Western Europe | FRA | SUBATECH (FRA) | China | Shanghai Inst Appl Phys | China | China | CHN | Shanghai Inst Appl Phys (CHN) | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics |
123610 | WOS:000571262000008 | Luxembourg | Luxembourg Ctr Syst Biomed | EU | Western Europe | LUX | Luxembourg Ctr Syst Biomed (LUX) | China | Wuhan Univ Sci & Technol | China | China | CHN | Wuhan Univ Sci & Technol (CHN) | Applied Sciences | Information & Communication Technologies | Artificial Intelligence & Image Processing |
77119 | WOS:000494411700001 | Germany | Tech Univ Berlin | EU | Western Europe | DEU | Tech Univ Berlin (DEU) | China | Dalian Univ Technol | China | China | CHN | Dalian Univ Technol (CHN) | Economic & Social Sciences | Social Sciences | Information & Library Sciences |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
317783 | WOS:000747086300011 | Germany | Tech Univ Dresden | EU | Western Europe | DEU | Tech Univ Dresden (DEU) | China | East China Univ Sci & Technol | China | China | CHN | East China Univ Sci & Technol (CHN) | Health Sciences | Biomedical Research | Developmental Biology |
118549 | WOS:000830403500012 | Italy | Univ Firenze | EU | Southern Europe | ITA | Univ Firenze (ITA) | China | Peking Univ | China | China | CHN | Peking Univ (CHN) | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics |
2175 | WOS:000345858500052 | Italy | Univ Genoa | EU | Southern Europe | ITA | Univ Genoa (ITA) | China | Peking Univ | China | China | CHN | Peking Univ (CHN) | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics |
312657 | WOS:000467489901126 | Germany | Tech Univ Munich | EU | Western Europe | DEU | Tech Univ Munich (DEU) | China | Fudan Univ | China | China | CHN | Fudan Univ (CHN) | Health Sciences | Clinical Medicine | Nuclear Medicine & Medical Imaging |
269060 | WOS:000679252300003 | United Kingdom | De Montfort Univ | Non-EU associate | Western Europe | GBR | De Montfort Univ (GBR) | China | Beijing Univ Technol | China | China | CHN | Beijing Univ Technol (CHN) | Applied Sciences | Information & Communication Technologies | Artificial Intelligence & Image Processing |
100 rows × 16 columns
fig = px.parallel_categories(wos_univ_dipol[["Country_eu","Domain_English","Country_ch"]])
fig.show()