import pandas as pd
import janitor
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MaxNLocator
import math
import plotly.express as px
%matplotlib inline
sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)
sns.palplot(sns.color_palette())
outdir="wos_processed_data"
wos = pd.read_excel(f"../{outdir}/wos_processed.xlsx")
wos_univ = pd.read_excel(f"../{outdir}/wos_institution_locations_harmonized.xlsx")
wos_country = pd.read_excel(f"../{outdir}/wos_countries.xlsx")
wos_country_types = pd.read_excel(f"../{outdir}/wos_country_types.xlsx")
wos_country_types
Country | Country_Type | |
---|---|---|
0 | Belgium | EU |
1 | China | China |
2 | Luxembourg | EU |
3 | Netherlands | EU |
4 | Norway | Non-EU associate |
5 | United Kingdom | Non-EU associate |
6 | France | EU |
7 | Sweden | EU |
8 | Italy | EU |
9 | Denmark | EU |
10 | Germany | EU |
11 | Slovenia | EU |
12 | Estonia | EU |
13 | Finland | EU |
14 | Bulgaria | EU |
15 | Slovakia | EU |
16 | Spain | EU |
17 | Poland | EU |
18 | Czech Republic | EU |
19 | Greece | EU |
20 | Malta | EU |
21 | Austria | EU |
22 | Switzerland | Non-EU associate |
23 | Ireland | EU |
24 | Portugal | EU |
25 | Romania | EU |
26 | Hungary | EU |
27 | Cyprus | EU |
28 | Croatia | EU |
29 | Lithuania | EU |
30 | Latvia | EU |
# len(wos),len(wos_univ_locations)
# wos_addresses = pd.read_excel(f"/{outdir}/wos_addresses.xlsx")
# wos_affiliations = pd.read_excel(f"/{outdir}/wos_affiliations.xlsx")
# wos_author_locations = pd.read_excel(f"/{outdir}/wos_author_locations.xlsx")
# wos_univ_locations = pd.read_excel(f"/{outdir}/wos_univ_locations.xlsx")
record_col = "UT (Unique WOS ID)"
# def nth_repl_all(s, sub="", repl="<br>", nth=2):
# find = s.find(sub)
# # loop util we find no match
# i = 1
# while find != -1:
# # if i is equal to nth we found nth matches so replace
# if i == nth:
# s = s[:find]+repl+s[find + len(sub):]
# i = 0
# # find + len(sub) + 1 means we start after the last match
# find = s.find(sub, find + len(sub) + 1)
# i += 1
# return s.replace("<br>&","&<br")
def replace_nth(s, sub=" ", repl="<br>", n=2):
chunks = s.split(sub)
size = len(chunks)
rows = size // n + (0 if size % n == 0 else 1)
return (repl.join([
sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])
for i in range(rows)
])).replace("<br>&"," &<br>")
groups = ['Domain_English',"Field_English",'SubField_English']
data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data["percent"] = data[record_col]/data[record_col].sum()
data[groups] = data[groups].applymap(replace_nth)
data
Domain_English | Field_English | SubField_English | UT (Unique WOS ID) | percent | |
---|---|---|---|---|---|
37 | Applied Sciences | Information &<br>Communication Technologies | Artificial Intelligence &<br> Image<br>Processing | 7915 | 0.171841 |
44 | Applied Sciences | Information &<br>Communication Technologies | Networking &<br>Telecommunications | 5360 | 0.116370 |
32 | Applied Sciences | Engineering | Geological &<br>Geomatics Engineering | 2576 | 0.055927 |
33 | Applied Sciences | Engineering | Industrial Engineering &<br> Automation | 2316 | 0.050282 |
15 | Applied Sciences | Enabling &<br>Strategic Technologies | Energy | 1965 | 0.042662 |
... | ... | ... | ... | ... | ... |
11 | Applied Sciences | Economics &<br>Business | Business &<br>Management | 1 | 0.000022 |
46 | Applied Sciences | Social Sciences | Anthropology | 1 | 0.000022 |
54 | Arts &<br>Humanities | Philosophy &<br>Theology | Philosophy | 1 | 0.000022 |
52 | Arts &<br>Humanities | Historical Studies | History of<br>Social Sciences | 1 | 0.000022 |
129 | Health Sciences | Psychology &<br>Cognitive Sciences | General Psychology &<br> Cognitive<br>Sciences | 1 | 0.000022 |
175 rows × 5 columns
fig = px.sunburst(data, path=groups, values=record_col,
color='Domain_English',title="Distribution of topics<br>(METRIX classification)", template='plotly')
fig.update_traces(hovertemplate='%{id}<br>%{value:d}')
fig.show(config= dict(displayModeBar = False))
group = 'Domain_English'
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data
Domain_English | UT (Unique WOS ID) | |
---|---|---|
0 | Applied Sciences | 29985 |
5 | Natural Sciences | 8457 |
3 | Health Sciences | 5341 |
2 | Economic & Social Sciences | 1360 |
4 | Multidisciplinary | 847 |
1 | Arts & Humanities | 70 |
g = sns.barplot(data, x=record_col, y=group)
g.set_xlim(0,35000)
g.set_ylabel(None)
g.set_xlabel("Number of co-publications")
g.set_title("Distribution of Domains")
for i in g.containers:
g.bar_label(i,fontsize=10)
fig = px.bar(data, x=record_col, y=group, color=group,
labels={
record_col: 'Number of co-publications',
group: "",
},
title="Distribution of Domains", template='plotly')
fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family="Montserrat")
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
# # define a function to divide each row's 'Count' by the value of the first year
# def divide_by_first_year(group):
# group['relative_growth'] = group[record_col] / group.loc[group['Publication Year'] == group['Publication Year'].min(), record_col].values[0]
# return group
#
#
#
# data = (wos.groupby(group)[record_col].nunique()
# .unstack(fill_value=0).stack()
# .reset_index()
# .rename(columns={0:record_col})
# .sort_values(ascending=False, by=group+[record_col]))
#
# # group by 'Topic'
# grouped = data.groupby('Domain_English')
# # apply the function to each group
# data = grouped.apply(divide_by_first_year).reset_index(drop=True)
# data['relative_growth'] = data['relative_growth']*100
group = ['Publication Year','Domain_English']
data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()
.fillna(0)
.stack()
.reset_index()
.rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Domain_English'),
on='Domain_English', suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data
Publication Year | Domain_English | UT (Unique WOS ID) | Publication Year_relative_growth | UT (Unique WOS ID)_relative_growth | |
---|---|---|---|---|---|
0 | 2011 | Applied Sciences | 490.0 | 2011 | 0.000000 |
1 | 2012 | Applied Sciences | 593.0 | 2011 | 21.020408 |
2 | 2013 | Applied Sciences | 738.0 | 2011 | 50.612245 |
3 | 2014 | Applied Sciences | 1031.0 | 2011 | 110.408163 |
4 | 2015 | Applied Sciences | 1201.0 | 2011 | 145.102041 |
... | ... | ... | ... | ... | ... |
67 | 2018 | Natural Sciences | 753.0 | 2011 | 316.022099 |
68 | 2019 | Natural Sciences | 999.0 | 2011 | 451.933702 |
69 | 2020 | Natural Sciences | 1232.0 | 2011 | 580.662983 |
70 | 2021 | Natural Sciences | 1403.0 | 2011 | 675.138122 |
71 | 2022 | Natural Sciences | 1665.0 | 2011 | 819.889503 |
72 rows × 5 columns
g=sns.lineplot(data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0], hue=group[-1], marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications")
Text(0.5, 1.0, 'Yearly output of co-publications')
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True, labels={
record_col: 'Number of co-publications',
group[-1]: "Domain",
},
title="Yearly output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_relative_growth",x=group[0], color=group[-1], markers=True, labels={
record_col+"_relative_growth": 'Rel. growth<br>in co-publications (%)',
group[-1]: "Domain",
},
title="Relative growth in the output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:.2f}%')
fig.update_layout(hovermode='x unified',yaxis_tickformat='d',font_family="Montserrat")
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],
columns=['Publication Year'], fill_value=0)
pivot_data
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ||||||||||||
Applied Sciences | 490 | 593 | 738 | 1031 | 1201 | 1535 | 1920 | 2808 | 3729 | 4446 | 5295 | 6199 |
Arts & Humanities | 0 | 0 | 0 | 4 | 1 | 3 | 7 | 4 | 11 | 11 | 16 | 13 |
Economic & Social Sciences | 20 | 22 | 29 | 28 | 34 | 40 | 84 | 105 | 160 | 211 | 252 | 375 |
Health Sciences | 116 | 120 | 155 | 184 | 216 | 243 | 321 | 403 | 611 | 755 | 1035 | 1182 |
Multidisciplinary | 15 | 21 | 43 | 52 | 57 | 64 | 75 | 76 | 83 | 97 | 115 | 149 |
Natural Sciences | 181 | 223 | 298 | 318 | 380 | 437 | 568 | 753 | 999 | 1232 | 1403 | 1665 |
f, ax = plt.subplots(figsize=(9, 6))
g = sns.heatmap(pivot_data, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")
[Text(0.5, 33.249999999999986, ''), Text(79.74999999999999, 0.5, '')]
import numpy as np
percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100
percent_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Domain_English | ||||||||||||
Applied Sciences | 59.610706 | 60.572012 | 58.432304 | 63.760049 | 63.578613 | 66.106804 | 64.537815 | 67.678959 | 66.672626 | 65.847156 | 65.241498 | 64.687467 |
Arts & Humanities | 0.000000 | 0.000000 | 0.000000 | 0.247372 | 0.052938 | 0.129199 | 0.235294 | 0.096409 | 0.196674 | 0.162915 | 0.197141 | 0.135657 |
Economic & Social Sciences | 2.433090 | 2.247191 | 2.296120 | 1.731602 | 1.799894 | 1.722653 | 2.823529 | 2.530730 | 2.860719 | 3.125000 | 3.104978 | 3.913180 |
Health Sciences | 14.111922 | 12.257406 | 12.272367 | 11.379097 | 11.434621 | 10.465116 | 10.789916 | 9.713184 | 10.924370 | 11.181872 | 12.752587 | 12.334342 |
Multidisciplinary | 1.824818 | 2.145046 | 3.404592 | 3.215832 | 3.017470 | 2.756245 | 2.521008 | 1.831767 | 1.483998 | 1.436611 | 1.416954 | 1.554837 |
Natural Sciences | 22.019465 | 22.778345 | 23.594616 | 19.666048 | 20.116464 | 18.819983 | 19.092437 | 18.148952 | 17.861613 | 18.246445 | 17.286841 | 17.374517 |
f, ax = plt.subplots(figsize=(15, 6))
g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")
[Text(0.5, 33.249999999999986, ''), Text(154.75, 0.5, '')]
percent_pivot.T.plot(kind='bar',
stacked=True,
figsize=(10, 6))
<Axes: xlabel='Publication Year'>
percent_pivot.T.plot(kind='bar',
stacked=True,
figsize=(15, 8))
plt.legend(loc="lower left", ncol=2)
# plt.ylabel("Release Year")
# plt.xlabel("Proportion")
for n, x in enumerate([*pivot_data.T.index.values]):
for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],
pivot_data.T.loc[x],
percent_pivot.T.loc[x].cumsum()):
plt.text(y=(y_loc - proportion) + (proportion / 2),
x=n - 0.11,
s=f'{count}',# ({np.round(proportion, 1)}%)',
color="black",
fontsize=8,
fontweight="bold")
plt.show()
group = ['Publication Year',"Domain_English",'Field_English']
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data
Publication Year | Domain_English | Field_English | UT (Unique WOS ID) | |
---|---|---|---|---|
233 | 2022 | Natural Sciences | Physics & Astronomy | 596 |
232 | 2022 | Natural Sciences | Mathematics & Statistics | 228 |
231 | 2022 | Natural Sciences | Earth & Environmental Sciences | 409 |
230 | 2022 | Natural Sciences | Chemistry | 251 |
229 | 2022 | Natural Sciences | Biology | 181 |
... | ... | ... | ... | ... |
4 | 2011 | Applied Sciences | Information & Communication Technologies | 256 |
3 | 2011 | Applied Sciences | Engineering | 166 |
2 | 2011 | Applied Sciences | Enabling & Strategic Technologies | 53 |
1 | 2011 | Applied Sciences | Built Environment & Design | 6 |
0 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | 9 |
234 rows × 4 columns
len(data[group[-2]].unique())
6
data_complete = pd.DataFrame()
for cat in sorted(data[group[-2]].unique()):
#data segment
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
#plot
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
y=record_col,x=group[0], hue=group[-1], marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_title(cat)
g.yaxis.set_major_locator(MaxNLocator(integer=True))
plt.show()
data_complete = pd.DataFrame()
# Creating subplot axes
fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))
for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):
#data segment
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
#plot
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
y=record_col,x=group[0], hue=group[-1], marker="o", ax=ax)
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_title(cat)
g.set_xlabel(None)
g.set_ylabel(None)
g.yaxis.set_major_locator(MaxNLocator(integer=True))
fig.suptitle("Number of co-publications in domains and respective fields", y=0.92)
plt.show()
group = ['Publication Year',"Domain_English",'Field_English',"SubField_English"]
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data
Publication Year | Domain_English | Field_English | SubField_English | UT (Unique WOS ID) | |
---|---|---|---|---|---|
1598 | 2022 | Natural Sciences | Physics & Astronomy | Optics | 134 |
1597 | 2022 | Natural Sciences | Physics & Astronomy | Nuclear & Particle Physics | 65 |
1596 | 2022 | Natural Sciences | Physics & Astronomy | Mathematical Physics | 10 |
1595 | 2022 | Natural Sciences | Physics & Astronomy | General Physics | 31 |
1594 | 2022 | Natural Sciences | Physics & Astronomy | Fluids & Plasmas | 79 |
... | ... | ... | ... | ... | ... |
4 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Forestry | 1 |
3 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Food Science | 1 |
2 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Fisheries | 2 |
1 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Dairy & Animal Science | 2 |
0 | 2011 | Applied Sciences | Agriculture, Fisheries & Forestry | Agronomy & Agriculture | 3 |
1599 rows × 5 columns
for cat in sorted(data[group[-2]].unique()):
sub_data = data[data[group[-2]]==cat]
sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
,group[-1],fill_value=0)
g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],
hue=group[-1], marker="o", errorbar=None)
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))
g.set_title(f'Number or co-publications in {cat}')
g.set_ylabel(None)
plt.show()
len(sorted(data[group[-2]].unique()))
20
from matplotlib.ticker import FuncFormatter
import math
def orderOfMagnitude(number):
return math.floor(math.log(number, 10))
def roundToNearest(number):
order = orderOfMagnitude(number)
# if order!=0:
# order+=1
near = math.ceil(number/10**order)*10**order
return near
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_locations.sample(100)
UT (Unique WOS ID) | Institution | Country | Institution_harm | merge_iter | Country_Type | |
---|---|---|---|---|---|---|
41191 | WOS:000538161600016 | Anhui Univ | China | Anhui Univ | 0 | China |
175692 | WOS:000709411500003 | Univ Porto | Portugal | Univ Porto | 0 | EU |
75198 | WOS:000831217100027 | Zhejiang Univ | China | Zhejiang Univ | 0 | China |
48614 | WOS:000597938400003 | Shanghai Jiao Tong Univ | China | Shanghai Jiao Tong Univ | 0 | China |
133670 | WOS:000411824101159 | Univ Pisa | Italy | Univ Pisa | 0 | EU |
... | ... | ... | ... | ... | ... | ... |
2892 | WOS:000293708200019 | Natl Univ Def Technol | China | Natl Univ Def Technol | 0 | China |
125259 | WOS:000663324800010 | INRAE | France | INRAE | 0 | EU |
55780 | WOS:000659952900011 | Huazhong Univ Sci & Technol | China | Huazhong Univ Sci & Technol | 0 | China |
138600 | WOS:000744399000001 | Brignone Clin | Italy | Brignone Clin | 0 | EU |
31040 | WOS:000471758500010 | Chinese Acad Sci | China | Chinese Acad Sci | 0 | China |
100 rows × 6 columns
wos_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country"]].drop_duplicates()
collab_desc = wos_collabs[wos_collabs["Country"]!="China"]["Country"].value_counts().reset_index()
collab_desc["percent_of_copubs"] = collab_desc["count"]/wos_collabs[record_col].nunique()*100
collab_desc["percent_contrib_in_copubs"] = collab_desc["count"]/wos_collabs[record_col].size*100
collab_desc = collab_desc.merge(wos_country_types, on="Country")
collab_desc
c_dict = {"count":"Number of co-publications",
"percent_of_copubs":"Percent of co-publications",
"percent_contrib_in_copubs":"Contribution to co-publications"}
# Creating subplot axes
# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))
# for c,ax in zip(c_dict.keys(),axes.flatten()):
for c in c_dict.keys():
data = collab_desc[["Country",c,"Country_Type"]]
plt.figure(figsize=(9,12))
g = sns.barplot(data, x=c, y="Country", hue="Country_Type", dodge=False)
g.set_xlim(0,roundToNearest(data[c].max()))
g.set_ylabel(None)
g.set_xlabel(c_dict.get(c))
g.set_title(c_dict.get(c))
g.legend(title=None, loc="right")
for i in g.containers:
g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')
if 'percent' in c:
g.xaxis.set_major_locator(MaxNLocator(integer=True))
vals = g.get_xticks()
g.set_xticklabels([str(int(val))+'%' for val in vals])
plt.show()
C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning: FixedFormatter should only be used together with FixedLocator
C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning: FixedFormatter should only be used together with FixedLocator
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Draw the heatmap with the mask and correct aspect ratio
g = sns.heatmap(EU_co_occur, mask=mask,
square=True, linewidths=.5)
g.set_ylabel(None)
g.set_xlabel(None)
Text(0.5, 71.74999999999994, '')
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
wos_collabs_EU
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
data = np.where(mask,None,EU_co_occur)
EU_co_occur.columns
Index(['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'United Kingdom'], dtype='object', name='Country_y')
fig = px.imshow(data,
labels=dict(x="Country (x)", y="Country (y)", color="Co-publication"),
x=list(EU_co_occur.columns),
y=list(EU_co_occur.index), title="Intraeuropean patterns"
)
fig.update_layout(title_x=0.5,
width=1000, height=1000,
xaxis_showgrid=False,
yaxis_showgrid=False,
yaxis_autorange='reversed', template='plotly_white')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
ticks="outside")
fig.update_xaxes(
ticks="outside")
fig.show(config= dict(displayModeBar = False))
collab_year = wos_collabs[wos_collabs["Country"]!="China"].copy()
collab_year = collab_year.merge(wos_country_types, on="Country").merge(wos[[record_col,"Publication Year"]],on=record_col).drop_duplicates()
data = collab_year.groupby(["Publication Year",'Country_Type'],as_index=False)[record_col].nunique()
g=sns.lineplot(data,y=record_col,x="Publication Year", hue="Country_Type", marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications with China")
Text(0.5, 1.0, 'Yearly output of co-publications with China')
import country_converter as coco
cc = coco.CountryConverter()
data = (collab_year.groupby(['Publication Year',"Country"])[record_col]
.nunique(dropna=False).unstack()
.fillna(0)
.stack()
.reset_index()
.rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset="Country"),
on=["Country"], suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data
Publication Year | Country | UT (Unique WOS ID) | Publication Year_relative_growth | UT (Unique WOS ID)_relative_growth | |
---|---|---|---|---|---|
0 | 2011 | Austria | 22.0 | 2011 | 0.000000 |
1 | 2012 | Austria | 24.0 | 2011 | 9.090909 |
2 | 2013 | Austria | 26.0 | 2011 | 18.181818 |
3 | 2014 | Austria | 39.0 | 2011 | 77.272727 |
4 | 2015 | Austria | 50.0 | 2011 | 127.272727 |
... | ... | ... | ... | ... | ... |
355 | 2018 | United Kingdom | 1837.0 | 2011 | 406.060606 |
356 | 2019 | United Kingdom | 2430.0 | 2011 | 569.421488 |
357 | 2020 | United Kingdom | 3108.0 | 2011 | 756.198347 |
358 | 2021 | United Kingdom | 3718.0 | 2011 | 924.242424 |
359 | 2022 | United Kingdom | 4245.0 | 2011 | 1069.421488 |
360 rows × 5 columns
data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col, hover_name="Country",
animation_frame='Publication Year', scope="europe", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])
fig.show()
data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col+"_relative_growth", hover_name="Country",
animation_frame='Publication Year', scope="europe", template='plotly',
range_color=[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()])
fig.show()
[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()]
[-100.0, 3700.0]
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color="Country", markers=True,
labels={
record_col: 'Number of co-publications',
},
title="Yearly output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col+"_relative_growth",x='Publication Year', color="Country", markers=True,
labels={
record_col+"_relative_growth": 'Relative growth of co-publications (%)',
},
title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly')
fig.update_traces(hovertemplate='%{y:d}%')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)
year_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ||||||||||||
Austria | 22 | 24 | 26 | 39 | 50 | 57 | 72 | 89 | 138 | 137 | 185 | 205 |
Belgium | 34 | 38 | 40 | 65 | 71 | 81 | 90 | 133 | 179 | 213 | 242 | 292 |
Bulgaria | 4 | 5 | 8 | 9 | 7 | 19 | 21 | 18 | 10 | 25 | 32 | 19 |
Croatia | 1 | 2 | 6 | 8 | 10 | 7 | 10 | 19 | 27 | 29 | 33 | 35 |
Cyprus | 2 | 1 | 5 | 5 | 5 | 5 | 8 | 7 | 15 | 28 | 36 | 43 |
Czech Republic | 13 | 15 | 16 | 21 | 20 | 36 | 37 | 56 | 64 | 81 | 93 | 123 |
Denmark | 35 | 33 | 40 | 59 | 68 | 74 | 101 | 195 | 234 | 245 | 293 | 343 |
Estonia | 3 | 3 | 7 | 10 | 12 | 10 | 15 | 15 | 16 | 38 | 45 | 39 |
Finland | 31 | 35 | 44 | 82 | 100 | 125 | 126 | 198 | 241 | 256 | 289 | 380 |
France | 117 | 130 | 174 | 231 | 269 | 325 | 348 | 491 | 648 | 691 | 807 | 858 |
Germany | 123 | 172 | 192 | 273 | 310 | 365 | 456 | 604 | 801 | 907 | 1210 | 1386 |
Greece | 15 | 18 | 19 | 32 | 35 | 50 | 47 | 81 | 114 | 122 | 139 | 181 |
Hungary | 11 | 11 | 21 | 16 | 20 | 38 | 34 | 47 | 61 | 61 | 83 | 90 |
Ireland | 13 | 16 | 22 | 31 | 27 | 45 | 66 | 72 | 84 | 116 | 167 | 187 |
Italy | 51 | 70 | 84 | 116 | 178 | 187 | 247 | 325 | 441 | 571 | 641 | 811 |
Latvia | 0 | 0 | 1 | 0 | 1 | 8 | 10 | 15 | 10 | 9 | 13 | 18 |
Lithuania | 1 | 2 | 10 | 4 | 4 | 13 | 12 | 23 | 38 | 36 | 38 | 38 |
Luxembourg | 2 | 3 | 3 | 1 | 8 | 9 | 13 | 15 | 18 | 22 | 35 | 51 |
Malta | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 6 | 2 | 7 | 10 |
Netherlands | 72 | 64 | 77 | 103 | 139 | 166 | 220 | 297 | 408 | 470 | 529 | 655 |
Norway | 30 | 42 | 60 | 76 | 67 | 88 | 104 | 134 | 222 | 253 | 304 | 311 |
Poland | 17 | 31 | 37 | 57 | 73 | 82 | 98 | 110 | 138 | 181 | 276 | 353 |
Portugal | 16 | 23 | 35 | 41 | 45 | 58 | 79 | 119 | 136 | 147 | 204 | 212 |
Romania | 7 | 15 | 13 | 16 | 25 | 26 | 37 | 57 | 64 | 55 | 48 | 62 |
Slovakia | 9 | 6 | 6 | 10 | 12 | 22 | 18 | 27 | 27 | 34 | 36 | 45 |
Slovenia | 7 | 7 | 10 | 12 | 17 | 27 | 22 | 47 | 54 | 31 | 48 | 40 |
Spain | 50 | 49 | 69 | 112 | 138 | 185 | 232 | 273 | 356 | 386 | 473 | 640 |
Sweden | 34 | 50 | 59 | 83 | 113 | 170 | 233 | 232 | 385 | 359 | 428 | 510 |
Switzerland | 37 | 50 | 54 | 74 | 74 | 95 | 155 | 195 | 233 | 263 | 349 | 447 |
United Kingdom | 363 | 417 | 531 | 660 | 781 | 979 | 1350 | 1837 | 2430 | 3108 | 3718 | 4245 |
f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_pivot, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")
for i in range(year_pivot.shape[0]+1):
ax.axhline(i, color='white', lw=10)
year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100
year_percent_pivot
Publication Year | 2011 | 2012 | 2013 | 2014 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Country | ||||||||||||
Austria | 1.962533 | 1.801802 | 1.557819 | 1.736420 | 1.865672 | 1.699970 | 1.689744 | 1.552958 | 1.816267 | 1.543488 | 1.712804 | 1.623248 |
Belgium | 3.033006 | 2.852853 | 2.396645 | 2.894034 | 2.649254 | 2.415747 | 2.112180 | 2.320712 | 2.355883 | 2.399730 | 2.240533 | 2.312139 |
Bulgaria | 0.356824 | 0.375375 | 0.479329 | 0.400712 | 0.261194 | 0.566657 | 0.492842 | 0.314081 | 0.131614 | 0.281658 | 0.296269 | 0.150447 |
Croatia | 0.089206 | 0.150150 | 0.359497 | 0.356189 | 0.373134 | 0.208768 | 0.234687 | 0.331530 | 0.355357 | 0.326724 | 0.305527 | 0.277140 |
Cyprus | 0.178412 | 0.075075 | 0.299581 | 0.222618 | 0.186567 | 0.149120 | 0.187749 | 0.122143 | 0.197420 | 0.315457 | 0.333302 | 0.340486 |
Czech Republic | 1.159679 | 1.126126 | 0.958658 | 0.934996 | 0.746269 | 1.073665 | 0.868341 | 0.977142 | 0.842327 | 0.912573 | 0.861031 | 0.973949 |
Denmark | 3.122212 | 2.477477 | 2.396645 | 2.626892 | 2.537313 | 2.206979 | 2.370336 | 3.402548 | 3.079758 | 2.760252 | 2.712712 | 2.715971 |
Estonia | 0.267618 | 0.225225 | 0.419413 | 0.445236 | 0.447761 | 0.298240 | 0.352030 | 0.261734 | 0.210582 | 0.428121 | 0.416628 | 0.308813 |
Finland | 2.765388 | 2.627628 | 2.636309 | 3.650935 | 3.731343 | 3.728005 | 2.957052 | 3.454894 | 3.171887 | 2.884182 | 2.675678 | 3.008948 |
France | 10.437110 | 9.759760 | 10.425404 | 10.284951 | 10.037313 | 9.692812 | 8.167097 | 8.567440 | 8.528560 | 7.785038 | 7.471530 | 6.793887 |
Germany | 10.972346 | 12.912913 | 11.503895 | 12.154942 | 11.567164 | 10.885774 | 10.701713 | 10.539173 | 10.542248 | 10.218567 | 11.202666 | 10.974741 |
Greece | 1.338091 | 1.351351 | 1.138406 | 1.424755 | 1.305970 | 1.491202 | 1.103027 | 1.413366 | 1.500395 | 1.374493 | 1.286918 | 1.433209 |
Hungary | 0.981267 | 0.825826 | 1.258238 | 0.712378 | 0.746269 | 1.133313 | 0.797935 | 0.820101 | 0.802843 | 0.687247 | 0.768447 | 0.712645 |
Ireland | 1.159679 | 1.201201 | 1.318155 | 1.380232 | 1.007463 | 1.342082 | 1.548932 | 1.256325 | 1.105554 | 1.306895 | 1.546153 | 1.480719 |
Italy | 4.549509 | 5.255255 | 5.032954 | 5.164737 | 6.641791 | 5.577095 | 5.796761 | 5.670913 | 5.804159 | 6.433078 | 5.934636 | 6.421728 |
Latvia | 0.000000 | 0.000000 | 0.059916 | 0.000000 | 0.037313 | 0.238592 | 0.234687 | 0.261734 | 0.131614 | 0.101397 | 0.120359 | 0.142529 |
Lithuania | 0.089206 | 0.150150 | 0.599161 | 0.178094 | 0.149254 | 0.387712 | 0.281624 | 0.401326 | 0.500132 | 0.405588 | 0.351819 | 0.300895 |
Luxembourg | 0.178412 | 0.225225 | 0.179748 | 0.044524 | 0.298507 | 0.268416 | 0.305093 | 0.261734 | 0.236904 | 0.247859 | 0.324044 | 0.403832 |
Malta | 0.089206 | 0.000000 | 0.000000 | 0.000000 | 0.037313 | 0.029824 | 0.000000 | 0.000000 | 0.078968 | 0.022533 | 0.064809 | 0.079183 |
Netherlands | 6.422837 | 4.804805 | 4.613541 | 4.585931 | 5.186567 | 4.950790 | 5.163107 | 5.182342 | 5.369834 | 5.295178 | 4.897695 | 5.186476 |
Norway | 2.676182 | 3.153153 | 3.594967 | 3.383793 | 2.500000 | 2.624515 | 2.440742 | 2.338161 | 2.921822 | 2.850383 | 2.814554 | 2.462586 |
Poland | 1.516503 | 2.327327 | 2.216896 | 2.537845 | 2.723881 | 2.445571 | 2.299930 | 1.919386 | 1.816267 | 2.039207 | 2.555319 | 2.795154 |
Portugal | 1.427297 | 1.726727 | 2.097064 | 1.825467 | 1.679104 | 1.729794 | 1.854025 | 2.076426 | 1.789945 | 1.656151 | 1.888714 | 1.678676 |
Romania | 0.624442 | 1.126126 | 0.778910 | 0.712378 | 0.932836 | 0.775425 | 0.868341 | 0.994591 | 0.842327 | 0.619648 | 0.444403 | 0.490934 |
Slovakia | 0.802855 | 0.450450 | 0.359497 | 0.445236 | 0.447761 | 0.656129 | 0.422436 | 0.471122 | 0.355357 | 0.383055 | 0.333302 | 0.356323 |
Slovenia | 0.624442 | 0.525526 | 0.599161 | 0.534283 | 0.634328 | 0.805249 | 0.516311 | 0.820101 | 0.710713 | 0.349256 | 0.444403 | 0.316731 |
Spain | 4.460303 | 3.678679 | 4.134212 | 4.986643 | 5.149254 | 5.517447 | 5.444731 | 4.763567 | 4.685444 | 4.348806 | 4.379224 | 5.067701 |
Sweden | 3.033006 | 3.753754 | 3.535051 | 3.695459 | 4.216418 | 5.070086 | 5.468200 | 4.048159 | 5.067123 | 4.044615 | 3.962596 | 4.038324 |
Switzerland | 3.300624 | 3.753754 | 3.235470 | 3.294746 | 2.761194 | 2.833284 | 3.637644 | 3.402548 | 3.066596 | 2.963046 | 3.231182 | 3.539473 |
United Kingdom | 32.381802 | 31.306306 | 31.815458 | 29.385574 | 29.141791 | 29.197733 | 31.682704 | 32.053743 | 31.982101 | 35.015773 | 34.422739 | 33.613113 |
f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")
for i in range(year_percent_pivot.shape[1]+1):
ax.axvline(i, color='white', lw=10)
# Institutional collab
wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution_harm","Country_Type"]].drop_duplicates()
wos_univ_collabs.sample(100)
UT (Unique WOS ID) | Country | Institution_harm | Country_Type | |
---|---|---|---|---|
104534 | WOS:000536637200011 | United Kingdom | Univ Warwick | Non-EU associate |
120323 | WOS:000373806800006 | France | ENSAIT | EU |
41841 | WOS:000542956600003 | China | Nanjing Univ Aeronaut & Astronaut | China |
100019 | WOS:000459844300007 | United Kingdom | Univ Manchester | Non-EU associate |
174151 | WOS:000843324300007 | Ireland | Trinity Coll Dublin | EU |
... | ... | ... | ... | ... |
157638 | WOS:000863147500001 | Finland | Univ Turku | EU |
71835 | WOS:000798227800116 | China | Shanghai Jiao Tong Univ | China |
128870 | WOS:000460118200077 | Sweden | Royal Inst Technol | EU |
37822 | WOS:000517665600048 | China | Chinese Acad Sci | China |
26625 | WOS:000453750400001 | China | Hangzhou Dianzi Univ | China |
100 rows × 4 columns
TOPN = 25
wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="EU"]
data_eu = (wos_univ_eu.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy())
data_eu_strict = (wos_univ_eu_strict.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy())
data_eu_strict
data_ch = (wos_univ_ch.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
.sort_values(by=record_col,ascending=False).head(TOPN).copy())
for data in [data_eu,data_eu_strict,data_ch]:
fig = px.bar(data, x=record_col, y="Institution_harm", color="Country_Type",
labels={
record_col: 'Number of co-publications',
"Institution_harm": "Institution",
"Country_Type":"Country type"
},
title="Most visible institutions", template='plotly')
fig.update_layout(xaxis_tickformat='d',font_family="Montserrat",yaxis={'categoryorder':'total ascending'},
width=1000, height=1000,)
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
# Rectangle with reference to the plot
type="rect",
xref="paper",
yref="paper",
x0=0,
y0=0,
x1=1.0,
y1=1.0,
line=dict(
color="black",
width=0.5,
)
)
fig.update_yaxes(
showgrid=True,
ticks="outside")
fig.update_xaxes(
showgrid=True,
ticks="outside")
fig.show(config= dict(displayModeBar = False))
wos_univ_test = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution","Institution_harm","Country_Type"]].drop_duplicates()
www = wos_univ_test.groupby(["Institution","Institution_harm"], as_index=False)[record_col].nunique()
www[www["Institution_harm"]=="Chinese Acad Sci"]
Institution | Institution_harm | UT (Unique WOS ID) | |
---|---|---|---|
16 | Chinese Acad Sci | Chinese Acad Sci | 1 |
3149 | Chinese Acad Sci | Chinese Acad Sci | 4614 |
3153 | Chinese Acad Sci AIRCAS | Chinese Acad Sci | 2 |
3155 | Chinese Acad Sci CAREERI CAS | Chinese Acad Sci | 1 |
3157 | Chinese Acad Sci CASIA | Chinese Acad Sci | 8 |
3159 | Chinese Acad Sci GUCAS | Chinese Acad Sci | 2 |
3160 | Chinese Acad Sci IAP | Chinese Acad Sci | 1 |
3161 | Chinese Acad Sci IECAS | Chinese Acad Sci | 2 |
3162 | Chinese Acad Sci IME CAS | Chinese Acad Sci | 1 |
3163 | Chinese Acad Sci IMECAS | Chinese Acad Sci | 1 |
3164 | Chinese Acad Sci ITP CAS | Chinese Acad Sci | 1 |
3166 | Chinese Acad Sci NAOC | Chinese Acad Sci | 1 |
3167 | Chinese Acad Sci NAOC CAS | Chinese Acad Sci | 2 |
13501 | RCEES Chinese Acad Sci | Chinese Acad Sci | 1 |
19499 | ZIAT Chinese Acad Sci | Chinese Acad Sci | 1 |
wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]
wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,"Domain_English","Field_English","SubField_English"]], on =record_col)
wos_univ_dipol.sample(100)
UT (Unique WOS ID) | Country_eu | Institution_harm_eu | Country_Type_eu | Country_ch | Institution_harm_ch | Country_Type_ch | Domain_English | Field_English | SubField_English | |
---|---|---|---|---|---|---|---|---|---|---|
263074 | WOS:000597493300001 | United Kingdom | Univ Northumbria Newcastle | Non-EU associate | China | Nanchang Univ | China | Natural Sciences | Chemistry | Analytical Chemistry |
71907 | WOS:000494411700001 | Germany | Univ Wurzburg | EU | China | South China Agr Univ | China | Economic & Social Sciences | Social Sciences | Information & Library Sciences |
303069 | WOS:000569985300066 | Italy | Selex | EU | China | Wuhan Elect Informat Inst | China | Applied Sciences | Engineering | Computation Theory & Mathematics |
259937 | WOS:000557391000036 | United Kingdom | Univ Glasgow | Non-EU associate | China | Southwest Jiaotong Univ | China | Natural Sciences | Chemistry | Analytical Chemistry |
302133 | WOS:000477943300012 | Italy | Politecn Milan | EU | China | City Univ Hong Kong | China | Applied Sciences | Information & Communication Technologies | Artificial Intelligence & Image Processing |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
179087 | WOS:000460128100005 | Finland | Univ Jyvaskyla | EU | China | Capital Med Univ | China | Health Sciences | Clinical Medicine | Ophthalmology & Optometry |
333080 | WOS:000589420400001 | Ireland | Univ Coll Dublin | EU | China | Guangxi Normal Univ | China | Natural Sciences | Chemistry | Organic Chemistry |
300958 | WOS:000388876400003 | Finland | Nokias Mobile Networks Organizat | EU | China | Nokia Bell Labs | China | Applied Sciences | Information & Communication Technologies | Networking & Telecommunications |
95342 | WOS:000579154000008 | United Kingdom | Imperial Coll London | Non-EU associate | China | Wuhan Polytech Univ | China | Health Sciences | Clinical Medicine | General & Internal Medicine |
197767 | WOS:000571399800004 | Switzerland | Univ Bern | Non-EU associate | China | Shandong Univ | China | Natural Sciences | Physics & Astronomy | General Physics |
100 rows × 10 columns
fig = px.parallel_categories(wos_univ_dipol[["Country_eu","Domain_English","Country_ch"]])
fig.show()