import pandas as pd
import janitor
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MaxNLocator
import math
import plotly.express as px

import country_converter as coco
cc = coco.CountryConverter()

%matplotlib inline


sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)
sns.palplot(sns.color_palette())


outdir="wos_processed_data"

wos = pd.read_excel(f"../{outdir}/wos_processed.xlsx")
wos_univ = pd.read_excel(f"../{outdir}/wos_institution_locations_harmonized.xlsx")


def eurovoc_classer(x):
    eurovoc_classification = {"Eastern Europe":["Bulgaria","Czech Republic","Croatia","Hungary","Poland","Romania","Slovakia","Slovenia"],
                          "Northern Europe":["Denmark","Estonia","Finland","Latvia","Lithuania","Sweden","Norway","Iceland"],
                          "Southern Europe":["Cyprus","Greece","Italy","Portugal","Spain","Malta"],
                          "Western Europe":["Austria","Belgium","France","Germany","Luxembourg","Netherlands","Switzerland","United Kingdom","Ireland"]}
    if x == 'China':
        return x
    for k in eurovoc_classification.keys():
        if x in eurovoc_classification[k]:
            return k


wos_country = pd.read_excel(f"../{outdir}/wos_countries.xlsx")
wos_country_types = pd.read_excel(f"../{outdir}/wos_country_types.xlsx")


wos_country_types["Eurovoc_Class"] = wos_country_types["Country"].map(eurovoc_classer)
wos_country_types


# len(wos),len(wos_univ_locations)


# wos_addresses = pd.read_excel(f"/{outdir}/wos_addresses.xlsx")

# wos_affiliations = pd.read_excel(f"/{outdir}/wos_affiliations.xlsx")

# wos_author_locations = pd.read_excel(f"/{outdir}/wos_author_locations.xlsx")

# wos_univ_locations = pd.read_excel(f"/{outdir}/wos_univ_locations.xlsx")


record_col = "UT (Unique WOS ID)"


# def nth_repl_all(s, sub="", repl="<br>", nth=2):
#     find = s.find(sub)
#     # loop util we find no match
#     i = 1
#     while find != -1:
#         # if i  is equal to nth we found nth matches so replace
#         if i == nth:
#             s = s[:find]+repl+s[find + len(sub):]
#             i = 0
#         # find + len(sub) + 1 means we start after the last match
#         find = s.find(sub, find + len(sub) + 1)
#         i += 1
#     return s.replace("<br>&","&<br")

def replace_nth(s, sub=" ", repl="<br>", n=2):
    chunks = s.split(sub)
    size = len(chunks)
    rows = size // n + (0 if size % n == 0 else 1)
    return (repl.join([
        sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])
        for i in range(rows)
    ])).replace("<br>&"," &<br>")


groups = ['Domain_English',"Field_English",'SubField_English']
data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data["percent"] = data[record_col]/data[record_col].sum()*100

# data[groups] = data[groups].applymap(replace_nth)
for c in ["Domain_English","Field_English","SubField_English"]:
    data[c] = data[c]+"<br>("+(pd.DataFrame(data[c],columns=[c]).merge(data.groupby(c,as_index=False)[record_col].sum(), on=c)[record_col]).astype(str)+")"
data


fig = px.sunburst(data, path=groups, values="percent",
                  color='Domain_English',title="Distribution of topics<br>(METRIX classification)", template='plotly')
fig.update_traces(hovertemplate='%{label}<br>%{value:.2f}%')
fig.show(config= dict(displayModeBar = False))


group = 'Domain_English'
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data


g = sns.barplot(data, x=record_col, y=group)
g.set_xlim(0,35000)
g.set_ylabel(None)
g.set_xlabel("Number of co-publications")
g.set_title("Distribution of Domains")
for i in g.containers:
    g.bar_label(i,fontsize=10)


fig = px.bar(data, x=record_col, y=group, color=group,
                              labels={
                     record_col: 'Number of co-publications',
                     group: "",
                 },
                title="Distribution of Domains", template='plotly')
fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family="Montserrat")
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )
fig.update_yaxes(
    showgrid=True,
    ticks="outside")
fig.update_xaxes(
    showgrid=True,
    ticks="outside")
fig.show(config= dict(displayModeBar = False))


# # define a function to divide each row's 'Count' by the value of the first year
# def divide_by_first_year(group):
#     group['relative_growth'] = group[record_col] / group.loc[group['Publication Year'] == group['Publication Year'].min(), record_col].values[0]
#     return group
#
#
#
# data = (wos.groupby(group)[record_col].nunique()
#         .unstack(fill_value=0).stack()
#         .reset_index()
#         .rename(columns={0:record_col})
#         .sort_values(ascending=False, by=group+[record_col]))
#
# # group by 'Topic'
# grouped = data.groupby('Domain_English')
# # apply the function to each group
# data = grouped.apply(divide_by_first_year).reset_index(drop=True)
# data['relative_growth'] = data['relative_growth']*100


group = ['Publication Year','Domain_English']
data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()
        .fillna(0)
        .stack()
        .reset_index()
        .rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Domain_English'),
                  on='Domain_English', suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data


g=sns.lineplot(data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0], hue=group[-1], marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications")

Text(0.5, 1.0, 'Yearly output of co-publications')


fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True,                             labels={
                     record_col: 'Number of co-publications',
                     group[-1]: "Domain",
                 },
                title="Yearly output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )
fig.update_yaxes(
    showgrid=True,
    ticks="outside")
fig.update_xaxes(
    showgrid=True,
    ticks="outside")
fig.show(config= dict(displayModeBar = False))


fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_relative_growth",x=group[0], color=group[-1], markers=True,                             labels={
                     record_col+"_relative_growth": 'Rel. growth<br>in co-publications (%)',
                     group[-1]: "Domain",
                 },
                title="Relative growth in the output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:.2f}%')

fig.update_layout(hovermode='x unified',yaxis_tickformat='d',font_family="Montserrat")
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )
fig.update_yaxes(
    showgrid=True,
    ticks="outside")
fig.update_xaxes(
    showgrid=True,
    ticks="outside")
fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')
fig.show(config= dict(displayModeBar = False))


pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],

                       columns=['Publication Year'], fill_value=0)
pivot_data


f, ax = plt.subplots(figsize=(9, 6))
g = sns.heatmap(pivot_data, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")

[Text(0.5, 33.249999999999986, ''), Text(79.74999999999999, 0.5, '')]


import numpy as np
percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100
percent_pivot


f, ax = plt.subplots(figsize=(15, 6))
g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")

[Text(0.5, 33.249999999999986, ''), Text(154.75, 0.5, '')]


# percent_pivot.T.plot(kind='bar',
#                     stacked=True,
#                     figsize=(10, 6))


# percent_pivot.T.plot(kind='bar',
#                         stacked=True,
#                         figsize=(15, 8))
#
# plt.legend(loc="lower left", ncol=2)
# # plt.ylabel("Release Year")
# # plt.xlabel("Proportion")
#
#
# for n, x in enumerate([*pivot_data.T.index.values]):
#     for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],
#                                           pivot_data.T.loc[x],
#                                           percent_pivot.T.loc[x].cumsum()):
#
#         plt.text(y=(y_loc - proportion) + (proportion / 2),
#                  x=n - 0.11,
#                  s=f'{count}',# ({np.round(proportion, 1)}%)',
#                  color="black",
#                  fontsize=8,
#                  fontweight="bold")
#
# plt.show()


group = ['Publication Year',"Domain_English",'Field_English']
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data


len(data[group[-2]].unique())

6


data_complete = pd.DataFrame()

for cat in sorted(data[group[-2]].unique()):
    #data segment
    sub_data = data[data[group[-2]]==cat]
    sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
                                 ,group[-1],fill_value=0)
    data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
    #plot
    g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
                   y=record_col,x=group[0], hue=group[-1], marker="o")
    g.set(xticks=list(range(2012,2022+1,2)))
    g.legend(title=None)
    g.set_title(cat)
    g.yaxis.set_major_locator(MaxNLocator(integer=True))
    plt.show()


data_complete = pd.DataFrame()

# Creating subplot axes
fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))

for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):
    #data segment
    sub_data = data[data[group[-2]]==cat]
    sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
                                 ,group[-1],fill_value=0)
    data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
    #plot
    g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
                   y=record_col,x=group[0], hue=group[-1], marker="o", ax=ax)
    g.set(xticks=list(range(2012,2022+1,2)))
    g.legend(title=None)
    g.set_title(cat)
    g.set_xlabel(None)
    g.set_ylabel(None)
    g.yaxis.set_major_locator(MaxNLocator(integer=True))
fig.suptitle("Number of co-publications in domains and respective fields", y=0.92)
plt.show()


group = ['Publication Year',"Domain_English",'Field_English',"SubField_English"]
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data


for cat in sorted(data[group[-2]].unique()):
    sub_data = data[data[group[-2]]==cat]
    sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
                                 ,group[-1],fill_value=0)
    g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],
                   hue=group[-1], marker="o", errorbar=None)
    g.set(xticks=list(range(2012,2022+1,2)))
    g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))
    g.set_title(f'Number or co-publications in {cat}')
    g.set_ylabel(None)
    plt.show()


from  matplotlib.ticker import FuncFormatter
import math
def orderOfMagnitude(number):
    return math.floor(math.log(number, 10))

def roundToNearest(number):
    order = orderOfMagnitude(number)
    # if order!=0:
    #     order+=1
    near = math.ceil(number/10**order)*10**order
    return near


wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_locations.sample(100)


wos_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country"]].drop_duplicates()


collab_desc = wos_collabs[wos_collabs["Country"]!="China"]["Country"].value_counts().reset_index()
collab_desc["percent_of_copubs"] = collab_desc["count"]/wos_collabs[record_col].nunique()*100
collab_desc["percent_contrib_in_copubs"] = collab_desc["count"]/wos_collabs[record_col].size*100
collab_desc = collab_desc.merge(wos_country_types, on="Country")
collab_desc

c_dict = {"count":"Number of co-publications",
          "percent_of_copubs":"Percent of co-publications",
          "percent_contrib_in_copubs":"Contribution to co-publications"}


# Creating subplot axes
# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))
# for c,ax in zip(c_dict.keys(),axes.flatten()):
for c in c_dict.keys():
    data = collab_desc[["Country",c,"Country_Type"]]
    plt.figure(figsize=(9,12))
    g = sns.barplot(data, x=c, y="Country", hue="Country_Type", dodge=False)
    g.set_xlim(0,roundToNearest(data[c].max()))
    g.set_ylabel(None)
    g.set_xlabel(c_dict.get(c))
    g.set_title(c_dict.get(c))
    g.legend(title=None, loc="right")
    for i in g.containers:
        g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')
    if 'percent' in c:
        g.xaxis.set_major_locator(MaxNLocator(integer=True))
        vals = g.get_xticks()
        g.set_xticklabels([str(int(val))+'%' for val in vals])
    plt.show()

C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning:

FixedFormatter should only be used together with FixedLocator

C:\Users\radvanyi\AppData\Local\Temp\ipykernel_30956\556627507.py:29: UserWarning:

FixedFormatter should only be used together with FixedLocator


wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Draw the heatmap with the mask and correct aspect ratio
g = sns.heatmap(EU_co_occur, mask=mask,
            square=True, linewidths=.5)

g.set_ylabel(None)
g.set_xlabel(None)

Text(0.5, 71.74999999999994, '')


wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
wos_collabs_EU
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)


# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
data = np.where(mask,None,EU_co_occur)
EU_co_occur.columns

Index(['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
       'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece',
       'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg',
       'Malta', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania',
       'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland',
       'United Kingdom'],
      dtype='object', name='Country_y')


fig = px.imshow(data,
                labels=dict(x="Country (x)", y="Country (y)", color="Co-publication"),
                x=list(EU_co_occur.columns),
                y=list(EU_co_occur.index), title="Intraeuropean patterns"
               )
fig.update_layout(title_x=0.5,
                   width=1000, height=1000,
                   xaxis_showgrid=False,
                   yaxis_showgrid=False,
                   yaxis_autorange='reversed', template='plotly_white')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
    ticks="outside")
fig.update_xaxes(
    ticks="outside")
fig.show(config= dict(displayModeBar = False))


collab_year = wos_collabs[wos_collabs["Country"]!="China"].copy()
collab_year = collab_year.merge(wos_country_types, on="Country").merge(wos[[record_col,"Publication Year"]],on=record_col).drop_duplicates()
data = collab_year.groupby(["Publication Year",'Country_Type'],as_index=False)[record_col].nunique()


g=sns.lineplot(data,y=record_col,x="Publication Year", hue="Country_Type", marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications with China")

Text(0.5, 1.0, 'Yearly output of co-publications with China')


data = (collab_year.groupby(['Publication Year',"Country"])[record_col]
        .nunique(dropna=False).unstack()
        .fillna(0)
        .stack()
        .reset_index()
        .rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset="Country"),
                  on=["Country"], suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data


data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col, hover_name="Country",
                    animation_frame='Publication Year', scope="europe", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])
fig.show()


data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col+"_relative_growth", hover_name="Country",
                    animation_frame='Publication Year', scope="europe", template='plotly',
                    range_color=[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()])
fig.show()


data = data.merge(wos_country_types, on='Country')
data


# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color="Eurovoc_Class",line_group="Country", markers=True,
#               labels={
#                      record_col: 'Number of co-publications',
#                   "Eurovoc_Class": "Region"
#                  },
#                 title="Yearly output of co-publications", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')
# # fig.update_layout(hovermode='x unified')
# fig.add_shape(
#         # Rectangle with reference to the plot
#             type="rect",
#             xref="paper",
#             yref="paper",
#             x0=0,
#             y0=0,
#             x1=1.0,
#             y1=1.0,
#             line=dict(
#                 color="black",
#                  width=0.5,
#              )
#          )
# fig.update_yaxes(
#     showgrid=True,
#     ticks="outside")
# fig.update_xaxes(
#     showgrid=True,
#     ticks="outside")
# fig.show(config= dict(displayModeBar = False))


# fig.data[0].hovertemplate

'<b>%{hovertext}</b><br><br>Eurovoc_Class=Western Europe<br>Country=Austria<br>Publication Year=%{x}<br>Number of co-publications=%{y}<extra></extra>'


# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
#               y=record_col+"_relative_growth",
#               x='Publication Year',
#               color="Eurovoc_Class",line_group="Country",markers=True,
#               labels={
#                      record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
#                  },
#                 title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
# fig.add_shape(
#         # Rectangle with reference to the plot
#             type="rect",
#             xref="paper",
#             yref="paper",
#             x0=0,
#             y0=0,
#             x1=1.0,
#             y1=1.0,
#             line=dict(
#                 color="black",
#                  width=0.5,
#              )
#          )
# fig.update_yaxes(
#     showgrid=True,
#     ticks="outside")
# fig.update_xaxes(
#     showgrid=True,
#     ticks="outside")
# fig.show(config= dict(displayModeBar = False))


from plotly.subplots import make_subplots
import plotly.graph_objects as go

figsuper = make_subplots(rows=1, cols=2, subplot_titles=["Yearly output of co-publications","Relative growth of co-publications<br>(baseline: 2011)"])

fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
              y=record_col,
              x='Publication Year',
              color="Eurovoc_Class",
              line_group="Country",
              markers=True,
              labels={
                     record_col: 'Number of co-publications',
                  "Eurovoc_Class": "Region"
                 },
                title="Yearly output of co-publications",hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')

for trace in list(fig.select_traces()):
    figsuper.add_trace(trace,
        row=1, col=1
    )

fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
              y=record_col+"_relative_growth",
              x='Publication Year',
              color="Eurovoc_Class",line_group="Country",markers=True,
              labels={
                     record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
                 },
                title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )

for trace in list(fig.select_traces()):
    trace.showlegend=False
    trace
    figsuper.add_trace(trace,
        row=1, col=2
    )

figsuper.update_yaxes(
    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
    ticks="outside")
figsuper.update_xaxes(
    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
    ticks="outside")
figsuper.update_layout({'template':"plotly"})
figsuper.show(config= dict(displayModeBar = False))


year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)
year_pivot


f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_pivot, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")
for i in range(year_pivot.shape[0]+1):
    ax.axhline(i, color='white', lw=10)


year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100
year_percent_pivot


f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")
for i in range(year_percent_pivot.shape[1]+1):
    ax.axvline(i, color='white', lw=10)


# Institutional collab


wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution_harm","Country_Type","Eurovoc_Class"]].drop_duplicates()
wos_univ_collabs["ISO3"] = cc.pandas_convert(series=wos_univ_collabs["Country"], to='ISO3')
wos_univ_collabs["Institution_harm_label"] = wos_univ_collabs["Institution_harm"] + " ("+wos_univ_collabs["ISO3"]+ ")"
wos_univ_collabs.sample(100)


color_discrete_map= {'China': '#EF553B',
                                      'EU': '#636EFA',
                                      'Non-EU associate': '#00CC96'}


TOPN = 25


wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]

wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="EU"]

data_eu = (wos_univ_eu.groupby(["Country","Institution_harm_label","Country_Type"], as_index=False)[record_col].nunique()
           .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by="Country_Type")

data_eu_strict = (wos_univ_eu_strict.groupby(["Country","Institution_harm_label","Eurovoc_Class"], as_index=False)[record_col].nunique()
           .sort_values(by=record_col,ascending=False).head(TOPN).copy())

data_ch = (wos_univ_ch.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
           .sort_values(by=record_col,ascending=False).head(TOPN).copy())


for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],
                        ["European countries in scope","EU-28 only","China"],
                        ["Institution_harm_label","Institution_harm_label","Institution_harm"],
                        ["Country","Eurovoc_Class","Country_Type"],
                                       ["Country_Type",None,None]):
    fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,
                              labels={
                     record_col: 'Number of co-publications',
                     "Institution_harm": "Institution",
                                  "Institution_harm_label": "Institution",
                                  "Country_Type":"Country type",
                                  "Eurovoc_Class":"Region"
                 },
                title=f"Most visible institutions (top {TOPN} within {c_scope})", template='plotly')
    fig.update_layout(xaxis_tickformat='d',font_family="Montserrat",yaxis={'categoryorder':'total ascending'},
                                         width=1000, height=1000,)
    fig.update_traces(hovertemplate='%{x:d}')
    fig.add_shape(
            # Rectangle with reference to the plot
                type="rect",
                xref="paper",
                yref="paper",
                x0=0,
                y0=0,
                x1=1.0,
                y1=1.0,
                line=dict(
                    color="black",
                     width=0.5,
                 )
             )
    fig.update_yaxes(
        showgrid=True,
        ticks="outside")
    fig.update_xaxes(
        showgrid=True,
        ticks="outside")
    fig.show(config= dict(displayModeBar = False))


wos_univ_test = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution","Institution_harm","Country_Type"]].drop_duplicates()
www = wos_univ_test.groupby(["Institution","Institution_harm"], as_index=False)[record_col].nunique()
www[www["Institution_harm"]=="Chinese Acad Sci"]


wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]

wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,"Domain_English","Field_English","SubField_English"]], on =record_col)
wos_univ_dipol.sample(100)


fig = px.parallel_categories(wos_univ_dipol[["Country_eu","Domain_English","Country_ch"]])
fig.show()


data_ch.columns

Index(['Country', 'Institution_harm', 'Country_Type', 'UT (Unique WOS ID)'], dtype='object')


subfilter = ((wos_univ_dipol["Institution_harm_label_eu"].isin(data_eu["Institution_harm_label"]))&
             (wos_univ_dipol["Institution_harm_ch"].isin(data_ch["Institution_harm"])))

fig = px.parallel_categories(wos_univ_dipol[subfilter][["Country_eu","Domain_English","Country_ch"]])
fig.show()


subfilter = ((wos_univ_dipol["Institution_harm_label_eu"].isin(data_eu["Institution_harm_label"]))&
             (wos_univ_dipol["Institution_harm_ch"].isin(data_ch["Institution_harm"])))

fig = px.parallel_categories(wos_univ_dipol[subfilter][["Country_eu","Institution_harm_eu","Domain_English","Institution_harm_ch"]])
fig.show()


sub_df =wos_univ_dipol[subfilter]

inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],
                            values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)
inst_co_occur

mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))
data = np.where(mask,inst_co_occur,inst_co_occur)

fig = px.imshow(data,
                labels=dict(x="Institute (CH)", y="Institute (EU)", color="Co-publication"),
                x=list(inst_co_occur.columns),
                y=list(inst_co_occur.index), title=f"Most visible institutions (top {TOPN} within Europe)"
               )
fig.update_layout(title_x=0.5,
                   width=1000, height=1000,
                   xaxis_showgrid=False,
                   yaxis_showgrid=False,
                   yaxis_autorange='reversed', template='plotly_white')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
    ticks="outside")
fig.update_xaxes(
    ticks="outside")
fig.show(config= dict(displayModeBar = False))


subfilter = ((wos_univ_dipol["Institution_harm_label_eu"].isin(data_eu_strict["Institution_harm_label"]))&
             (wos_univ_dipol["Institution_harm_ch"].isin(data_ch["Institution_harm"])))

fig = px.parallel_categories(wos_univ_dipol[subfilter][["Country_eu","Institution_harm_eu","Domain_English","Institution_harm_ch"]])
fig.show()


sub_df =wos_univ_dipol[subfilter]

inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],
                            values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)
inst_co_occur

mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))
data = np.where(mask,inst_co_occur,inst_co_occur)

fig = px.imshow(data,
                labels=dict(x="Institute (CH)", y="Institute (EU)", color="Co-publication"),
                x=list(inst_co_occur.columns),
                y=list(inst_co_occur.index), title=f"Most visible institutions (top {TOPN} within Europe)"
               )
fig.update_layout(title_x=0.5,
                   width=1000, height=1000,
                   xaxis_showgrid=False,
                   yaxis_showgrid=False,
                   yaxis_autorange='reversed', template='plotly_white')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
    ticks="outside")
fig.update_xaxes(
    ticks="outside")
fig.show(config= dict(displayModeBar = False))


import dash_bio

	Domain_English	Field_English	SubField_English	UT (Unique WOS ID)	percent
37	Applied Sciences<br>(29985)	Information & Communication Technologies<br>(1...	Artificial Intelligence & Image Processing<br>...	7915	17.184108
44	Applied Sciences<br>(29985)	Information & Communication Technologies<br>(2...	Networking & Telecommunications<br>(303)	5360	11.636995
32	Applied Sciences<br>(29985)	Engineering<br>(3940)	Geological & Geomatics Engineering<br>(436)	2576	5.592705
33	Applied Sciences<br>(29985)	Engineering<br>(1226)	Industrial Engineering & Automation<br>(425)	2316	5.028224
15	Applied Sciences<br>(29985)	Enabling & Strategic Technologies<br>(9232)	Energy<br>(598)	1965	4.266175
...	...	...	...	...	...
11	Applied Sciences<br>(29985)	Economics & Business <br>(9232)	Business & Management<br>(792)	1	0.002171
46	Applied Sciences<br>(29985)	Social Sciences<br>(2032)	Anthropology<br>(285)	1	0.002171
54	Arts & Humanities<br>(8457)	Philosophy & Theology<br>(3385)	Philosophy<br>(208)	1	0.002171
52	Arts & Humanities<br>(8457)	Historical Studies<br>(3385)	History of Social Sciences<br>(211)	1	0.002171
129	Health Sciences<br>(5341)	Psychology & Cognitive Sciences<br>(1067)	General Psychology & Cognitive Sciences<br>(19)	1	0.002171

	Domain_English	UT (Unique WOS ID)
0	Applied Sciences	29985
5	Natural Sciences	8457
3	Health Sciences	5341
2	Economic & Social Sciences	1360
4	Multidisciplinary	847
1	Arts & Humanities	70

	Publication Year	Domain_English	UT (Unique WOS ID)	Publication Year_relative_growth	UT (Unique WOS ID)_relative_growth
0	2011	Applied Sciences	490.0	2011	0.000000
1	2012	Applied Sciences	593.0	2011	21.020408
2	2013	Applied Sciences	738.0	2011	50.612245
3	2014	Applied Sciences	1031.0	2011	110.408163
4	2015	Applied Sciences	1201.0	2011	145.102041
...	...	...	...	...	...
67	2018	Natural Sciences	753.0	2011	316.022099
68	2019	Natural Sciences	999.0	2011	451.933702
69	2020	Natural Sciences	1232.0	2011	580.662983
70	2021	Natural Sciences	1403.0	2011	675.138122
71	2022	Natural Sciences	1665.0	2011	819.889503

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Domain_English
Applied Sciences	490	593	738	1031	1201	1535	1920	2808	3729	4446	5295	6199
Arts & Humanities	0	0	0	4	1	3	7	4	11	11	16	13
Economic & Social Sciences	20	22	29	28	34	40	84	105	160	211	252	375
Health Sciences	116	120	155	184	216	243	321	403	611	755	1035	1182
Multidisciplinary	15	21	43	52	57	64	75	76	83	97	115	149
Natural Sciences	181	223	298	318	380	437	568	753	999	1232	1403	1665

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Domain_English
Applied Sciences	59.610706	60.572012	58.432304	63.760049	63.578613	66.106804	64.537815	67.678959	66.672626	65.847156	65.241498	64.687467
Arts & Humanities	0.000000	0.000000	0.000000	0.247372	0.052938	0.129199	0.235294	0.096409	0.196674	0.162915	0.197141	0.135657
Economic & Social Sciences	2.433090	2.247191	2.296120	1.731602	1.799894	1.722653	2.823529	2.530730	2.860719	3.125000	3.104978	3.913180
Health Sciences	14.111922	12.257406	12.272367	11.379097	11.434621	10.465116	10.789916	9.713184	10.924370	11.181872	12.752587	12.334342
Multidisciplinary	1.824818	2.145046	3.404592	3.215832	3.017470	2.756245	2.521008	1.831767	1.483998	1.436611	1.416954	1.554837
Natural Sciences	22.019465	22.778345	23.594616	19.666048	20.116464	18.819983	19.092437	18.148952	17.861613	18.246445	17.286841	17.374517

Output - per yer, by Metrix taxonomy¶

Domains¶

Field¶

SubField¶

Collabs¶

	Country	Country_Type	Eurovoc_Class
0	Belgium	EU	Western Europe
1	China	China	China
2	Luxembourg	EU	Western Europe
3	Netherlands	EU	Western Europe
4	Norway	Non-EU associate	Northern Europe
5	United Kingdom	Non-EU associate	Western Europe
6	France	EU	Western Europe
7	Sweden	EU	Northern Europe
8	Italy	EU	Southern Europe
9	Denmark	EU	Northern Europe
10	Germany	EU	Western Europe
11	Slovenia	EU	Eastern Europe
12	Estonia	EU	Northern Europe
13	Finland	EU	Northern Europe
14	Bulgaria	EU	Eastern Europe
15	Slovakia	EU	Eastern Europe
16	Spain	EU	Southern Europe
17	Poland	EU	Eastern Europe
18	Czech Republic	EU	Eastern Europe
19	Greece	EU	Southern Europe
20	Malta	EU	Southern Europe
21	Austria	EU	Western Europe
22	Switzerland	Non-EU associate	Western Europe
23	Ireland	EU	Western Europe
24	Portugal	EU	Southern Europe
25	Romania	EU	Eastern Europe
26	Hungary	EU	Eastern Europe
27	Cyprus	EU	Southern Europe
28	Croatia	EU	Eastern Europe
29	Lithuania	EU	Northern Europe
30	Latvia	EU	Northern Europe

	Publication Year	Domain_English	Field_English	UT (Unique WOS ID)
233	2022	Natural Sciences	Physics & Astronomy	596
232	2022	Natural Sciences	Mathematics & Statistics	228
231	2022	Natural Sciences	Earth & Environmental Sciences	409
230	2022	Natural Sciences	Chemistry	251
229	2022	Natural Sciences	Biology	181
...	...	...	...	...
4	2011	Applied Sciences	Information & Communication Technologies	256
3	2011	Applied Sciences	Engineering	166
2	2011	Applied Sciences	Enabling & Strategic Technologies	53
1	2011	Applied Sciences	Built Environment & Design	6
0	2011	Applied Sciences	Agriculture, Fisheries & Forestry	9

	Publication Year	Domain_English	Field_English	SubField_English	UT (Unique WOS ID)
1598	2022	Natural Sciences	Physics & Astronomy	Optics	134
1597	2022	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics	65
1596	2022	Natural Sciences	Physics & Astronomy	Mathematical Physics	10
1595	2022	Natural Sciences	Physics & Astronomy	General Physics	31
1594	2022	Natural Sciences	Physics & Astronomy	Fluids & Plasmas	79
...	...	...	...	...	...
4	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Forestry	1
3	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Food Science	1
2	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Fisheries	2
1	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Dairy & Animal Science	2
0	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Agronomy & Agriculture	3

	UT (Unique WOS ID)	Institution	Country	Institution_harm	merge_iter	Country_Type
41191	WOS:000538161600016	Anhui Univ	China	Anhui Univ	0	China
175692	WOS:000709411500003	Univ Porto	Portugal	Univ Porto	0	EU
75198	WOS:000831217100027	Zhejiang Univ	China	Zhejiang Univ	0	China
48614	WOS:000597938400003	Shanghai Jiao Tong Univ	China	Shanghai Jiao Tong Univ	0	China
133670	WOS:000411824101159	Univ Pisa	Italy	Univ Pisa	0	EU
...	...	...	...	...	...	...
2892	WOS:000293708200019	Natl Univ Def Technol	China	Natl Univ Def Technol	0	China
125259	WOS:000663324800010	INRAE	France	INRAE	0	EU
55780	WOS:000659952900011	Huazhong Univ Sci & Technol	China	Huazhong Univ Sci & Technol	0	China
138600	WOS:000744399000001	Brignone Clin	Italy	Brignone Clin	0	EU
31040	WOS:000471758500010	Chinese Acad Sci	China	Chinese Acad Sci	0	China

	Publication Year	Country	UT (Unique WOS ID)	Publication Year_relative_growth	UT (Unique WOS ID)_relative_growth
0	2011	Austria	22.0	2011	0.000000
1	2012	Austria	24.0	2011	9.090909
2	2013	Austria	26.0	2011	18.181818
3	2014	Austria	39.0	2011	77.272727
4	2015	Austria	50.0	2011	127.272727
...	...	...	...	...	...
355	2018	United Kingdom	1837.0	2011	406.060606
356	2019	United Kingdom	2430.0	2011	569.421488
357	2020	United Kingdom	3108.0	2011	756.198347
358	2021	United Kingdom	3718.0	2011	924.242424
359	2022	United Kingdom	4245.0	2011	1069.421488

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Country
Austria	22	24	26	39	50	57	72	89	138	137	185	205
Belgium	34	38	40	65	71	81	90	133	179	213	242	292
Bulgaria	4	5	8	9	7	19	21	18	10	25	32	19
Croatia	1	2	6	8	10	7	10	19	27	29	33	35
Cyprus	2	1	5	5	5	5	8	7	15	28	36	43
Czech Republic	13	15	16	21	20	36	37	56	64	81	93	123
Denmark	35	33	40	59	68	74	101	195	234	245	293	343
Estonia	3	3	7	10	12	10	15	15	16	38	45	39
Finland	31	35	44	82	100	125	126	198	241	256	289	380
France	117	130	174	231	269	325	348	491	648	691	807	858
Germany	123	172	192	273	310	365	456	604	801	907	1210	1386
Greece	15	18	19	32	35	50	47	81	114	122	139	181
Hungary	11	11	21	16	20	38	34	47	61	61	83	90
Ireland	13	16	22	31	27	45	66	72	84	116	167	187
Italy	51	70	84	116	178	187	247	325	441	571	641	811
Latvia	0	0	1	0	1	8	10	15	10	9	13	18
Lithuania	1	2	10	4	4	13	12	23	38	36	38	38
Luxembourg	2	3	3	1	8	9	13	15	18	22	35	51
Malta	1	0	0	0	1	1	0	0	6	2	7	10
Netherlands	72	64	77	103	139	166	220	297	408	470	529	655
Norway	30	42	60	76	67	88	104	134	222	253	304	311
Poland	17	31	37	57	73	82	98	110	138	181	276	353
Portugal	16	23	35	41	45	58	79	119	136	147	204	212
Romania	7	15	13	16	25	26	37	57	64	55	48	62
Slovakia	9	6	6	10	12	22	18	27	27	34	36	45
Slovenia	7	7	10	12	17	27	22	47	54	31	48	40
Spain	50	49	69	112	138	185	232	273	356	386	473	640
Sweden	34	50	59	83	113	170	233	232	385	359	428	510
Switzerland	37	50	54	74	74	95	155	195	233	263	349	447
United Kingdom	363	417	531	660	781	979	1350	1837	2430	3108	3718	4245

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Country
Austria	1.962533	1.801802	1.557819	1.736420	1.865672	1.699970	1.689744	1.552958	1.816267	1.543488	1.712804	1.623248
Belgium	3.033006	2.852853	2.396645	2.894034	2.649254	2.415747	2.112180	2.320712	2.355883	2.399730	2.240533	2.312139
Bulgaria	0.356824	0.375375	0.479329	0.400712	0.261194	0.566657	0.492842	0.314081	0.131614	0.281658	0.296269	0.150447
Croatia	0.089206	0.150150	0.359497	0.356189	0.373134	0.208768	0.234687	0.331530	0.355357	0.326724	0.305527	0.277140
Cyprus	0.178412	0.075075	0.299581	0.222618	0.186567	0.149120	0.187749	0.122143	0.197420	0.315457	0.333302	0.340486
Czech Republic	1.159679	1.126126	0.958658	0.934996	0.746269	1.073665	0.868341	0.977142	0.842327	0.912573	0.861031	0.973949
Denmark	3.122212	2.477477	2.396645	2.626892	2.537313	2.206979	2.370336	3.402548	3.079758	2.760252	2.712712	2.715971
Estonia	0.267618	0.225225	0.419413	0.445236	0.447761	0.298240	0.352030	0.261734	0.210582	0.428121	0.416628	0.308813
Finland	2.765388	2.627628	2.636309	3.650935	3.731343	3.728005	2.957052	3.454894	3.171887	2.884182	2.675678	3.008948
France	10.437110	9.759760	10.425404	10.284951	10.037313	9.692812	8.167097	8.567440	8.528560	7.785038	7.471530	6.793887
Germany	10.972346	12.912913	11.503895	12.154942	11.567164	10.885774	10.701713	10.539173	10.542248	10.218567	11.202666	10.974741
Greece	1.338091	1.351351	1.138406	1.424755	1.305970	1.491202	1.103027	1.413366	1.500395	1.374493	1.286918	1.433209
Hungary	0.981267	0.825826	1.258238	0.712378	0.746269	1.133313	0.797935	0.820101	0.802843	0.687247	0.768447	0.712645
Ireland	1.159679	1.201201	1.318155	1.380232	1.007463	1.342082	1.548932	1.256325	1.105554	1.306895	1.546153	1.480719
Italy	4.549509	5.255255	5.032954	5.164737	6.641791	5.577095	5.796761	5.670913	5.804159	6.433078	5.934636	6.421728
Latvia	0.000000	0.000000	0.059916	0.000000	0.037313	0.238592	0.234687	0.261734	0.131614	0.101397	0.120359	0.142529
Lithuania	0.089206	0.150150	0.599161	0.178094	0.149254	0.387712	0.281624	0.401326	0.500132	0.405588	0.351819	0.300895
Luxembourg	0.178412	0.225225	0.179748	0.044524	0.298507	0.268416	0.305093	0.261734	0.236904	0.247859	0.324044	0.403832
Malta	0.089206	0.000000	0.000000	0.000000	0.037313	0.029824	0.000000	0.000000	0.078968	0.022533	0.064809	0.079183
Netherlands	6.422837	4.804805	4.613541	4.585931	5.186567	4.950790	5.163107	5.182342	5.369834	5.295178	4.897695	5.186476
Norway	2.676182	3.153153	3.594967	3.383793	2.500000	2.624515	2.440742	2.338161	2.921822	2.850383	2.814554	2.462586
Poland	1.516503	2.327327	2.216896	2.537845	2.723881	2.445571	2.299930	1.919386	1.816267	2.039207	2.555319	2.795154
Portugal	1.427297	1.726727	2.097064	1.825467	1.679104	1.729794	1.854025	2.076426	1.789945	1.656151	1.888714	1.678676
Romania	0.624442	1.126126	0.778910	0.712378	0.932836	0.775425	0.868341	0.994591	0.842327	0.619648	0.444403	0.490934
Slovakia	0.802855	0.450450	0.359497	0.445236	0.447761	0.656129	0.422436	0.471122	0.355357	0.383055	0.333302	0.356323
Slovenia	0.624442	0.525526	0.599161	0.534283	0.634328	0.805249	0.516311	0.820101	0.710713	0.349256	0.444403	0.316731
Spain	4.460303	3.678679	4.134212	4.986643	5.149254	5.517447	5.444731	4.763567	4.685444	4.348806	4.379224	5.067701
Sweden	3.033006	3.753754	3.535051	3.695459	4.216418	5.070086	5.468200	4.048159	5.067123	4.044615	3.962596	4.038324
Switzerland	3.300624	3.753754	3.235470	3.294746	2.761194	2.833284	3.637644	3.402548	3.066596	2.963046	3.231182	3.539473
United Kingdom	32.381802	31.306306	31.815458	29.385574	29.141791	29.197733	31.682704	32.053743	31.982101	35.015773	34.422739	33.613113

	UT (Unique WOS ID)	Country	Institution_harm	Country_Type	Eurovoc_Class	ISO3	Institution_harm_label
86064	WOS:000640648500012	Netherlands	Eindhoven Univ Technol	EU	Western Europe	NLD	Eindhoven Univ Technol (NLD)
115079	WOS:000798227800073	United Kingdom	Univ Leeds	Non-EU associate	Western Europe	GBR	Univ Leeds (GBR)
97887	WOS:000431633800004	United Kingdom	Francis Crick Inst	Non-EU associate	Western Europe	GBR	Francis Crick Inst (GBR)
147070	WOS:000460118200077	Germany	Johannes Gutenberg Univ Mainz	EU	Western Europe	DEU	Johannes Gutenberg Univ Mainz (DEU)
93724	WOS:000371153900007	United Kingdom	Royal Marsden Fdn Trust	Non-EU associate	Western Europe	GBR	Royal Marsden Fdn Trust (GBR)
...	...	...	...	...	...	...	...
75906	WOS:000838382400015	China	China Aerosp Sci & Ind Corp	China	China	CHN	China Aerosp Sci & Ind Corp (CHN)
153124	WOS:000802927600001	Germany	Rhein Westfal Aachen	EU	Western Europe	DEU	Rhein Westfal Aachen (DEU)
82197	WOS:000911585800012	China	Chinese Acad Med Sci	China	China	CHN	Chinese Acad Med Sci (CHN)
150080	WOS:000605979600009	Germany	Univ Med Ctr Goettingen	EU	Western Europe	DEU	Univ Med Ctr Goettingen (DEU)
112004	WOS:000717881300001	United Kingdom	Imperial Coll London	Non-EU associate	Western Europe	GBR	Imperial Coll London (GBR)

	Institution	Institution_harm	UT (Unique WOS ID)
16	Chinese Acad Sci	Chinese Acad Sci	1
3149	Chinese Acad Sci	Chinese Acad Sci	4614
3153	Chinese Acad Sci AIRCAS	Chinese Acad Sci	2
3155	Chinese Acad Sci CAREERI CAS	Chinese Acad Sci	1
3157	Chinese Acad Sci CASIA	Chinese Acad Sci	8
3159	Chinese Acad Sci GUCAS	Chinese Acad Sci	2
3160	Chinese Acad Sci IAP	Chinese Acad Sci	1
3161	Chinese Acad Sci IECAS	Chinese Acad Sci	2
3162	Chinese Acad Sci IME CAS	Chinese Acad Sci	1
3163	Chinese Acad Sci IMECAS	Chinese Acad Sci	1
3164	Chinese Acad Sci ITP CAS	Chinese Acad Sci	1
3166	Chinese Acad Sci NAOC	Chinese Acad Sci	1
3167	Chinese Acad Sci NAOC CAS	Chinese Acad Sci	2
13501	RCEES Chinese Acad Sci	Chinese Acad Sci	1
19499	ZIAT Chinese Acad Sci	Chinese Acad Sci	1

	UT (Unique WOS ID)	Country_eu	Institution_harm_eu	Country_Type_eu	Eurovoc_Class_eu	ISO3_eu	Institution_harm_label_eu	Country_ch	Institution_harm_ch	Country_Type_ch	Eurovoc_Class_ch	ISO3_ch	Institution_harm_label_ch	Domain_English	Field_English	SubField_English
329695	WOS:000702508000001	Greece	Sch Pedag & Technol Educ	EU	Southern Europe	GRC	Sch Pedag & Technol Educ (GRC)	China	Huanghuai Univ	China	China	CHN	Huanghuai Univ (CHN)	Applied Sciences	Engineering	Geological & Geomatics Engineering
103762	WOS:000696019400001	France	Inst Phys 2 Infinis Lyon	EU	Western Europe	FRA	Inst Phys 2 Infinis Lyon (FRA)	China	Fudan Univ	China	China	CHN	Fudan Univ (CHN)	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics
237699	WOS:000353892300005	France	SUBATECH	EU	Western Europe	FRA	SUBATECH (FRA)	China	Shanghai Inst Appl Phys	China	China	CHN	Shanghai Inst Appl Phys (CHN)	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics
123610	WOS:000571262000008	Luxembourg	Luxembourg Ctr Syst Biomed	EU	Western Europe	LUX	Luxembourg Ctr Syst Biomed (LUX)	China	Wuhan Univ Sci & Technol	China	China	CHN	Wuhan Univ Sci & Technol (CHN)	Applied Sciences	Information & Communication Technologies	Artificial Intelligence & Image Processing
77119	WOS:000494411700001	Germany	Tech Univ Berlin	EU	Western Europe	DEU	Tech Univ Berlin (DEU)	China	Dalian Univ Technol	China	China	CHN	Dalian Univ Technol (CHN)	Economic & Social Sciences	Social Sciences	Information & Library Sciences
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
317783	WOS:000747086300011	Germany	Tech Univ Dresden	EU	Western Europe	DEU	Tech Univ Dresden (DEU)	China	East China Univ Sci & Technol	China	China	CHN	East China Univ Sci & Technol (CHN)	Health Sciences	Biomedical Research	Developmental Biology
118549	WOS:000830403500012	Italy	Univ Firenze	EU	Southern Europe	ITA	Univ Firenze (ITA)	China	Peking Univ	China	China	CHN	Peking Univ (CHN)	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics
2175	WOS:000345858500052	Italy	Univ Genoa	EU	Southern Europe	ITA	Univ Genoa (ITA)	China	Peking Univ	China	China	CHN	Peking Univ (CHN)	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics
312657	WOS:000467489901126	Germany	Tech Univ Munich	EU	Western Europe	DEU	Tech Univ Munich (DEU)	China	Fudan Univ	China	China	CHN	Fudan Univ (CHN)	Health Sciences	Clinical Medicine	Nuclear Medicine & Medical Imaging
269060	WOS:000679252300003	United Kingdom	De Montfort Univ	Non-EU associate	Western Europe	GBR	De Montfort Univ (GBR)	China	Beijing Univ Technol	China	China	CHN	Beijing Univ Technol (CHN)	Applied Sciences	Information & Communication Technologies	Artificial Intelligence & Image Processing