import pandas as pd
import janitor
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MaxNLocator
import math
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
pyo.init_notebook_mode()

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"

import country_converter as coco
cc = coco.CountryConverter()


%matplotlib inline


# Seaborn palette
# sns.set_theme(context='notebook', style='ticks', palette='colorblind', font='sans-serif', font_scale=1, color_codes=True, rc=None)
# sns.palplot(sns.color_palette())


outdir="wos_processed_data"

wos = pd.read_excel(f"../{outdir}/wos_processed.xlsx")
wos_univ = pd.read_excel(f"../{outdir}/wos_institution_locations_harmonized.xlsx")


def eurovoc_classer(x):
    eurovoc_classification = {"Eastern Europe":["Bulgaria","Czech Republic","Croatia","Hungary","Poland","Romania","Slovakia","Slovenia"],
                          "Northern Europe":["Denmark","Estonia","Finland","Latvia","Lithuania","Sweden","Norway","Iceland"],
                          "Southern Europe":["Cyprus","Greece","Italy","Portugal","Spain","Malta"],
                          "Western Europe":["Austria","Belgium","France","Germany","Luxembourg","Netherlands","Switzerland","United Kingdom","Ireland"]}
    if x == 'China':
        return x
    for k in eurovoc_classification.keys():
        if x in eurovoc_classification[k]:
            return k


wos_country = pd.read_excel(f"../{outdir}/wos_countries.xlsx")
wos_country_types = pd.read_excel(f"../{outdir}/wos_country_types.xlsx")


wos_country_types["Eurovoc_Class"] = wos_country_types["Country"].map(eurovoc_classer)
wos_country_types


record_col = "UT (Unique WOS ID)"


def replace_nth(s, sub=" ", repl="<br>", n=2):
    chunks = s.split(sub)
    size = len(chunks)
    rows = size // n + (0 if size % n == 0 else 1)
    return (repl.join([
        sub.join([chunks[i * n + j] for j in range(n if (i + 1) * n < size else size - i * n)])
        for i in range(rows)
    ])).replace("<br>&"," &<br>")


groups = ['Domain_English',"Field_English",'SubField_English']
data = wos.groupby(groups, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)
data["percent"] = data[record_col]/data[record_col].sum()*100

# data[groups] = data[groups].applymap(replace_nth)
for c in ["Domain_English","Field_English","SubField_English"]:
    data[c] = data[c]+"<br>("+(pd.DataFrame(data[c],columns=[c]).merge(data.groupby(c,as_index=False)[record_col].sum(), on=c)[record_col]).astype(str)+")"
# data


fig = px.sunburst(data, path=groups, values="percent",
                  color='Domain_English',title="Distribution of topics<br>(METRIX classification)", template='plotly')
fig.update_traces(hovertemplate='%{label}<br>%{value:.2f}%')
metrix_distr = go.Figure(fig)
metrix_distr.show(config= dict(displayModeBar = False))


group = 'Domain_English'
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=record_col)

fig = px.bar(data.sort_values(by=group), x=record_col, y=group, color=group,barmode='relative',
                              labels={
                     record_col: 'Number of co-publications',
                     group: "",
                 },
                title="Distribution of Domains", template='plotly')
fig.update_layout(showlegend=False, xaxis_tickformat='d',font_family="Montserrat")
fig.update_traces(hovertemplate='%{x:d}')
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_yaxes(
    showgrid=True,
    ticks="outside")
fig.update_xaxes(
    showgrid=True,
    ticks="outside")
dom_distr = go.Figure(fig)
dom_distr.show(config= dict(displayModeBar = False, responsive = True))


group = ['Publication Year','Domain_English']
data = (wos.groupby(['Publication Year','Domain_English'])[record_col].nunique(dropna=False).unstack()
        .fillna(0)
        .stack()
        .reset_index()
        .rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Domain_English'),
                  on='Domain_English', suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100

data = data.sort_values(by =["Domain_English","Publication Year"], ascending=[True,True])
data[record_col+"_cumsum"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())

# data


fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col,x=group[0], color=group[-1], markers=True,                             labels={
                     record_col: 'Number of co-publications',
                     group[-1]: "Domain",
                 },
                title="Yearly output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )
fig.update_yaxes(
    showgrid=True,
    ticks="outside")
fig.update_xaxes(
    showgrid=True,
    ticks="outside")

year_output_by_domain = go.Figure(fig)
year_output_by_domain.show(config= dict(displayModeBar = False))


fig = px.line(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_relative_growth",x=group[0], color=group[-1], markers=True,                             labels={
                     record_col+"_relative_growth": 'Rel. growth<br>in co-publications (%)',
                     group[-1]: "Domain",
                 },
                title="Relative growth in the output of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:.2f}%')

fig.update_layout(hovermode='x unified',yaxis_tickformat='d',font_family="Montserrat")
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )
fig.update_yaxes(
    showgrid=True,
    ticks="outside")
fig.update_xaxes(
    showgrid=True,
    ticks="outside")
fig.update_yaxes(zeroline=True, zerolinewidth=0.5, zerolinecolor='grey')

rel_output_by_domain = go.Figure(fig)
rel_output_by_domain.show(config= dict(displayModeBar = False))


fig = px.area(data.sort_values(ascending=[True,True], by=[group[0],group[-1]]),y=record_col+"_cumsum",x=group[0], color=group[-1],line_group=group[-1],
              labels={
                     record_col+"_cumsum": 'Cumulative number of co-publications',
                     group[-1]: "Domain",
                 },
                title="Cumulative number of co-publications", template='plotly')
fig.update_traces(hovertemplate='%{y:d}')
fig.update_layout(hovermode='x unified')
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )
fig.update_yaxes(
    showgrid=True,
    ticks="outside")
fig.update_xaxes(
    showgrid=True,
    ticks="outside")

cumsum_by_domain = go.Figure(fig)
cumsum_by_domain.show(config= dict(displayModeBar = False))


from plotly.subplots import make_subplots
import plotly.graph_objects as go

# dom_distr
# year_output_by_domain
# rel_output_by_domain
# cumsum_by_domain

figsuper = make_subplots(rows=2, cols=2, subplot_titles=["a","b",
                                                         "c","d"])


for trace in list(dom_distr.select_traces()):
    trace.showlegend=False
    # trace.barmode
    figsuper.add_trace(trace,
        row=1, col=1
    )

for trace in list(cumsum_by_domain.select_traces()):
    figsuper.add_trace(trace,
        row=1, col=2
    )

for trace in list(year_output_by_domain.select_traces()):
    trace.showlegend=False
    figsuper.add_trace(trace,
        row=2, col=1
    )

for trace in list(rel_output_by_domain.select_traces()):
    trace.showlegend=False
    figsuper.add_trace(trace,
        row=2, col=2
    )

# figsuper.update_layout(hovermode='x unified')
figsuper.update_layout(yaxis={'categoryorder':'total ascending'}, barmode='relative')
figsuper.update_yaxes(
    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
    ticks="outside")
figsuper.update_xaxes(
    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
    ticks="outside")
figsuper.update_layout({'template':"plotly"})
figsuper.show(config= dict(displayModeBar = False, responsive = True))


pivot_data = pd.pivot_table(data, values=record_col, index=['Domain_English'],

                       columns=['Publication Year'], fill_value=0)
pivot_data


# f, ax = plt.subplots(figsize=(9, 6))
# g = sns.heatmap(pivot_data, annot=True, fmt="d", linewidths=.5, ax=ax)
# g.set(xlabel="", ylabel="")


import numpy as np
percent_pivot = pd.crosstab(data['Domain_English'], data['Publication Year'], values=data[record_col], aggfunc=np.sum, normalize='columns')*100
percent_pivot


 # f, ax = plt.subplots(figsize=(15, 6))
# # g = sns.heatmap(percent_pivot, annot=True, fmt='.2f', linewidths=.5, ax=ax, cbar=False)
# # for t in ax.texts: t.set_text(t.get_text() + " %")
# g.set(xlabel="", ylabel="")


# percent_pivot.T.plot(kind='bar',
#                     stacked=True,
#                     figsize=(10, 6))


# percent_pivot.T.plot(kind='bar',
#                         stacked=True,
#                         figsize=(15, 8))
#
# plt.legend(loc="lower left", ncol=2)
# # plt.ylabel("Release Year")
# # plt.xlabel("Proportion")
#
#
# for n, x in enumerate([*pivot_data.T.index.values]):
#     for (proportion, count, y_loc) in zip(percent_pivot.T.loc[x],
#                                           pivot_data.T.loc[x],
#                                           percent_pivot.T.loc[x].cumsum()):
#
#         plt.text(y=(y_loc - proportion) + (proportion / 2),
#                  x=n - 0.11,
#                  s=f'{count}',# ({np.round(proportion, 1)}%)',
#                  color="black",
#                  fontsize=8,
#                  fontweight="bold")
#
# plt.show()


group = ['Publication Year',"Domain_English",'Field_English']
# data = wos.groupby(['Publication Year',"Domain_English",'Field_English'], as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])


data = (wos.groupby(['Publication Year','Field_English'],)[record_col].nunique(dropna=False).unstack()
        .fillna(0)
        .stack()
        .reset_index()
        .rename(columns={0:record_col}))

data = data.merge(wos[["Domain_English",'Field_English']].drop_duplicates(),on="Field_English")

data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset='Field_English'),
                  on='Field_English', suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100

data = data.sort_values(by =["Field_English","Publication Year"], ascending=[True,True])
data[record_col+"_cumsum"] = (data.groupby('Domain_English',as_index=False)[record_col].cumsum())


data_complete = pd.DataFrame()

for cat in sorted(data[group[-2]].unique()):
    #data segment
    sub_data = data[data[group[-2]]==cat]
    sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
                                 ,group[-1],fill_value=0)
    data_complete = pd.concat([data_complete,sub_data], ignore_index=True)


    # seaborn version plot
    # g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
    #                y=record_col,x=group[0], hue=group[-1], marker="o")
    # g.set(xticks=list(range(2012,2022+1,2)))
    # g.legend(title=None)
    # g.set_title(cat)
    # g.yaxis.set_major_locator(MaxNLocator(integer=True))
    # plt.show()


data_complete = pd.DataFrame()

# Creating subplot axes
fig, axes = plt.subplots(nrows=3,ncols=2,figsize=(15, 15))

for cat,ax in zip(sorted(data[group[-2]].unique()),axes.flatten()):
    #data segment
    sub_data = data[data[group[-2]]==cat]
    sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
                                 ,group[-1],fill_value=0)
    data_complete = pd.concat([data_complete,sub_data], ignore_index=True)
    #plot
    g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),
                   y=record_col,x=group[0], hue=group[-1], marker="o", ax=ax)
    g.set(xticks=list(range(2012,2022+1,2)))
    g.legend(title=None)
    g.set_title(cat)
    g.set_xlabel(None)
    g.set_ylabel(None)
    g.yaxis.set_major_locator(MaxNLocator(integer=True))
fig.suptitle("Number of co-publications in domains and respective fields", y=0.92)
plt.show()


group = ['Publication Year',"Domain_English",'Field_English',"SubField_English"]
data = wos.groupby(group, as_index=False)[record_col].nunique().sort_values(ascending=False, by=group+[record_col])
data


for cat in sorted(data[group[-2]].unique()):
    sub_data = data[data[group[-2]]==cat]
    sub_data = sub_data.complete({group[0]:range(int(data[group[0]].min()), int(data[group[0]].max()) + 1)}
                                 ,group[-1],fill_value=0)
    g=sns.lineplot(sub_data.sort_values(ascending=True, by=group[-1]),y=record_col,x=group[0],
                   hue=group[-1], marker="o", errorbar=None)
    g.set(xticks=list(range(2012,2022+1,2)))
    g.legend(title=None,bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., ncols=math.ceil(len(g.legend_.texts)/12))
    g.set_title(f'Number or co-publications in {cat}')
    g.set_ylabel(None)
    plt.show()


from  matplotlib.ticker import FuncFormatter
import math
def orderOfMagnitude(number):
    return math.floor(math.log(number, 10))

def roundToNearest(number):
    order = orderOfMagnitude(number)
    # if order!=0:
    #     order+=1
    near = math.ceil(number/10**order)*10**order
    return near


wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_locations.sample(100)


wos_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country"]].drop_duplicates()


collab_desc = wos_collabs[wos_collabs["Country"]!="China"]["Country"].value_counts().reset_index()
collab_desc["percent_of_copubs"] = collab_desc["count"]/wos_collabs[record_col].nunique()*100
collab_desc["percent_contrib_in_copubs"] = collab_desc["count"]/wos_collabs[record_col].size*100
collab_desc = collab_desc.merge(wos_country_types, on="Country")
collab_desc

c_dict = {"count":"Number of co-publications",
          "percent_of_copubs":"Percent of co-publications",
          "percent_contrib_in_copubs":"Contribution to co-publications"}


# Creating subplot axes
# fig, axes = plt.subplots(ncols=3,figsize=(15, 15))
# for c,ax in zip(c_dict.keys(),axes.flatten()):
for c in c_dict.keys():
    data = collab_desc[["Country",c,"Country_Type"]]
    plt.figure(figsize=(9,12))
    g = sns.barplot(data, x=c, y="Country", hue="Country_Type", dodge=False)
    g.set_xlim(0,roundToNearest(data[c].max()))
    g.set_ylabel(None)
    g.set_xlabel(c_dict.get(c))
    g.set_title(c_dict.get(c))
    g.legend(title=None, loc="right")
    for i in g.containers:
        g.bar_label(i,fontsize=10, fmt='%.1f%%' if 'percent' in c else '%.0f')
    if 'percent' in c:
        g.xaxis.set_major_locator(MaxNLocator(integer=True))
        vals = g.get_xticks()
        g.set_xticklabels([str(int(val))+'%' for val in vals])
    plt.show()


# wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
# wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
# EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique', normalize='all').fillna(0)
#
# # Generate a mask for the upper triangle
# mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
#
# # Set up the matplotlib figure
# f, ax = plt.subplots(figsize=(11, 9))
#
# # Draw the heatmap with the mask and correct aspect ratio
# g = sns.heatmap(EU_co_occur, mask=mask,
#             square=True, linewidths=.5)
#
# g.set_ylabel(None)
# g.set_xlabel(None)


%%capture
wos_collabs_EU = wos_univ_locations[~wos_univ_locations["Country_Type"].isin(["Other","China"])][[record_col,"Country"]].drop_duplicates()
wos_collabs_EU = wos_collabs_EU.merge(wos_collabs_EU, on=record_col)
EU_co_occur = pd.crosstab(wos_collabs_EU['Country_x'], wos_collabs_EU['Country_y'], values=wos_collabs_EU[record_col], aggfunc='nunique').fillna(0).astype(int)

pre_fig = sns.clustermap(EU_co_occur)
re_index = [i.get_text() for i in pre_fig.ax_heatmap.yaxis.get_majorticklabels()]
re_column = [i.get_text() for i in pre_fig.ax_heatmap.xaxis.get_majorticklabels()]

EU_co_occur = EU_co_occur.reindex(index = re_index, columns=re_column)

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(EU_co_occur, dtype=bool))
data = np.where(mask,None,EU_co_occur)
EU_co_occur.columns


fig = px.imshow(data,
                labels=dict(x="Country", y="Country", color="Co-publication with China"),
                x=list(EU_co_occur.columns),
                y=list(EU_co_occur.index), title="Intraeuropean patterns<br>Co-occurences of countries in chinese co-publications"
               )
fig.update_layout(title_x=0.5,
                   width=1000, height=1000,
                   xaxis_showgrid=False,
                   yaxis_showgrid=False,
                   yaxis_autorange='reversed', template='plotly_white')
# fig.update_traces(hovertemplate='<b>%{y}</b><br>%{x}<br>Co-publications: %{hovertext}')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
    ticks="outside")
fig.update_xaxes(
    ticks="outside")
fig.show(config= dict(displayModeBar = False,responsive=True))


collab_year = wos_collabs[wos_collabs["Country"]!="China"].copy()
collab_year = collab_year.merge(wos_country_types, on="Country").merge(wos[[record_col,"Publication Year"]],on=record_col).drop_duplicates()
data = collab_year.groupby(["Publication Year",'Country_Type'],as_index=False)[record_col].nunique()


g=sns.lineplot(data,y=record_col,x="Publication Year", hue="Country_Type", marker="o")
g.set(xticks=list(range(2012,2022+1,2)))
g.legend(title=None)
g.set_xlabel(None)
g.set_ylabel(None)
g.set_title("Yearly output of co-publications with China")

Text(0.5, 1.0, 'Yearly output of co-publications with China')


data = (collab_year.groupby(['Publication Year',"Country"])[record_col]
        .nunique(dropna=False).unstack()
        .fillna(0)
        .stack()
        .reset_index()
        .rename(columns={0:record_col}))
data = data.merge(data[data[record_col]>0].sort_values(by=["Publication Year"], ascending=True).drop_duplicates(subset="Country"),
                  on=["Country"], suffixes=[None,"_relative_growth"])
data[record_col+"_relative_growth"] = (data[record_col]-data[record_col+"_relative_growth"])/data[record_col+"_relative_growth"]*100
data = data.sort_values(by =["Country","Publication Year"], ascending=[True,True])
data[record_col+"_cumsum"] = (data.groupby('Country',as_index=False)[record_col].cumsum())
data = data.merge(wos_country_types, on='Country')
data


# data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
# fig = px.choropleth(data, locations="ISO3", color=record_col, hover_name="Country",
#                     animation_frame='Publication Year', scope="europe", template='plotly', range_color=[data[record_col].min(),data[record_col].max()])
# fig.show()


data["ISO3"] = cc.pandas_convert(series=data["Country"], to='ISO3')
fig = px.choropleth(data, locations="ISO3", color=record_col+"_relative_growth", hover_name="Country",
                    animation_frame='Publication Year', scope="europe", template='plotly',
                    range_color=[data[record_col+"_relative_growth"].min(),data[record_col+"_relative_growth"].max()])
fig.show()


# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),y=record_col,x='Publication Year', color="Eurovoc_Class",line_group="Country", markers=True,
#               labels={
#                      record_col: 'Number of co-publications',
#                   "Eurovoc_Class": "Region"
#                  },
#                 title="Yearly output of co-publications", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')
# # fig.update_layout(hovermode='x unified')
# fig.add_shape(
#         # Rectangle with reference to the plot
#             type="rect",
#             xref="paper",
#             yref="paper",
#             x0=0,
#             y0=0,
#             x1=1.0,
#             y1=1.0,
#             line=dict(
#                 color="black",
#                  width=0.5,
#              )
#          )
# fig.update_yaxes(
#     showgrid=True,
#     ticks="outside")
# fig.update_xaxes(
#     showgrid=True,
#     ticks="outside")
# fig.show(config= dict(displayModeBar = False))


# fig.data[0].hovertemplate


# fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
#               y=record_col+"_relative_growth",
#               x='Publication Year',
#               color="Eurovoc_Class",line_group="Country",markers=True,
#               labels={
#                      record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
#                  },
#                 title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
# fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
# fig.add_shape(
#         # Rectangle with reference to the plot
#             type="rect",
#             xref="paper",
#             yref="paper",
#             x0=0,
#             y0=0,
#             x1=1.0,
#             y1=1.0,
#             line=dict(
#                 color="black",
#                  width=0.5,
#              )
#          )
# fig.update_yaxes(
#     showgrid=True,
#     ticks="outside")
# fig.update_xaxes(
#     showgrid=True,
#     ticks="outside")
# fig.show(config= dict(displayModeBar = False))


from plotly.subplots import make_subplots
import plotly.graph_objects as go

figsuper = make_subplots(rows=2, cols=2, subplot_titles=["placeholder","Cumulative number of co-publications",
                                                         "Yearly output of co-publications","Relative growth of co-publications<br>(baseline: 2011)"])

fig = px.area(data.sort_values(ascending=True, by='Publication Year'),  y=record_col+"_cumsum",
              x='Publication Year',
              color="Eurovoc_Class",
              line_group="Country",
              labels={
                     record_col: 'Number of co-publications',
                  "Eurovoc_Class": "Region"
                 },
                title="Cumulative number of co-publications",hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')

for trace in list(fig.select_traces()):
    figsuper.add_trace(trace,
        row=1, col=2
    )


fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
              y=record_col,
              x='Publication Year',
              color="Eurovoc_Class",
              line_group="Country",
              markers=True,
              labels={
                     record_col: 'Number of co-publications',
                  "Eurovoc_Class": "Region"
                 },
                title="Yearly output of co-publications",hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Co-publications: %{y}')

for trace in list(fig.select_traces()):
    trace.showlegend=False
    figsuper.add_trace(trace,
        row=2, col=1
    )

fig = px.line(data.sort_values(ascending=True, by='Publication Year'),
              y=record_col+"_relative_growth",
              x='Publication Year',
              color="Eurovoc_Class",line_group="Country",markers=True,
              labels={
                     record_col+"_relative_growth": 'Relative growth of co-publications (%)',"Eurovoc_Class": "Region"
                 },
                title="Relative growth of co-publications<br>(baseline: 2011)", template='plotly',hover_name= "Country")
fig.update_traces(hovertemplate='<b>%{hovertext}</b><br>%{x}<br>Relative growth: %{y}%')
fig.add_shape(
        # Rectangle with reference to the plot
            type="rect",
            xref="paper",
            yref="paper",
            x0=0,
            y0=0,
            x1=1.0,
            y1=1.0,
            line=dict(
                color="black",
                 width=0.5,
             )
         )

for trace in list(fig.select_traces()):
    trace.showlegend=False
    figsuper.add_trace(trace,
        row=2, col=2
    )

figsuper.update_yaxes(
    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
    ticks="outside")
figsuper.update_xaxes(
    showgrid=True,showline=True, linewidth=1, linecolor='black', mirror=True,
    ticks="outside")
figsuper.update_layout({'template':"plotly"})
figsuper.show(config= dict(displayModeBar = False))


year_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique').fillna(0).astype(int)
year_pivot


f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_pivot, annot=True, fmt="d", linewidths=.5, ax=ax)
g.set(xlabel="", ylabel="")
for i in range(year_pivot.shape[0]+1):
    ax.axhline(i, color='white', lw=10)


year_percent_pivot = pd.crosstab(collab_year['Country'], collab_year['Publication Year'], values=collab_year[record_col], aggfunc='nunique', normalize='columns').fillna(0)*100
year_percent_pivot


f, ax = plt.subplots(figsize=(15, 15))
g = sns.heatmap(year_percent_pivot, annot=True, fmt='.1f', linewidths=(.5), ax=ax, cbar=False)
for t in ax.texts: t.set_text(t.get_text() + " %")
g.set(xlabel="", ylabel="")
for i in range(year_percent_pivot.shape[1]+1):
    ax.axvline(i, color='white', lw=10)


# Institutional collab


wos_univ_locations = wos_univ.merge(wos_country_types, on="Country")
wos_univ_collabs = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution_harm","Country_Type","Eurovoc_Class"]].drop_duplicates()
wos_univ_collabs["ISO3"] = cc.pandas_convert(series=wos_univ_collabs["Country"], to='ISO3')
wos_univ_collabs["Institution_harm_label"] = wos_univ_collabs["Institution_harm"] + " ("+wos_univ_collabs["ISO3"]+ ")"
wos_univ_collabs.sample(100)


color_discrete_map= {'China': '#EF553B',
                    'EU': '#636EFA',
                    'Non-EU associate': '#00CC96'}


TOPN = 25


wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]

wos_univ_eu_strict = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="EU"]

data_eu = (wos_univ_eu.groupby(["Country","Institution_harm_label","Country_Type"], as_index=False)[record_col].nunique()
           .sort_values(by=record_col,ascending=False).head(TOPN).copy()).sort_values(by="Country_Type")

data_eu_strict = (wos_univ_eu_strict.groupby(["Country","Institution_harm_label","Eurovoc_Class"], as_index=False)[record_col].nunique()
           .sort_values(by=record_col,ascending=False).head(TOPN).copy())

data_ch = (wos_univ_ch.groupby(["Country","Institution_harm","Country_Type"], as_index=False)[record_col].nunique()
           .sort_values(by=record_col,ascending=False).head(TOPN).copy())


for data,c_scope, y_lab, col_by, pat in zip([data_eu,data_eu_strict,data_ch],
                        ["European countries in scope","EU-28 only","China"],
                        ["Institution_harm_label","Institution_harm_label","Institution_harm"],
                        ["Country","Eurovoc_Class","Country_Type"],
                                       ["Country_Type",None,None]):
    fig = px.bar(data, x=record_col, y=y_lab, color=col_by, color_discrete_map=color_discrete_map,pattern_shape=pat,
                              labels={
                     record_col: 'Number of co-publications',
                     "Institution_harm": "Institution",
                                  "Institution_harm_label": "Institution",
                                  "Country_Type":"Country type",
                                  "Eurovoc_Class":"Region"
                 },
                title=f"Most visible institutions (top {TOPN} within {c_scope})", template='plotly')
    fig.update_layout(xaxis_tickformat='d',font_family="Montserrat",yaxis={'categoryorder':'total ascending'},
                                         width=1000, height=1000,)
    fig.update_traces(hovertemplate='%{x:d}')
    fig.add_shape(
            # Rectangle with reference to the plot
                type="rect",
                xref="paper",
                yref="paper",
                x0=0,
                y0=0,
                x1=1.0,
                y1=1.0,
                line=dict(
                    color="black",
                     width=0.5,
                 )
             )
    fig.update_yaxes(
        showgrid=True,
        ticks="outside")
    fig.update_xaxes(
        showgrid=True,
        ticks="outside")
    fig.show(config= dict(displayModeBar = False))


wos_univ_test = wos_univ_locations[wos_univ_locations["Country_Type"]!="Other"][[record_col,"Country","Institution","Institution_harm","Country_Type"]].drop_duplicates()
www = wos_univ_test.groupby(["Institution","Institution_harm"], as_index=False)[record_col].nunique()
www[www["Institution_harm"]=="Chinese Acad Sci"]


wos_univ_ch = wos_univ_collabs[wos_univ_collabs["Country_Type"]=="China"]
wos_univ_eu = wos_univ_collabs[wos_univ_collabs["Country_Type"]!="China"]

wos_univ_dipol = wos_univ_eu.merge(wos_univ_ch, on=record_col, suffixes=('_eu', '_ch')).merge(wos[[record_col,"Domain_English","Field_English","SubField_English"]], on =record_col)
wos_univ_dipol.sample(100)


fig = px.parallel_categories(wos_univ_dipol[["Country_eu","Domain_English","Country_ch"]])
fig.show()


data_ch.columns

Index(['Country', 'Institution_harm', 'Country_Type', 'UT (Unique WOS ID)'], dtype='object')


subfilter = ((wos_univ_dipol["Institution_harm_label_eu"].isin(data_eu["Institution_harm_label"]))&
             (wos_univ_dipol["Institution_harm_ch"].isin(data_ch["Institution_harm"])))

fig = px.parallel_categories(wos_univ_dipol[subfilter][["Country_eu","Domain_English","Country_ch"]])
fig.show()


subfilter = ((wos_univ_dipol["Institution_harm_label_eu"].isin(data_eu["Institution_harm_label"]))&
             (wos_univ_dipol["Institution_harm_ch"].isin(data_ch["Institution_harm"])))

fig = px.parallel_categories(wos_univ_dipol[subfilter][["Country_eu","Institution_harm_eu","Domain_English","Institution_harm_ch"]])
fig.show()


sub_df =wos_univ_dipol[subfilter]

inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],
                            values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)
inst_co_occur

mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))
data = np.where(mask,inst_co_occur,inst_co_occur)

fig = px.imshow(data,
                labels=dict(x="Institute (CH)", y="Institute (EU)", color="Co-publication"),
                x=list(inst_co_occur.columns),
                y=list(inst_co_occur.index), title=f"Most visible institutions (top {TOPN} within Europe)"
               )
fig.update_layout(title_x=0.5,
                   width=1000, height=1000,
                   xaxis_showgrid=False,
                   yaxis_showgrid=False,
                   yaxis_autorange='reversed', template='plotly_white')
fig.update_xaxes(tickangle= -90)
fig.update_yaxes(
    ticks="outside")
fig.update_xaxes(
    ticks="outside")
fig.show(config= dict(displayModeBar = False))


subfilter = ((wos_univ_dipol["Institution_harm_label_eu"].isin(data_eu_strict["Institution_harm_label"]))&
             (wos_univ_dipol["Institution_harm_ch"].isin(data_ch["Institution_harm"])))

fig = px.parallel_categories(wos_univ_dipol[subfilter][["Country_eu","Institution_harm_eu","Domain_English","Institution_harm_ch"]])
fig.show()


%%capture
sub_df =wos_univ_dipol[subfilter]

inst_co_occur = pd.crosstab(sub_df['Institution_harm_label_eu'], sub_df['Institution_harm_ch'],
                            values=sub_df[record_col], aggfunc='nunique').fillna(0).astype(int)


pre_fig = sns.clustermap(inst_co_occur)
re_index = [i.get_text() for i in pre_fig.ax_heatmap.yaxis.get_majorticklabels()]
re_column = [i.get_text() for i in pre_fig.ax_heatmap.xaxis.get_majorticklabels()]

inst_co_occur = inst_co_occur.reindex(index = re_index, columns=re_column)

mask = np.triu(np.ones_like(inst_co_occur, dtype=bool))
data = np.where(mask,inst_co_occur,inst_co_occur)


fig = px.imshow(data,
                labels=dict(x="Institute (CH)", y="Institute (EU)", color="Co-publication"),
                x=list(inst_co_occur.columns),
                y=list(inst_co_occur.index), title=f"Most visible institutions (top {TOPN} within EU-28)"
               )
fig.update_layout(title_x=0.5,
                   width=1000, height=1000,
                   xaxis_showgrid=False,
                   yaxis_showgrid=False,
                   yaxis_autorange='reversed',
                  template='plotly_white',
                  coloraxis_colorbar=dict(
                            thicknessmode="pixels", thickness=25,
                            ticks="outside", ticksuffix=" ",
                            dtick=20,outlinewidth=1,
                        ))
fig.update_xaxes(tickangle= -45)
fig.update_yaxes(
    ticks="outside")
fig.update_xaxes(
    ticks="outside")

fig.show(config= dict(displayModeBar = False))

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Domain_English
Applied Sciences	490	593	738	1031	1201	1535	1920	2808	3729	4446	5295	6199
Arts & Humanities	0	0	0	4	1	3	7	4	11	11	16	13
Economic & Social Sciences	20	22	29	28	34	40	84	105	160	211	252	375
Health Sciences	116	120	155	184	216	243	321	403	611	755	1035	1182
Multidisciplinary	15	21	43	52	57	64	75	76	83	97	115	149
Natural Sciences	181	223	298	318	380	437	568	753	999	1232	1403	1665

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Domain_English
Applied Sciences	59.610706	60.572012	58.432304	63.760049	63.578613	66.106804	64.537815	67.678959	66.672626	65.847156	65.241498	64.687467
Arts & Humanities	0.000000	0.000000	0.000000	0.247372	0.052938	0.129199	0.235294	0.096409	0.196674	0.162915	0.197141	0.135657
Economic & Social Sciences	2.433090	2.247191	2.296120	1.731602	1.799894	1.722653	2.823529	2.530730	2.860719	3.125000	3.104978	3.913180
Health Sciences	14.111922	12.257406	12.272367	11.379097	11.434621	10.465116	10.789916	9.713184	10.924370	11.181872	12.752587	12.334342
Multidisciplinary	1.824818	2.145046	3.404592	3.215832	3.017470	2.756245	2.521008	1.831767	1.483998	1.436611	1.416954	1.554837
Natural Sciences	22.019465	22.778345	23.594616	19.666048	20.116464	18.819983	19.092437	18.148952	17.861613	18.246445	17.286841	17.374517

	Publication Year	Domain_English	Field_English	SubField_English	UT (Unique WOS ID)
1598	2022	Natural Sciences	Physics & Astronomy	Optics	134
1597	2022	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics	65
1596	2022	Natural Sciences	Physics & Astronomy	Mathematical Physics	10
1595	2022	Natural Sciences	Physics & Astronomy	General Physics	31
1594	2022	Natural Sciences	Physics & Astronomy	Fluids & Plasmas	79
...	...	...	...	...	...
4	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Forestry	1
3	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Food Science	1
2	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Fisheries	2
1	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Dairy & Animal Science	2
0	2011	Applied Sciences	Agriculture, Fisheries & Forestry	Agronomy & Agriculture	3

	UT (Unique WOS ID)	Institution	Country	Institution_harm	merge_iter	Country_Type	Eurovoc_Class
32326	WOS:000480328900026	Chinese Univ Hong Kong	China	Chinese Univ Hong Kong	0	China	China
168425	WOS:000721883100002	Natl Tech Univ Athens	Greece	Natl Tech Univ Athens	0	EU	Southern Europe
114303	WOS:000779000900001	Cardiff Sch Technol	United Kingdom	Cardiff Sch Technol	0	Non-EU associate	Western Europe
96915	WOS:000418781800008	Univ London	United Kingdom	Univ London	0	Non-EU associate	Western Europe
37409	WOS:000514081100008	Changjiang Waterway Planning Design & Res Inst	China	Changjiang Waterway Planning Design & Res Inst	0	China	China
...	...	...	...	...	...	...	...
56377	WOS:000664002600088	Univ Sci & Technol Beijing	China	Univ Sci & Technol Beijing	0	China	China
37559	WOS:000515393800112	Southwest Minzu Univ	China	Southwest Minzu Univ	0	China	China
50960	WOS:000616422300002	Xinxiang Med Univ	China	Xinxiang Med Univ	0	China	China
13885	WOS:000381268400001	Shandong Univ	China	Shandong Univ	0	China	China
124265	WOS:000575964500001	CNRS R 7225	France	7225	0	EU	Western Europe

	Publication Year	Country	UT (Unique WOS ID)	Publication Year_relative_growth	UT (Unique WOS ID)_relative_growth	UT (Unique WOS ID)_cumsum	Country_Type	Eurovoc_Class
0	2011	Austria	22.0	2011	0.000000	22.0	EU	Western Europe
1	2012	Austria	24.0	2011	9.090909	46.0	EU	Western Europe
2	2013	Austria	26.0	2011	18.181818	72.0	EU	Western Europe
3	2014	Austria	39.0	2011	77.272727	111.0	EU	Western Europe
4	2015	Austria	50.0	2011	127.272727	161.0	EU	Western Europe
...	...	...	...	...	...	...	...	...
355	2018	United Kingdom	1837.0	2011	406.060606	6918.0	Non-EU associate	Western Europe
356	2019	United Kingdom	2430.0	2011	569.421488	9348.0	Non-EU associate	Western Europe
357	2020	United Kingdom	3108.0	2011	756.198347	12456.0	Non-EU associate	Western Europe
358	2021	United Kingdom	3718.0	2011	924.242424	16174.0	Non-EU associate	Western Europe
359	2022	United Kingdom	4245.0	2011	1069.421488	20419.0	Non-EU associate	Western Europe

Analysis by METRIX classification¶

Distribution of topics via the METRIX classification¶

Domains, distribution, yearly trends¶

Field¶

SubField¶

Collabs¶

	Country	Country_Type	Eurovoc_Class
0	Belgium	EU	Western Europe
1	China	China	China
2	Luxembourg	EU	Western Europe
3	Netherlands	EU	Western Europe
4	Norway	Non-EU associate	Northern Europe
5	United Kingdom	Non-EU associate	Western Europe
6	France	EU	Western Europe
7	Sweden	EU	Northern Europe
8	Italy	EU	Southern Europe
9	Denmark	EU	Northern Europe
10	Germany	EU	Western Europe
11	Slovenia	EU	Eastern Europe
12	Estonia	EU	Northern Europe
13	Finland	EU	Northern Europe
14	Bulgaria	EU	Eastern Europe
15	Slovakia	EU	Eastern Europe
16	Spain	EU	Southern Europe
17	Poland	EU	Eastern Europe
18	Czech Republic	EU	Eastern Europe
19	Greece	EU	Southern Europe
20	Malta	EU	Southern Europe
21	Austria	EU	Western Europe
22	Switzerland	Non-EU associate	Western Europe
23	Ireland	EU	Western Europe
24	Portugal	EU	Southern Europe
25	Romania	EU	Eastern Europe
26	Hungary	EU	Eastern Europe
27	Cyprus	EU	Southern Europe
28	Croatia	EU	Eastern Europe
29	Lithuania	EU	Northern Europe
30	Latvia	EU	Northern Europe

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Country
Austria	1.962533	1.801802	1.557819	1.736420	1.865672	1.699970	1.689744	1.552958	1.816267	1.543488	1.712804	1.623248
Belgium	3.033006	2.852853	2.396645	2.894034	2.649254	2.415747	2.112180	2.320712	2.355883	2.399730	2.240533	2.312139
Bulgaria	0.356824	0.375375	0.479329	0.400712	0.261194	0.566657	0.492842	0.314081	0.131614	0.281658	0.296269	0.150447
Croatia	0.089206	0.150150	0.359497	0.356189	0.373134	0.208768	0.234687	0.331530	0.355357	0.326724	0.305527	0.277140
Cyprus	0.178412	0.075075	0.299581	0.222618	0.186567	0.149120	0.187749	0.122143	0.197420	0.315457	0.333302	0.340486
Czech Republic	1.159679	1.126126	0.958658	0.934996	0.746269	1.073665	0.868341	0.977142	0.842327	0.912573	0.861031	0.973949
Denmark	3.122212	2.477477	2.396645	2.626892	2.537313	2.206979	2.370336	3.402548	3.079758	2.760252	2.712712	2.715971
Estonia	0.267618	0.225225	0.419413	0.445236	0.447761	0.298240	0.352030	0.261734	0.210582	0.428121	0.416628	0.308813
Finland	2.765388	2.627628	2.636309	3.650935	3.731343	3.728005	2.957052	3.454894	3.171887	2.884182	2.675678	3.008948
France	10.437110	9.759760	10.425404	10.284951	10.037313	9.692812	8.167097	8.567440	8.528560	7.785038	7.471530	6.793887
Germany	10.972346	12.912913	11.503895	12.154942	11.567164	10.885774	10.701713	10.539173	10.542248	10.218567	11.202666	10.974741
Greece	1.338091	1.351351	1.138406	1.424755	1.305970	1.491202	1.103027	1.413366	1.500395	1.374493	1.286918	1.433209
Hungary	0.981267	0.825826	1.258238	0.712378	0.746269	1.133313	0.797935	0.820101	0.802843	0.687247	0.768447	0.712645
Ireland	1.159679	1.201201	1.318155	1.380232	1.007463	1.342082	1.548932	1.256325	1.105554	1.306895	1.546153	1.480719
Italy	4.549509	5.255255	5.032954	5.164737	6.641791	5.577095	5.796761	5.670913	5.804159	6.433078	5.934636	6.421728
Latvia	0.000000	0.000000	0.059916	0.000000	0.037313	0.238592	0.234687	0.261734	0.131614	0.101397	0.120359	0.142529
Lithuania	0.089206	0.150150	0.599161	0.178094	0.149254	0.387712	0.281624	0.401326	0.500132	0.405588	0.351819	0.300895
Luxembourg	0.178412	0.225225	0.179748	0.044524	0.298507	0.268416	0.305093	0.261734	0.236904	0.247859	0.324044	0.403832
Malta	0.089206	0.000000	0.000000	0.000000	0.037313	0.029824	0.000000	0.000000	0.078968	0.022533	0.064809	0.079183
Netherlands	6.422837	4.804805	4.613541	4.585931	5.186567	4.950790	5.163107	5.182342	5.369834	5.295178	4.897695	5.186476
Norway	2.676182	3.153153	3.594967	3.383793	2.500000	2.624515	2.440742	2.338161	2.921822	2.850383	2.814554	2.462586
Poland	1.516503	2.327327	2.216896	2.537845	2.723881	2.445571	2.299930	1.919386	1.816267	2.039207	2.555319	2.795154
Portugal	1.427297	1.726727	2.097064	1.825467	1.679104	1.729794	1.854025	2.076426	1.789945	1.656151	1.888714	1.678676
Romania	0.624442	1.126126	0.778910	0.712378	0.932836	0.775425	0.868341	0.994591	0.842327	0.619648	0.444403	0.490934
Slovakia	0.802855	0.450450	0.359497	0.445236	0.447761	0.656129	0.422436	0.471122	0.355357	0.383055	0.333302	0.356323
Slovenia	0.624442	0.525526	0.599161	0.534283	0.634328	0.805249	0.516311	0.820101	0.710713	0.349256	0.444403	0.316731
Spain	4.460303	3.678679	4.134212	4.986643	5.149254	5.517447	5.444731	4.763567	4.685444	4.348806	4.379224	5.067701
Sweden	3.033006	3.753754	3.535051	3.695459	4.216418	5.070086	5.468200	4.048159	5.067123	4.044615	3.962596	4.038324
Switzerland	3.300624	3.753754	3.235470	3.294746	2.761194	2.833284	3.637644	3.402548	3.066596	2.963046	3.231182	3.539473
United Kingdom	32.381802	31.306306	31.815458	29.385574	29.141791	29.197733	31.682704	32.053743	31.982101	35.015773	34.422739	33.613113

	UT (Unique WOS ID)	Country	Institution_harm	Country_Type	Eurovoc_Class	ISO3	Institution_harm_label
62496	WOS:000713807500098	China	Tsinghua Univ	China	China	CHN	Tsinghua Univ (CHN)
125594	WOS:000694719000013	France	Univ Paris	EU	Western Europe	FRA	Univ Paris (FRA)
169955	WOS:000766762800026	Austria	Inst Adv Res Artificial Intelligence	EU	Western Europe	AUT	Inst Adv Res Artificial Intelligence (AUT)
48357	WOS:000596356000001	China	Beijing Inst Technol	China	China	CHN	Beijing Inst Technol (CHN)
148599	WOS:000517228300013	Germany	Univ Wurzburg	EU	Western Europe	DEU	Univ Wurzburg (DEU)
...	...	...	...	...	...	...	...
92516	WOS:000348141800002	United Kingdom	Heriot Watt Univ	Non-EU associate	Western Europe	GBR	Heriot Watt Univ (GBR)
153610	WOS:000838053000004	Germany	Cluster Excellence Hearing4all	EU	Western Europe	DEU	Cluster Excellence Hearing4all (DEU)
92564	WOS:000349389800004	United Kingdom	European Ctr Medium Range Weather Forecasts	Non-EU associate	Western Europe	GBR	European Ctr Medium Range Weather Forecasts (GBR)
56459	WOS:000665034700031	China	Nanjing Univ Sci & Technol	China	China	CHN	Nanjing Univ Sci & Technol (CHN)
54749	WOS:000651626700012	China	Ningbo Univ	China	China	CHN	Ningbo Univ (CHN)

	Institution	Institution_harm	UT (Unique WOS ID)
16	Chinese Acad Sci	Chinese Acad Sci	1
3149	Chinese Acad Sci	Chinese Acad Sci	4614
3153	Chinese Acad Sci AIRCAS	Chinese Acad Sci	2
3155	Chinese Acad Sci CAREERI CAS	Chinese Acad Sci	1
3157	Chinese Acad Sci CASIA	Chinese Acad Sci	8
3159	Chinese Acad Sci GUCAS	Chinese Acad Sci	2
3160	Chinese Acad Sci IAP	Chinese Acad Sci	1
3161	Chinese Acad Sci IECAS	Chinese Acad Sci	2
3162	Chinese Acad Sci IME CAS	Chinese Acad Sci	1
3163	Chinese Acad Sci IMECAS	Chinese Acad Sci	1
3164	Chinese Acad Sci ITP CAS	Chinese Acad Sci	1
3166	Chinese Acad Sci NAOC	Chinese Acad Sci	1
3167	Chinese Acad Sci NAOC CAS	Chinese Acad Sci	2
13501	RCEES Chinese Acad Sci	Chinese Acad Sci	1
19499	ZIAT Chinese Acad Sci	Chinese Acad Sci	1

	UT (Unique WOS ID)	Country_eu	Institution_harm_eu	Country_Type_eu	Eurovoc_Class_eu	ISO3_eu	Institution_harm_label_eu	Country_ch	Institution_harm_ch	Country_Type_ch	Eurovoc_Class_ch	ISO3_ch	Institution_harm_label_ch	Domain_English	Field_English	SubField_English
151306	WOS:000387273800018	Slovakia	Reg Author Publ Hlth	EU	Eastern Europe	SVK	Reg Author Publ Hlth (SVK)	China	Chinese Univ Hong Kong	China	China	CHN	Chinese Univ Hong Kong (CHN)	Health Sciences	Clinical Medicine	Environmental & Occupational Health
288046	WOS:000447568300005	France	Sorbonne Univ	EU	Western Europe	FRA	Sorbonne Univ (FRA)	China	Xiamen Univ	China	China	CHN	Xiamen Univ (CHN)	Applied Sciences	Information & Communication Technologies	Networking & Telecommunications
86767	WOS:000552035900004	Italy	Terrasystem Srl	EU	Southern Europe	ITA	Terrasystem Srl (ITA)	China	Nanjing Univ Informat Sci & Technol	China	China	CHN	Nanjing Univ Informat Sci & Technol (CHN)	Multidisciplinary	Multidisciplinary	Multidisciplinary
74398	WOS:000494411700001	Germany	Georg August Univ Gottingen	EU	Western Europe	DEU	Georg August Univ Gottingen (DEU)	China	China Three Gorges Univ	China	China	CHN	China Three Gorges Univ (CHN)	Economic & Social Sciences	Social Sciences	Information & Library Sciences
266078	WOS:000639495800005	United Kingdom	Univ Lincoln	Non-EU associate	Western Europe	GBR	Univ Lincoln (GBR)	China	Minist Educ	China	China	CHN	Minist Educ (CHN)	Applied Sciences	Information & Communication Technologies	Information Systems
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
38553	WOS:000418982000012	Spain	Reg Univ Carlos Haya	EU	Southern Europe	ESP	Reg Univ Carlos Haya (ESP)	China	Zhengzhou Univ	China	China	CHN	Zhengzhou Univ (CHN)	Health Sciences	Clinical Medicine	Oncology & Carcinogenesis
19393	WOS:000406549900063	Italy	Azienda Ospedaliero Univ	EU	Southern Europe	ITA	Azienda Ospedaliero Univ (ITA)	China	Shanxi Prov Peoples Hosp	China	China	CHN	Shanxi Prov Peoples Hosp (CHN)	Health Sciences	Clinical Medicine	Anesthesiology
77625	WOS:000494411700001	Germany	Martin Luther Univ Halle Wittenberg	EU	Western Europe	DEU	Martin Luther Univ Halle Wittenberg (DEU)	China	Nankai Univ	China	China	CHN	Nankai Univ (CHN)	Economic & Social Sciences	Social Sciences	Information & Library Sciences
138669	WOS:000355671300001	France	Aix Marseille Univ	EU	Western Europe	FRA	Aix Marseille Univ (FRA)	China	Chinese Acad Sci	China	China	CHN	Chinese Acad Sci (CHN)	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics
136858	WOS:000347046200017	Slovenia	Univ Ljubljana	EU	Eastern Europe	SVN	Univ Ljubljana (SVN)	China	Shanghai Jiao Tong Univ	China	China	CHN	Shanghai Jiao Tong Univ (CHN)	Natural Sciences	Physics & Astronomy	Nuclear & Particle Physics

Publication Year	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020	2021	2022
Country
Austria	22	24	26	39	50	57	72	89	138	137	185	205
Belgium	34	38	40	65	71	81	90	133	179	213	242	292
Bulgaria	4	5	8	9	7	19	21	18	10	25	32	19
Croatia	1	2	6	8	10	7	10	19	27	29	33	35
Cyprus	2	1	5	5	5	5	8	7	15	28	36	43
Czech Republic	13	15	16	21	20	36	37	56	64	81	93	123
Denmark	35	33	40	59	68	74	101	195	234	245	293	343
Estonia	3	3	7	10	12	10	15	15	16	38	45	39
Finland	31	35	44	82	100	125	126	198	241	256	289	380
France	117	130	174	231	269	325	348	491	648	691	807	858
Germany	123	172	192	273	310	365	456	604	801	907	1210	1386
Greece	15	18	19	32	35	50	47	81	114	122	139	181
Hungary	11	11	21	16	20	38	34	47	61	61	83	90
Ireland	13	16	22	31	27	45	66	72	84	116	167	187
Italy	51	70	84	116	178	187	247	325	441	571	641	811
Latvia	0	0	1	0	1	8	10	15	10	9	13	18
Lithuania	1	2	10	4	4	13	12	23	38	36	38	38
Luxembourg	2	3	3	1	8	9	13	15	18	22	35	51
Malta	1	0	0	0	1	1	0	0	6	2	7	10
Netherlands	72	64	77	103	139	166	220	297	408	470	529	655
Norway	30	42	60	76	67	88	104	134	222	253	304	311
Poland	17	31	37	57	73	82	98	110	138	181	276	353
Portugal	16	23	35	41	45	58	79	119	136	147	204	212
Romania	7	15	13	16	25	26	37	57	64	55	48	62
Slovakia	9	6	6	10	12	22	18	27	27	34	36	45
Slovenia	7	7	10	12	17	27	22	47	54	31	48	40
Spain	50	49	69	112	138	185	232	273	356	386	473	640
Sweden	34	50	59	83	113	170	233	232	385	359	428	510
Switzerland	37	50	54	74	74	95	155	195	233	263	349	447
United Kingdom	363	417	531	660	781	979	1350	1837	2430	3108	3718	4245