Check institution frequencies

utku_inst_name_exploration
Utku Bilen Demir 1 year ago
parent 9416a1c8b6
commit 0549d0f55f

@ -0,0 +1,14 @@
#%% Compare the frequency of the inst. names with the wos-analysis on the web interface
import pandas as pd
# %%
wos_inst_df = pd.read_excel("../wos_processed_data/wos_institution_locations_harmonized.xlsx")
# %% Get rid of the duplicate institution name entries in each individual publication
wos_inst_df["id_inst"] = wos_inst_df['Institution'] + ";" + wos_inst_df["UT (Unique WOS ID)"]
wos_pubunique_inst = wos_inst_df["id_inst"].unique()
wos_pubunique_inst = [x.split(";")[0] for x in wos_pubunique_inst]
# %% Calc. frequencies, and get an output
wos_inst_freqdf = pd.DataFrame(wos_pubunique_inst).value_counts().rename_axis('institution').reset_index(name='frequency')
wos_inst_freqdf.to_csv("../wos_processed_data/wos_pubunique-institution_frequency.csv", index=False)

Binary file not shown.
1 version https://git-lfs.github.com/spec/v1
2 oid sha256:0d4a58f241d2e9b130869eedae2119be43919952d6dd236cdcd4d613a48d3e6b
3 size 453006
Loading…
Cancel
Save