From 0549d0f55fc4fdf453923a347219d7cc08db492c Mon Sep 17 00:00:00 2001 From: Utku Bilen Demir <84389167+UtkuBilenDemir@users.noreply.github.com> Date: Mon, 24 Apr 2023 13:51:06 +0200 Subject: [PATCH] Check institution frequencies --- .../u_institution_frequency_exploration.py | 14 ++++++++++++++ .../wos_pubunique-institution_frequency.csv | 3 +++ .../~$wos_institution_locations_harmonized.xlsx | Bin 0 -> 165 bytes 3 files changed, 17 insertions(+) create mode 100644 WOS/wos_analysis/u_institution_frequency_exploration.py create mode 100644 WOS/wos_processed_data/wos_pubunique-institution_frequency.csv create mode 100644 WOS/wos_processed_data/~$wos_institution_locations_harmonized.xlsx diff --git a/WOS/wos_analysis/u_institution_frequency_exploration.py b/WOS/wos_analysis/u_institution_frequency_exploration.py new file mode 100644 index 0000000..5613e5d --- /dev/null +++ b/WOS/wos_analysis/u_institution_frequency_exploration.py @@ -0,0 +1,14 @@ +#%% Compare the frequency of the inst. names with the wos-analysis on the web interface +import pandas as pd + +# %% +wos_inst_df = pd.read_excel("../wos_processed_data/wos_institution_locations_harmonized.xlsx") + +# %% Get rid of the duplicate institution name entries in each individual publication +wos_inst_df["id_inst"] = wos_inst_df['Institution'] + ";" + wos_inst_df["UT (Unique WOS ID)"] +wos_pubunique_inst = wos_inst_df["id_inst"].unique() +wos_pubunique_inst = [x.split(";")[0] for x in wos_pubunique_inst] + +# %% Calc. frequencies, and get an output +wos_inst_freqdf = pd.DataFrame(wos_pubunique_inst).value_counts().rename_axis('institution').reset_index(name='frequency') +wos_inst_freqdf.to_csv("../wos_processed_data/wos_pubunique-institution_frequency.csv", index=False) diff --git a/WOS/wos_processed_data/wos_pubunique-institution_frequency.csv b/WOS/wos_processed_data/wos_pubunique-institution_frequency.csv new file mode 100644 index 0000000..a175556 --- /dev/null +++ b/WOS/wos_processed_data/wos_pubunique-institution_frequency.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d4a58f241d2e9b130869eedae2119be43919952d6dd236cdcd4d613a48d3e6b +size 453006 diff --git a/WOS/wos_processed_data/~$wos_institution_locations_harmonized.xlsx b/WOS/wos_processed_data/~$wos_institution_locations_harmonized.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a20b14c8f8c665d4246ab607a6b78dae4140ec16 GIT binary patch literal 165 zcmd-LEy*rbaMDw7NzKhHQXm%aGK4aeFk~~7GAJ-OG3WtF7lu@ZT!u`BA|RXE5CFxp B67K*2 literal 0 HcmV?d00001