{ "cells": [ { "cell_type": "code", "execution_count": 191, "outputs": [], "source": [ "import pandas as pd\n", "# Importing libraries and module and some setting for notebook\n", "\n", "import pandas as pd\n", "import re\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "import numpy as np\n", "from scipy.sparse import csr_matrix\n", "import sparse_dot_topn.sparse_dot_topn as ct #Cosine Similarity\n", "import time\n", "from tqdm import tqdm" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 192, "outputs": [], "source": [ "def wikinorm(univ_string):\n", " from googlesearch import search\n", " from nltk.metrics import edit_distance\n", " from operator import itemgetter\n", " from numpy.random import default_rng\n", " rng = default_rng()\n", " results = search(univ_string, lang=\"en\", num_results=3,advanced=True, sleep_interval=rng.uniform(1, 5))\n", " univ_name = univ_string.split(\",\")[0]\n", " u_results = [i.title for i in results if \"Category:\" not in i.title]\n", " return sorted([tuple((j,edit_distance(univ_name, j))) for j in u_results],key=itemgetter(1))[0][0]\n" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 193, "outputs": [], "source": [ "def replace_uppercase_words(text):\n", " words = text.split()\n", " all_uppercase = all(word.isupper() for word in words)\n", " all_lowercase = all(word.islower() for word in words)\n", " if all_uppercase or all_lowercase:\n", " return text\n", " else:\n", " result = []\n", " for word in words:\n", " w = word.strip()\n", " if not w.isupper() and not w.islower():\n", " result.append(w)\n", " return \" \".join(result).strip()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 194, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO: Pandarallel will run on 4 workers.\n", "INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n", "\n", "WARNING: You are on Windows. If you detect any issue with pandarallel, be sure you checked out the Troubleshooting page:\n", "https://nalepae.github.io/pandarallel/troubleshooting/\n" ] }, { "data": { "text/plain": "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=38767), Label(value='0 / 38767')))…", "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "8551fdcfc52a43108a78c1e91915c681" } }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "155067\n" ] } ], "source": [ "outdir=\"wos_processed_data\"\n", "univ = pd.read_excel(f\"{outdir}/wos_institution_locations.xlsx\")\n", "\n", "from pandarallel import pandarallel\n", "pandarallel.initialize(progress_bar=True, nb_workers=4)\n", "\n", "univ[\"Institution_harm\"] = univ[\"Institution\"].parallel_apply(replace_uppercase_words)\n", "print(len(univ))" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 195, "outputs": [ { "data": { "text/plain": " UT (Unique WOS ID) Institution \n1094 WOS:000292330300050 Hong Kong Polytech Univ \\\n21547 WOS:000374363900001 Guangdong Univ Technol \n53778 WOS:000459846300019 Aarhus Univ \n153776 WOS:000907044000014 Univ Siena \n81562 WOS:000554591602038 China Natl Elect Import Export Corp \n... ... ... \n29206 WOS:000397047200002 Univ Duisburg Essen \n21658 WOS:000374617600020 Univ Southampton \n43289 WOS:000434742800004 Univ Strathclyde \n37200 WOS:000418525100013 Goethe Univ Frankfurt \n95964 WOS:000616310200013 Eindhoven Univ Technol \n\n Country Institution_harm \n1094 China Hong Kong Polytech Univ \n21547 China Guangdong Univ Technol \n53778 Denmark Aarhus Univ \n153776 Italy Univ Siena \n81562 China China Natl Elect Import Export Corp \n... ... ... \n29206 Germany Univ Duisburg Essen \n21658 United Kingdom Univ Southampton \n43289 United Kingdom Univ Strathclyde \n37200 Germany Goethe Univ Frankfurt \n95964 Netherlands Eindhoven Univ Technol \n\n[100 rows x 4 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
UT (Unique WOS ID)InstitutionCountryInstitution_harm
1094WOS:000292330300050Hong Kong Polytech UnivChinaHong Kong Polytech Univ
21547WOS:000374363900001Guangdong Univ TechnolChinaGuangdong Univ Technol
53778WOS:000459846300019Aarhus UnivDenmarkAarhus Univ
153776WOS:000907044000014Univ SienaItalyUniv Siena
81562WOS:000554591602038China Natl Elect Import Export CorpChinaChina Natl Elect Import Export Corp
...............
29206WOS:000397047200002Univ Duisburg EssenGermanyUniv Duisburg Essen
21658WOS:000374617600020Univ SouthamptonUnited KingdomUniv Southampton
43289WOS:000434742800004Univ StrathclydeUnited KingdomUniv Strathclyde
37200WOS:000418525100013Goethe Univ FrankfurtGermanyGoethe Univ Frankfurt
95964WOS:000616310200013Eindhoven Univ TechnolNetherlandsEindhoven Univ Technol
\n

100 rows × 4 columns

\n
" }, "execution_count": 195, "metadata": {}, "output_type": "execute_result" } ], "source": [ "univ.sample(100)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 196, "outputs": [ { "data": { "text/plain": " Country Institution_harm count\n12655 Poland Space Res Ctr 6\n12940 Portugal Ctr Invest Energia State Grid 1\n616 China Minist Nat Resources 78\n5561 China PowerChina Huadong Engn Corp Ltd 1\n514 China Chongqing Univ 478\n... ... ... ...\n476 Bulgaria Tech Univ 1\n12454 Norway Stavanger Univ Hosp 9\n5489 China Shanghai Sports Sch 1\n768 China Hubei Univ 25\n13527 Spain Jimenez Diaz Univ Hosp 2\n\n[100 rows x 3 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
CountryInstitution_harmcount
12655PolandSpace Res Ctr6
12940PortugalCtr Invest Energia State Grid1
616ChinaMinist Nat Resources78
5561ChinaPowerChina Huadong Engn Corp Ltd1
514ChinaChongqing Univ478
............
476BulgariaTech Univ1
12454NorwayStavanger Univ Hosp9
5489ChinaShanghai Sports Sch1
768ChinaHubei Univ25
13527SpainJimenez Diaz Univ Hosp2
\n

100 rows × 3 columns

\n
" }, "execution_count": 196, "metadata": {}, "output_type": "execute_result" } ], "source": [ "univ_norm = univ.groupby(\"Country\", as_index=False)[\"Institution_harm\"].value_counts()\n", "# univ_norm[\"search_for\"] = univ_norm[\"Institution\"]+\", \" + univ_norm[\"Country\"]+ \", wikipedia\"\n", "univ_norm.sample(100)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 197, "outputs": [], "source": [ "# from pandarallel import pandarallel\n", "# pandarallel.initialize(progress_bar=True, nb_workers=2)\n", "#\n", "# df_sample[\"search_result\"] = df_sample[\"search_for\"].parallel_apply(wikinorm)" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 198, "outputs": [], "source": [ "def ngrams(string, n=3):\n", "\n", " string = re.sub(r'[,-./]|\\sBD',r'', string)\n", " ngrams = zip(*[string[i:] for i in range(n)])\n", " return [''.join(ngram) for ngram in ngrams]\n", "\n", "# calculate the similarity between two vectors of TF-IDF values the Cosine Similarity is usually used.\n", "# result matrix in a very sparse terms and Scikit-learn deals with this nicely by returning a sparse CSR matrix.\n", "\n", "def awesome_cossim_top(A, B, ntop, lower_bound=0):\n", " # force A and B as a CSR matrix.\n", " # If they have already been CSR, there is no overhead\n", " A = A.tocsr()\n", " B = B.tocsr()\n", " M, _ = A.shape\n", " _, N = B.shape\n", "\n", " idx_dtype = np.int32\n", "\n", " nnz_max = M*ntop\n", "\n", " indptr = np.zeros(M+1, dtype=idx_dtype)\n", " indices = np.zeros(nnz_max, dtype=idx_dtype)\n", " data = np.zeros(nnz_max, dtype=A.dtype)\n", "\n", " ct.sparse_dot_topn(\n", " M, N, np.asarray(A.indptr, dtype=idx_dtype),\n", " np.asarray(A.indices, dtype=idx_dtype),\n", " A.data,\n", " np.asarray(B.indptr, dtype=idx_dtype),\n", " np.asarray(B.indices, dtype=idx_dtype),\n", " B.data,\n", " ntop,\n", " lower_bound,\n", " indptr, indices, data)\n", "\n", " return csr_matrix((data,indices,indptr),shape=(M,N))\n", "\n", "# unpacks the resulting sparse matrix\n", "\n", "def get_matches_df(sparse_matrix, name_vector, top=None):\n", " non_zeros = sparse_matrix.nonzero()\n", "\n", " sparserows = non_zeros[0]\n", " sparsecols = non_zeros[1]\n", "\n", " if top:\n", " nr_matches = top\n", " else:\n", " nr_matches = sparsecols.size\n", "\n", " left_side = np.empty([nr_matches], dtype=object)\n", " right_side = np.empty([nr_matches], dtype=object)\n", " similarity = np.zeros(nr_matches)\n", "\n", " for index in range(0, nr_matches):\n", " left_side[index] = name_vector[sparserows[index]]\n", " right_side[index] = name_vector[sparsecols[index]]\n", " similarity[index] = sparse_matrix.data[index]\n", "\n", " return pd.DataFrame({'left_side': left_side,\n", " 'right_side': right_side,\n", " 'similarity': similarity})\n", "\n", "\n", "def discrepancy_filter(df):\n", " f_df = df.copy()\n", " tokenlist = [\"Med\", \"Hosp\", \"Tech\", \"Univ\", \"Acad\", \"Poly\"]\n", " for token in tokenlist:\n", " f_df = f_df[~(((f_df[\"right_side\"].str.contains(token))&\n", " (~f_df[\"left_side\"].str.contains(token)))\n", " |\n", " ((f_df[\"left_side\"].str.contains(token))&\n", " (~f_df[\"right_side\"].str.contains(token))))].copy()\n", " return f_df\n", "\n", "\n", "# Define a function to get the high and low counts for each row\n", "def get_high_low_counts(row):\n", " if row['left_count'] > row['right_count']:\n", " high_count = row['left_count']\n", " low_count = row['right_count']\n", " else: #row['left_count'] < row['right_count']:\n", " high_count = row['right_count']\n", " low_count = row['left_count']\n", " # else:\n", " # if len(row['left_side']) > len(row['right_side']):\n", " # high_count = len(row['left_side'])\n", " # low_count = len(row['right_side'])\n", " # else:\n", " # high_count = len(row['right_side'])\n", " # low_count = len(row['left_side'])\n", " return pd.Series([high_count, low_count])" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 199, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 31/31 [00:00<00:00, 32.89it/s]\n" ] } ], "source": [ "merger = pd.DataFrame()\n", "\n", "# for i in tqdm(filter(lambda c: c!=\"China\", list(univ_norm[\"Country\"].unique()))):\n", "for i in tqdm(list(univ_norm[\"Country\"].unique())):\n", " sub_inst = univ_norm[univ_norm[\"Country\"]==i].reset_index()\n", " types = sub_inst['Institution_harm']\n", " vectorizer = TfidfVectorizer(min_df=1, analyzer=ngrams)\n", " tf_idf_matrix = vectorizer.fit_transform(types)\n", " t1 = time.time()\n", " matches = awesome_cossim_top(tf_idf_matrix, tf_idf_matrix.transpose(), 10, 0.8 if i!=\"China\" else 0.9)\n", " t = time.time()-t1\n", "\n", " # store the matches into new dataframe called matched_df and printing 10 samples\n", " matches_df = get_matches_df(matches, types)\n", " matches_df = matches_df[matches_df['similarity'] < 0.99999] # For removing all exact matches\n", " matches_df = discrepancy_filter(matches_df).reset_index(drop=True)\n", " matches_df[\"Country\"] = i\n", " # matches_df = matches_df[pd.DataFrame(np.sort(matches_df[['left_side','right_side']].values,1)).duplicated()]\n", " # matches_df = matches_df[~matches_df[['left_side', 'right_side']].apply(frozenset, axis=1).duplicated()]\n", " merger = pd.concat([merger,matches_df], ignore_index=True)\n", "\n", "for s in [\"left\",\"right\"]:\n", " merger[f\"{s}_count\"] = merger[f\"{s}_side\"].apply(lambda x: len(univ[univ[\"Institution_harm\"] == x]))\n", "\n", "# Apply the function to create a new column\n", "merger[['high_count', 'low_count']] = merger.apply(get_high_low_counts, axis=1)\n", "\n", "# Use apply again to create the high_side and low_side columns\n", "merger['high_side'] = merger.apply(lambda row: row['left_side'] if row['left_count'] > row['right_count'] else row['right_side'], axis=1)\n", "merger['low_side'] = merger.apply(lambda row: row['left_side'] if row['left_count'] <= row['right_count'] else row['right_side'], axis=1)\n", "\n", "# Drop the high_count and low_count columns if they are not needed\n", "# merger.drop(['high_count', 'low_count'], axis=1, inplace=True)" ], "metadata": { "collapsed": false } }, { "cell_type": "markdown", "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 200, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1538\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "1538it [01:04, 23.79it/s]\n" ] } ], "source": [ "fuzzymerger = merger[[\"Country\",\"low_side\",\"high_side\",\"high_count\",\"low_count\",\"similarity\"]].drop_duplicates()\n", "fuzzymerger = fuzzymerger.sort_values(by=[\"low_side\",\"high_count\"], ascending=[True,False])\n", "fuzzymerger = fuzzymerger.drop_duplicates(subset=[\"Country\",\"low_side\"]).sort_values(by=\"high_count\", ascending=True).reset_index(drop=True)\n", "print(len(fuzzymerger))\n", "univ_harm = univ.copy()\n", "univ_harm[\"merge_iter\"] = 0\n", "for i,row in tqdm(fuzzymerger.iterrows()):\n", " univ_harm.loc[((univ_harm[\"Country\"]==row[\"Country\"])&\n", " (univ_harm[\"Institution_harm\"]==row[\"low_side\"])),\"merge_iter\"] += 1\n", " univ_harm.loc[((univ_harm[\"Country\"]==row[\"Country\"])&\n", " (univ_harm[\"Institution_harm\"]==row[\"low_side\"])),\"Institution_harm\"] = row[\"high_side\"]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 201, "outputs": [ { "data": { "text/plain": " Country low_side \n0 China Logist Univ Chinese Peoples Armed Police Forces \\\n9 China Flight Automat Control Res Inst \n10 China Northwest Elect Power Design Inst Co Ltd \n11 China Northwest Elect Power Design Inst Co Ltd China \n12 China Northwest Inst Ecoenvironm & Resources \n... ... ... \n1531 China Chinese Univ Hong Kong Hong \n1532 China Huazhong Univ Sci & Techno \n1533 China Hong Kong Polytech Univ Hong Kong \n1534 China Kong Kong Polytech Univ \n1537 China Univ Elect Sci & Technol Chin \n\n high_side high_count \n0 Logist Univ Chinese Peoples Armed Police Force 1 \\\n9 Xian Flight Automat Control Res Inst 1 \n10 Northwest Elect Power Design Inst Co Ltd China 1 \n11 Northwest Elect Power Design Inst Co Ltd 1 \n12 Northwest Inst Ecoenvironm & Resources Chinese Ac 1 \n... ... ... \n1531 Chinese Univ Hong Kong 728 \n1532 Huazhong Univ Sci & Technol 729 \n1533 Hong Kong Polytech Univ 809 \n1534 Hong Kong Polytech Univ 809 \n1537 Univ Elect Sci & Technol China 1076 \n\n low_count similarity \n0 1 0.988072 \n9 1 0.905747 \n10 1 0.926984 \n11 1 0.926984 \n12 1 0.910630 \n... ... ... \n1531 1 0.935944 \n1532 1 0.989260 \n1533 1 0.917345 \n1534 1 0.939416 \n1537 1 0.983258 \n\n[346 rows x 6 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Countrylow_sidehigh_sidehigh_countlow_countsimilarity
0ChinaLogist Univ Chinese Peoples Armed Police ForcesLogist Univ Chinese Peoples Armed Police Force110.988072
9ChinaFlight Automat Control Res InstXian Flight Automat Control Res Inst110.905747
10ChinaNorthwest Elect Power Design Inst Co LtdNorthwest Elect Power Design Inst Co Ltd China110.926984
11ChinaNorthwest Elect Power Design Inst Co Ltd ChinaNorthwest Elect Power Design Inst Co Ltd110.926984
12ChinaNorthwest Inst Ecoenvironm & ResourcesNorthwest Inst Ecoenvironm & Resources Chinese Ac110.910630
.....................
1531ChinaChinese Univ Hong Kong HongChinese Univ Hong Kong72810.935944
1532ChinaHuazhong Univ Sci & TechnoHuazhong Univ Sci & Technol72910.989260
1533ChinaHong Kong Polytech Univ Hong KongHong Kong Polytech Univ80910.917345
1534ChinaKong Kong Polytech UnivHong Kong Polytech Univ80910.939416
1537ChinaUniv Elect Sci & Technol ChinUniv Elect Sci & Technol China107610.983258
\n

346 rows × 6 columns

\n
" }, "execution_count": 201, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# fuzzymerger[fuzzymerger[\"Country\"]==\"China\"]" ], "metadata": { "collapsed": false } }, { "cell_type": "markdown", "source": [], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 202, "outputs": [ { "data": { "text/plain": " UT (Unique WOS ID) Institution Country \n244 WOS:000286472300003 Univ Trent Italy \\\n364 WOS:000287586100011 Univ Trent Italy \n410 WOS:000287939200011 Abdus Salam Int Ctr Theoret Phys Italy \n765 WOS:000290996200002 Univ Trent Italy \n907 WOS:000291698400013 INFN Sez Roma 1 Italy \n... ... ... ... \n153063 WOS:000900129900175 Univ Rome Campus Biomed Aquila Italy \n154775 WOS:000929737300001 Prevent & Res Inst Italy \n154813 WOS:000929737300001 Ist Super Sanit Italy \n154855 WOS:000933331200004 Univ Federio II Italy \n154857 WOS:000933331200004 INAF Osservatorio Astron Capodimonte Italy \n\n Institution_harm merge_iter \n244 Univ Trento 1 \n364 Univ Trento 1 \n410 Abdus Salaam Int Ctr Theoret Phys 1 \n765 Univ Trento 1 \n907 Sez Roma 1 \n... ... ... \n153063 Univ Rome Campus Biomed Aquila 2 \n154775 Prevent & Res Inst 2 \n154813 Ist Super Sanita 1 \n154855 Univ Federico 1 \n154857 Osserv Astron Capodimonte 1 \n\n[375 rows x 5 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
UT (Unique WOS ID)InstitutionCountryInstitution_harmmerge_iter
244WOS:000286472300003Univ TrentItalyUniv Trento1
364WOS:000287586100011Univ TrentItalyUniv Trento1
410WOS:000287939200011Abdus Salam Int Ctr Theoret PhysItalyAbdus Salaam Int Ctr Theoret Phys1
765WOS:000290996200002Univ TrentItalyUniv Trento1
907WOS:000291698400013INFN Sez Roma 1ItalySez Roma1
..................
153063WOS:000900129900175Univ Rome Campus Biomed AquilaItalyUniv Rome Campus Biomed Aquila2
154775WOS:000929737300001Prevent & Res InstItalyPrevent & Res Inst2
154813WOS:000929737300001Ist Super SanitItalyIst Super Sanita1
154855WOS:000933331200004Univ Federio IIItalyUniv Federico1
154857WOS:000933331200004INAF Osservatorio Astron CapodimonteItalyOsserv Astron Capodimonte1
\n

375 rows × 5 columns

\n
" }, "execution_count": 202, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# univ_harm[((univ_harm[\"merge_iter\"]>0) & (univ_harm[\"Country\"]==\"Italy\"))]" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 208, "outputs": [], "source": [ "univ_harm.loc[((univ_harm[\"merge_iter\"]>0) & (univ_harm[\"Country\"]==\"Italy\")&\n", " (univ_harm[\"Institution\"].str.lower().str.contains(\"sapien\"))&\n", " (univ_harm[\"Institution\"].str.lower().str.contains(\"univ\"))), \"Institution_harm\"] = \"Sapienza Univ Rome\"" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 209, "outputs": [ { "data": { "text/plain": "Institution 17083\nInstitution_harm 14449\ndtype: int64" }, "execution_count": 209, "metadata": {}, "output_type": "execute_result" } ], "source": [ "univ_harm[[\"Institution\",\"Institution_harm\"]].nunique()" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 210, "outputs": [], "source": [ "univ_harm.to_excel(f\"{outdir}/wos_institution_locations_harmonized.xlsx\", index=False)" ], "metadata": { "collapsed": false } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }