Spaces:

taskswithcode
/

semantic_search

Runtime error

App Files Files Community

taskswithcode commited on Sep 18, 2022

Commit

e227e49

1 Parent(s): 57eed52

Fixes

Browse files

Files changed (2) hide show

app.py +44 -22
doc_app_models.json +5 -5

app.py CHANGED Viewed

@@ -34,13 +34,11 @@ INFO_URL = "http://www.taskswithcode.com/stats/"
 def get_views(action):
-    print("in get views",action)
     ret_val = 0
     hostname = socket.gethostname()
     ip_address = socket.gethostbyname(hostname)
     if ("view_count" not in st.session_state):
         try:
-           print("inside get views")
            app_info = {'name': APP_NAME,"action":action,"host":hostname,"ip":ip_address}
            res = requests.post(INFO_URL, json = app_info).json()
            print(res)
@@ -61,7 +59,8 @@ def get_views(action):
 def construct_model_info_for_display(model_names):
     options_arr  = []
-    markdown_str = f"<div style=\"font-size:16px; color: #2f2f2f; text-align: left\"><br/><b>Models evaluated ({len(model_names)})</b></div>"
     for node in model_names:
         options_arr .append(node["name"])
         if (node["mark"] == "True"):
@@ -88,20 +87,19 @@ with col:
 @st.experimental_memo
-def load_model(model_name,model_names):
     try:
         ret_model = None
-        for node in model_names:
-            if (model_name.startswith(node["name"])):
-                obj_class = globals()[node["class"]]
-                ret_model = obj_class()
-                ret_model.init_model(node["model"])
         assert(ret_model is not None)
     except Exception as e:
-        st.error("Unable to load model:" + model_name + " " +  str(e))
         pass
     return ret_model
 @st.experimental_memo
 def cached_compute_similarity(sentences,_model,model_name,main_index):
@@ -117,18 +115,27 @@ def uncached_compute_similarity(sentences,_model,model_name,main_index):
     #st.success("Similarity computation complete")
     return results
 def get_model_info(model_names,model_name):
     for node in model_names:
         if (model_name == node["name"]):
-            return node
-def run_test(model_names,model_name,sentences,display_area,main_index,user_uploaded):
     display_area.text("Loading model:" + model_name)
-    model_info = get_model_info(model_names,model_name)
     if ("Note" in model_info):
         fail_link = f"{model_info['Note']} [link]({model_info['alt_url']})"
         display_area.write(fail_link)
-    model = load_model(model_name,model_names)
     display_area.text("Model " + model_name  + " load complete")
     try:
             if (user_uploaded):
@@ -148,9 +155,10 @@ def run_test(model_names,model_name,sentences,display_area,main_index,user_uploa
-def display_results(orig_sentences,main_index,results,response_info,app_mode):
     main_sent = f"<div style=\"font-size:14px; color: #2f2f2f; text-align: left\">{response_info}<br/><br/></div>"
-    score_text = "cosine_distance" if app_mode == SEM_SIMILARITY else "cosine_distance/score"
     pivot_name = "main sentence" if app_mode == SEM_SIMILARITY else "query"
     main_sent += f"<div style=\"font-size:14px; color: #6f6f6f; text-align: left\">Results sorted by {score_text}. Closest to furthest away from {pivot_name}</div>"
     pivot_name = pivot_name[0].upper() + pivot_name[1:]
@@ -172,10 +180,14 @@ def display_results(orig_sentences,main_index,results,response_info,app_mode):
 def init_session():
-    st.session_state["download_ready"] = None
-    st.session_state["model_name"] = "ss_test"
-    st.session_state["main_index"] = 1
-    st.session_state["file_name"] = "default"
 def app_main(app_mode,example_files,model_name_files):
   init_session()
@@ -185,6 +197,7 @@ def app_main(app_mode,example_files,model_name_files):
         model_names = json.load(fp)
   curr_use_case = use_case[app_mode].split(".")[0]
   st.markdown("<h5 style='text-align: center;'>Compare popular/state-of-the-art models for tasks using sentence embeddings</h5>", unsafe_allow_html=True)
   st.markdown(f"<div style='color: #4f4f4f; text-align: left'>Use cases for sentence embeddings<br/>&nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;<a href=\'{use_case_url['1']}\' target='_blank'>{use_case['1']}</a><br/>&nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;{use_case['2']}<br/>&nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;{use_case['3']}<br/><i>This app illustrates <b>'{curr_use_case}'</b> use case</i></div>", unsafe_allow_html=True)
   st.markdown(f"<div style='color: #9f9f9f; text-align: right'>views:&nbsp;{get_views('init')}</div>", unsafe_allow_html=True)
@@ -207,6 +220,9 @@ def app_main(app_mode,example_files,model_name_files):
         selected_model = st.selectbox(label=selection_label,
                     options = options_arr, index=0,  key = "twc_model")
         st.write("")
         if (app_mode == SEM_SIMILARITY):
             main_index = st.number_input('Step 3. Enter index of sentence in file to make it the main sentence',value=1,min_value = 1)
         else:
@@ -232,14 +248,20 @@ def app_main(app_mode,example_files,model_name_files):
             if (len(sentences) > MAX_INPUT):
                 st.info(f"Input sentence count exceeds maximum sentence limit. First {MAX_INPUT} out of {len(sentences)} sentences chosen")
                 sentences = sentences[:MAX_INPUT]
             st.session_state["model_name"] = selected_model
             st.session_state["main_index"] = main_index
-            results = run_test(model_names,selected_model,sentences,display_area,main_index - 1,(uploaded_file is not None))
             display_area.empty()
             with display_area.container():
                 device = 'GPU' if torch.cuda.is_available() else 'CPU'
                 response_info = f"Computation time on {device}: {time.time() - start:.2f} secs for {len(sentences)} sentences"
-                display_results(sentences,main_index - 1,results,response_info,app_mode)
                 #st.json(results)
       st.download_button(
          label="Download results as json",

 def get_views(action):
     ret_val = 0
     hostname = socket.gethostname()
     ip_address = socket.gethostbyname(hostname)
     if ("view_count" not in st.session_state):
         try:
            app_info = {'name': APP_NAME,"action":action,"host":hostname,"ip":ip_address}
            res = requests.post(INFO_URL, json = app_info).json()
            print(res)
 def construct_model_info_for_display(model_names):
     options_arr  = []
+    markdown_str = f"<div style=\"font-size:16px; color: #2f2f2f; text-align: left\"><br/><b>Models evaluated ({len(model_names)})</b><br/><i>These are either state-of-the-art or the most downloaded models on Huggingface</i></div>"
+    markdown_str += f"<div style=\"font-size:2px; color: #2f2f2f; text-align: left\"><br/></div>"
     for node in model_names:
         options_arr .append(node["name"])
         if (node["mark"] == "True"):
 @st.experimental_memo
+def load_model(model_name,model_class,load_model_name):
     try:
         ret_model = None
+        obj_class = globals()[model_class]
+        ret_model = obj_class()
+        ret_model.init_model(load_model_name)
         assert(ret_model is not None)
     except Exception as e:
+        st.error("Unable to load model:" + model_name + " " + load_model_name + " " +  str(e))
         pass
     return ret_model
 @st.experimental_memo
 def cached_compute_similarity(sentences,_model,model_name,main_index):
     #st.success("Similarity computation complete")
     return results
+DEFAULT_HF_MODEL = "sentence-transformers/paraphrase-MiniLM-L6-v2"
 def get_model_info(model_names,model_name):
     for node in model_names:
         if (model_name == node["name"]):
+            return node,model_name
+    return get_model_info(model_names,DEFAULT_HF_MODEL)
+def run_test(model_names,model_name,sentences,display_area,main_index,user_uploaded,custom_model):
     display_area.text("Loading model:" + model_name)
+    #Note. model_name may get mapped to new name in the call below for custom models
+    orig_model_name = model_name
+    model_info,model_name = get_model_info(model_names,model_name)
+    if (model_name != orig_model_name):
+        load_model_name  = orig_model_name
+    else:
+        load_model_name = model_info["model"]
     if ("Note" in model_info):
         fail_link = f"{model_info['Note']} [link]({model_info['alt_url']})"
         display_area.write(fail_link)
+    model = load_model(model_name,model_info["class"],load_model_name)
     display_area.text("Model " + model_name  + " load complete")
     try:
             if (user_uploaded):
+def display_results(orig_sentences,main_index,results,response_info,app_mode,model_name):
     main_sent = f"<div style=\"font-size:14px; color: #2f2f2f; text-align: left\">{response_info}<br/><br/></div>"
+    main_sent += f"<div style=\"font-size:14px; color: #2f2f2f; text-align: left\">Showing results for model:&nbsp;<b>{model_name}</b></div>"
+    score_text = "cosine distance" if app_mode == SEM_SIMILARITY else "cosine distance/score"
     pivot_name = "main sentence" if app_mode == SEM_SIMILARITY else "query"
     main_sent += f"<div style=\"font-size:14px; color: #6f6f6f; text-align: left\">Results sorted by {score_text}. Closest to furthest away from {pivot_name}</div>"
     pivot_name = pivot_name[0].upper() + pivot_name[1:]
 def init_session():
+    if ("model_name" not in st.session_state):
+        st.session_state["model_name"] = "ss_test"
+        st.session_state["download_ready"] = None
+        st.session_state["model_name"] = "ss_test"
+        st.session_state["main_index"] = 1
+        st.session_state["file_name"] = "default"
+    else:
+        print("Skipping init session")
 def app_main(app_mode,example_files,model_name_files):
   init_session()
         model_names = json.load(fp)
   curr_use_case = use_case[app_mode].split(".")[0]
   st.markdown("<h5 style='text-align: center;'>Compare popular/state-of-the-art models for tasks using sentence embeddings</h5>", unsafe_allow_html=True)
+  st.markdown(f"<p style='font-size:14px; color: #4f4f4f; text-align: center'><i>Or compare your own model with state-of-the-art/popular models</p>", unsafe_allow_html=True)
   st.markdown(f"<div style='color: #4f4f4f; text-align: left'>Use cases for sentence embeddings<br/>&nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;<a href=\'{use_case_url['1']}\' target='_blank'>{use_case['1']}</a><br/>&nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;{use_case['2']}<br/>&nbsp;&nbsp;&nbsp;•&nbsp;&nbsp;{use_case['3']}<br/><i>This app illustrates <b>'{curr_use_case}'</b> use case</i></div>", unsafe_allow_html=True)
   st.markdown(f"<div style='color: #9f9f9f; text-align: right'>views:&nbsp;{get_views('init')}</div>", unsafe_allow_html=True)
         selected_model = st.selectbox(label=selection_label,
                     options = options_arr, index=0,  key = "twc_model")
         st.write("")
+        custom_model_selection = st.text_input("Model not listed above? Type any Huggingface semantic search model name ", "",key="custom_model")
+        hf_link_str = "<div style=\"font-size:12px; color: #9f9f9f; text-align: left\"><a href='https://huggingface.co/models?pipeline_tag=sentence-similarity' target = '_blank'>List of Huggingface semantic search models</a><br/><br/><br/></div>"
+        st.markdown(hf_link_str, unsafe_allow_html=True)
         if (app_mode == SEM_SIMILARITY):
             main_index = st.number_input('Step 3. Enter index of sentence in file to make it the main sentence',value=1,min_value = 1)
         else:
             if (len(sentences) > MAX_INPUT):
                 st.info(f"Input sentence count exceeds maximum sentence limit. First {MAX_INPUT} out of {len(sentences)} sentences chosen")
                 sentences = sentences[:MAX_INPUT]
+            if (len(custom_model_selection) != 0):
+                run_model = custom_model_selection
+            else:
+                run_model = selected_model
             st.session_state["model_name"] = selected_model
             st.session_state["main_index"] = main_index
+            results = run_test(model_names,run_model,sentences,display_area,main_index - 1,(uploaded_file is not None),(len(custom_model_selection) != 0))
             display_area.empty()
             with display_area.container():
                 device = 'GPU' if torch.cuda.is_available() else 'CPU'
                 response_info = f"Computation time on {device}: {time.time() - start:.2f} secs for {len(sentences)} sentences"
+                if (len(custom_model_selection) != 0):
+                    st.info("Custom model overrides model selection in step 2 above. So please clear the custom model text box to choose models from step 2")
+                display_results(sentences,main_index - 1,results,response_info,app_mode,run_model)
                 #st.json(results)
       st.download_button(
          label="Download results as json",

doc_app_models.json CHANGED Viewed

@@ -30,7 +30,7 @@
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
-                                 "task":"Over 3.8  million downloads from huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
@@ -42,7 +42,7 @@
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
-                                 "task":"Over 2 million downloads from huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
@@ -54,7 +54,7 @@
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
-                                 "task":"Over 700,000 downloads from huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
@@ -66,7 +66,7 @@
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
-                                 "task":"Over 500,000 downloads from huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/all-mpnet-base-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
@@ -78,7 +78,7 @@
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
-                                 "task":"Over 500,000 downloads from huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",

                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
+                                 "task":"Over 3.8  million downloads from Huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
+                                 "task":"Over 2 million downloads from Huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
+                                 "task":"Over 700,000 downloads from Huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/bert-base-nli-mean-tokens"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
+                                 "task":"Over 500,000 downloads from Huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/all-mpnet-base-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",
                 "orig_author_url":"https://github.com/UKPLab",
                 "orig_author":"Ubiquitous Knowledge Processing Lab",
                 "sota_info": {
+                                 "task":"Over 500,000 downloads from Huggingface",
                                  "sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2"
                             },
                 "paper_url":"https://arxiv.org/abs/1908.10084",