OpenSearch-AI

Running on CPU Upgrade

App Files Files

prasadnu commited on May 21

Commit

c86ed06

1 Parent(s): 97e72c0

mvectors

Browse files

Files changed (3) hide show

pages/Semantic_Search.py +58 -10
semantic_search/all_search_execute.py +2 -0
utilities/mvectors.py +1 -1

pages/Semantic_Search.py CHANGED Viewed

@@ -24,8 +24,8 @@ import base64
 import shutil
 import re
 from requests.auth import HTTPBasicAuth
-# from nltk.stem import PorterStemmer
-# from nltk.tokenize import word_tokenize
 import query_rewrite
 import amazon_rekognition
 from streamlit.components.v1 import html
@@ -71,7 +71,7 @@ st.markdown("""
-#ps = PorterStemmer()
 st.session_state.REGION = 'us-east-1'
 USER_ICON = "images/user.png"
@@ -113,6 +113,9 @@ if "chats" not in st.session_state:
 if "questions" not in st.session_state:
     st.session_state.questions = []
 if "clear_" not in st.session_state:
     st.session_state.clear_ = False
@@ -744,14 +747,14 @@ def write_user_message(md,ans):
         st.markdown('---')
-# def stem_(sentence):
-#     words = word_tokenize(sentence)
-#     words_stem = []
-#     for w in words:
-#         words_stem.append( ps.stem(w))
-#     return words_stem
 def render_answer(answer,index):
     column1, column2 = st.columns([6,90])
@@ -790,7 +793,52 @@ def render_answer(answer,index):
                 with inner_col_2:
                     st.image(ans['image_url'].replace("/home/ec2-user/SageMaker/","/home/user/"))
-                    if("highlight" in ans and 'Keyword Search' in st.session_state.input_searchType):
                         test_strs = ans["highlight"]
                         tag = "em"
                         res__ = []

 import shutil
 import re
 from requests.auth import HTTPBasicAuth
+from nltk.stem import PorterStemmer
+from nltk.tokenize import word_tokenize
 import query_rewrite
 import amazon_rekognition
 from streamlit.components.v1 import html
+ps = PorterStemmer()
 st.session_state.REGION = 'us-east-1'
 USER_ICON = "images/user.png"
 if "questions" not in st.session_state:
     st.session_state.questions = []
+if "input_mvector_rerank" not in st.session_state:
+    st.session_state.input_colBert_rerank = False
 if "clear_" not in st.session_state:
     st.session_state.clear_ = False
         st.markdown('---')
+def stem_(sentence):
+    words = word_tokenize(sentence)
+    words_stem = []
+    for w in words:
+        words_stem.append( ps.stem(w))
+    return words_stem
 def render_answer(answer,index):
     column1, column2 = st.columns([6,90])
                 with inner_col_2:
                     st.image(ans['image_url'].replace("/home/ec2-user/SageMaker/","/home/user/"))
+                    if('max_score_dict_list_sorted' in ans and 'Vector Search' in st.session_state.input_searchType):
+                        desc___ = ans['desc'].split(" ")
+                        res___ = []
+                        for o in ans['max_score_dict_list_sorted']:
+                            res___.append(o['doc_token'])
+                        final_desc_ = "<p></p><p>"
+                        for word_ in desc___:
+                            str_=re.sub('[^A-Za-z0-9]+', '', word_).lower()
+                            ###### stemming and highlighting
+                            # ans_text = ans['desc']
+                            # query_text = st.session_state.input_text
+                            stemmed_word = next(iter(set(stem_(str_))))
+                            # print("stemmed_word-------------------")
+                            # print(stemmed_word)
+                            # common = ans_text_stemmed.intersection( query_text_stemmed)
+                            # #unique = set(document_1_words).symmetric_difference(  )
+                            # desc__stemmed = stem_(desc__)
+                            #print(str_)
+                            if(stemmed_word in res___ or str_ in res___):
+                                if(stemmed_word in res___):
+                                    mod_word = stemmed_word
+                                else:
+                                    mod_word = str_
+                                #print(str_)
+                                if(res___.index(mod_word)==0):
+                                    #print(str_)
+                                    final_desc_ +=  "<span style='color:#ffffff;background-color:#8B0001;font-weight:bold'>"+word_+"</span> "
+                                elif(res___.index(mod_word)==1):
+                                    #print(str_)
+                                    final_desc_ +=  "<span style='color:#ffffff;background-color:#C34632;font-weight:bold'>"+word_+"</span> "
+                                else:
+                                    #print(str_)
+                                    final_desc_ +=  "<span style='color:#ffffff;background-color:#E97452;font-weight:bold'>"+word_+"</span> "
+                            else:
+                                final_desc_ += word_ + " "
+                        final_desc_ += "</p><br>"
+                        #print(final_desc_)
+                        st.markdown(final_desc_,unsafe_allow_html = True)
+                    elif("highlight" in ans and 'Keyword Search' in st.session_state.input_searchType):
                         test_strs = ans["highlight"]
                         tag = "em"
                         res__ = []

semantic_search/all_search_execute.py CHANGED Viewed

@@ -512,6 +512,8 @@ def handler(input_,session_id):
                 "style":doc['_source']['style'],
                 }
             if('highlight' in doc):
                 res_['highlight'] = doc['highlight']['product_description']
             if('NeuralSparse Search' in search_types):

                 "style":doc['_source']['style'],
                 }
+            if('max_score_dict_list_sorted' in doc):
+                res_['max_score_dict_list_sorted'] = doc['max_score_dict_list_sorted']
             if('highlight' in doc):
                 res_['highlight'] = doc['highlight']['product_description']
             if('NeuralSparse Search' in search_types):

utilities/mvectors.py CHANGED Viewed

@@ -56,7 +56,7 @@ def search(hits):
         doc={"_source":
             {
             "description":j["_source"]["description"],"caption":j["_source"]["title"],
-            "image_s3_url":j["_source"]["image_s3_url"],"price":j["_source"]["price"],
             "style":j["_source"]["style"],"category":j["_source"]["category"]},"_id":j["_id"],"_score":j["_score"]}
         if("gender_affinity" in j["_source"]):

         doc={"_source":
             {
             "description":j["_source"]["description"],"caption":j["_source"]["title"],
+            "image_url":j["_source"]["image_s3_url"],"price":j["_source"]["price"],
             "style":j["_source"]["style"],"category":j["_source"]["category"]},"_id":j["_id"],"_score":j["_score"]}
         if("gender_affinity" in j["_source"]):