Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
multilingual
Browse files
pages/Semantic_Search.py
CHANGED
|
@@ -285,12 +285,7 @@ if(search_all_type==True):
|
|
| 285 |
'NeuralSparse Search',
|
| 286 |
]
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
def generate_images(tab,inp_):
|
| 295 |
#write_top_bar()
|
| 296 |
seed = random.randint(1, 10)
|
|
|
|
| 285 |
'NeuralSparse Search',
|
| 286 |
]
|
| 287 |
|
| 288 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
def generate_images(tab,inp_):
|
| 290 |
#write_top_bar()
|
| 291 |
seed = random.randint(1, 10)
|
semantic_search/all_search_execute.py
CHANGED
|
@@ -68,7 +68,7 @@ def handler(input_,session_id):
|
|
| 68 |
print("*********")
|
| 69 |
print(input_)
|
| 70 |
search_types = input_["searchType"]
|
| 71 |
-
|
| 72 |
if("NormType" not in input_.keys()):
|
| 73 |
norm_type = "min_max"
|
| 74 |
else:
|
|
@@ -111,37 +111,70 @@ def handler(input_,session_id):
|
|
| 111 |
weights.append(weight)
|
| 112 |
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
|
|
|
|
|
|
| 128 |
}
|
| 129 |
}
|
| 130 |
-
|
| 131 |
-
}
|
| 132 |
-
]
|
| 133 |
|
| 134 |
-
|
| 135 |
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
if(st.session_state.input_rewritten_query!=""):
|
| 146 |
filter_ = {"filter": {
|
| 147 |
"bool": {
|
|
@@ -456,17 +489,30 @@ def handler(input_,session_id):
|
|
| 456 |
|
| 457 |
else:
|
| 458 |
if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
| 470 |
r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
|
| 471 |
response_ = json.loads(r.text)
|
| 472 |
docs = response_['hits']['hits']
|
|
|
|
| 68 |
print("*********")
|
| 69 |
print(input_)
|
| 70 |
search_types = input_["searchType"]
|
| 71 |
+
print(type(search_types))
|
| 72 |
if("NormType" not in input_.keys()):
|
| 73 |
norm_type = "min_max"
|
| 74 |
else:
|
|
|
|
| 111 |
weights.append(weight)
|
| 112 |
|
| 113 |
|
| 114 |
+
if(num_queries>1):
|
| 115 |
+
######## Updating hybrid Search pipeline #######
|
| 116 |
+
print("Updating Search pipeline with new weights")
|
| 117 |
+
s_pipeline_payload = {"version": 1234}
|
| 118 |
+
s_pipeline_payload["phase_results_processors"] = [
|
| 119 |
+
{
|
| 120 |
+
"normalization-processor": {
|
| 121 |
+
"normalization": {
|
| 122 |
+
"technique": norm_type
|
| 123 |
+
},
|
| 124 |
+
"combination": {
|
| 125 |
+
"technique": combine_type,
|
| 126 |
+
"parameters": {
|
| 127 |
+
"weights": weights
|
| 128 |
+
}
|
| 129 |
+
}
|
| 130 |
}
|
| 131 |
}
|
| 132 |
+
]
|
|
|
|
|
|
|
| 133 |
|
| 134 |
+
hybrid_search_processor = s_pipeline_payload["phase_results_processors"]
|
| 135 |
|
| 136 |
+
opensearch_search_pipeline = (requests.get(host+'_search/pipeline/hybrid_search_pipeline', auth=awsauth,headers=headers)).text
|
| 137 |
+
if(opensearch_search_pipeline!='{}'):
|
| 138 |
+
path = "_search/pipeline/hybrid_search_pipeline"
|
| 139 |
+
url = host + path
|
| 140 |
+
r = requests.put(url, auth=awsauth, json=s_pipeline_payload, headers=headers)
|
| 141 |
+
print("Hybrid Search Pipeline updated: "+str(r.status_code))
|
| 142 |
+
######## Combining hybrid+rerank pipeline #######
|
| 143 |
+
path = "_search/pipeline/hybrid_rerank_pipeline"
|
| 144 |
+
url = host + path
|
| 145 |
+
s_pipeline_payload['response_processors'] = [
|
| 146 |
+
{
|
| 147 |
+
"rerank": {
|
| 148 |
+
"ml_opensearch": {
|
| 149 |
+
"model_id": "deBS3pYB5VHEj-qVuPHT"
|
| 150 |
+
},
|
| 151 |
+
"context": {
|
| 152 |
+
"document_fields": [
|
| 153 |
+
"product_description"
|
| 154 |
+
]
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
+
}
|
| 158 |
+
]
|
| 159 |
+
r = requests.put(url, auth=awsauth, json=s_pipeline_payload, headers=headers)
|
| 160 |
+
print("Hybrid Rerank Search Pipeline updated: "+str(r.status_code))
|
| 161 |
+
|
| 162 |
+
######## Updating opensearch_translation_pipeline Search pipeline #######
|
| 163 |
+
opensearch_translation_pipeline = (requests.get(host+'_search/pipeline/ml_inference_for_vector_search_and_language_translation', auth=awsauth,headers=headers)).text
|
| 164 |
+
path = "_search/pipeline/ml_inference_for_vector_search_and_language_translation"
|
| 165 |
+
url = host + path
|
| 166 |
+
opensearch_translation_pipeline["phase_results_processors"] = hybrid_search_processor
|
| 167 |
+
r = requests.put(url, auth=awsauth, json=opensearch_translation_pipeline, headers=headers)
|
| 168 |
+
print("translation hybrid Search Pipeline updated: "+str(r.status_code))
|
| 169 |
+
|
| 170 |
+
######## Updating opensearch_translation_pipeline_with_rerank Search pipeline #######
|
| 171 |
+
opensearch_translation_pipeline_with_rerank = (requests.get(host+'_search/pipeline/ml_inference_for_vector_search_and_language_translation_with_rerank', auth=awsauth,headers=headers)).text
|
| 172 |
+
path = "_search/pipeline/ml_inference_for_vector_search_and_language_translation_with_rerank"
|
| 173 |
+
url = host + path
|
| 174 |
+
opensearch_translation_pipeline_with_rerank["phase_results_processors"] = hybrid_search_processor
|
| 175 |
+
r = requests.put(url, auth=awsauth, json=opensearch_translation_pipeline_with_rerank, headers=headers)
|
| 176 |
+
print("translation hybrid rerank Search Pipeline updated: "+str(r.status_code))
|
| 177 |
+
######## start of Applying LLM filters #######
|
| 178 |
if(st.session_state.input_rewritten_query!=""):
|
| 179 |
filter_ = {"filter": {
|
| 180 |
"bool": {
|
|
|
|
| 489 |
|
| 490 |
else:
|
| 491 |
if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
|
| 492 |
+
if(st.session_state.input_multilingual):
|
| 493 |
+
if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
|
| 494 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation_with_rerank"
|
| 495 |
+
url = host + path
|
| 496 |
+
hybrid_payload["ext"] = {"rerank": {
|
| 497 |
+
"query_context": {
|
| 498 |
+
"query_text": query
|
| 499 |
+
}
|
| 500 |
+
}}
|
| 501 |
+
else:
|
| 502 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation"
|
| 503 |
+
url = host + path
|
| 504 |
+
else:
|
| 505 |
+
url_ = url + "?search_pipeline=hybrid_search_pipeline"
|
| 506 |
|
| 507 |
+
if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
|
| 508 |
+
|
| 509 |
+
url_ = url + "?search_pipeline=hybrid_rerank_pipeline"
|
| 510 |
+
|
| 511 |
+
hybrid_payload["ext"] = {"rerank": {
|
| 512 |
+
"query_context": {
|
| 513 |
+
"query_text": query
|
| 514 |
+
}
|
| 515 |
+
}}
|
| 516 |
r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
|
| 517 |
response_ = json.loads(r.text)
|
| 518 |
docs = response_['hits']['hits']
|