Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
search pipeline updated
Browse files- RAG/rag_DocumentSearcher.py +1 -1
- pages/Semantic_Search.py +1 -6
- semantic_search/all_search_execute.py +0 -25
- utilities/re_ranker.py +22 -26
RAG/rag_DocumentSearcher.py
CHANGED
|
@@ -12,7 +12,7 @@ import re
|
|
| 12 |
import torch
|
| 13 |
import base64
|
| 14 |
import requests
|
| 15 |
-
import utilities.re_ranker as re_ranker
|
| 16 |
import utilities.invoke_models as invoke_models
|
| 17 |
#import langchain
|
| 18 |
headers = {"Content-Type": "application/json"}
|
|
|
|
| 12 |
import torch
|
| 13 |
import base64
|
| 14 |
import requests
|
| 15 |
+
#import utilities.re_ranker as re_ranker
|
| 16 |
import utilities.invoke_models as invoke_models
|
| 17 |
#import langchain
|
| 18 |
headers = {"Content-Type": "application/json"}
|
pages/Semantic_Search.py
CHANGED
|
@@ -24,7 +24,7 @@ import base64
|
|
| 24 |
import shutil
|
| 25 |
import re
|
| 26 |
from requests.auth import HTTPBasicAuth
|
| 27 |
-
import utilities.re_ranker as re_ranker
|
| 28 |
# from nltk.stem import PorterStemmer
|
| 29 |
# from nltk.tokenize import word_tokenize
|
| 30 |
import query_rewrite
|
|
@@ -585,11 +585,6 @@ def handle_input():
|
|
| 585 |
})
|
| 586 |
|
| 587 |
st.session_state.answers_none_rank = st.session_state.answers
|
| 588 |
-
if(st.session_state.input_reranker == "None"):
|
| 589 |
-
st.session_state.answers = st.session_state.answers_none_rank
|
| 590 |
-
else:
|
| 591 |
-
if(st.session_state.input_reranker == 'Kendra Rescore'):
|
| 592 |
-
st.session_state.answers = re_ranker.re_rank("search",st.session_state.input_reranker,st.session_state.input_searchType,st.session_state.questions, st.session_state.answers)
|
| 593 |
if(st.session_state.input_evaluate == "enabled"):
|
| 594 |
llm_eval.eval(st.session_state.questions, st.session_state.answers)
|
| 595 |
#st.session_state.input_text=""
|
|
|
|
| 24 |
import shutil
|
| 25 |
import re
|
| 26 |
from requests.auth import HTTPBasicAuth
|
| 27 |
+
#import utilities.re_ranker as re_ranker
|
| 28 |
# from nltk.stem import PorterStemmer
|
| 29 |
# from nltk.tokenize import word_tokenize
|
| 30 |
import query_rewrite
|
|
|
|
| 585 |
})
|
| 586 |
|
| 587 |
st.session_state.answers_none_rank = st.session_state.answers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
if(st.session_state.input_evaluate == "enabled"):
|
| 589 |
llm_eval.eval(st.session_state.questions, st.session_state.answers)
|
| 590 |
#st.session_state.input_text=""
|
semantic_search/all_search_execute.py
CHANGED
|
@@ -408,23 +408,12 @@ def handler(input_,session_id):
|
|
| 408 |
hybrid_payload["query"]["hybrid"]["queries"].append(sparse_payload)
|
| 409 |
|
| 410 |
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
print("hybrid_payload")
|
| 418 |
-
print(st.session_state.re_ranker)
|
| 419 |
-
print("---------------")
|
| 420 |
docs = []
|
| 421 |
|
| 422 |
if(st.session_state.input_sql_query!=""):
|
| 423 |
url = host +"_plugins/_sql?format=json"
|
| 424 |
payload = {"query":st.session_state.input_sql_query}
|
| 425 |
r = requests.post(url, auth=awsauth, json=payload, headers=headers)
|
| 426 |
-
print("^^^^^")
|
| 427 |
-
print(r.text)
|
| 428 |
|
| 429 |
if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
|
| 430 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
|
@@ -439,24 +428,14 @@ def handler(input_,session_id):
|
|
| 439 |
}
|
| 440 |
}}
|
| 441 |
|
| 442 |
-
print(hybrid_payload)
|
| 443 |
-
print(url)
|
| 444 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
| 445 |
-
print(r.status_code)
|
| 446 |
-
print(r.text)
|
| 447 |
response_ = json.loads(r.text)
|
| 448 |
-
print("-------------------------------------------------------------------")
|
| 449 |
-
#print(response_)
|
| 450 |
docs = response_['hits']['hits']
|
| 451 |
|
| 452 |
|
| 453 |
else:
|
| 454 |
|
| 455 |
|
| 456 |
-
print("hybrid_payload")
|
| 457 |
-
print(hybrid_payload)
|
| 458 |
-
print("-------------------------------------------------------------------")
|
| 459 |
-
|
| 460 |
if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
|
| 461 |
url_ = url + "?search_pipeline=hybrid_search_pipeline"
|
| 462 |
|
|
@@ -469,12 +448,8 @@ def handler(input_,session_id):
|
|
| 469 |
"query_text": query
|
| 470 |
}
|
| 471 |
}}
|
| 472 |
-
print(url_)
|
| 473 |
r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
|
| 474 |
-
print(r.status_code)
|
| 475 |
response_ = json.loads(r.text)
|
| 476 |
-
print("-------------------------------------------------------------------")
|
| 477 |
-
print(response_)
|
| 478 |
docs = response_['hits']['hits']
|
| 479 |
|
| 480 |
else:
|
|
|
|
| 408 |
hybrid_payload["query"]["hybrid"]["queries"].append(sparse_payload)
|
| 409 |
|
| 410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
docs = []
|
| 412 |
|
| 413 |
if(st.session_state.input_sql_query!=""):
|
| 414 |
url = host +"_plugins/_sql?format=json"
|
| 415 |
payload = {"query":st.session_state.input_sql_query}
|
| 416 |
r = requests.post(url, auth=awsauth, json=payload, headers=headers)
|
|
|
|
|
|
|
| 417 |
|
| 418 |
if(len(hybrid_payload["query"]["hybrid"]["queries"])==1):
|
| 419 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
|
|
|
| 428 |
}
|
| 429 |
}}
|
| 430 |
|
|
|
|
|
|
|
| 431 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
|
|
|
|
|
|
| 432 |
response_ = json.loads(r.text)
|
|
|
|
|
|
|
| 433 |
docs = response_['hits']['hits']
|
| 434 |
|
| 435 |
|
| 436 |
else:
|
| 437 |
|
| 438 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
if( st.session_state.input_hybridType == "OpenSearch Hybrid Query"):
|
| 440 |
url_ = url + "?search_pipeline=hybrid_search_pipeline"
|
| 441 |
|
|
|
|
| 448 |
"query_text": query
|
| 449 |
}
|
| 450 |
}}
|
|
|
|
| 451 |
r = requests.get(url_, auth=awsauth, json=hybrid_payload, headers=headers)
|
|
|
|
| 452 |
response_ = json.loads(r.text)
|
|
|
|
|
|
|
| 453 |
docs = response_['hits']['hits']
|
| 454 |
|
| 455 |
else:
|
utilities/re_ranker.py
CHANGED
|
@@ -5,7 +5,7 @@ import time
|
|
| 5 |
import streamlit as st
|
| 6 |
from sentence_transformers import CrossEncoder
|
| 7 |
|
| 8 |
-
model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
|
| 9 |
####### Add this Kendra Rescore ranking
|
| 10 |
#kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')
|
| 11 |
#print("Create a rescore execution plan.")
|
|
@@ -48,11 +48,7 @@ model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
|
|
| 48 |
|
| 49 |
|
| 50 |
def re_rank(self_, rerank_type, search_type, question, answers):
|
| 51 |
-
|
| 52 |
-
print("start")
|
| 53 |
-
print()
|
| 54 |
-
|
| 55 |
-
|
| 56 |
ans = []
|
| 57 |
ids = []
|
| 58 |
ques_ans = []
|
|
@@ -94,34 +90,34 @@ def re_rank(self_, rerank_type, search_type, question, answers):
|
|
| 94 |
# re_ranked[0]['id'] = len(question)
|
| 95 |
# return re_ranked
|
| 96 |
|
| 97 |
-
if(rerank_type == 'Cross Encoder'):
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
|
| 108 |
-
|
| 109 |
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
|
| 126 |
|
| 127 |
|
|
|
|
| 5 |
import streamlit as st
|
| 6 |
from sentence_transformers import CrossEncoder
|
| 7 |
|
| 8 |
+
#model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2", max_length=512)
|
| 9 |
####### Add this Kendra Rescore ranking
|
| 10 |
#kendra_ranking = boto3.client("kendra-ranking",region_name = 'us-east-1')
|
| 11 |
#print("Create a rescore execution plan.")
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
def re_rank(self_, rerank_type, search_type, question, answers):
|
| 51 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
ans = []
|
| 53 |
ids = []
|
| 54 |
ques_ans = []
|
|
|
|
| 90 |
# re_ranked[0]['id'] = len(question)
|
| 91 |
# return re_ranked
|
| 92 |
|
| 93 |
+
# if(rerank_type == 'Cross Encoder'):
|
| 94 |
|
| 95 |
+
# scores = model.predict(
|
| 96 |
+
# ques_ans
|
| 97 |
+
# )
|
| 98 |
|
| 99 |
+
# index__ = 0
|
| 100 |
+
# for i in ans:
|
| 101 |
+
# i['new_score'] = scores[index__]
|
| 102 |
+
# index__ = index__+1
|
| 103 |
|
| 104 |
+
# ans_sorted = sorted(ans, key=lambda d: d['new_score'],reverse=True)
|
| 105 |
|
| 106 |
|
| 107 |
+
# def retreive_only_text(item):
|
| 108 |
+
# return item['text']
|
| 109 |
|
| 110 |
+
# if(self_ == 'rag'):
|
| 111 |
+
# return list(map(retreive_only_text, ans_sorted))
|
| 112 |
|
| 113 |
|
| 114 |
+
# re_ranked[0]['answer']=[]
|
| 115 |
+
# for j in ans_sorted:
|
| 116 |
+
# pos_ = ids.index(j['Id'])
|
| 117 |
+
# re_ranked[0]['answer'].append(answers[0]['answer'][pos_])
|
| 118 |
+
# re_ranked[0]['search_type']= search_type,
|
| 119 |
+
# re_ranked[0]['id'] = len(question)
|
| 120 |
+
# return re_ranked
|
| 121 |
|
| 122 |
|
| 123 |
|