Spaces:
Sleeping
Sleeping
Update
Browse files
app.py
CHANGED
|
@@ -24,13 +24,8 @@ async def ask_api(request: AskRequest):
|
|
| 24 |
|
| 25 |
@app.post("/api/v2/ask")
|
| 26 |
async def ask_api(request: AskRequest):
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
df = util.read_df(xlsx_url)
|
| 30 |
-
df_update = util.split_page_breaks(df, request.page_content_column)
|
| 31 |
-
documents = faq.create_documents(df_update, request.page_content_column)
|
| 32 |
-
embedding_function = faq.define_embedding_function("sentence-transformers/all-mpnet-base-v2")
|
| 33 |
-
vectordb = faq.get_vectordb(faq_id=faq_id, embedding_function=embedding_function, documents=documents, vectordb_type=faq.VECTORDB_TYPE.Chroma)
|
| 34 |
documents = faq.similarity_search(vectordb, request.question, k=request.k)
|
| 35 |
df_doc = util.transform_documents_to_dataframe(documents)
|
| 36 |
df_filter = util.remove_duplicates_by_column(df_doc, "ID")
|
|
|
|
| 24 |
|
| 25 |
@app.post("/api/v2/ask")
|
| 26 |
async def ask_api(request: AskRequest):
|
| 27 |
+
util.SPLIT_PAGE_BREAKS = True
|
| 28 |
+
vectordb = faq.load_vectordb(request.sheet_url, request.page_content_column)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
documents = faq.similarity_search(vectordb, request.question, k=request.k)
|
| 30 |
df_doc = util.transform_documents_to_dataframe(documents)
|
| 31 |
df_filter = util.remove_duplicates_by_column(df_doc, "ID")
|
util.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
| 3 |
SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
|
| 4 |
SHEET_URL_Y = "/edit#gid="
|
| 5 |
SHEET_URL_Y_EXPORT = "/export?gid="
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def get_id(sheet_url: str) -> str:
|
|
@@ -16,8 +17,11 @@ def xlsx_url(get_id: str) -> str:
|
|
| 16 |
return SHEET_URL_X + get_id[0:y] + SHEET_URL_Y_EXPORT + get_id[y + 1 :]
|
| 17 |
|
| 18 |
|
| 19 |
-
def read_df(xlsx_url: str) -> pd.DataFrame:
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
def split_page_breaks(df, column_name):
|
|
|
|
| 3 |
SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
|
| 4 |
SHEET_URL_Y = "/edit#gid="
|
| 5 |
SHEET_URL_Y_EXPORT = "/export?gid="
|
| 6 |
+
SPLIT_PAGE_BREAKS = False
|
| 7 |
|
| 8 |
|
| 9 |
def get_id(sheet_url: str) -> str:
|
|
|
|
| 17 |
return SHEET_URL_X + get_id[0:y] + SHEET_URL_Y_EXPORT + get_id[y + 1 :]
|
| 18 |
|
| 19 |
|
| 20 |
+
def read_df(xlsx_url: str, split_page_breaks: bool = SPLIT_PAGE_BREAKS) -> pd.DataFrame:
|
| 21 |
+
df = pd.read_excel(xlsx_url, header=0, keep_default_na=False)
|
| 22 |
+
if split_page_breaks:
|
| 23 |
+
df = split_page_breaks(df, page_content_column)
|
| 24 |
+
return df
|
| 25 |
|
| 26 |
|
| 27 |
def split_page_breaks(df, column_name):
|