Spaces:
Sleeping
Sleeping
Commit
·
6323bc8
1
Parent(s):
41a8232
deepnote update
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import gradio as gr
|
|
| 7 |
app = FastAPI()
|
| 8 |
|
| 9 |
|
| 10 |
-
class
|
| 11 |
question: str
|
| 12 |
sheet_url: str
|
| 13 |
page_content_column: str
|
|
@@ -15,31 +15,43 @@ class Request(BaseModel):
|
|
| 15 |
|
| 16 |
|
| 17 |
@app.post("/api/v1/ask")
|
| 18 |
-
async def ask_api(request:
|
| 19 |
return ask(
|
| 20 |
request.sheet_url, request.page_content_column, request.k, request.question
|
| 21 |
)
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def ask(sheet_url: str, page_content_column: str, k: int, question: str):
|
| 25 |
vectordb = faq.load_vectordb(sheet_url, page_content_column)
|
| 26 |
result = faq.similarity_search(vectordb, question, k=k)
|
| 27 |
return result
|
| 28 |
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
app = gr.mount_gradio_app(app,
|
| 43 |
|
| 44 |
|
| 45 |
if __name__ == "__main__":
|
|
|
|
| 7 |
app = FastAPI()
|
| 8 |
|
| 9 |
|
| 10 |
+
class AskRequest(BaseModel):
|
| 11 |
question: str
|
| 12 |
sheet_url: str
|
| 13 |
page_content_column: str
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
@app.post("/api/v1/ask")
|
| 18 |
+
async def ask_api(request: AskRequest):
|
| 19 |
return ask(
|
| 20 |
request.sheet_url, request.page_content_column, request.k, request.question
|
| 21 |
)
|
| 22 |
|
| 23 |
|
| 24 |
+
@app.delete("/api/v1/")
|
| 25 |
+
async def delete_vectordb_api():
|
| 26 |
+
return delete_vectordb()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
def ask(sheet_url: str, page_content_column: str, k: int, question: str):
|
| 30 |
vectordb = faq.load_vectordb(sheet_url, page_content_column)
|
| 31 |
result = faq.similarity_search(vectordb, question, k=k)
|
| 32 |
return result
|
| 33 |
|
| 34 |
|
| 35 |
+
def delete_vectordb():
|
| 36 |
+
faq.delete_vectordb()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
with gr.Blocks() as block:
|
| 40 |
+
sheet_url = gr.Textbox(label="Google Sheet URL")
|
| 41 |
+
page_content_column = gr.Textbox(label="Question Column")
|
| 42 |
+
k = gr.Slider(2, 5, step=1, label="K")
|
| 43 |
+
question = gr.Textbox(label="Question")
|
| 44 |
+
ask_button = gr.Button("Ask")
|
| 45 |
+
answer_output = gr.JSON(label="Answer")
|
| 46 |
+
delete_button = gr.Button("Delete Vector DB")
|
| 47 |
+
ask_button.click(
|
| 48 |
+
ask,
|
| 49 |
+
inputs=[sheet_url, page_content_column, k, question],
|
| 50 |
+
outputs=answer_output,
|
| 51 |
+
)
|
| 52 |
+
delete_button.click(delete_vectordb)
|
| 53 |
|
| 54 |
+
app = gr.mount_gradio_app(app, block, path="/")
|
| 55 |
|
| 56 |
|
| 57 |
if __name__ == "__main__":
|
faq.py
CHANGED
|
@@ -7,12 +7,14 @@ from langchain.docstore.document import Document
|
|
| 7 |
from langchain.embeddings.base import Embeddings
|
| 8 |
from langchain.vectorstores.base import VectorStore
|
| 9 |
import os
|
|
|
|
| 10 |
|
| 11 |
SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
|
| 12 |
SHEET_URL_Y = "/edit#gid="
|
| 13 |
SHEET_URL_Y_EXPORT = "/export?gid="
|
| 14 |
CACHE_FOLDER = ".embedding-model"
|
| 15 |
VECTORDB_FOLDER = ".vectordb"
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
def faq_id(sheet_url: str) -> str:
|
|
@@ -69,17 +71,40 @@ def similarity_search(
|
|
| 69 |
return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
|
| 70 |
|
| 71 |
|
| 72 |
-
def load_vectordb_id(
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
vectordb = None
|
| 75 |
try:
|
| 76 |
vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
|
| 77 |
except Exception as e:
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
return vectordb
|
| 82 |
|
| 83 |
|
| 84 |
def load_vectordb(sheet_url: str, page_content_column: str) -> VectorStore:
|
| 85 |
return load_vectordb_id(faq_id(sheet_url), page_content_column)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from langchain.embeddings.base import Embeddings
|
| 8 |
from langchain.vectorstores.base import VectorStore
|
| 9 |
import os
|
| 10 |
+
import shutil
|
| 11 |
|
| 12 |
SHEET_URL_X = "https://docs.google.com/spreadsheets/d/"
|
| 13 |
SHEET_URL_Y = "/edit#gid="
|
| 14 |
SHEET_URL_Y_EXPORT = "/export?gid="
|
| 15 |
CACHE_FOLDER = ".embedding-model"
|
| 16 |
VECTORDB_FOLDER = ".vectordb"
|
| 17 |
+
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
|
| 18 |
|
| 19 |
|
| 20 |
def faq_id(sheet_url: str) -> str:
|
|
|
|
| 71 |
return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
|
| 72 |
|
| 73 |
|
| 74 |
+
def load_vectordb_id(
|
| 75 |
+
faq_id: str,
|
| 76 |
+
page_content_column: str,
|
| 77 |
+
embedding_function_name: str = EMBEDDING_MODEL,
|
| 78 |
+
) -> VectorStore:
|
| 79 |
+
embedding_function = define_embedding_function(embedding_function_name)
|
| 80 |
vectordb = None
|
| 81 |
try:
|
| 82 |
vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
|
| 83 |
except Exception as e:
|
| 84 |
+
vectordb = create_vectordb_id(faq_id, page_content_column, embedding_function)
|
| 85 |
+
|
| 86 |
+
return vectordb
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def create_vectordb_id(
|
| 90 |
+
faq_id: str,
|
| 91 |
+
page_content_column: str,
|
| 92 |
+
embedding_function: HuggingFaceEmbeddings = None,
|
| 93 |
+
) -> VectorStore:
|
| 94 |
+
if embedding_function is None:
|
| 95 |
+
embedding_function = define_embedding_function(EMBEDDING_MODEL)
|
| 96 |
+
|
| 97 |
+
df = read_df(xlsx_url(faq_id))
|
| 98 |
+
documents = create_documents(df, page_content_column)
|
| 99 |
+
vectordb = get_vectordb(
|
| 100 |
+
faq_id=faq_id, embedding_function=embedding_function, documents=documents
|
| 101 |
+
)
|
| 102 |
return vectordb
|
| 103 |
|
| 104 |
|
| 105 |
def load_vectordb(sheet_url: str, page_content_column: str) -> VectorStore:
|
| 106 |
return load_vectordb_id(faq_id(sheet_url), page_content_column)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def delete_vectordb():
|
| 110 |
+
shutil.rmtree(VECTORDB_FOLDER, ignore_errors=True)
|