Spaces:
Sleeping
Sleeping
Commit
·
6017dce
1
Parent(s):
8c2f0ba
deepnote update
Browse files- faq.py +19 -3
- requirements.txt +3 -1
faq.py
CHANGED
|
@@ -35,7 +35,7 @@ def create_documents(df: pd.DataFrame, page_content_column: str) -> pd.DataFrame
|
|
| 35 |
return loader.load()
|
| 36 |
|
| 37 |
|
| 38 |
-
def
|
| 39 |
return HuggingFaceEmbeddings(
|
| 40 |
model_name=model_name,
|
| 41 |
encode_kwargs={"normalize_embeddings": True},
|
|
@@ -43,7 +43,7 @@ def embedding_function(model_name: str) -> HuggingFaceEmbeddings:
|
|
| 43 |
)
|
| 44 |
|
| 45 |
|
| 46 |
-
def
|
| 47 |
faq_id: str, embedding_function: Embeddings, documents: List[Document] = None
|
| 48 |
) -> VectorStore:
|
| 49 |
vectordb = None
|
|
@@ -63,7 +63,23 @@ def vectordb(
|
|
| 63 |
|
| 64 |
|
| 65 |
def similarity_search(
|
| 66 |
-
vectordb: VectorStore, query: str, k: int
|
| 67 |
) -> List[Tuple[Document, float]]:
|
| 68 |
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
| 69 |
return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
return loader.load()
|
| 36 |
|
| 37 |
|
| 38 |
+
def define_embedding_function(model_name: str) -> HuggingFaceEmbeddings:
|
| 39 |
return HuggingFaceEmbeddings(
|
| 40 |
model_name=model_name,
|
| 41 |
encode_kwargs={"normalize_embeddings": True},
|
|
|
|
| 43 |
)
|
| 44 |
|
| 45 |
|
| 46 |
+
def get_vectordb(
|
| 47 |
faq_id: str, embedding_function: Embeddings, documents: List[Document] = None
|
| 48 |
) -> VectorStore:
|
| 49 |
vectordb = None
|
|
|
|
| 63 |
|
| 64 |
|
| 65 |
def similarity_search(
|
| 66 |
+
vectordb: VectorStore, query: str, k: int = 3
|
| 67 |
) -> List[Tuple[Document, float]]:
|
| 68 |
os.environ["TOKENIZERS_PARALLELISM"] = "true"
|
| 69 |
return vectordb.similarity_search_with_relevance_scores(query=query, k=k)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def load_vectordb_id(faq_id: str, page_content_column: str) -> VectorStore:
|
| 73 |
+
embedding_function = define_embedding_function("sentence-transformers/all-mpnet-base-v2")
|
| 74 |
+
vectordb = None
|
| 75 |
+
try:
|
| 76 |
+
vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
|
| 77 |
+
except Exception as e:
|
| 78 |
+
df = read_df(xlsx_url(faq_id))
|
| 79 |
+
documents = create_documents(df, page_content_column)
|
| 80 |
+
vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function, documents=documents)
|
| 81 |
+
return vectordb
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def load_vectordb(sheet_url: str, page_content_column: str) -> VectorStore:
|
| 85 |
+
return load_vectordb_id(faq_id(sheet_url), page_content_column)
|
requirements.txt
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
openpyxl
|
| 2 |
langchain
|
| 3 |
sentence_transformers
|
| 4 |
-
awadb
|
|
|
|
|
|
|
|
|
| 1 |
openpyxl
|
| 2 |
langchain
|
| 3 |
sentence_transformers
|
| 4 |
+
awadb
|
| 5 |
+
fastapi
|
| 6 |
+
uvicorn
|