Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ import pdfplumber
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
|
| 4 |
import torch
|
| 5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 6 |
-
from langchain.embeddings import HuggingFaceEmbeddings
|
| 7 |
from langchain.vectorstores import Chroma
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
from langchain.memory import ConversationBufferMemory
|
|
@@ -41,8 +40,13 @@ if uploaded_file:
|
|
| 41 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 42 |
docs = text_splitter.create_documents([pdf_text])
|
| 43 |
|
| 44 |
-
embeddings
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
retriever = vector_store.as_retriever()
|
| 48 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
|
|
|
| 3 |
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
|
| 4 |
import torch
|
| 5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
| 6 |
from langchain.vectorstores import Chroma
|
| 7 |
from langchain.chains import ConversationalRetrievalChain
|
| 8 |
from langchain.memory import ConversationBufferMemory
|
|
|
|
| 40 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
| 41 |
docs = text_splitter.create_documents([pdf_text])
|
| 42 |
|
| 43 |
+
# สร้าง embeddings โดยใช้ transformers
|
| 44 |
+
model_name = "sentence-transformers/paraphrase-xlm-r-multilingual-v1"
|
| 45 |
+
embedding_model = AutoModel.from_pretrained(model_name)
|
| 46 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 47 |
+
|
| 48 |
+
# ปรับแต่ง Chroma กับ embeddings ของคุณ
|
| 49 |
+
vector_store = Chroma.from_documents(documents=docs, embedding=embedding_model)
|
| 50 |
|
| 51 |
retriever = vector_store.as_retriever()
|
| 52 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|