Update retriever.py
Browse files- retriever.py +39 -38
retriever.py
CHANGED
|
@@ -1,38 +1,39 @@
|
|
| 1 |
-
from langchain_core.runnables import RunnablePassthrough
|
| 2 |
-
from langchain_core.output_parsers import StrOutputParser
|
| 3 |
-
from langchain_community.chat_models import ChatOllama
|
| 4 |
-
from langchain_core.prompts import ChatPromptTemplate
|
| 5 |
-
from langchain_pinecone import PineconeVectorStore
|
| 6 |
-
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
| 7 |
-
|
| 8 |
-
import os
|
| 9 |
-
from dotenv import load_dotenv
|
| 10 |
-
from
|
| 11 |
-
from
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
kiwi_bm25.
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
#
|
| 33 |
-
# )
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
| 1 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 2 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 3 |
+
from langchain_community.chat_models import ChatOllama
|
| 4 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 5 |
+
from langchain_pinecone import PineconeVectorStore
|
| 6 |
+
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
from langchain_community.retrievers import BM25Retriever, EnsembleRetriever
|
| 11 |
+
# from langchain.retrievers import BM25Retriever, EnsembleRetriever
|
| 12 |
+
from kiwipiepy import Kiwi
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
kiwi = Kiwi()
|
| 16 |
+
|
| 17 |
+
def kiwi_tokenize(text):
|
| 18 |
+
return [token.form for token in kiwi.tokenize(text)]
|
| 19 |
+
# embedding_model = SentenceTransformerEmbeddings(model_name='BM-K/KoSimCSE-roberta-multitask', model_kwargs={"trust_remote_code":True})
|
| 20 |
+
|
| 21 |
+
def retriever(pc, bm25):
|
| 22 |
+
pcretriever = pc.as_retriever(search_kwargs={'k':4})
|
| 23 |
+
kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize)
|
| 24 |
+
kiwi_bm25.k=4
|
| 25 |
+
|
| 26 |
+
kiwibm25_pc_37 = EnsembleRetriever(
|
| 27 |
+
retrievers=[kiwi_bm25, pcretriever], # ์ฌ์ฉํ ๊ฒ์ ๋ชจ๋ธ์ ๋ฆฌ์คํธ
|
| 28 |
+
weights=[0.3, 0.7], # ๊ฐ ๊ฒ์ ๋ชจ๋ธ์ ๊ฒฐ๊ณผ์ ์ ์ฉํ ๊ฐ์ค์น
|
| 29 |
+
search_type="mmr", # ๊ฒ์ ๊ฒฐ๊ณผ์ ๋ค์์ฑ์ ์ฆ์ง์ํค๋ MMR ๋ฐฉ์์ ์ฌ์ฉ
|
| 30 |
+
)
|
| 31 |
+
# Pinecone vector store ์ด๊ธฐํ
|
| 32 |
+
# vectorstore = PineconeVectorStore(
|
| 33 |
+
# index_name=os.getenv("INDEX_NAME"), embedding=embedding_model
|
| 34 |
+
# )
|
| 35 |
+
|
| 36 |
+
# retriever = vectorstore.as_retriever(search_kwargs={'k': 2})
|
| 37 |
+
|
| 38 |
+
return kiwibm25_pc_37
|
| 39 |
+
|