Spaces:
Build error
Build error
Commit
·
617be15
1
Parent(s):
8705444
Update model.py
Browse files
model.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import os
|
| 2 |
import subprocess
|
| 3 |
from dotenv import load_dotenv
|
|
@@ -8,16 +9,36 @@ try:
|
|
| 8 |
except:
|
| 9 |
PINECONE_API_KEY = subprocess.check_output(["bash", "-c", "echo ${{ secrets.PINECONE_API_KEY }}"]).decode("utf-8").strip()
|
| 10 |
|
| 11 |
-
|
| 12 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 13 |
import pinecone
|
| 14 |
import torch
|
| 15 |
from langchain import PromptTemplate, LLMChain,HuggingFacePipeline
|
| 16 |
from langchain.vectorstores import Pinecone
|
| 17 |
-
from langchain.
|
| 18 |
-
from langchain.chains import RetrievalQA
|
| 19 |
from transformers import pipeline
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def get_llm(model_name,pinecone_index,llm):
|
| 22 |
# model_name = "bert-large-uncased" #"t5-large"
|
| 23 |
model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
|
|
@@ -31,7 +52,7 @@ def get_llm(model_name,pinecone_index,llm):
|
|
| 31 |
)
|
| 32 |
|
| 33 |
index = pinecone.Index(pinecone_index)
|
| 34 |
-
print(index.describe_index_stats())
|
| 35 |
|
| 36 |
docsearch = Pinecone(index, embeddings.embed_query,"text")
|
| 37 |
|
|
|
|
| 1 |
+
|
| 2 |
import os
|
| 3 |
import subprocess
|
| 4 |
from dotenv import load_dotenv
|
|
|
|
| 9 |
except:
|
| 10 |
PINECONE_API_KEY = subprocess.check_output(["bash", "-c", "echo ${{ secrets.PINECONE_API_KEY }}"]).decode("utf-8").strip()
|
| 11 |
|
| 12 |
+
from typing import Optional,List,Mapping,Any
|
| 13 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 14 |
import pinecone
|
| 15 |
import torch
|
| 16 |
from langchain import PromptTemplate, LLMChain,HuggingFacePipeline
|
| 17 |
from langchain.vectorstores import Pinecone
|
| 18 |
+
from langchain.llms.base import LLM
|
|
|
|
| 19 |
from transformers import pipeline
|
| 20 |
|
| 21 |
+
class CustomLLM(LLM):
|
| 22 |
+
# def __init__(self,model_name,pipeline):
|
| 23 |
+
model_name ="databricks/dolly-v2-3b"
|
| 24 |
+
num_output = 128
|
| 25 |
+
pipeline = pipeline(model=model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto",
|
| 26 |
+
return_full_text=True, do_sample=False, max_new_tokens=128)
|
| 27 |
+
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
| 28 |
+
prompt_length = len(prompt)
|
| 29 |
+
response = self.pipeline(prompt, max_new_tokens=self.num_output)[0]["generated_text"]
|
| 30 |
+
|
| 31 |
+
# only return newly generated tokens
|
| 32 |
+
return response[prompt_length:]
|
| 33 |
+
|
| 34 |
+
@property
|
| 35 |
+
def _identifying_params(self) -> Mapping[str, Any]:
|
| 36 |
+
return {"name_of_model": self.model_name}
|
| 37 |
+
|
| 38 |
+
@property
|
| 39 |
+
def _llm_type(self) -> str:
|
| 40 |
+
return "custom"
|
| 41 |
+
|
| 42 |
def get_llm(model_name,pinecone_index,llm):
|
| 43 |
# model_name = "bert-large-uncased" #"t5-large"
|
| 44 |
model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
index = pinecone.Index(pinecone_index)
|
| 55 |
+
# print(index.describe_index_stats())
|
| 56 |
|
| 57 |
docsearch = Pinecone(index, embeddings.embed_query,"text")
|
| 58 |
|