Spaces:
Sleeping
Sleeping
Update src/simple_rag.py
Browse files- src/simple_rag.py +22 -40
src/simple_rag.py
CHANGED
|
@@ -9,6 +9,7 @@ from langchain.schema import Document
|
|
| 9 |
from langchain.vectorstores.chroma import Chroma
|
| 10 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 11 |
from langchain.document_loaders import PyPDFDirectoryLoader
|
|
|
|
| 12 |
|
| 13 |
logging.basicConfig(level=logging.INFO)
|
| 14 |
|
|
@@ -19,33 +20,12 @@ if use_gpu:
|
|
| 19 |
else:
|
| 20 |
print("Running on CPU. No GPU detected.")
|
| 21 |
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
if use_gpu:
|
| 29 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 30 |
-
model_id,
|
| 31 |
-
device_map="auto",
|
| 32 |
-
load_in_8bit=True,
|
| 33 |
-
torch_dtype=torch.float16,
|
| 34 |
-
trust_remote_code=True,
|
| 35 |
-
)
|
| 36 |
-
else:
|
| 37 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 38 |
-
model_id,
|
| 39 |
-
device_map="cpu", # Force CPU
|
| 40 |
-
torch_dtype=torch.float32,
|
| 41 |
-
trust_remote_code=True,
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
pipeline = pipeline(
|
| 46 |
-
"text-generation",
|
| 47 |
-
model=model,
|
| 48 |
-
tokenizer=tokenizer,
|
| 49 |
)
|
| 50 |
|
| 51 |
# Use Hugging Face's writable directory
|
|
@@ -122,20 +102,22 @@ def ask_question(query_text: str, k: int = 3):
|
|
| 122 |
|
| 123 |
messages = [{"role": "user", "content": prompt}]
|
| 124 |
logging.info("Sending prompt to model...")
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
output = pipeline(
|
| 133 |
-
prompt,
|
| 134 |
-
max_new_tokens=128,
|
| 135 |
-
return_full_text=False,
|
| 136 |
-
truncation=True,
|
| 137 |
-
do_sample=False,
|
| 138 |
-
)
|
| 139 |
-
|
| 140 |
-
answer = output[0]["generated_text"].strip()
|
| 141 |
return answer, context_chunks
|
|
|
|
| 9 |
from langchain.vectorstores.chroma import Chroma
|
| 10 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 11 |
from langchain.document_loaders import PyPDFDirectoryLoader
|
| 12 |
+
from openai import OpenAI
|
| 13 |
|
| 14 |
logging.basicConfig(level=logging.INFO)
|
| 15 |
|
|
|
|
| 20 |
else:
|
| 21 |
print("Running on CPU. No GPU detected.")
|
| 22 |
|
| 23 |
+
# Load API key from HF Space secrets
|
| 24 |
+
SEALION_API_KEY = os.environ.get("SEALION_API_KEY")
|
| 25 |
|
| 26 |
+
client = OpenAI(
|
| 27 |
+
api_key=SEALION_API_KEY,
|
| 28 |
+
base_url="https://api.sea-lion.ai/v1"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
# Use Hugging Face's writable directory
|
|
|
|
| 102 |
|
| 103 |
messages = [{"role": "user", "content": prompt}]
|
| 104 |
logging.info("Sending prompt to model...")
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
logging.info("Sending prompt to SEA-LION API...")
|
| 108 |
+
completion = client.chat.completions.create(
|
| 109 |
+
model="aisingapore/Llama-SEA-LION-v3.5-8B-R",
|
| 110 |
+
messages=messages,
|
| 111 |
+
extra_body={
|
| 112 |
+
"chat_template_kwargs": {
|
| 113 |
+
"thinking_mode": "off"
|
| 114 |
+
}
|
| 115 |
+
},
|
| 116 |
+
max_tokens=128
|
| 117 |
)
|
| 118 |
+
answer = completion.choices[0].message.content.strip()
|
| 119 |
+
except Exception as e:
|
| 120 |
+
logging.error(f"Error calling SEA-LION API: {e}")
|
| 121 |
+
answer = "Sorry, something went wrong when contacting the language model."
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
return answer, context_chunks
|