Spaces:

Darayut
/

SEALION-v3.5-8B-R-RAG

Sleeping

App Files Files Community

Darayut commited on Jul 30

Commit

a49fb2e

verified ·

1 Parent(s): 50ab722

Update src/simple_rag.py

Browse files

Files changed (1) hide show

src/simple_rag.py +22 -40

src/simple_rag.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain.schema import Document
 from langchain.vectorstores.chroma import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.document_loaders import PyPDFDirectoryLoader
 logging.basicConfig(level=logging.INFO)
@@ -19,33 +20,12 @@ if use_gpu:
 else:
     print("Running on CPU. No GPU detected.")
-model_id = "aisingapore/SEA-LION-v1-3B"
-# # Load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-if use_gpu:
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="auto",
-        load_in_8bit=True,
-        torch_dtype=torch.float16,
-        trust_remote_code=True,
-    )
-else:
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        device_map="cpu",  # Force CPU
-        torch_dtype=torch.float32,
-        trust_remote_code=True,
-    )
-pipeline = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
 )
 # Use Hugging Face's writable directory
@@ -122,20 +102,22 @@ def ask_question(query_text: str, k: int = 3):
     messages = [{"role": "user", "content": prompt}]
     logging.info("Sending prompt to model...")
-    prompt = tokenizer.apply_chat_template(
-            messages,
-            add_generation_prompt=True,
-            tokenize=False,
-            thinking_mode="off"
         )
-    output = pipeline(
-        prompt,
-        max_new_tokens=128,
-        return_full_text=False,
-        truncation=True,
-        do_sample=False,
-    )
-    answer = output[0]["generated_text"].strip()
     return answer, context_chunks

 from langchain.vectorstores.chroma import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.document_loaders import PyPDFDirectoryLoader
+from openai import OpenAI
 logging.basicConfig(level=logging.INFO)
 else:
     print("Running on CPU. No GPU detected.")
+# Load API key from HF Space secrets
+SEALION_API_KEY = os.environ.get("SEALION_API_KEY")
+client = OpenAI(
+    api_key=SEALION_API_KEY,
+    base_url="https://api.sea-lion.ai/v1"
 )
 # Use Hugging Face's writable directory
     messages = [{"role": "user", "content": prompt}]
     logging.info("Sending prompt to model...")
+    try:
+        logging.info("Sending prompt to SEA-LION API...")
+        completion = client.chat.completions.create(
+            model="aisingapore/Llama-SEA-LION-v3.5-8B-R",
+            messages=messages,
+            extra_body={
+                "chat_template_kwargs": {
+                    "thinking_mode": "off"
+                }
+            },
+            max_tokens=128
         )
+        answer = completion.choices[0].message.content.strip()
+    except Exception as e:
+        logging.error(f"Error calling SEA-LION API: {e}")
+        answer = "Sorry, something went wrong when contacting the language model."
     return answer, context_chunks