Darayut commited on
Commit
a49fb2e
·
verified ·
1 Parent(s): 50ab722

Update src/simple_rag.py

Browse files
Files changed (1) hide show
  1. src/simple_rag.py +22 -40
src/simple_rag.py CHANGED
@@ -9,6 +9,7 @@ from langchain.schema import Document
9
  from langchain.vectorstores.chroma import Chroma
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.document_loaders import PyPDFDirectoryLoader
 
12
 
13
  logging.basicConfig(level=logging.INFO)
14
 
@@ -19,33 +20,12 @@ if use_gpu:
19
  else:
20
  print("Running on CPU. No GPU detected.")
21
 
22
- model_id = "aisingapore/SEA-LION-v1-3B"
 
23
 
24
-
25
- # # Load model and tokenizer
26
- tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
27
-
28
- if use_gpu:
29
- model = AutoModelForCausalLM.from_pretrained(
30
- model_id,
31
- device_map="auto",
32
- load_in_8bit=True,
33
- torch_dtype=torch.float16,
34
- trust_remote_code=True,
35
- )
36
- else:
37
- model = AutoModelForCausalLM.from_pretrained(
38
- model_id,
39
- device_map="cpu", # Force CPU
40
- torch_dtype=torch.float32,
41
- trust_remote_code=True,
42
- )
43
-
44
-
45
- pipeline = pipeline(
46
- "text-generation",
47
- model=model,
48
- tokenizer=tokenizer,
49
  )
50
 
51
  # Use Hugging Face's writable directory
@@ -122,20 +102,22 @@ def ask_question(query_text: str, k: int = 3):
122
 
123
  messages = [{"role": "user", "content": prompt}]
124
  logging.info("Sending prompt to model...")
125
- prompt = tokenizer.apply_chat_template(
126
- messages,
127
- add_generation_prompt=True,
128
- tokenize=False,
129
- thinking_mode="off"
 
 
 
 
 
 
 
130
  )
 
 
 
 
131
 
132
- output = pipeline(
133
- prompt,
134
- max_new_tokens=128,
135
- return_full_text=False,
136
- truncation=True,
137
- do_sample=False,
138
- )
139
-
140
- answer = output[0]["generated_text"].strip()
141
  return answer, context_chunks
 
9
  from langchain.vectorstores.chroma import Chroma
10
  from langchain.embeddings import HuggingFaceEmbeddings
11
  from langchain.document_loaders import PyPDFDirectoryLoader
12
+ from openai import OpenAI
13
 
14
  logging.basicConfig(level=logging.INFO)
15
 
 
20
  else:
21
  print("Running on CPU. No GPU detected.")
22
 
23
+ # Load API key from HF Space secrets
24
+ SEALION_API_KEY = os.environ.get("SEALION_API_KEY")
25
 
26
+ client = OpenAI(
27
+ api_key=SEALION_API_KEY,
28
+ base_url="https://api.sea-lion.ai/v1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
 
31
  # Use Hugging Face's writable directory
 
102
 
103
  messages = [{"role": "user", "content": prompt}]
104
  logging.info("Sending prompt to model...")
105
+
106
+ try:
107
+ logging.info("Sending prompt to SEA-LION API...")
108
+ completion = client.chat.completions.create(
109
+ model="aisingapore/Llama-SEA-LION-v3.5-8B-R",
110
+ messages=messages,
111
+ extra_body={
112
+ "chat_template_kwargs": {
113
+ "thinking_mode": "off"
114
+ }
115
+ },
116
+ max_tokens=128
117
  )
118
+ answer = completion.choices[0].message.content.strip()
119
+ except Exception as e:
120
+ logging.error(f"Error calling SEA-LION API: {e}")
121
+ answer = "Sorry, something went wrong when contacting the language model."
122
 
 
 
 
 
 
 
 
 
 
123
  return answer, context_chunks