Update app.py
Browse files
app.py
CHANGED
|
@@ -21,6 +21,7 @@ from level_classifier_tool_2 import (
|
|
| 21 |
from task_temp import rag_temp, rag_cls_temp, cls_temp, gen_temp
|
| 22 |
from all_tools import classify_and_score, QuestionRetrieverTool
|
| 23 |
from phrases import BLOOMS_PHRASES, DOK_PHRASES
|
|
|
|
| 24 |
# Prebuild embeddings once
|
| 25 |
_backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 26 |
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
|
|
@@ -46,16 +47,18 @@ all_questions = (
|
|
| 46 |
list(D["ScienceQA"]) +
|
| 47 |
list(D["PubmedQA"])
|
| 48 |
)
|
| 49 |
-
texts = all_questions
|
| 50 |
-
@spaces(15)
|
| 51 |
def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
|
| 52 |
device = 'cuda'
|
| 53 |
emb = HuggingFaceEmbeddings(
|
| 54 |
model_name="model",
|
| 55 |
model_kwargs={"device": device},
|
| 56 |
-
encode_kwargs={"normalize_embeddings": True}
|
| 57 |
-
|
| 58 |
-
return
|
|
|
|
|
|
|
| 59 |
index = build_indexes_on_gpu(model="google/embeddinggemma-300m")
|
| 60 |
# ------------------------ Agent setup with timeout ------------------------
|
| 61 |
def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
|
|
|
|
| 21 |
from task_temp import rag_temp, rag_cls_temp, cls_temp, gen_temp
|
| 22 |
from all_tools import classify_and_score, QuestionRetrieverTool
|
| 23 |
from phrases import BLOOMS_PHRASES, DOK_PHRASES
|
| 24 |
+
import spaces
|
| 25 |
# Prebuild embeddings once
|
| 26 |
_backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 27 |
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
|
|
|
|
| 47 |
list(D["ScienceQA"]) +
|
| 48 |
list(D["PubmedQA"])
|
| 49 |
)
|
| 50 |
+
texts = all_questions
|
| 51 |
+
@spaces.GPU(15)
|
| 52 |
def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
|
| 53 |
device = 'cuda'
|
| 54 |
emb = HuggingFaceEmbeddings(
|
| 55 |
model_name="model",
|
| 56 |
model_kwargs={"device": device},
|
| 57 |
+
encode_kwargs={"normalize_embeddings": True})
|
| 58 |
+
idx = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
|
| 59 |
+
return idx
|
| 60 |
+
device = "cuda"
|
| 61 |
+
|
| 62 |
index = build_indexes_on_gpu(model="google/embeddinggemma-300m")
|
| 63 |
# ------------------------ Agent setup with timeout ------------------------
|
| 64 |
def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
|