Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Running

App Files Files Community

bhardwaj08sarthak commited on Sep 25

Commit

1ee013c

verified ·

1 Parent(s): f10e473

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -39

app.py CHANGED Viewed

@@ -25,40 +25,7 @@ from phrases import BLOOMS_PHRASES, DOK_PHRASES
 _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
-D = {
-    "GSM8k": GSM8k['question'],
-    "Olympiad": Olympiad_math['question'],
-    "Olympiad2": Olympiad_math2['question'],
-    "DeepMind Math": clean_math['question'],
-    "MMMLU": MMMLU['question'],
-    "MMMU": MMMU['question'],
-    "ScienceQA": ScienceQA['question'],
-    "PubmedQA": PubmedQA['question']
-}
-all_questions = (
-    list(D["GSM8k"]) +
-    list(D["Olympiad"]) +
-    list(D["MMMLU"]) +
-    list(D["MMMU"]) +
-    list(D["DeepMind Math"]) +
-    list(D["Olympiad2"]) +
-    list(D["ScienceQA"]) +
-    list(D["PubmedQA"])
-)
-emb = HuggingFaceEmbeddings(
-    model_name="google/embeddinggemma-300m",
-    encode_kwargs={"normalize_embeddings": True},
-)
-texts = all_questions
-index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
-# ------------------------ Scoring TOOL -----------------------------------
-emb = HuggingFaceEmbeddings(
-    model_name="google/embeddinggemma-300m",
-    encode_kwargs={"normalize_embeddings": True},
-)
 D = {
     "GSM8k": GSM8k['question'],
     "Olympiad": Olympiad_math['question'],
@@ -79,12 +46,17 @@ all_questions = (
     list(D["ScienceQA"]) +
     list(D["PubmedQA"])
 )
-texts = all_questions
-index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
-# ------------------------ Retriever TOOL -----------------------------------
 # ------------------------ Agent setup with timeout ------------------------
 def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
     client = InferenceClient(

 _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
 _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
 _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
 D = {
     "GSM8k": GSM8k['question'],
     "Olympiad": Olympiad_math['question'],
     list(D["ScienceQA"]) +
     list(D["PubmedQA"])
 )
+texts = all_questions.
+@spaces(15)
+def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
+    device = 'cuda'
+    emb = HuggingFaceEmbeddings(
+        model_name="model",
+        model_kwargs={"device": device},
+        encode_kwargs={"normalize_embeddings": True},
+    index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
+    return index
+index = build_indexes_on_gpu(model="google/embeddinggemma-300m")
 # ------------------------ Agent setup with timeout ------------------------
 def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
     client = InferenceClient(