bhardwaj08sarthak commited on
Commit
3d115e7
·
verified ·
1 Parent(s): e2ade71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -52
app.py CHANGED
@@ -1,8 +1,18 @@
1
- # Create a self-contained Gradio app that uses the agent-driven loop (Option A)
2
- # It expects `level_classifier_tool.py` to be colocated (or installed on PYTHONPATH).
 
 
 
 
 
 
 
 
 
 
3
  import spaces
 
4
  import sys
5
- import os
6
  from huggingface_hub import hf_hub_download
7
  import pickle
8
  from huggingface_hub import login
@@ -12,11 +22,11 @@ import gradio as gr
12
  from huggingface_hub import InferenceClient
13
  from smolagents import CodeAgent, InferenceClientModel, tool
14
  from langchain_community.embeddings import HuggingFaceEmbeddings
15
- #from llama_index.embeddings.huggingface import HuggingFaceEmbeddings
16
  from llama_index.core import StorageContext, load_index_from_storage
17
  from huggingface_hub import login, snapshot_download
18
  from smolagents import tool
19
- #from all_datasets import *
20
  from level_classifier_tool_2 import (
21
  classify_levels_phrases,
22
  HFEmbeddingBackend,
@@ -25,15 +35,21 @@ from level_classifier_tool_2 import (
25
  from task_temp import rag_temp, rag_cls_temp, cls_temp, gen_temp
26
  from all_tools import classify_and_score, QuestionRetrieverTool
27
  from phrases import BLOOMS_PHRASES, DOK_PHRASES
28
- # Prebuild embeddings once
 
29
  _backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m")
 
 
 
 
 
 
30
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
31
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
32
 
33
  DATASET_REPO = "bhardwaj08sarthak/my-stem-index" # your dataset repo id
34
- PERSIST_SUBDIR = "index_store" # the folder you uploaded
35
- LOCAL_BASE = "/data/index" # where to place files in the Space
36
-
37
 
38
  # Download the persisted index folder into ephemeral storage
39
  os.makedirs(LOCAL_BASE, exist_ok=True)
@@ -50,46 +66,25 @@ persist_dir = os.path.join(LOCAL_BASE, PERSIST_SUBDIR)
50
  # Recreate the SAME embedding model used to build the index
51
  emb = HuggingFaceEmbeddings(
52
  model_name="google/embeddinggemma-300m",
53
- model_kwargs={"device": "cuda"},
54
  encode_kwargs={"normalize_embeddings": True},
55
  )
56
 
57
  # Load the index from storage
58
  storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
59
  index = load_index_from_storage(storage_context, embed_model=emb)
60
- #D = {
61
- # "GSM8k": GSM8k['question'],
62
- # "Olympiad": Olympiad_math['question'],
63
- # "Olympiad2": Olympiad_math2['question'],
64
- # "DeepMind Math": clean_math['question'],
65
- # "MMMLU": MMMLU['question'],
66
- # "MMMU": MMMU['question'],
67
- # "ScienceQA": ScienceQA['question'],
68
- # "PubmedQA": PubmedQA['question']
69
- #}
70
- #all_questions = (
71
- # list(D["GSM8k"]) +
72
- # list(D["Olympiad"]) +
73
- # list(D["MMMLU"]) +
74
- # list(D["MMMU"]) +
75
- # list(D["DeepMind Math"]) +
76
- # list(D["Olympiad2"]) +
77
- # list(D["ScienceQA"]) +
78
- # list(D["PubmedQA"])
79
- #)
80
- #texts = all_questions
81
- #@spaces.GPU(15)
82
- #def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
83
- # device = 'cuda'
84
- # emb = HuggingFaceEmbeddings(
85
- # model_name="model",
86
- # model_kwargs={"device": device},
87
- # encode_kwargs={"normalize_embeddings": True})
88
- # idx = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
89
- # return idx
90
- # device = "cuda"
91
-
92
- #index = build_indexes_on_gpu(model="google/embeddinggemma-300m")
93
  # ------------------------ Agent setup with timeout ------------------------
94
  def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
95
  client = InferenceClient(
@@ -101,14 +96,11 @@ def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temper
101
 
102
  # Bind generation params by partially applying via model kwargs.
103
  # smolagents InferenceClientModel currently accepts client only; we pass runtime params in task text.
104
- model = InferenceClientModel(model_id=model_id,client=client)
105
  agent = CodeAgent(model=model, tools=[classify_and_score, QuestionRetrieverTool])
106
  agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
107
  return agent
108
 
109
-
110
- # ------------------------ Agent task template -----------------------------
111
-
112
  # ------------------------ Gradio glue ------------------------------------
113
  def run_pipeline(
114
  hf_token,
@@ -146,7 +138,7 @@ def run_pipeline(
146
 
147
  # The agent will internally call the tool
148
  try:
149
- result_text = agent.run(task, max_steps=int(attempts)*4)
150
  except Exception as e:
151
  result_text = f"ERROR: {e}"
152
 
@@ -164,7 +156,6 @@ def run_pipeline(
164
 
165
  return final_json, result_text
166
 
167
-
168
  with gr.Blocks() as demo:
169
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
170
  gr.Markdown(
@@ -186,10 +177,11 @@ with gr.Blocks() as demo:
186
  value="Grade 7",
187
  label="Grade"
188
  )
189
- subject= gr.Textbox(value="Math", label="Subject")
190
  task_type = gr.Dropdown(
191
  choices=["TASK_TMPL", "CLASSIFY_TMPL", "GEN_TMPL", "RAG_TMPL"],
192
- label= "task type")
 
193
 
194
  with gr.Row():
195
  target_bloom = gr.Dropdown(
@@ -215,7 +207,7 @@ with gr.Blocks() as demo:
215
 
216
  run_btn.click(
217
  fn=run_pipeline,
218
- inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens,task_type],
219
  outputs=[final_json, transcript]
220
  )
221
 
 
1
+ # --- MUST be first: disable Hugging Face Spaces ZeroGPU monkey-patch ---
2
+ import os
3
+ os.environ["SPACES_ZERO_DISABLED"] = "1"
4
+
5
+ # (optional but helpful) steer PyTorch to math attention kernels (no Flash/MemEfficient)
6
+ try:
7
+ import torch
8
+ torch.backends.cuda.sdp_kernel(enable_math=True, enable_flash=False, enable_mem_efficient=False)
9
+ except Exception:
10
+ pass
11
+
12
+ # If you truly need Spaces, import it AFTER disabling the patch.
13
  import spaces
14
+
15
  import sys
 
16
  from huggingface_hub import hf_hub_download
17
  import pickle
18
  from huggingface_hub import login
 
22
  from huggingface_hub import InferenceClient
23
  from smolagents import CodeAgent, InferenceClientModel, tool
24
  from langchain_community.embeddings import HuggingFaceEmbeddings
25
+ # from llama_index.embeddings.huggingface import HuggingFaceEmbeddings
26
  from llama_index.core import StorageContext, load_index_from_storage
27
  from huggingface_hub import login, snapshot_download
28
  from smolagents import tool
29
+ # from all_datasets import *
30
  from level_classifier_tool_2 import (
31
  classify_levels_phrases,
32
  HFEmbeddingBackend,
 
35
  from task_temp import rag_temp, rag_cls_temp, cls_temp, gen_temp
36
  from all_tools import classify_and_score, QuestionRetrieverTool
37
  from phrases import BLOOMS_PHRASES, DOK_PHRASES
38
+
39
+ # ------------------------ Prebuild embeddings once ------------------------
40
  _backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m")
41
+ # Belt-and-suspenders: ensure eager attention even if class wasn't patched
42
+ try:
43
+ _backend.MODEL.config.attn_implementation = "eager"
44
+ except Exception:
45
+ pass
46
+
47
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
48
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
49
 
50
  DATASET_REPO = "bhardwaj08sarthak/my-stem-index" # your dataset repo id
51
+ PERSIST_SUBDIR = "index_store" # the folder you uploaded
52
+ LOCAL_BASE = "/data/index" # where to place files in the Space
 
53
 
54
  # Download the persisted index folder into ephemeral storage
55
  os.makedirs(LOCAL_BASE, exist_ok=True)
 
66
  # Recreate the SAME embedding model used to build the index
67
  emb = HuggingFaceEmbeddings(
68
  model_name="google/embeddinggemma-300m",
69
+ model_kwargs={"device": "cuda", "attn_implementation": "eager"},
70
  encode_kwargs={"normalize_embeddings": True},
71
  )
72
 
73
  # Load the index from storage
74
  storage_context = StorageContext.from_defaults(persist_dir=persist_dir)
75
  index = load_index_from_storage(storage_context, embed_model=emb)
76
+
77
+ # Datasets & GPU build code remains commented out...
78
+ # @spaces.GPU(15)
79
+ # def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
80
+ # device = 'cuda'
81
+ # emb = HuggingFaceEmbeddings(
82
+ # model_name="model",
83
+ # model_kwargs={"device": device, "attn_implementation": "eager"},
84
+ # encode_kwargs={"normalize_embeddings": True})
85
+ # idx = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
86
+ # return idx
87
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  # ------------------------ Agent setup with timeout ------------------------
89
  def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
90
  client = InferenceClient(
 
96
 
97
  # Bind generation params by partially applying via model kwargs.
98
  # smolagents InferenceClientModel currently accepts client only; we pass runtime params in task text.
99
+ model = InferenceClientModel(model_id=model_id, client=client)
100
  agent = CodeAgent(model=model, tools=[classify_and_score, QuestionRetrieverTool])
101
  agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
102
  return agent
103
 
 
 
 
104
  # ------------------------ Gradio glue ------------------------------------
105
  def run_pipeline(
106
  hf_token,
 
138
 
139
  # The agent will internally call the tool
140
  try:
141
+ result_text = agent.run(task, max_steps=int(attempts) * 4)
142
  except Exception as e:
143
  result_text = f"ERROR: {e}"
144
 
 
156
 
157
  return final_json, result_text
158
 
 
159
  with gr.Blocks() as demo:
160
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
161
  gr.Markdown(
 
177
  value="Grade 7",
178
  label="Grade"
179
  )
180
+ subject = gr.Textbox(value="Math", label="Subject")
181
  task_type = gr.Dropdown(
182
  choices=["TASK_TMPL", "CLASSIFY_TMPL", "GEN_TMPL", "RAG_TMPL"],
183
+ label="task type"
184
+ )
185
 
186
  with gr.Row():
187
  target_bloom = gr.Dropdown(
 
207
 
208
  run_btn.click(
209
  fn=run_pipeline,
210
+ inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens, task_type],
211
  outputs=[final_json, transcript]
212
  )
213