bhardwaj08sarthak commited on
Commit
bfc2469
·
verified ·
1 Parent(s): f46d15d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +290 -0
app.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import gradio as gr
4
+ from huggingface_hub import InferenceClient
5
+ from smolagents import CodeAgent, InferenceClientModel, tool
6
+
7
+ from level_classifier_tool import (
8
+ classify_levels_phrases,
9
+ HFEmbeddingBackend,
10
+ build_phrase_index
11
+ )
12
+ BLOOMS_PHRASES = {
13
+ "Remember": [
14
+ "define", "list", "recall", "identify", "state", "label", "name", "recognize", "find", "select", "match", "choose", "give", "write", "tell", "show"
15
+ ],
16
+ "Understand": [
17
+ "classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss", "predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report"
18
+ ],
19
+ "Apply": [
20
+ "apply", "solve", "use", "demonstrate", "calculate", "implement", "perform", "execute", "carry out", "practice", "employ", "sketch"
21
+ ],
22
+ "Analyze": [
23
+ "analyze", "differentiate", "organize", "structure", "break down", "distinguish", "dissect", "examine", "compare", "contrast", "attribute", "investigate"
24
+ ],
25
+ "Evaluate": [
26
+ "evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support", "appraise", "recommend", "conclude", "review"
27
+ ],
28
+ "Create": [
29
+ "create", "design", "compose", "plan", "construct", "produce", "devise", "generate", "develop", "formulate", "invent", "build"
30
+ ]
31
+ }
32
+
33
+ DOK_PHRASES = {
34
+ "DOK1": [
35
+ "define", "list", "recall", "compute", "identify", "state", "label", "how many",
36
+ "name", "recognize", "find", "determine", "select", "match", "choose", "give",
37
+ "write", "tell", "show", "point out"
38
+ ],
39
+ "DOK2": [
40
+ "classify", "interpret", "estimate", "organise", "summarise", "explain", "solve",
41
+ "categorize", "group", "compare", "contrast", "distinguish", "make observations",
42
+ "collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict",
43
+ "approximate", "demonstrate", "illustrate", "describe", "analyze data"
44
+ ],
45
+ "DOK3": [
46
+ "justify", "analyze", "generalise", "compare", "construct", "investigate",
47
+ "support", "defend", "argue", "examine", "differentiate", "criticize", "debate",
48
+ "test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect",
49
+ "probe", "explore", "develop", "formulate"
50
+ ],
51
+ "DOK4": [
52
+ "design", "synthesize", "model", "prove", "evaluate system", "critique", "create",
53
+ "compose", "plan", "invent", "devise", "generate", "build", "construct", "produce",
54
+ "formulate", "improve", "revise", "assess", "appraise", "judge", "recommend",
55
+ "predict outcome", "simulate"
56
+ ]
57
+ }
58
+
59
+ # Prebuild embeddings once
60
+ _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
61
+ _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
62
+ _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
63
+
64
+ @tool
65
+ def classify_and_score(
66
+ question: str,
67
+ target_bloom: str,
68
+ target_dok: str,
69
+ agg: str = "max"
70
+ ) -> dict:
71
+ """Classify a question against Bloom’s and DOK targets and return guidance.
72
+
73
+ Args:
74
+ question: The question text to evaluate for cognitive demand.
75
+ target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
76
+ or plus form (e.g., "Apply+") meaning that level or higher.
77
+ target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
78
+ or span (e.g., "DOK2-DOK3").
79
+ agg: Aggregation method over phrase similarities within a level
80
+ (choices: "mean", "max", "topk_mean").
81
+
82
+ Returns:
83
+ A dictionary with:
84
+ ok: True if both Bloom’s and DOK match the targets.
85
+ measured: Dict with best levels and per-level scores for Bloom’s and DOK.
86
+ feedback: Brief guidance describing how to adjust the question to hit targets.
87
+ """
88
+ res = classify_levels_phrases(
89
+ question,
90
+ BLOOMS_PHRASES,
91
+ DOK_PHRASES,
92
+ backend=_backend,
93
+ prebuilt_bloom_index=_BLOOM_INDEX,
94
+ prebuilt_dok_index=_DOK_INDEX,
95
+ agg=agg,
96
+ return_phrase_matches=True
97
+ )
98
+
99
+ def _parse_target_bloom(t: str):
100
+ order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"]
101
+ if t.endswith("+"):
102
+ base = t[:-1]
103
+ return set(order[order.index(base):])
104
+ return {t}
105
+
106
+ def _parse_target_dok(t: str):
107
+ order = ["DOK1","DOK2","DOK3","DOK4"]
108
+ if "-" in t:
109
+ lo, hi = t.split("-")
110
+ return set(order[order.index(lo):order.index(hi)+1])
111
+ return {t}
112
+
113
+ bloom_target_set = _parse_target_bloom(target_bloom)
114
+ dok_target_set = _parse_target_dok(target_dok)
115
+
116
+ bloom_best = res["blooms"]["best_level"]
117
+ dok_best = res["dok"]["best_level"]
118
+
119
+ bloom_ok = bloom_best in bloom_target_set
120
+ dok_ok = dok_best in dok_target_set
121
+
122
+ feedback_parts = []
123
+ if not bloom_ok:
124
+ feedback_parts.append(
125
+ f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. "
126
+ f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}"
127
+ )
128
+ if not dok_ok:
129
+ feedback_parts.append(
130
+ f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. "
131
+ f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}"
132
+ )
133
+
134
+ return {
135
+ "ok": bool(bloom_ok and dok_ok),
136
+ "measured": {
137
+ "bloom_best": bloom_best,
138
+ "bloom_scores": res["blooms"]["scores"],
139
+ "dok_best": dok_best,
140
+ "dok_scores": res["dok"]["scores"],
141
+ },
142
+ "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
143
+ }
144
+
145
+
146
+ # ------------------------ Agent setup with timeout ------------------------
147
+ def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
148
+ client = InferenceClient(
149
+ model=model_id,
150
+ provider=provider,
151
+ timeout=timeout,
152
+ token=hf_token if hf_token else None,
153
+ )
154
+
155
+ model = InferenceClientModel(client=client)
156
+ agent = CodeAgent(model=model, tools=[classify_and_score])
157
+ agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
158
+ return agent
159
+
160
+
161
+ # ------------------------ Agent task template -----------------------------
162
+ TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
163
+
164
+ After you propose a candidate, you MUST immediately call:
165
+ classify_and_score(
166
+ question=<just the question text>,
167
+ target_bloom="{target_bloom}",
168
+ target_dok="{target_dok}",
169
+ agg="max"
170
+ )
171
+
172
+ Use the returned dict:
173
+ - If ok == True: print ONLY compact JSON {{"question": "...", "answer": "...", "reasoning": "..."}} and finish.
174
+ - If ok == False: briefly explain the needed shift, revise the question, and call classify_and_score again.
175
+ Repeat up to {attempts} attempts.
176
+ Keep answers concise.
177
+ Additionally, when you call classify_and_score, pass the exact question text you propose.
178
+ If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
179
+ '''
180
+
181
+
182
+ # ------------------------ Gradio glue ------------------------------------
183
+ def run_pipeline(
184
+ hf_token,
185
+ topic,
186
+ grade,
187
+ subject,
188
+ target_bloom,
189
+ target_dok,
190
+ attempts,
191
+ model_id,
192
+ provider,
193
+ timeout,
194
+ temperature,
195
+ max_tokens
196
+ ):
197
+ # Build agent per run (or cache if you prefer)
198
+ agent = make_agent(
199
+ hf_token=hf_token.strip(),
200
+ model_id=model_id,
201
+ provider=provider,
202
+ timeout=int(timeout),
203
+ temperature=float(temperature),
204
+ max_tokens=int(max_tokens),
205
+ )
206
+
207
+ task = TASK_TMPL.format(
208
+ grade=grade,
209
+ topic=topic,
210
+ subject=subject,
211
+ target_bloom=target_bloom,
212
+ target_dok=target_dok,
213
+ attempts=int(attempts)
214
+ )
215
+
216
+ # The agent will internally call the tool
217
+ try:
218
+ result_text = agent.run(task, max_steps=int(attempts)*4)
219
+ except Exception as e:
220
+ result_text = f"ERROR: {e}"
221
+
222
+ # Try to extract final JSON
223
+ final_json = ""
224
+ try:
225
+ # find JSON object in result_text (simple heuristic)
226
+ start = result_text.find("{")
227
+ end = result_text.rfind("}")
228
+ if start != -1 and end != -1 and end > start:
229
+ candidate = result_text[start:end+1]
230
+ final_json = json.dumps(json.loads(candidate), indent=2)
231
+ except Exception:
232
+ final_json = ""
233
+
234
+ return final_json, result_text
235
+
236
+
237
+ with gr.Blocks() as demo:
238
+ gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
239
+ gr.Markdown(
240
+ "This app uses a **CodeAgent** that *calls the scoring tool* "
241
+ "(`classify_and_score`) after each proposal, and revises until it hits the target."
242
+ )
243
+
244
+ with gr.Accordion("API Settings", open=False):
245
+ hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password")
246
+ model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
247
+ provider = gr.Textbox(value="novita", label="Provider")
248
+ timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
249
+
250
+ with gr.Row():
251
+ topic = gr.Textbox(value="Fractions", label="Topic")
252
+ grade = gr.Dropdown(
253
+ choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9",
254
+ "Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"],
255
+ value="Grade 7",
256
+ label="Grade"
257
+ )
258
+ subject= gr.Textbox(value="Math", label="Subject")
259
+
260
+ with gr.Row():
261
+ target_bloom = gr.Dropdown(
262
+ choices=["Remember","Understand","Apply","Analyze","Evaluate","Create"],
263
+ value="Analyze",
264
+ label="Target Bloom’s"
265
+ )
266
+ target_dok = gr.Dropdown(
267
+ choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"],
268
+ value="DOK2-DOK3",
269
+ label="Target Depth of Knowledge"
270
+ )
271
+ attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
272
+
273
+ with gr.Accordion("⚙️ Generation Controls", open=False):
274
+ temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
275
+ max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
276
+
277
+ run_btn = gr.Button("Run Agent 🚀")
278
+
279
+ final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
280
+ transcript = gr.Textbox(label="Agent Transcript", lines=18)
281
+
282
+ run_btn.click(
283
+ fn=run_pipeline,
284
+ inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens],
285
+ outputs=[final_json, transcript]
286
+ )
287
+
288
+ if __name__ == "__main__":
289
+ demo.launch()
290
+