Spaces:

bhardwaj08sarthak
/

STEM-Question-Generator

Sleeping

App Files Files Community

bhardwaj08sarthak commited on Sep 15

Commit

bfc2469

verified ·

1 Parent(s): f46d15d

Create app.py

Browse files

Files changed (1) hide show

app.py +290 -0

app.py ADDED Viewed

	@@ -0,0 +1,290 @@

+import os
+import json
+import gradio as gr
+from huggingface_hub import InferenceClient
+from smolagents import CodeAgent, InferenceClientModel, tool
+from level_classifier_tool import (
+    classify_levels_phrases,
+    HFEmbeddingBackend,
+    build_phrase_index
+)
+BLOOMS_PHRASES = {
+    "Remember": [
+        "define", "list", "recall", "identify", "state", "label", "name", "recognize", "find", "select", "match", "choose", "give", "write", "tell", "show"
+    ],
+    "Understand": [
+        "classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss", "predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report"
+    ],
+    "Apply": [
+        "apply", "solve", "use", "demonstrate", "calculate", "implement", "perform", "execute", "carry out", "practice", "employ", "sketch"
+    ],
+    "Analyze": [
+        "analyze", "differentiate", "organize", "structure", "break down", "distinguish", "dissect", "examine", "compare", "contrast", "attribute", "investigate"
+    ],
+    "Evaluate": [
+        "evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support", "appraise", "recommend", "conclude", "review"
+    ],
+    "Create": [
+        "create", "design", "compose", "plan", "construct", "produce", "devise", "generate", "develop", "formulate", "invent", "build"
+    ]
+}
+DOK_PHRASES = {
+    "DOK1": [
+        "define", "list", "recall", "compute", "identify", "state", "label", "how many",
+        "name", "recognize", "find", "determine", "select", "match", "choose", "give",
+        "write", "tell", "show", "point out"
+    ],
+    "DOK2": [
+        "classify", "interpret", "estimate", "organise", "summarise", "explain", "solve",
+        "categorize", "group", "compare", "contrast", "distinguish", "make observations",
+        "collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict",
+        "approximate", "demonstrate", "illustrate", "describe", "analyze data"
+    ],
+    "DOK3": [
+        "justify", "analyze", "generalise", "compare", "construct", "investigate",
+        "support", "defend", "argue", "examine", "differentiate", "criticize", "debate",
+        "test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect",
+        "probe", "explore", "develop", "formulate"
+    ],
+    "DOK4": [
+        "design", "synthesize", "model", "prove", "evaluate system", "critique", "create",
+        "compose", "plan", "invent", "devise", "generate", "build", "construct", "produce",
+        "formulate", "improve", "revise", "assess", "appraise", "judge", "recommend",
+        "predict outcome", "simulate"
+    ]
+}
+# Prebuild embeddings once
+_backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
+_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
+_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
+@tool
+def classify_and_score(
+    question: str,
+    target_bloom: str,
+    target_dok: str,
+    agg: str = "max"
+) -> dict:
+    """Classify a question against Bloom’s and DOK targets and return guidance.
+    Args:
+        question: The question text to evaluate for cognitive demand.
+        target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
+            or plus form (e.g., "Apply+") meaning that level or higher.
+        target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
+            or span (e.g., "DOK2-DOK3").
+        agg: Aggregation method over phrase similarities within a level
+            (choices: "mean", "max", "topk_mean").
+    Returns:
+        A dictionary with:
+            ok: True if both Bloom’s and DOK match the targets.
+            measured: Dict with best levels and per-level scores for Bloom’s and DOK.
+            feedback: Brief guidance describing how to adjust the question to hit targets.
+    """
+    res = classify_levels_phrases(
+        question,
+        BLOOMS_PHRASES,
+        DOK_PHRASES,
+        backend=_backend,
+        prebuilt_bloom_index=_BLOOM_INDEX,
+        prebuilt_dok_index=_DOK_INDEX,
+        agg=agg,
+        return_phrase_matches=True
+    )
+    def _parse_target_bloom(t: str):
+        order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"]
+        if t.endswith("+"):
+            base = t[:-1]
+            return set(order[order.index(base):])
+        return {t}
+    def _parse_target_dok(t: str):
+        order = ["DOK1","DOK2","DOK3","DOK4"]
+        if "-" in t:
+            lo, hi = t.split("-")
+            return set(order[order.index(lo):order.index(hi)+1])
+        return {t}
+    bloom_target_set = _parse_target_bloom(target_bloom)
+    dok_target_set = _parse_target_dok(target_dok)
+    bloom_best = res["blooms"]["best_level"]
+    dok_best = res["dok"]["best_level"]
+    bloom_ok = bloom_best in bloom_target_set
+    dok_ok = dok_best in dok_target_set
+    feedback_parts = []
+    if not bloom_ok:
+        feedback_parts.append(
+            f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. "
+            f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}"
+        )
+    if not dok_ok:
+        feedback_parts.append(
+            f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. "
+            f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}"
+        )
+    return {
+        "ok": bool(bloom_ok and dok_ok),
+        "measured": {
+            "bloom_best": bloom_best,
+            "bloom_scores": res["blooms"]["scores"],
+            "dok_best": dok_best,
+            "dok_scores": res["dok"]["scores"],
+        },
+        "feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
+    }
+# ------------------------ Agent setup with timeout ------------------------
+def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
+    client = InferenceClient(
+        model=model_id,
+        provider=provider,
+        timeout=timeout,
+        token=hf_token if hf_token else None,
+    )
+    model = InferenceClientModel(client=client)
+    agent = CodeAgent(model=model, tools=[classify_and_score])
+    agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}  # attach for reference
+    return agent
+# ------------------------ Agent task template -----------------------------
+TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
+After you propose a candidate, you MUST immediately call:
+classify_and_score(
+    question=<just the question text>,
+    target_bloom="{target_bloom}",
+    target_dok="{target_dok}",
+    agg="max"
+)
+Use the returned dict:
+- If ok == True: print ONLY compact JSON {{"question": "...", "answer": "...", "reasoning": "..."}} and finish.
+- If ok == False: briefly explain the needed shift, revise the question, and call classify_and_score again.
+Repeat up to {attempts} attempts.
+Keep answers concise.
+Additionally, when you call classify_and_score, pass the exact question text you propose.
+If you output JSON, ensure it is valid JSON (no trailing commas, use double quotes).
+'''
+# ------------------------ Gradio glue ------------------------------------
+def run_pipeline(
+    hf_token,
+    topic,
+    grade,
+    subject,
+    target_bloom,
+    target_dok,
+    attempts,
+    model_id,
+    provider,
+    timeout,
+    temperature,
+    max_tokens
+):
+    # Build agent per run (or cache if you prefer)
+    agent = make_agent(
+        hf_token=hf_token.strip(),
+        model_id=model_id,
+        provider=provider,
+        timeout=int(timeout),
+        temperature=float(temperature),
+        max_tokens=int(max_tokens),
+    )
+    task = TASK_TMPL.format(
+        grade=grade,
+        topic=topic,
+        subject=subject,
+        target_bloom=target_bloom,
+        target_dok=target_dok,
+        attempts=int(attempts)
+    )
+    # The agent will internally call the tool
+    try:
+        result_text = agent.run(task, max_steps=int(attempts)*4)
+    except Exception as e:
+        result_text = f"ERROR: {e}"
+    # Try to extract final JSON
+    final_json = ""
+    try:
+        # find JSON object in result_text (simple heuristic)
+        start = result_text.find("{")
+        end = result_text.rfind("}")
+        if start != -1 and end != -1 and end > start:
+            candidate = result_text[start:end+1]
+            final_json = json.dumps(json.loads(candidate), indent=2)
+    except Exception:
+        final_json = ""
+    return final_json, result_text
+with gr.Blocks() as demo:
+    gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
+    gr.Markdown(
+        "This app uses a **CodeAgent** that *calls the scoring tool* "
+        "(`classify_and_score`) after each proposal, and revises until it hits the target."
+    )
+    with gr.Accordion("API Settings", open=False):
+        hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password")
+        model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
+        provider = gr.Textbox(value="novita", label="Provider")
+        timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
+    with gr.Row():
+        topic = gr.Textbox(value="Fractions", label="Topic")
+        grade = gr.Dropdown(
+            choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9",
+                     "Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"],
+            value="Grade 7",
+            label="Grade"
+        )
+        subject= gr.Textbox(value="Math", label="Subject")
+    with gr.Row():
+        target_bloom = gr.Dropdown(
+            choices=["Remember","Understand","Apply","Analyze","Evaluate","Create"],
+            value="Analyze",
+            label="Target Bloom’s"
+        )
+        target_dok = gr.Dropdown(
+            choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"],
+            value="DOK2-DOK3",
+            label="Target Depth of Knowledge"
+        )
+        attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
+    with gr.Accordion("⚙️ Generation Controls", open=False):
+        temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
+        max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
+    run_btn = gr.Button("Run Agent 🚀")
+    final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
+    transcript = gr.Textbox(label="Agent Transcript", lines=18)
+    run_btn.click(
+        fn=run_pipeline,
+        inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens],
+        outputs=[final_json, transcript]
+    )
+if __name__ == "__main__":
+    demo.launch()