|
|
|
|
|
import os, sys, importlib |
|
|
|
|
|
|
|
|
os.environ["SPACES_ZERO_DISABLED"] = "1" |
|
|
|
|
|
def _hard_disable_spaces_zero(): |
|
|
|
|
|
candidates = [ |
|
|
"spaces.zero", "spaces.zero.torch.patching", "spaces.zero.torch", |
|
|
"spaces.zero.patch", "spaces.zero.patching" |
|
|
] |
|
|
for modname in candidates: |
|
|
try: |
|
|
m = sys.modules.get(modname) or importlib.import_module(modname) |
|
|
except Exception: |
|
|
continue |
|
|
for attr in ("disable", "unpatch", "deactivate"): |
|
|
fn = getattr(m, attr, None) |
|
|
if callable(fn): |
|
|
try: |
|
|
fn() |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
_hard_disable_spaces_zero() |
|
|
|
|
|
|
|
|
os.environ["TRANSFORMERS_ATTENTION_IMPLEMENTATION"] = "eager" |
|
|
|
|
|
|
|
|
try: |
|
|
import torch |
|
|
torch.backends.cuda.sdp_kernel(enable_math=True, enable_flash=False, enable_mem_efficient=False) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
import sys |
|
|
from huggingface_hub import hf_hub_download |
|
|
import pickle |
|
|
from huggingface_hub import login |
|
|
login(os.getenv("HF_Token")) |
|
|
import json |
|
|
import gradio as gr |
|
|
from huggingface_hub import InferenceClient |
|
|
from smolagents import CodeAgent, InferenceClientModel, tool |
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
|
|
|
from llama_index.core import StorageContext, load_index_from_storage |
|
|
from huggingface_hub import login, snapshot_download |
|
|
from smolagents import tool |
|
|
|
|
|
from level_classifier_tool_2 import ( |
|
|
classify_levels_phrases, |
|
|
HFEmbeddingBackend, |
|
|
build_phrase_index |
|
|
) |
|
|
from task_temp import rag_temp, rag_cls_temp, cls_temp, gen_temp |
|
|
from all_tools import classify_and_score, QuestionRetrieverTool, set_classifier_state, set_retrieval_index |
|
|
|
|
|
from phrases import BLOOMS_PHRASES, DOK_PHRASES |
|
|
from pathlib import Path |
|
|
|
|
|
_backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m") |
|
|
|
|
|
try: |
|
|
_backend.MODEL.config.attn_implementation = "eager" |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES) |
|
|
_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES) |
|
|
|
|
|
DATASET_REPO = "bhardwaj08sarthak/my-stem-index" |
|
|
PERSIST_SUBDIR = "index_store" |
|
|
|
|
|
|
|
|
def _pick_writable_base() -> Path: |
|
|
for base in (Path.home(), Path("/tmp")): |
|
|
try: |
|
|
base.mkdir(parents=True, exist_ok=True) |
|
|
test = base / ".write_test" |
|
|
test.write_text("ok") |
|
|
test.unlink(missing_ok=True) |
|
|
return base |
|
|
except Exception: |
|
|
continue |
|
|
return Path.cwd() |
|
|
|
|
|
WRITABLE_BASE = _pick_writable_base() |
|
|
LOCAL_BASE = WRITABLE_BASE / "my_app_cache" / "index" |
|
|
LOCAL_BASE.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
snapshot_download( |
|
|
repo_id=DATASET_REPO, |
|
|
repo_type="dataset", |
|
|
local_dir=str(LOCAL_BASE), |
|
|
local_dir_use_symlinks=False, |
|
|
) |
|
|
|
|
|
|
|
|
def _resolve_persist_dir(base: Path, subdir: str) -> Path: |
|
|
|
|
|
candidates = [ |
|
|
base / subdir, |
|
|
base, |
|
|
] |
|
|
for c in candidates: |
|
|
if (c / "docstore.json").exists(): |
|
|
return c |
|
|
|
|
|
matches = list(base.rglob("docstore.json")) |
|
|
if matches: |
|
|
return matches[0].parent |
|
|
|
|
|
tree = "\n".join(str(p.relative_to(base)) for p in base.rglob("*") if p.is_file()) |
|
|
raise FileNotFoundError( |
|
|
f"Could not find 'docstore.json' under {base}. " |
|
|
f"Expected '{subdir}/docstore.json'. Downloaded files:\n{tree}" |
|
|
) |
|
|
|
|
|
persist_dir = _resolve_persist_dir(Path(LOCAL_BASE), PERSIST_SUBDIR) |
|
|
|
|
|
|
|
|
expected = ["docstore.json", "index_store.json", "vector_store.json"] |
|
|
missing = [name for name in expected if not (persist_dir / name).exists()] |
|
|
if missing: |
|
|
|
|
|
print(f"[warn] Missing in {persist_dir}: {missing}. If loading fails, re-upload the full '{PERSIST_SUBDIR}' folder.") |
|
|
|
|
|
|
|
|
try: |
|
|
import torch |
|
|
_emb_device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
except Exception: |
|
|
_emb_device = "cpu" |
|
|
|
|
|
emb = HuggingFaceEmbeddings( |
|
|
model_name="google/embeddinggemma-300m", |
|
|
model_kwargs={"device": _emb_device}, |
|
|
encode_kwargs={"normalize_embeddings": True}, |
|
|
) |
|
|
|
|
|
|
|
|
storage_context = StorageContext.from_defaults(persist_dir=str(persist_dir)) |
|
|
index = load_index_from_storage(storage_context, embed_model=emb) |
|
|
|
|
|
set_classifier_state(_backend, _BLOOM_INDEX, _DOK_INDEX) |
|
|
set_retrieval_index(index) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TASK_TEMPLATES = { |
|
|
"rag_temp": rag_temp, |
|
|
"rag_cls_temp": rag_cls_temp, |
|
|
"cls_temp": cls_temp, |
|
|
"gen_temp": gen_temp, |
|
|
} |
|
|
|
|
|
def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int): |
|
|
client = InferenceClient( |
|
|
model=model_id, |
|
|
provider=provider, |
|
|
timeout=timeout, |
|
|
token=hf_token if hf_token else None, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
model = InferenceClientModel(model_id=model_id, client=client) |
|
|
agent = CodeAgent(model=model, tools=[classify_and_score, QuestionRetrieverTool]) |
|
|
agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} |
|
|
return agent |
|
|
|
|
|
|
|
|
def run_pipeline( |
|
|
hf_token, |
|
|
topic, |
|
|
grade, |
|
|
subject, |
|
|
target_bloom, |
|
|
target_dok, |
|
|
attempts, |
|
|
model_id, |
|
|
provider, |
|
|
timeout, |
|
|
temperature, |
|
|
max_tokens, |
|
|
task_type |
|
|
): |
|
|
|
|
|
agent = make_agent( |
|
|
hf_token=hf_token.strip(), |
|
|
model_id=model_id, |
|
|
provider=provider, |
|
|
timeout=int(timeout), |
|
|
temperature=float(temperature), |
|
|
max_tokens=int(max_tokens), |
|
|
) |
|
|
template = TASK_TEMPLATES[task_type] |
|
|
task = template.format( |
|
|
grade=grade, |
|
|
topic=topic, |
|
|
subject=subject, |
|
|
target_bloom=target_bloom, |
|
|
target_dok=target_dok, |
|
|
attempts=int(attempts) |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
result_text = agent.run(task, max_steps=int(attempts) * 4) |
|
|
except Exception as e: |
|
|
result_text = f"ERROR: {e}" |
|
|
|
|
|
|
|
|
final_json = "" |
|
|
try: |
|
|
|
|
|
start = result_text.find("{") |
|
|
end = result_text.rfind("}") |
|
|
if start != -1 and end != -1 and end > start: |
|
|
candidate = result_text[start:end+1] |
|
|
final_json = json.dumps(json.loads(candidate), indent=2) |
|
|
except Exception: |
|
|
final_json = "" |
|
|
|
|
|
return final_json, result_text |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty") |
|
|
gr.Markdown( |
|
|
"This app uses a **CodeAgent** that *calls the scoring tool* " |
|
|
"(`classify_and_score`) after each proposal, and revises until it hits the target." |
|
|
) |
|
|
|
|
|
with gr.Accordion("API Settings", open=False): |
|
|
hf_token = gr.Textbox(label="Hugging Face Token (required)", type="password") |
|
|
model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID") |
|
|
provider = gr.Textbox(value="novita", label="Provider") |
|
|
timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)") |
|
|
|
|
|
with gr.Row(): |
|
|
topic = gr.Textbox(value="Fractions", label="Topic") |
|
|
grade = gr.Dropdown( |
|
|
choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9", |
|
|
"Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"], |
|
|
value="Grade 7", |
|
|
label="Grade" |
|
|
) |
|
|
subject = gr.Textbox(value="Math", label="Subject") |
|
|
task_type = gr.Dropdown( |
|
|
choices=[("RAG Template", "rag_temp"), |
|
|
("RAG+CLS Template", "rag_cls_temp"), |
|
|
("Classification Template", "cls_temp"), |
|
|
("Generation Template", "gen_temp")], |
|
|
label="task type" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
target_bloom = gr.Dropdown( |
|
|
choices=["Remember","Understand","Apply","Analyze","Evaluate","Create","Apply+","Analyze+","Evaluate+"], |
|
|
value="Analyze", |
|
|
label="Target Bloom’s" |
|
|
) |
|
|
target_dok = gr.Dropdown( |
|
|
choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"], |
|
|
value="DOK2-DOK3", |
|
|
label="Target DOK" |
|
|
) |
|
|
attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts") |
|
|
|
|
|
with gr.Accordion("Generation Controls", open=False): |
|
|
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature") |
|
|
max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens") |
|
|
|
|
|
run_btn = gr.Button("Run Agent") |
|
|
|
|
|
final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json") |
|
|
transcript = gr.Textbox(label="Agent Transcript", lines=18) |
|
|
|
|
|
run_btn.click( |
|
|
fn=run_pipeline, |
|
|
inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens, task_type], |
|
|
outputs=[final_json, transcript] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(share=True) |
|
|
|