bhardwaj08sarthak's picture
Update app.py
d92f6a2 verified
raw
history blame
11 kB
import os, sys, importlib
# Disable Spaces ZeroGPU by env and by calling its disable/unpatch if preloaded
os.environ["SPACES_ZERO_DISABLED"] = "1"
def _hard_disable_spaces_zero():
# Hit common modules and try disable/unpatch/deactivate if present
candidates = [
"spaces.zero", "spaces.zero.torch.patching", "spaces.zero.torch",
"spaces.zero.patch", "spaces.zero.patching"
]
for modname in candidates:
try:
m = sys.modules.get(modname) or importlib.import_module(modname)
except Exception:
continue
for attr in ("disable", "unpatch", "deactivate"):
fn = getattr(m, attr, None)
if callable(fn):
try:
fn()
except Exception:
pass
_hard_disable_spaces_zero()
# Force Transformers to use eager attention globally (affects all future loads)
os.environ["TRANSFORMERS_ATTENTION_IMPLEMENTATION"] = "eager"
# Prefer simple math SDP kernels (avoid vmap-heavy paths)
try:
import torch
torch.backends.cuda.sdp_kernel(enable_math=True, enable_flash=False, enable_mem_efficient=False)
except Exception:
pass
# -------------------------------------------------------------------------------
import sys
from huggingface_hub import hf_hub_download
import pickle
from huggingface_hub import login
login(os.getenv("HF_Token"))
import json
import gradio as gr
from huggingface_hub import InferenceClient
from smolagents import CodeAgent, InferenceClientModel, tool
from langchain_community.embeddings import HuggingFaceEmbeddings
# from llama_index.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import StorageContext, load_index_from_storage
from huggingface_hub import login, snapshot_download
from smolagents import tool
# from all_datasets import *
from level_classifier_tool_2 import (
classify_levels_phrases,
HFEmbeddingBackend,
build_phrase_index
)
from task_temp import rag_temp, rag_cls_temp, cls_temp, gen_temp
from all_tools import classify_and_score, QuestionRetrieverTool, set_classifier_state, set_retrieval_index
from phrases import BLOOMS_PHRASES, DOK_PHRASES
from pathlib import Path
# ------------------------ Prebuild embeddings once ------------------------
_backend = HFEmbeddingBackend(model_name="google/embeddinggemma-300m")
# Belt-and-suspenders: ensure eager attention even if class wasn't patched
try:
_backend.MODEL.config.attn_implementation = "eager"
except Exception:
pass
_BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
_DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
DATASET_REPO = "bhardwaj08sarthak/my-stem-index" # your dataset repo id
PERSIST_SUBDIR = "index_store" # folder inside the dataset
# Writable cache base (home or /tmp)
def _pick_writable_base() -> Path:
for base in (Path.home(), Path("/tmp")):
try:
base.mkdir(parents=True, exist_ok=True)
test = base / ".write_test"
test.write_text("ok")
test.unlink(missing_ok=True)
return base
except Exception:
continue
return Path.cwd()
WRITABLE_BASE = _pick_writable_base()
LOCAL_BASE = WRITABLE_BASE / "my_app_cache" / "index"
LOCAL_BASE.mkdir(parents=True, exist_ok=True)
# Download only the persisted index folder
snapshot_download(
repo_id=DATASET_REPO,
repo_type="dataset",
local_dir=str(LOCAL_BASE),
local_dir_use_symlinks=False,
)
# Resolve the actual persist dir by finding docstore.json
def _resolve_persist_dir(base: Path, subdir: str) -> Path:
# Common candidates
candidates = [
base / subdir, # <LOCAL_BASE>/index_store
base, # sometimes files land directly under local base
]
for c in candidates:
if (c / "docstore.json").exists():
return c
# Search anywhere under base for docstore.json
matches = list(base.rglob("docstore.json"))
if matches:
return matches[0].parent
# Nothing found: print what we actually downloaded
tree = "\n".join(str(p.relative_to(base)) for p in base.rglob("*") if p.is_file())
raise FileNotFoundError(
f"Could not find 'docstore.json' under {base}. "
f"Expected '{subdir}/docstore.json'. Downloaded files:\n{tree}"
)
persist_dir = _resolve_persist_dir(Path(LOCAL_BASE), PERSIST_SUBDIR)
# Sanity-check typical LlamaIndex files (names may vary by version/vector store)
expected = ["docstore.json", "index_store.json", "vector_store.json"]
missing = [name for name in expected if not (persist_dir / name).exists()]
if missing:
# Not fatal for every setup, but warn loudly so you know if upload was incomplete
print(f"[warn] Missing in {persist_dir}: {missing}. If loading fails, re-upload the full '{PERSIST_SUBDIR}' folder.")
# Pick a device that exists for embeddings
try:
import torch
_emb_device = "cuda" if torch.cuda.is_available() else "cpu"
except Exception:
_emb_device = "cpu"
emb = HuggingFaceEmbeddings(
model_name="google/embeddinggemma-300m",
model_kwargs={"device": _emb_device}, #"attn_implementation": "eager"},
encode_kwargs={"normalize_embeddings": True},
)
# Finally load the index
storage_context = StorageContext.from_defaults(persist_dir=str(persist_dir))
index = load_index_from_storage(storage_context, embed_model=emb)
set_classifier_state(_backend, _BLOOM_INDEX, _DOK_INDEX)
set_retrieval_index(index)
# Datasets & GPU build code remains commented out...
# @spaces.GPU(15)
# def build_indexes_on_gpu(model="google/embeddinggemma-300m"):
# device = 'cuda'
# emb = HuggingFaceEmbeddings(
# model_name="model",
# model_kwargs={"device": device, "attn_implementation": "eager"},
# encode_kwargs={"normalize_embeddings": True})
# idx = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
# return idx
TASK_TEMPLATES = {
"rag_temp": rag_temp,
"rag_cls_temp": rag_cls_temp,
"cls_temp": cls_temp,
"gen_temp": gen_temp,
}
# ------------------------ Agent setup with timeout ------------------------
def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
client = InferenceClient(
model=model_id,
provider=provider,
timeout=timeout,
token=hf_token if hf_token else None,
)
# Bind generation params by partially applying via model kwargs.
# smolagents InferenceClientModel currently accepts client only; we pass runtime params in task text.
model = InferenceClientModel(model_id=model_id, client=client)
agent = CodeAgent(model=model, tools=[classify_and_score, QuestionRetrieverTool])
agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
return agent
# ------------------------ Gradio glue ------------------------------------
def run_pipeline(
hf_token,
topic,
grade,
subject,
target_bloom,
target_dok,
attempts,
model_id,
provider,
timeout,
temperature,
max_tokens,
task_type
):
# Build agent per run (or cache if you prefer)
agent = make_agent(
hf_token=hf_token.strip(),
model_id=model_id,
provider=provider,
timeout=int(timeout),
temperature=float(temperature),
max_tokens=int(max_tokens),
)
template = TASK_TEMPLATES[task_type]
task = template.format(
grade=grade,
topic=topic,
subject=subject,
target_bloom=target_bloom,
target_dok=target_dok,
attempts=int(attempts)
)
# The agent will internally call the tool
try:
result_text = agent.run(task, max_steps=int(attempts) * 4)
except Exception as e:
result_text = f"ERROR: {e}"
# Try to extract final JSON
final_json = ""
try:
# find JSON object in result_text (simple heuristic)
start = result_text.find("{")
end = result_text.rfind("}")
if start != -1 and end != -1 and end > start:
candidate = result_text[start:end+1]
final_json = json.dumps(json.loads(candidate), indent=2)
except Exception:
final_json = ""
return final_json, result_text
with gr.Blocks() as demo:
gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
gr.Markdown(
"This app uses a **CodeAgent** that *calls the scoring tool* "
"(`classify_and_score`) after each proposal, and revises until it hits the target."
)
with gr.Accordion("API Settings", open=False):
hf_token = gr.Textbox(label="Hugging Face Token (required)", type="password")
model_id = gr.Textbox(value="meta-llama/Llama-4-Scout-17B-16E-Instruct", label="Model ID")
provider = gr.Textbox(value="novita", label="Provider")
timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
with gr.Row():
topic = gr.Textbox(value="Fractions", label="Topic")
grade = gr.Dropdown(
choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9",
"Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"],
value="Grade 7",
label="Grade"
)
subject = gr.Textbox(value="Math", label="Subject")
task_type = gr.Dropdown(
choices=[("RAG Template", "rag_temp"),
("RAG+CLS Template", "rag_cls_temp"),
("Classification Template", "cls_temp"),
("Generation Template", "gen_temp")],
label="task type"
)
with gr.Row():
target_bloom = gr.Dropdown(
choices=["Remember","Understand","Apply","Analyze","Evaluate","Create","Apply+","Analyze+","Evaluate+"],
value="Analyze",
label="Target Bloom’s"
)
target_dok = gr.Dropdown(
choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"],
value="DOK2-DOK3",
label="Target DOK"
)
attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
with gr.Accordion("Generation Controls", open=False):
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
run_btn = gr.Button("Run Agent")
final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
transcript = gr.Textbox(label="Agent Transcript", lines=18)
run_btn.click(
fn=run_pipeline,
inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens, task_type],
outputs=[final_json, transcript]
)
if __name__ == "__main__":
demo.launch(share=True)