Upload all_tools.py
Browse files- all_tools.py +144 -0
all_tools.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from llama_index.core import VectorStoreIndex, Document
|
| 2 |
+
from smolagents import tool
|
| 3 |
+
from all_datasets import *
|
| 4 |
+
from level_classifier_tool_2 import (
|
| 5 |
+
classify_levels_phrases,
|
| 6 |
+
HFEmbeddingBackend,
|
| 7 |
+
build_phrase_index
|
| 8 |
+
)
|
| 9 |
+
from phrases import BLOOMS_PHRASES, DOK_PHRASES
|
| 10 |
+
D = {
|
| 11 |
+
"GSM8k": GSM8k['question'],
|
| 12 |
+
"Olympiad": Olympiad_math['question'],
|
| 13 |
+
"Olympiad2": Olympiad_math2['question'],
|
| 14 |
+
"DeepMind Math": clean_math['question'],
|
| 15 |
+
"MMMLU": MMMLU['question'],
|
| 16 |
+
"MMMU": MMMU['question'],
|
| 17 |
+
"ScienceQA": ScienceQA['question'],
|
| 18 |
+
"PubmedQA": PubmedQA['question']
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
all_questions = (
|
| 22 |
+
list(D["GSM8k"]) +
|
| 23 |
+
list(D["Olympiad"]) +
|
| 24 |
+
list(D["MMMLU"]) +
|
| 25 |
+
list(D["MMMU"]) +
|
| 26 |
+
list(D["DeepMind Math"]) +
|
| 27 |
+
list(D["Olympiad2"]) +
|
| 28 |
+
list(D["ScienceQA"]) +
|
| 29 |
+
list(D["PubmedQA"])
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
emb = HuggingFaceEmbeddings(
|
| 33 |
+
model_name="google/embeddinggemma-300m",
|
| 34 |
+
encode_kwargs={"normalize_embeddings": True},
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
texts = all_questions
|
| 38 |
+
index = VectorStoreIndex.from_documents([Document(text=t) for t in texts], embed_model=emb)
|
| 39 |
+
#Retriever tool
|
| 40 |
+
@tool
|
| 41 |
+
def QuestionRetrieverTool(subject: str, topic: str, grade: str) -> dict:
|
| 42 |
+
""" Retrieve up to 5 closely-related example Q&A pairs from the source datasets.
|
| 43 |
+
description = (
|
| 44 |
+
"Retrieve up to 5 closely-related example Q&A pairs from the source datasets."
|
| 45 |
+
)
|
| 46 |
+
Args:
|
| 47 |
+
subject: The subject area (e.g., "Math", "Science"). type: "string",
|
| 48 |
+
topic: The specific topic within the subject (e.g., "Algebra", "Biology"). type: "string",
|
| 49 |
+
grade: The grade level (e.g., "5th", "8th"). type: "string"
|
| 50 |
+
Returns:
|
| 51 |
+
A dictionary with:
|
| 52 |
+
closest questions found for: dict with subject, topic, grade
|
| 53 |
+
questions: List of up to 5 dicts with question text.
|
| 54 |
+
}"""
|
| 55 |
+
query = f"{topic} question for {grade} of the {subject}"
|
| 56 |
+
results = index.as_retriever(similarity_top_k=5).retrieve(query)
|
| 57 |
+
question_texts = [r.node.text for r in results]
|
| 58 |
+
return {"closest questions found for": {
|
| 59 |
+
"subject": subject,
|
| 60 |
+
"topic": topic,
|
| 61 |
+
"grade": grade,},
|
| 62 |
+
"questions": [{"text": question} for question in question_texts]
|
| 63 |
+
}
|
| 64 |
+
# Scoring tool
|
| 65 |
+
@tool
|
| 66 |
+
def classify_and_score(
|
| 67 |
+
question: str,
|
| 68 |
+
target_bloom: str,
|
| 69 |
+
target_dok: str,
|
| 70 |
+
agg: str = "max"
|
| 71 |
+
) -> dict:
|
| 72 |
+
"""Classify a question against Bloom’s and DOK targets and return guidance.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
question: The question text to evaluate for cognitive demand.
|
| 76 |
+
target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
|
| 77 |
+
or plus form (e.g., "Apply+") meaning that level or higher.
|
| 78 |
+
target_dok: Target DOK level or range. Accepts exact (e.g., "DOK3")
|
| 79 |
+
or span (e.g., "DOK2-DOK3").
|
| 80 |
+
agg: Aggregation method over phrase similarities within a level
|
| 81 |
+
(choices: "mean", "max", "topk_mean").
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
A dictionary with:
|
| 85 |
+
ok: True if both Bloom’s and DOK match the targets.
|
| 86 |
+
measured: Dict with best levels and per-level scores for Bloom’s and DOK.
|
| 87 |
+
feedback: Brief guidance describing how to adjust the question to hit targets.
|
| 88 |
+
"""
|
| 89 |
+
res = classify_levels_phrases(
|
| 90 |
+
question,
|
| 91 |
+
BLOOMS_PHRASES,
|
| 92 |
+
DOK_PHRASES,
|
| 93 |
+
backend=_backend,
|
| 94 |
+
prebuilt_bloom_index=_BLOOM_INDEX,
|
| 95 |
+
prebuilt_dok_index=_DOK_INDEX,
|
| 96 |
+
agg=agg,
|
| 97 |
+
return_phrase_matches=True
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
def _parse_target_bloom(t: str):
|
| 101 |
+
order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"]
|
| 102 |
+
if t.endswith("+"):
|
| 103 |
+
base = t[:-1]
|
| 104 |
+
return set(order[order.index(base):])
|
| 105 |
+
return {t}
|
| 106 |
+
|
| 107 |
+
def _parse_target_dok(t: str):
|
| 108 |
+
order = ["DOK1","DOK2","DOK3","DOK4"]
|
| 109 |
+
if "-" in t:
|
| 110 |
+
lo, hi = t.split("-")
|
| 111 |
+
return set(order[order.index(lo):order.index(hi)+1])
|
| 112 |
+
return {t}
|
| 113 |
+
|
| 114 |
+
bloom_target_set = _parse_target_bloom(target_bloom)
|
| 115 |
+
dok_target_set = _parse_target_dok(target_dok)
|
| 116 |
+
|
| 117 |
+
bloom_best = res["blooms"]["best_level"]
|
| 118 |
+
dok_best = res["dok"]["best_level"]
|
| 119 |
+
|
| 120 |
+
bloom_ok = bloom_best in bloom_target_set
|
| 121 |
+
dok_ok = dok_best in dok_target_set
|
| 122 |
+
|
| 123 |
+
feedback_parts = []
|
| 124 |
+
if not bloom_ok:
|
| 125 |
+
feedback_parts.append(
|
| 126 |
+
f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. "
|
| 127 |
+
f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}"
|
| 128 |
+
)
|
| 129 |
+
if not dok_ok:
|
| 130 |
+
feedback_parts.append(
|
| 131 |
+
f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. "
|
| 132 |
+
f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}"
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
return {
|
| 136 |
+
"ok": bool(bloom_ok and dok_ok),
|
| 137 |
+
"measured": {
|
| 138 |
+
"bloom_best": bloom_best,
|
| 139 |
+
"bloom_scores": res["blooms"]["scores"],
|
| 140 |
+
"dok_best": dok_best,
|
| 141 |
+
"dok_scores": res["dok"]["scores"],
|
| 142 |
+
},
|
| 143 |
+
"feedback": " ".join(feedback_parts) if feedback_parts else "On target.",
|
| 144 |
+
}
|