Spaces:
Sleeping
Sleeping
added examples
Browse files- app.py +42 -6
- router_backend.py +1 -1
app.py
CHANGED
|
@@ -12,6 +12,7 @@ This Space lets a user:
|
|
| 12 |
By default, a deterministic "mock mode" produces stable pseudo-random percentages from the prompt.
|
| 13 |
"""
|
| 14 |
|
|
|
|
| 15 |
import hashlib
|
| 16 |
from typing import Dict, List, Tuple, Union
|
| 17 |
import gradio as gr
|
|
@@ -33,6 +34,8 @@ from router_backend import get_expert_routing
|
|
| 33 |
# BACKEND_AVAILABLE = False
|
| 34 |
# _backend_import_error = e
|
| 35 |
|
|
|
|
|
|
|
| 36 |
EXPERTS = ["Language", "Logic", "Social", "World"]
|
| 37 |
|
| 38 |
DEFAULT_MODELS = [
|
|
@@ -83,7 +86,6 @@ def _compose_prompt(user_prompt: str, assistant_prompt: str) -> str:
|
|
| 83 |
|
| 84 |
def route_and_plot(
|
| 85 |
model_choice: str,
|
| 86 |
-
hf_token: str,
|
| 87 |
user_prompt: str,
|
| 88 |
assistant_prompt: str,
|
| 89 |
ablate_language: bool,
|
|
@@ -97,8 +99,8 @@ def route_and_plot(
|
|
| 97 |
- Call backend (real or mock)
|
| 98 |
- Return a table + bar plot + status message
|
| 99 |
"""
|
| 100 |
-
if
|
| 101 |
-
|
| 102 |
|
| 103 |
ablations = []
|
| 104 |
if ablate_language:
|
|
@@ -131,7 +133,7 @@ def route_and_plot(
|
|
| 131 |
generation = None
|
| 132 |
else:
|
| 133 |
try:
|
| 134 |
-
raw, generation = get_expert_routing(model_id,
|
| 135 |
vals = _normalize_output(raw)
|
| 136 |
msg = "Routed with real backend."
|
| 137 |
except Exception as e:
|
|
@@ -167,7 +169,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
|
|
| 167 |
|
| 168 |
with gr.Row():
|
| 169 |
model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
|
| 170 |
-
hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
|
| 171 |
|
| 172 |
with gr.Column():
|
| 173 |
with gr.Row():
|
|
@@ -204,9 +206,43 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
|
|
| 204 |
|
| 205 |
run.click(
|
| 206 |
route_and_plot,
|
| 207 |
-
inputs=[model_choice,
|
| 208 |
outputs=[generation_output, table, plot, status],
|
| 209 |
)
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
if __name__ == "__main__":
|
| 212 |
demo.launch()
|
|
|
|
| 12 |
By default, a deterministic "mock mode" produces stable pseudo-random percentages from the prompt.
|
| 13 |
"""
|
| 14 |
|
| 15 |
+
import os
|
| 16 |
import hashlib
|
| 17 |
from typing import Dict, List, Tuple, Union
|
| 18 |
import gradio as gr
|
|
|
|
| 34 |
# BACKEND_AVAILABLE = False
|
| 35 |
# _backend_import_error = e
|
| 36 |
|
| 37 |
+
HF_TOKEN: str = os.getenv("HF_TOKEN")
|
| 38 |
+
|
| 39 |
EXPERTS = ["Language", "Logic", "Social", "World"]
|
| 40 |
|
| 41 |
DEFAULT_MODELS = [
|
|
|
|
| 86 |
|
| 87 |
def route_and_plot(
|
| 88 |
model_choice: str,
|
|
|
|
| 89 |
user_prompt: str,
|
| 90 |
assistant_prompt: str,
|
| 91 |
ablate_language: bool,
|
|
|
|
| 99 |
- Call backend (real or mock)
|
| 100 |
- Return a table + bar plot + status message
|
| 101 |
"""
|
| 102 |
+
if HF_TOKEN.strip() == "":
|
| 103 |
+
HF_TOKEN = None # allow empty token
|
| 104 |
|
| 105 |
ablations = []
|
| 106 |
if ablate_language:
|
|
|
|
| 133 |
generation = None
|
| 134 |
else:
|
| 135 |
try:
|
| 136 |
+
raw, generation = get_expert_routing(model_id, HF_TOKEN, prompt, ablations) # <-- your real function
|
| 137 |
vals = _normalize_output(raw)
|
| 138 |
msg = "Routed with real backend."
|
| 139 |
except Exception as e:
|
|
|
|
| 169 |
|
| 170 |
with gr.Row():
|
| 171 |
model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
|
| 172 |
+
# hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
|
| 173 |
|
| 174 |
with gr.Column():
|
| 175 |
with gr.Row():
|
|
|
|
| 206 |
|
| 207 |
run.click(
|
| 208 |
route_and_plot,
|
| 209 |
+
inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
|
| 210 |
outputs=[generation_output, table, plot, status],
|
| 211 |
)
|
| 212 |
|
| 213 |
+
# example prompts
|
| 214 |
+
examples = [
|
| 215 |
+
[
|
| 216 |
+
"micro-llama-1b", # dropdown model
|
| 217 |
+
"Correct the grammar: \"She go to the park every morning.\"", # user prompt
|
| 218 |
+
"She goes to the park every morning.", # assistant prompt (empty)
|
| 219 |
+
False, False, False, False # no ablations
|
| 220 |
+
],
|
| 221 |
+
[
|
| 222 |
+
"micro-llama-1b", # dropdown model
|
| 223 |
+
"What is 27 multiplied by 14?", # user prompt
|
| 224 |
+
"First, break it down: 27 * 10 = 270. Then 27 * 4 = 108. Add them together: 270 + 108 = 378. So the answer is 378.", # assistant prompt (empty)
|
| 225 |
+
False, False, False, False # no ablations
|
| 226 |
+
],
|
| 227 |
+
[
|
| 228 |
+
"micro-llama-1b", # dropdown model
|
| 229 |
+
"Why did Sarah look away when John asked if she was okay?", # user prompt
|
| 230 |
+
"Because she didn't want him to see that she was upset.", # assistant prompt (empty)
|
| 231 |
+
False, False, False, False # no ablations
|
| 232 |
+
],
|
| 233 |
+
[
|
| 234 |
+
"micro-llama-1b", # dropdown model
|
| 235 |
+
"Why do people usually eat breakfast in the morning?", # user prompt
|
| 236 |
+
"Because after sleeping, the body needs energy to start the day.", # assistant prompt (empty)
|
| 237 |
+
False, False, False, False # no ablations
|
| 238 |
+
],
|
| 239 |
+
]
|
| 240 |
+
|
| 241 |
+
gr.Examples(
|
| 242 |
+
examples=examples,
|
| 243 |
+
inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
|
| 244 |
+
label="Try these examples:"
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
if __name__ == "__main__":
|
| 248 |
demo.launch()
|
router_backend.py
CHANGED
|
@@ -110,7 +110,7 @@ def generate_continuation(model,
|
|
| 110 |
attention_mask=attention_mask,
|
| 111 |
max_new_tokens=max_tokens,
|
| 112 |
use_cache=use_cache,
|
| 113 |
-
stop_strings=["</s>","<|eot_id|>", "<|im_start|>user"],
|
| 114 |
tokenizer=tokenizer,
|
| 115 |
pad_token_id=tokenizer.pad_token_id,
|
| 116 |
temperature=0,
|
|
|
|
| 110 |
attention_mask=attention_mask,
|
| 111 |
max_new_tokens=max_tokens,
|
| 112 |
use_cache=use_cache,
|
| 113 |
+
stop_strings=["</s>","<|eot_id|>", "<|im_start|>user", "user"],
|
| 114 |
tokenizer=tokenizer,
|
| 115 |
pad_token_id=tokenizer.pad_token_id,
|
| 116 |
temperature=0,
|