bkhmsi commited on
Commit
5f411d7
·
1 Parent(s): a269509

added examples

Browse files
Files changed (2) hide show
  1. app.py +42 -6
  2. router_backend.py +1 -1
app.py CHANGED
@@ -12,6 +12,7 @@ This Space lets a user:
12
  By default, a deterministic "mock mode" produces stable pseudo-random percentages from the prompt.
13
  """
14
 
 
15
  import hashlib
16
  from typing import Dict, List, Tuple, Union
17
  import gradio as gr
@@ -33,6 +34,8 @@ from router_backend import get_expert_routing
33
  # BACKEND_AVAILABLE = False
34
  # _backend_import_error = e
35
 
 
 
36
  EXPERTS = ["Language", "Logic", "Social", "World"]
37
 
38
  DEFAULT_MODELS = [
@@ -83,7 +86,6 @@ def _compose_prompt(user_prompt: str, assistant_prompt: str) -> str:
83
 
84
  def route_and_plot(
85
  model_choice: str,
86
- hf_token: str,
87
  user_prompt: str,
88
  assistant_prompt: str,
89
  ablate_language: bool,
@@ -97,8 +99,8 @@ def route_and_plot(
97
  - Call backend (real or mock)
98
  - Return a table + bar plot + status message
99
  """
100
- if hf_token.strip() == "":
101
- hf_token = None # allow empty token
102
 
103
  ablations = []
104
  if ablate_language:
@@ -131,7 +133,7 @@ def route_and_plot(
131
  generation = None
132
  else:
133
  try:
134
- raw, generation = get_expert_routing(model_id, hf_token, prompt, ablations) # <-- your real function
135
  vals = _normalize_output(raw)
136
  msg = "Routed with real backend."
137
  except Exception as e:
@@ -167,7 +169,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
167
 
168
  with gr.Row():
169
  model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
170
- hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
171
 
172
  with gr.Column():
173
  with gr.Row():
@@ -204,9 +206,43 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
204
 
205
  run.click(
206
  route_and_plot,
207
- inputs=[model_choice, hf_token, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
208
  outputs=[generation_output, table, plot, status],
209
  )
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  if __name__ == "__main__":
212
  demo.launch()
 
12
  By default, a deterministic "mock mode" produces stable pseudo-random percentages from the prompt.
13
  """
14
 
15
+ import os
16
  import hashlib
17
  from typing import Dict, List, Tuple, Union
18
  import gradio as gr
 
34
  # BACKEND_AVAILABLE = False
35
  # _backend_import_error = e
36
 
37
+ HF_TOKEN: str = os.getenv("HF_TOKEN")
38
+
39
  EXPERTS = ["Language", "Logic", "Social", "World"]
40
 
41
  DEFAULT_MODELS = [
 
86
 
87
  def route_and_plot(
88
  model_choice: str,
 
89
  user_prompt: str,
90
  assistant_prompt: str,
91
  ablate_language: bool,
 
99
  - Call backend (real or mock)
100
  - Return a table + bar plot + status message
101
  """
102
+ if HF_TOKEN.strip() == "":
103
+ HF_TOKEN = None # allow empty token
104
 
105
  ablations = []
106
  if ablate_language:
 
133
  generation = None
134
  else:
135
  try:
136
+ raw, generation = get_expert_routing(model_id, HF_TOKEN, prompt, ablations) # <-- your real function
137
  vals = _normalize_output(raw)
138
  msg = "Routed with real backend."
139
  except Exception as e:
 
169
 
170
  with gr.Row():
171
  model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
172
+ # hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
173
 
174
  with gr.Column():
175
  with gr.Row():
 
206
 
207
  run.click(
208
  route_and_plot,
209
+ inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
210
  outputs=[generation_output, table, plot, status],
211
  )
212
 
213
+ # example prompts
214
+ examples = [
215
+ [
216
+ "micro-llama-1b", # dropdown model
217
+ "Correct the grammar: \"She go to the park every morning.\"", # user prompt
218
+ "She goes to the park every morning.", # assistant prompt (empty)
219
+ False, False, False, False # no ablations
220
+ ],
221
+ [
222
+ "micro-llama-1b", # dropdown model
223
+ "What is 27 multiplied by 14?", # user prompt
224
+ "First, break it down: 27 * 10 = 270. Then 27 * 4 = 108. Add them together: 270 + 108 = 378. So the answer is 378.", # assistant prompt (empty)
225
+ False, False, False, False # no ablations
226
+ ],
227
+ [
228
+ "micro-llama-1b", # dropdown model
229
+ "Why did Sarah look away when John asked if she was okay?", # user prompt
230
+ "Because she didn't want him to see that she was upset.", # assistant prompt (empty)
231
+ False, False, False, False # no ablations
232
+ ],
233
+ [
234
+ "micro-llama-1b", # dropdown model
235
+ "Why do people usually eat breakfast in the morning?", # user prompt
236
+ "Because after sleeping, the body needs energy to start the day.", # assistant prompt (empty)
237
+ False, False, False, False # no ablations
238
+ ],
239
+ ]
240
+
241
+ gr.Examples(
242
+ examples=examples,
243
+ inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
244
+ label="Try these examples:"
245
+ )
246
+
247
  if __name__ == "__main__":
248
  demo.launch()
router_backend.py CHANGED
@@ -110,7 +110,7 @@ def generate_continuation(model,
110
  attention_mask=attention_mask,
111
  max_new_tokens=max_tokens,
112
  use_cache=use_cache,
113
- stop_strings=["</s>","<|eot_id|>", "<|im_start|>user"],
114
  tokenizer=tokenizer,
115
  pad_token_id=tokenizer.pad_token_id,
116
  temperature=0,
 
110
  attention_mask=attention_mask,
111
  max_new_tokens=max_tokens,
112
  use_cache=use_cache,
113
+ stop_strings=["</s>","<|eot_id|>", "<|im_start|>user", "user"],
114
  tokenizer=tokenizer,
115
  pad_token_id=tokenizer.pad_token_id,
116
  temperature=0,