Spaces:

bkhmsi
/

cognitive-reasoners

Sleeping

App Files Files Community

bkhmsi commited on 14 days ago

Commit

5f411d7

1 Parent(s): a269509

added examples

Browse files

Files changed (2) hide show

app.py +42 -6
router_backend.py +1 -1

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ This Space lets a user:
 By default, a deterministic "mock mode" produces stable pseudo-random percentages from the prompt.
 """
 import hashlib
 from typing import Dict, List, Tuple, Union
 import gradio as gr
@@ -33,6 +34,8 @@ from router_backend import get_expert_routing
 #     BACKEND_AVAILABLE = False
 #     _backend_import_error = e
 EXPERTS = ["Language", "Logic", "Social", "World"]
 DEFAULT_MODELS = [
@@ -83,7 +86,6 @@ def _compose_prompt(user_prompt: str, assistant_prompt: str) -> str:
 def route_and_plot(
     model_choice: str,
-    hf_token: str,
     user_prompt: str,
     assistant_prompt: str,
     ablate_language: bool,
@@ -97,8 +99,8 @@ def route_and_plot(
     - Call backend (real or mock)
     - Return a table + bar plot + status message
     """
-    if hf_token.strip() == "":
-        hf_token = None  # allow empty token
     ablations = []
     if ablate_language:
@@ -131,7 +133,7 @@ def route_and_plot(
             generation = None
         else:
             try:
-                raw, generation = get_expert_routing(model_id, hf_token, prompt, ablations)  # <-- your real function
                 vals = _normalize_output(raw)
                 msg = "Routed with real backend."
             except Exception as e:
@@ -167,7 +169,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Row():
         model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
-        hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
     with gr.Column():
         with gr.Row():
@@ -204,9 +206,43 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     run.click(
         route_and_plot,
-        inputs=[model_choice, hf_token, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
         outputs=[generation_output, table, plot, status],
     )
 if __name__ == "__main__":
     demo.launch()

 By default, a deterministic "mock mode" produces stable pseudo-random percentages from the prompt.
 """
+import os
 import hashlib
 from typing import Dict, List, Tuple, Union
 import gradio as gr
 #     BACKEND_AVAILABLE = False
 #     _backend_import_error = e
+HF_TOKEN: str = os.getenv("HF_TOKEN")
 EXPERTS = ["Language", "Logic", "Social", "World"]
 DEFAULT_MODELS = [
 def route_and_plot(
     model_choice: str,
     user_prompt: str,
     assistant_prompt: str,
     ablate_language: bool,
     - Call backend (real or mock)
     - Return a table + bar plot + status message
     """
+    if HF_TOKEN.strip() == "":
+        HF_TOKEN = None  # allow empty token
     ablations = []
     if ablate_language:
             generation = None
         else:
             try:
+                raw, generation = get_expert_routing(model_id, HF_TOKEN, prompt, ablations)  # <-- your real function
                 vals = _normalize_output(raw)
                 msg = "Routed with real backend."
             except Exception as e:
     with gr.Row():
         model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
+        # hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
     with gr.Column():
         with gr.Row():
     run.click(
         route_and_plot,
+        inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
         outputs=[generation_output, table, plot, status],
     )
+    # example prompts
+    examples = [
+        [
+            "micro-llama-1b",     # dropdown model
+            "Correct the grammar: \"She go to the park every morning.\"",  # user prompt
+            "She goes to the park every morning.",  # assistant prompt (empty)
+            False, False, False, False  # no ablations
+        ],
+        [
+            "micro-llama-1b",     # dropdown model
+            "What is 27 multiplied by 14?",  # user prompt
+            "First, break it down: 27 * 10 = 270. Then 27 * 4 = 108. Add them together: 270 + 108 = 378. So the answer is 378.",  # assistant prompt (empty)
+            False, False, False, False  # no ablations
+        ],
+        [
+            "micro-llama-1b",     # dropdown model
+            "Why did Sarah look away when John asked if she was okay?",  # user prompt
+            "Because she didn't want him to see that she was upset.",  # assistant prompt (empty)
+            False, False, False, False  # no ablations
+        ],
+        [
+            "micro-llama-1b",     # dropdown model
+            "Why do people usually eat breakfast in the morning?",  # user prompt
+            "Because after sleeping, the body needs energy to start the day.",  # assistant prompt (empty)
+            False, False, False, False  # no ablations
+        ],
+    ]
+    gr.Examples(
+        examples=examples,
+        inputs=[model_choice, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
+        label="Try these examples:"
+    )
 if __name__ == "__main__":
     demo.launch()

router_backend.py CHANGED Viewed

@@ -110,7 +110,7 @@ def generate_continuation(model,
         attention_mask=attention_mask,
         max_new_tokens=max_tokens,
         use_cache=use_cache,
-        stop_strings=["</s>","<|eot_id|>", "<|im_start|>user"],
         tokenizer=tokenizer,
         pad_token_id=tokenizer.pad_token_id,
         temperature=0,

         attention_mask=attention_mask,
         max_new_tokens=max_tokens,
         use_cache=use_cache,
+        stop_strings=["</s>","<|eot_id|>", "<|im_start|>user", "user"],
         tokenizer=tokenizer,
         pad_token_id=tokenizer.pad_token_id,
         temperature=0,