Spaces:

Eurico149
/

SmallAgent-POC

Sleeping

App Files Files Community

VIRTUS commited on 23 days ago

Commit

88ce0e7

1 Parent(s): 49079dd

fix: HF AUTH and GUI

Browse files

Files changed (1) hide show

app.py +28 -9

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from tools import get_weather, CurrencyConverterTool
 model_path = "Qwen/Qwen3-4B-Instruct-2507"
-AGENT_SYSTEM_PROMPT = "You are a friendly and helpful Chatbot"
 cuda = torch.cuda.is_available()
 if cuda:
@@ -15,12 +14,25 @@ if cuda:
 else:
     print("\nRunning on Hugging Face Ecosystem\n")
-def interact_with_agent(prompt, history, hf_token: gr.OAuthToken,):
     if cuda:
         quantization = BitsAndBytesConfig(load_in_8bit=True)
         model = TransformersModel(
             model_id=model_path,
             max_new_tokens=1024,
             model_kwargs={
                 "quantization_config": quantization
             })
@@ -42,19 +54,26 @@ def interact_with_agent(prompt, history, hf_token: gr.OAuthToken,):
     )
     messages = []
     yield messages
-    for msg in stream_to_gradio(agent, prompt):
         messages.append(asdict(msg))
         yield messages
     yield messages
 chatbot = gr.ChatInterface(
     interact_with_agent,
-    chatbot=gr.Chatbot(
-        label="CodeAgent",
-        type="messages",
-    ),
-    textbox=gr.Textbox(placeholder="Ask something..."),
-    title="smolagents-poc - Qwen3-4B"
 )
 with gr.Blocks() as demo:

 model_path = "Qwen/Qwen3-4B-Instruct-2507"
 cuda = torch.cuda.is_available()
 if cuda:
 else:
     print("\nRunning on Hugging Face Ecosystem\n")
+def interact_with_agent(
+    message,
+    history: list[dict[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    hf_token: gr.OAuthToken,
+):
     if cuda:
         quantization = BitsAndBytesConfig(load_in_8bit=True)
         model = TransformersModel(
             model_id=model_path,
             max_new_tokens=1024,
+            temperature=temperature,
+            hf_token=hf_token.token,
+            top_p=top_p,
+            max_tokens=max_tokens,
+            system_message=system_message,
             model_kwargs={
                 "quantization_config": quantization
             })
     )
     messages = []
     yield messages
+    for msg in stream_to_gradio(agent, message):
         messages.append(asdict(msg))
         yield messages
     yield messages
 chatbot = gr.ChatInterface(
     interact_with_agent,
+    type="messages",
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
 )
 with gr.Blocks() as demo: