Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -259,7 +259,8 @@ phi4_model = AutoModelForCausalLM.from_pretrained(
|
|
| 259 |
# ------------------------------------------------------------------------------
|
| 260 |
|
| 261 |
DESCRIPTION = """
|
| 262 |
-
# Agent Dino 🌠
|
|
|
|
| 263 |
|
| 264 |
css = '''
|
| 265 |
h1 {
|
|
@@ -468,7 +469,7 @@ def generate(
|
|
| 468 |
- "@web": triggers a web search or webpage visit.
|
| 469 |
- "@rAgent": initiates a reasoning chain using Llama mode.
|
| 470 |
- "@yolo": triggers object detection using YOLO.
|
| 471 |
-
- **"@phi4": triggers multimodal (image/audio) processing using the Phi-4 model.**
|
| 472 |
"""
|
| 473 |
text = input_dict["text"]
|
| 474 |
files = input_dict.get("files", [])
|
|
@@ -564,7 +565,7 @@ def generate(
|
|
| 564 |
yield gr.Image(result_img)
|
| 565 |
return
|
| 566 |
|
| 567 |
-
# --- Phi-4 Multimodal branch (Image/Audio) ---
|
| 568 |
if text.strip().lower().startswith("@phi4"):
|
| 569 |
question = text[len("@phi4"):].strip()
|
| 570 |
if not files:
|
|
@@ -603,18 +604,17 @@ def generate(
|
|
| 603 |
yield "Invalid file type for @phi4 multimodal processing."
|
| 604 |
return
|
| 605 |
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
yield response
|
| 618 |
return
|
| 619 |
|
| 620 |
# --- Text and TTS branch ---
|
|
|
|
| 259 |
# ------------------------------------------------------------------------------
|
| 260 |
|
| 261 |
DESCRIPTION = """
|
| 262 |
+
# Agent Dino 🌠
|
| 263 |
+
"""
|
| 264 |
|
| 265 |
css = '''
|
| 266 |
h1 {
|
|
|
|
| 469 |
- "@web": triggers a web search or webpage visit.
|
| 470 |
- "@rAgent": initiates a reasoning chain using Llama mode.
|
| 471 |
- "@yolo": triggers object detection using YOLO.
|
| 472 |
+
- **"@phi4": triggers multimodal (image/audio) processing using the Phi-4 model with streaming output.**
|
| 473 |
"""
|
| 474 |
text = input_dict["text"]
|
| 475 |
files = input_dict.get("files", [])
|
|
|
|
| 565 |
yield gr.Image(result_img)
|
| 566 |
return
|
| 567 |
|
| 568 |
+
# --- Phi-4 Multimodal branch (Image/Audio) with streaming ---
|
| 569 |
if text.strip().lower().startswith("@phi4"):
|
| 570 |
question = text[len("@phi4"):].strip()
|
| 571 |
if not files:
|
|
|
|
| 604 |
yield "Invalid file type for @phi4 multimodal processing."
|
| 605 |
return
|
| 606 |
|
| 607 |
+
# Set up a streamer for the phi4 model
|
| 608 |
+
streamer_phi4 = TextIteratorStreamer(phi4_processor, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
|
| 609 |
+
generation_kwargs_phi4 = {**inputs, "streamer": streamer_phi4, "max_new_tokens": 200}
|
| 610 |
+
thread_phi4 = Thread(target=phi4_model.generate, kwargs=generation_kwargs_phi4)
|
| 611 |
+
thread_phi4.start()
|
| 612 |
+
|
| 613 |
+
outputs_phi4 = []
|
| 614 |
+
yield "🤔 Thinking..."
|
| 615 |
+
for new_text in streamer_phi4:
|
| 616 |
+
outputs_phi4.append(new_text)
|
| 617 |
+
yield "".join(outputs_phi4)
|
|
|
|
| 618 |
return
|
| 619 |
|
| 620 |
# --- Text and TTS branch ---
|