Spaces:

ServiceNow-AI
/

Apriel-Chat

Running

App Files Files Community

bradnow commited on Sep 27

Commit

d055fb3

1 Parent(s): 9a0917a

Update to use a multimodal prompt input, and update api data processing to support images

Browse files

Files changed (2) hide show

app.py +209 -11
styles.css +9 -0

app.py CHANGED Viewed

@@ -1,12 +1,17 @@
 import random
 from uuid import uuid4
 from openai import OpenAI
 import gradio as gr
 from theme import apriel
 from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_config, \
-    logged_event_handler, DEBUG_MODE, DEBUG_MODEL, log_debug, log_info, log_error
 from log_chat import log_chat
 MODEL_TEMPERATURE = 0.8
@@ -119,14 +124,45 @@ def run_chat_inference(history, message, state):
         gr.Warning("Client UI is stale, please refresh the page")
         return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
     # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, session_state
     log_debug(f"{'-' * 80}")
     log_debug(f"chat_fn() --> Message: {message}")
     log_debug(f"chat_fn() --> History: {history}")
     try:
         # Check if the message is empty
-        if not message.strip():
             gr.Info("Please enter a message before sending")
             yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
             return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
@@ -140,7 +176,16 @@ def run_chat_inference(history, message, state):
         # Remove any assistant messages with metadata from history for multiple turns
         log_debug(f"Initial History: {history}")
         check_format(history, "messages")
-        history.append({"role": "user", "content": message})
         log_debug(f"History with user message: {history}")
         check_format(history, "messages")
@@ -155,9 +200,152 @@ def run_chat_inference(history, message, state):
             check_format(history_no_thoughts, "messages")
             log_debug(f"history_no_thoughts with user message: {history_no_thoughts}")
             stream = openai_client.chat.completions.create(
                 model=model_name,
-                messages=history_no_thoughts,
                 temperature=MODEL_TEMPERATURE,
                 stream=True
             )
@@ -322,20 +510,30 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
     )
     with gr.Row():
-        with gr.Column(scale=10, min_width=400):
             with gr.Row():
-                user_input = gr.Textbox(
                     show_label=False,
-                    placeholder="Type your message here and press Enter",
-                    container=False
                 )
         with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
             with gr.Row():
                 with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
-                    send_btn = gr.Button("Send", variant="primary")
-                    stop_btn = gr.Button("Stop", variant="cancel", visible=False)
                 with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
-                    clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary")
     with gr.Row():
         with gr.Column(min_width=400, elem_classes="opt-out-container"):
             with gr.Row():

 import random
+from collections.abc import Mapping
 from uuid import uuid4
 from openai import OpenAI
 import gradio as gr
+import base64
+import mimetypes
+import copy
+import os
 from theme import apriel
 from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_config, \
+    logged_event_handler, DEBUG_MODE, DEBUG_MODEL, log_debug, log_info, log_error, log_warning
 from log_chat import log_chat
 MODEL_TEMPERATURE = 0.8
         gr.Warning("Client UI is stale, please refresh the page")
         return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
+    # files will be the newly added files from the user
+    files = []
     # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, session_state
     log_debug(f"{'-' * 80}")
     log_debug(f"chat_fn() --> Message: {message}")
     log_debug(f"chat_fn() --> History: {history}")
+    # We have multimodal input in this case
+    if isinstance(message, Mapping):
+        files = message.get("files") or []
+        message = message.get("text") or ""
+        log_debug(f"chat_fn() --> Message (text only): {message}")
+        log_debug(f"chat_fn() --> Files: {files}")
+    # Validate that any uploaded files are images
+    if len(files) > 0:
+        invalid_files = []
+        for path in files:
+            try:
+                mime, _ = mimetypes.guess_type(path)
+                mime = mime or ""
+                if not mime.startswith("image/"):
+                    invalid_files.append((os.path.basename(path), mime or "unknown"))
+            except Exception as e:
+                log_error(f"Failed to inspect file '{path}': {e}")
+                invalid_files.append((os.path.basename(path), "unknown"))
+        if invalid_files:
+            msg = "Only image files are allowed. Invalid uploads: " + \
+                  ", ".join([f"{p} (type: {m})" for p, m in invalid_files])
+            log_warning(msg)
+            gr.Warning(msg)
+            yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
+            return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
     try:
         # Check if the message is empty
+        if not message.strip() and len(files) == 0:
             gr.Info("Please enter a message before sending")
             yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
             return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
         # Remove any assistant messages with metadata from history for multiple turns
         log_debug(f"Initial History: {history}")
         check_format(history, "messages")
+        # Build UI history: add text (if any) and per-file image placeholders {"path": ...}
+        # Build API parts separately later to avoid Gradio issues with arrays in content
+        if len(files) == 0:
+            history.append({"role": "user", "content": message})
+        else:
+            if message.strip():
+                history.append({"role": "user", "content": message})
+            for path in files:
+                history.append({"role": "user", "content": {"path": path}})
         log_debug(f"History with user message: {history}")
         check_format(history, "messages")
             check_format(history_no_thoughts, "messages")
             log_debug(f"history_no_thoughts with user message: {history_no_thoughts}")
+            # Build API-specific messages:
+            # - Convert any UI image placeholders {"path": ...} to image_url parts
+            # - Convert any user string content that is a valid file path to image_url parts
+            # - Coalesce consecutive image paths into a single image-only user message
+            api_messages = []
+            image_parts_buffer = []
+            def flush_image_buffer():
+                if len(image_parts_buffer) > 0:
+                    api_messages.append({"role": "user", "content": list(image_parts_buffer)})
+                    image_parts_buffer.clear()
+            def to_image_part(path: str):
+                try:
+                    mime, _ = mimetypes.guess_type(path)
+                    mime = mime or "application/octet-stream"
+                    with open(path, "rb") as f:
+                        b64 = base64.b64encode(f.read()).decode("utf-8")
+                    data_url = f"data:{mime};base64,{b64}"
+                    return {"type": "image_url", "image_url": {"url": data_url}}
+                except Exception as e:
+                    log_error(f"Failed to load file '{path}': {e}")
+                    return None
+            def normalize_msg(msg):
+                # Returns (role, content, as_dict) where as_dict is a message dict suitable to pass through when unmodified
+                if isinstance(msg, dict):
+                    return msg.get("role"), msg.get("content"), msg
+                # Gradio ChatMessage-like object
+                role = getattr(msg, "role", None)
+                content = getattr(msg, "content", None)
+                if role is not None:
+                    return role, content, {"role": role, "content": content}
+                return None, None, msg
+            for m in copy.deepcopy(history_no_thoughts):
+                role, content, as_dict = normalize_msg(m)
+                # Unknown structure: pass through
+                if role is None:
+                    flush_image_buffer()
+                    api_messages.append(as_dict)
+                    continue
+                # Assistant messages pass through as-is
+                if role == "assistant":
+                    flush_image_buffer()
+                    api_messages.append(as_dict)
+                    continue
+                # Only user messages have potential image paths to convert
+                if role == "user":
+                    # Case A: {'path': ...}
+                    if isinstance(content, dict) and isinstance(content.get("path"), str):
+                        p = content["path"]
+                        part = to_image_part(p) if os.path.isfile(p) else None
+                        if part:
+                            image_parts_buffer.append(part)
+                        else:
+                            flush_image_buffer()
+                            api_messages.append({"role": "user", "content": str(content)})
+                        continue
+                    # Case B: string or tuple content that may be a file path
+                    if isinstance(content, str):
+                        if os.path.isfile(content):
+                            part = to_image_part(content)
+                            if part:
+                                image_parts_buffer.append(part)
+                                continue
+                        # Not a file path: pass through as text
+                        flush_image_buffer()
+                        api_messages.append({"role": "user", "content": content})
+                        continue
+                    if isinstance(content, tuple):
+                        # Common case: a single-element tuple containing a path string
+                        tuple_items = list(content)
+                        tmp_parts = []
+                        text_accum = []
+                        for item in tuple_items:
+                            if isinstance(item, str) and os.path.isfile(item):
+                                part = to_image_part(item)
+                                if part:
+                                    tmp_parts.append(part)
+                                else:
+                                    text_accum.append(item)
+                            else:
+                                text_accum.append(str(item))
+                        if tmp_parts:
+                            flush_image_buffer()
+                            api_messages.append({"role": "user", "content": tmp_parts})
+                            if not text_accum:
+                                continue
+                        if text_accum:
+                            flush_image_buffer()
+                            api_messages.append({"role": "user", "content": "\n".join(text_accum)})
+                            continue
+                    # Case C: list content
+                    if isinstance(content, list):
+                        # If it's already a list of parts, let it pass through
+                        all_dicts = all(isinstance(c, dict) for c in content)
+                        if all_dicts:
+                            flush_image_buffer()
+                            api_messages.append({"role": "user", "content": content})
+                            continue
+                        # It might be a list of strings (paths/text). Convert string paths to image parts, others to text parts
+                        tmp_parts = []
+                        text_accum = []
+                        def flush_text_accum():
+                            if text_accum:
+                                api_messages.append({"role": "user", "content": "\n".join(text_accum)})
+                                text_accum.clear()
+                        for item in content:
+                            if isinstance(item, str) and os.path.isfile(item):
+                                part = to_image_part(item)
+                                if part:
+                                    tmp_parts.append(part)
+                                else:
+                                    text_accum.append(item)
+                            else:
+                                text_accum.append(str(item))
+                        if tmp_parts:
+                            flush_image_buffer()
+                            api_messages.append({"role": "user", "content": tmp_parts})
+                        if text_accum:
+                            flush_text_accum()
+                        continue
+                    # Fallback: pass through
+                    flush_image_buffer()
+                    api_messages.append(as_dict)
+                    continue
+                # Other roles
+                flush_image_buffer()
+                api_messages.append(as_dict)
+            # Flush any trailing images
+            flush_image_buffer()
+            log_debug(f"sending api_messages to model {model_name}: {api_messages}")
             stream = openai_client.chat.completions.create(
                 model=model_name,
+                messages=api_messages,
                 temperature=MODEL_TEMPERATURE,
                 stream=True
             )
     )
     with gr.Row():
+        with gr.Column(scale=10, min_width=400, elem_classes="user-input-container"):
             with gr.Row():
+                user_input = gr.MultimodalTextbox(
+                    interactive=True,
+                    container=False,
+                    file_count="multiple",
+                    placeholder="Type your message here and press Enter or upload file...",
                     show_label=False,
+                    sources=["upload"]
                 )
+                # Original text-only input
+                # user_input = gr.Textbox(
+                #     show_label=False,
+                #     placeholder="Type your message here and press Enter",
+                #     container=False
+                # )
         with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
             with gr.Row():
                 with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
+                    send_btn = gr.Button("Send", variant="primary", elem_classes="control-button")
+                    stop_btn = gr.Button("Stop", variant="cancel", elem_classes="control-button", visible=False)
                 with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
+                    clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary", elem_classes="control-button")
     with gr.Row():
         with gr.Column(min_width=400, elem_classes="opt-out-container"):
             with gr.Row():

styles.css CHANGED Viewed

@@ -30,6 +30,15 @@
     max-height: 1400px;
 }
 button.cancel {
     border: var(--button-border-width) solid var(--button-cancel-border-color);
     background: var(--button-cancel-background-fill);

     max-height: 1400px;
 }
+.user-input-container .multimodal-textbox{
+    border: none !important;
+}
+/* Match the height of the modified multimodal input box on the same row */
+.control-button {
+    height: 51px;
+}
 button.cancel {
     border: var(--button-border-width) solid var(--button-cancel-border-color);
     background: var(--button-cancel-background-fill);