Spaces:

ServiceNow-AI
/

Apriel-Chat

Running

App Files Files Community

bradnow commited on May 9

Commit

78f63db

1 Parent(s): ddbdc31

Fix problem with multiple turns for some models

Browse files

Files changed (1) hide show

app.py +35 -28

app.py CHANGED Viewed

@@ -5,21 +5,24 @@ import gradio as gr
 from utils import COMMUNITY_POSTFIX_URL, get_model_config, log_message, check_format, models_config
-print(f"Gradio version: {gr.__version__}")
 DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker"
-BUTTON_WIDTH = 160
 chat_start_count = 0
-model_config = None
-client = None
 def setup_model(model_name, intial=False):
-    global model_config, client
     model_config = get_model_config(model_name)
     log_message(f"update_model() --> Model config: {model_config}")
-    client = OpenAI(
         api_key=model_config.get('AUTH_TOKEN'),
         base_url=model_config.get('VLLM_API_URL')
     )
@@ -44,7 +47,8 @@ def chat_fn(message, history):
     # Check if the message is empty
     if not message.strip():
         gr.Warning("Please enter a message before sending.")
-        return history
     global chat_start_count
     chat_start_count = chat_start_count + 1
@@ -54,34 +58,32 @@ def chat_fn(message, history):
     is_reasoning = model_config.get("REASONING")
     # Remove any assistant messages with metadata from history for multiple turns
-    log_message(f"Original History: {history}")
     check_format(history, "messages")
-    history = [item for item in history if
-               not (isinstance(item, dict) and
-                    item.get("role") == "assistant" and
-                    isinstance(item.get("metadata"), dict) and
-                    item.get("metadata", {}).get("title") is not None)]
-    log_message(f"Updated History: {history}")
-    check_format(history, "messages")
     history.append({"role": "user", "content": message})
     log_message(f"History with user message: {history}")
     check_format(history, "messages")
     # Create the streaming response
     try:
-        stream = client.chat.completions.create(
             model=model_config.get('MODEL_NAME'),
-            messages=history,
-            temperature=0.8,
             stream=True
         )
     except Exception as e:
         print(f"Error: {e}")
-        yield gr.ChatMessage(
-            role="assistant",
-            content="😔 The model is unavailable at the moment. Please try again later.",
-        )
         return
     if is_reasoning:
@@ -92,6 +94,13 @@ def chat_fn(message, history):
         ))
         log_message(f"History added thinking: {history}")
         check_format(history, "messages")
     output = ""
     completion_started = False
@@ -135,11 +144,8 @@ def chat_fn(message, history):
                 content=output
             )
-        # only yield the most recent assistant messages
-        messages_to_yield = history[-1:] if not completion_started else history[-2:]
-        # check_format(messages_to_yield, "messages")
-        # log_message(f"Yielding messages: {messages_to_yield}")
-        yield messages_to_yield
     log_message(f"Final History: {history}")
     check_format(history, "messages")
@@ -281,6 +287,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="green")) as demo:
         desc = setup_model(actual_model_name)
         return desc, []
     model_dropdown.change(
         fn=update_model_and_clear,
         inputs=[model_dropdown],

 from utils import COMMUNITY_POSTFIX_URL, get_model_config, log_message, check_format, models_config
+MODEL_TEMPERATURE = 0.8
+BUTTON_WIDTH = 160
 DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker"
+# DEFAULT_MODEL_NAME = "Apriel-5b"
+print(f"Gradio version: {gr.__version__}")
 chat_start_count = 0
+model_config = {}
+openai_client = None
 def setup_model(model_name, intial=False):
+    global model_config, openai_client
     model_config = get_model_config(model_name)
     log_message(f"update_model() --> Model config: {model_config}")
+    openai_client = OpenAI(
         api_key=model_config.get('AUTH_TOKEN'),
         base_url=model_config.get('VLLM_API_URL')
     )
     # Check if the message is empty
     if not message.strip():
         gr.Warning("Please enter a message before sending.")
+        yield history
+        return
     global chat_start_count
     chat_start_count = chat_start_count + 1
     is_reasoning = model_config.get("REASONING")
     # Remove any assistant messages with metadata from history for multiple turns
+    log_message(f"Initial History: {history}")
     check_format(history, "messages")
     history.append({"role": "user", "content": message})
     log_message(f"History with user message: {history}")
     check_format(history, "messages")
     # Create the streaming response
     try:
+        history_no_thoughts = [item for item in history if
+                               not (isinstance(item, dict) and
+                                    item.get("role") == "assistant" and
+                                    isinstance(item.get("metadata"), dict) and
+                                    item.get("metadata", {}).get("title") is not None)]
+        log_message(f"Updated History: {history_no_thoughts}")
+        check_format(history_no_thoughts, "messages")
+        log_message(f"history_no_thoughts with user message: {history_no_thoughts}")
+        stream = openai_client.chat.completions.create(
             model=model_config.get('MODEL_NAME'),
+            messages=history_no_thoughts,
+            temperature=MODEL_TEMPERATURE,
             stream=True
         )
     except Exception as e:
         print(f"Error: {e}")
+        yield [{"role": "assistant", "content": "😔 The model is unavailable at the moment. Please try again later."}]
         return
     if is_reasoning:
         ))
         log_message(f"History added thinking: {history}")
         check_format(history, "messages")
+    else:
+        history.append(gr.ChatMessage(
+            role="assistant",
+            content="",
+        ))
+        log_message(f"History added empty assistant: {history}")
+        check_format(history, "messages")
     output = ""
     completion_started = False
                 content=output
             )
+        # log_message(f"Yielding messages: {history}")
+        yield history
     log_message(f"Final History: {history}")
     check_format(history, "messages")
         desc = setup_model(actual_model_name)
         return desc, []
     model_dropdown.change(
         fn=update_model_and_clear,
         inputs=[model_dropdown],