Nymbo commited on
Commit
6fe806a
Β·
verified Β·
1 Parent(s): 3fbe0e2

Update chat_handler.py

Browse files
Files changed (1) hide show
  1. chat_handler.py +446 -45
chat_handler.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
  Chat handling logic for Universal MCP Client - Fixed Version with File Upload Support
3
  """
 
4
  import re
5
  import logging
6
  import traceback
@@ -188,6 +189,7 @@ class ChatHandler:
188
  recent_history = history[-max_history:] if len(history) > max_history else history
189
 
190
  last_role = None
 
191
  for msg in recent_history:
192
  # Handle both ChatMessage objects and dictionary format for backward compatibility
193
  if hasattr(msg, 'role'): # ChatMessage object
@@ -200,31 +202,52 @@ class ChatHandler:
200
  continue # Skip invalid messages
201
 
202
  if role == "user":
203
- # Build multimodal user messages with parts
204
- part = None
205
- if isinstance(content, dict) and "path" in content:
206
- file_path = content.get("path", "")
207
- if isinstance(file_path, str) and file_path.startswith("http") and AppConfig.is_image_file(file_path):
208
- part = {"type": "image_url", "image_url": {"url": file_path}}
 
 
 
 
 
209
  else:
210
- # Non-image or non-URL: fallback to text description
211
- part = {"type": "text", "text": f"[File: {file_path}]"}
212
- elif isinstance(content, (list, tuple)):
213
- part = {"type": "text", "text": f"[List: {str(content)[:50]}...]"}
214
- elif content is None:
215
- part = {"type": "text", "text": "[Empty]"}
216
- else:
217
- part = {"type": "text", "text": str(content)}
218
-
219
- if messages and last_role == "user" and isinstance(messages[-1].get("content"), list):
220
- messages[-1]["content"].append(part)
221
- elif messages and last_role == "user" and isinstance(messages[-1].get("content"), str):
222
- # Convert existing string content to parts and append
223
- existing_text = messages[-1]["content"]
224
- messages[-1]["content"] = [{"type": "text", "text": existing_text}, part]
225
  else:
226
- messages.append({"role": "user", "content": [part]})
227
- last_role = "user"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  elif role == "assistant":
230
  # Assistant content remains text for chat.completions API
@@ -252,40 +275,54 @@ class ChatHandler:
252
  return self._call_hf_with_mcp(messages, uploaded_file_urls)
253
 
254
  def _call_hf_without_mcp(self, messages: List[Dict[str, Any]]) -> List[ChatMessage]:
255
- """Call HF Inference API without MCP servers"""
256
- logger.info("πŸ’¬ No MCP servers available, using regular HF Inference chat")
257
-
258
  system_prompt = self._get_native_system_prompt()
259
-
260
  # Add system prompt to messages
261
  if messages and messages[0].get("role") == "system":
262
  messages[0]["content"] = system_prompt + "\n\n" + messages[0]["content"]
263
  else:
264
  messages.insert(0, {"role": "system", "content": system_prompt})
265
-
266
  # Get optimal token settings
267
  if self.mcp_client.current_model and self.mcp_client.current_provider:
268
  context_settings = AppConfig.get_optimal_context_settings(
269
- self.mcp_client.current_model,
270
  self.mcp_client.current_provider,
271
  0 # No MCP servers
272
  )
273
  max_tokens = context_settings['max_response_tokens']
274
  else:
275
  max_tokens = 8192
276
-
277
- # Use HF Inference API
278
  try:
279
- response = self.mcp_client.generate_chat_completion(messages, **{"max_tokens": max_tokens})
280
- response_text = response.choices[0].message.content
281
-
282
- if not response_text:
283
- response_text = "I understand your request and I'm here to help."
284
-
285
- return [ChatMessage(role="assistant", content=response_text)]
 
 
 
 
 
 
286
  except Exception as e:
287
- logger.error(f"HF Inference API call failed: {e}")
288
- return [ChatMessage(role="assistant", content=f"❌ API call failed: {str(e)}")]
 
 
 
 
 
 
 
 
289
 
290
  def _call_hf_with_mcp(self, messages: List[Dict[str, Any]], uploaded_file_urls: List[str] = None) -> List[ChatMessage]:
291
  """Call HF Inference API with MCP servers and return structured responses"""
@@ -433,13 +470,13 @@ class ChatHandler:
433
  # Tool execution failed
434
  error_details = tool_info['result']
435
 
436
- # Create main tool message with error status
437
  chat_messages.append(ChatMessage(
438
  role="assistant",
439
  content="",
440
  metadata={
441
  "title": f"❌ Used {tool_info['tool']}",
442
- "status": "error",
443
  "duration": duration,
444
  "id": tool_id
445
  }
@@ -452,7 +489,7 @@ class ChatHandler:
452
  metadata={
453
  "title": "πŸ“Š Server Response",
454
  "parent_id": tool_id,
455
- "status": "error"
456
  }
457
  ))
458
 
@@ -463,7 +500,7 @@ class ChatHandler:
463
  metadata={
464
  "title": "πŸ’‘ Possible Solutions",
465
  "parent_id": tool_id,
466
- "status": "info"
467
  }
468
  ))
469
  else:
@@ -482,6 +519,370 @@ class ChatHandler:
482
  ))
483
 
484
  return chat_messages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
 
486
  def _extract_media_url(self, result_text: str, server_name: str) -> Optional[str]:
487
  """Extract media URL from MCP response with improved pattern matching"""
@@ -644,4 +1045,4 @@ IMPORTANT NOTES:
644
  - ALWAYS provide a descriptive message before the JSON tool call
645
  - After tool execution, you can provide additional context or ask if the user needs anything else
646
  Current time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
647
- Current model: {self.mcp_client.current_model} via {self.mcp_client.current_provider}"""
 
1
  """
2
  Chat handling logic for Universal MCP Client - Fixed Version with File Upload Support
3
  """
4
+ import asyncio
5
  import re
6
  import logging
7
  import traceback
 
189
  recent_history = history[-max_history:] if len(history) > max_history else history
190
 
191
  last_role = None
192
+ is_gpt_oss = AppConfig.is_gpt_oss_model(self.mcp_client.current_model) if self.mcp_client.current_model else False
193
  for msg in recent_history:
194
  # Handle both ChatMessage objects and dictionary format for backward compatibility
195
  if hasattr(msg, 'role'): # ChatMessage object
 
202
  continue # Skip invalid messages
203
 
204
  if role == "user":
205
+ if is_gpt_oss:
206
+ # Text-only content for GPT-OSS (no multimodal parts)
207
+ if isinstance(content, dict) and "path" in content:
208
+ file_path = content.get("path", "")
209
+ # Omit media content; optionally note the upload as text
210
+ text_piece = ""
211
+ # Choose to ignore media fully to avoid confusing the model
212
+ elif isinstance(content, (list, tuple)):
213
+ text_piece = f"[List: {str(content)[:50]}...]"
214
+ elif content is None:
215
+ text_piece = "[Empty]"
216
  else:
217
+ text_piece = str(content)
218
+
219
+ if messages and last_role == "user" and isinstance(messages[-1].get("content"), str):
220
+ # Concatenate text
221
+ if text_piece:
222
+ messages[-1]["content"] = (messages[-1]["content"] + "\n" + text_piece) if messages[-1]["content"] else text_piece
223
+ else:
224
+ messages.append({"role": "user", "content": text_piece})
225
+ last_role = "user"
 
 
 
 
 
 
226
  else:
227
+ # Build multimodal user messages with parts (for non-GPT-OSS)
228
+ part = None
229
+ if isinstance(content, dict) and "path" in content:
230
+ file_path = content.get("path", "")
231
+ if isinstance(file_path, str) and file_path.startswith("http") and AppConfig.is_image_file(file_path):
232
+ part = {"type": "image_url", "image_url": {"url": file_path}}
233
+ else:
234
+ part = {"type": "text", "text": f"[File: {file_path}]"}
235
+ elif isinstance(content, (list, tuple)):
236
+ part = {"type": "text", "text": f"[List: {str(content)[:50]}...]"}
237
+ elif content is None:
238
+ part = {"type": "text", "text": "[Empty]"}
239
+ else:
240
+ part = {"type": "text", "text": str(content)}
241
+
242
+ if messages and last_role == "user" and isinstance(messages[-1].get("content"), list):
243
+ messages[-1]["content"].append(part)
244
+ elif messages and last_role == "user" and isinstance(messages[-1].get("content"), str):
245
+ # Convert existing string content to parts and append
246
+ existing_text = messages[-1]["content"]
247
+ messages[-1]["content"] = [{"type": "text", "text": existing_text}, part]
248
+ else:
249
+ messages.append({"role": "user", "content": [part]})
250
+ last_role = "user"
251
 
252
  elif role == "assistant":
253
  # Assistant content remains text for chat.completions API
 
275
  return self._call_hf_with_mcp(messages, uploaded_file_urls)
276
 
277
  def _call_hf_without_mcp(self, messages: List[Dict[str, Any]]) -> List[ChatMessage]:
278
+ """Call HF Inference API without MCP servers. Streams tokens for faster feedback."""
279
+ logger.info("πŸ’¬ No MCP servers available, using streaming HF Inference chat when possible")
280
+
281
  system_prompt = self._get_native_system_prompt()
282
+
283
  # Add system prompt to messages
284
  if messages and messages[0].get("role") == "system":
285
  messages[0]["content"] = system_prompt + "\n\n" + messages[0]["content"]
286
  else:
287
  messages.insert(0, {"role": "system", "content": system_prompt})
288
+
289
  # Get optimal token settings
290
  if self.mcp_client.current_model and self.mcp_client.current_provider:
291
  context_settings = AppConfig.get_optimal_context_settings(
292
+ self.mcp_client.current_model,
293
  self.mcp_client.current_provider,
294
  0 # No MCP servers
295
  )
296
  max_tokens = context_settings['max_response_tokens']
297
  else:
298
  max_tokens = 8192
299
+
300
+ # Try streaming first; fall back to non-streaming on error
301
  try:
302
+ stream = self.mcp_client.generate_chat_completion_stream(messages, **{"max_tokens": max_tokens})
303
+ accumulated = ""
304
+ for chunk in stream:
305
+ try:
306
+ delta = chunk.choices[0].delta.content or ""
307
+ except Exception:
308
+ # Some SDK variants stream as message deltas differently
309
+ delta = getattr(getattr(chunk.choices[0], "delta", None), "content", "") or ""
310
+ if delta:
311
+ accumulated += delta
312
+ if not accumulated:
313
+ accumulated = "I understand your request and I'm here to help."
314
+ return [ChatMessage(role="assistant", content=accumulated)]
315
  except Exception as e:
316
+ logger.warning(f"Streaming failed, retrying without stream: {e}")
317
+ try:
318
+ response = self.mcp_client.generate_chat_completion(messages, **{"max_tokens": max_tokens})
319
+ response_text = response.choices[0].message.content
320
+ if not response_text:
321
+ response_text = "I understand your request and I'm here to help."
322
+ return [ChatMessage(role="assistant", content=response_text)]
323
+ except Exception as e2:
324
+ logger.error(f"HF Inference API call failed: {e2}")
325
+ return [ChatMessage(role="assistant", content=f"❌ API call failed: {str(e2)}")]
326
 
327
  def _call_hf_with_mcp(self, messages: List[Dict[str, Any]], uploaded_file_urls: List[str] = None) -> List[ChatMessage]:
328
  """Call HF Inference API with MCP servers and return structured responses"""
 
470
  # Tool execution failed
471
  error_details = tool_info['result']
472
 
473
+ # Create main tool message with pending status (error reflected in content)
474
  chat_messages.append(ChatMessage(
475
  role="assistant",
476
  content="",
477
  metadata={
478
  "title": f"❌ Used {tool_info['tool']}",
479
+ "status": "pending",
480
  "duration": duration,
481
  "id": tool_id
482
  }
 
489
  metadata={
490
  "title": "πŸ“Š Server Response",
491
  "parent_id": tool_id,
492
+ "status": "done"
493
  }
494
  ))
495
 
 
500
  metadata={
501
  "title": "πŸ’‘ Possible Solutions",
502
  "parent_id": tool_id,
503
+ "status": "done"
504
  }
505
  ))
506
  else:
 
519
  ))
520
 
521
  return chat_messages
522
+
523
+ def process_multimodal_message_stream(self, message: Dict[str, Any], history: List):
524
+ """Generator that streams assistant output to the UI as it arrives.
525
+ - Streams for plain LLM chats
526
+ - Streams initial planning/tool JSON for MCP flows, executes tool, then streams final answer
527
+ - Attempts to surface reasoning/thinking traces when available
528
+ """
529
+ try:
530
+ # Pre-checks
531
+ if not self.mcp_client.hf_client:
532
+ error_msg = "❌ HuggingFace token not configured. Please set HF_TOKEN environment variable or login."
533
+ history.append(ChatMessage(role="assistant", content=error_msg))
534
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
535
+ return
536
+
537
+ if not self.mcp_client.current_provider or not self.mcp_client.current_model:
538
+ error_msg = "❌ Please select an inference provider and model first."
539
+ history.append(ChatMessage(role="assistant", content=error_msg))
540
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
541
+ return
542
+
543
+ # Parse user input
544
+ user_text = message.get("text", "") if message else ""
545
+ user_files = message.get("files", []) if message else []
546
+
547
+ # Upload files and update history similarly to non-stream path
548
+ self.file_url_mapping = {}
549
+ uploaded_file_urls: List[str] = []
550
+ if isinstance(message, str):
551
+ user_text = message
552
+ user_files = []
553
+
554
+ if user_files:
555
+ for file_path in user_files:
556
+ try:
557
+ uploaded_url = self._upload_file_to_gradio_server(file_path)
558
+ self.file_url_mapping[file_path] = uploaded_url
559
+ uploaded_file_urls.append(uploaded_url)
560
+ history.append(ChatMessage(role="user", content={"path": uploaded_url}))
561
+ except Exception:
562
+ history.append(ChatMessage(role="user", content={"path": file_path}))
563
+
564
+ if user_text and user_text.strip():
565
+ history.append(ChatMessage(role="user", content=user_text))
566
+
567
+ if not user_text.strip() and not user_files:
568
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
569
+ return
570
+
571
+ # Prepare messages for HF
572
+ messages = self._prepare_hf_messages(history, uploaded_file_urls)
573
+ # Choose streaming path based on MCP servers
574
+ if self.mcp_client.get_enabled_servers():
575
+ # Stream with MCP planning/tool execution
576
+ yield from self._stream_with_mcp(messages, uploaded_file_urls, history)
577
+ else:
578
+ # Plain LLM streaming with optional thinking trace
579
+ yield from self._stream_without_mcp(messages, history)
580
+ except Exception as e:
581
+ history.append(ChatMessage(role="assistant", content=f"❌ Error: {str(e)}"))
582
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
583
+
584
+ def _stream_without_mcp(self, messages: List[Dict[str, Any]], history: List):
585
+ """Stream tokens for plain LLM chats; attempts to surface reasoning traces if available."""
586
+ # Add system prompt
587
+ system_prompt = self._get_native_system_prompt()
588
+ if messages and messages[0].get("role") == "system":
589
+ messages[0]["content"] = system_prompt + "\n\n" + messages[0]["content"]
590
+ else:
591
+ messages.insert(0, {"role": "system", "content": system_prompt})
592
+
593
+ # Compute max tokens
594
+ if self.mcp_client.current_model and self.mcp_client.current_provider:
595
+ ctx = AppConfig.get_optimal_context_settings(
596
+ self.mcp_client.current_model, self.mcp_client.current_provider, 0
597
+ )
598
+ max_tokens = ctx["max_response_tokens"]
599
+ else:
600
+ max_tokens = 8192
601
+
602
+ # Insert placeholders: optional thinking + main assistant
603
+ thinking_index = None
604
+ # Prepare a thinking message only when we actually receive thinking tokens
605
+ history.append(ChatMessage(role="assistant", content=""))
606
+ main_index = len(history) - 1
607
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
608
+
609
+ accumulated = ""
610
+ thinking_accum = ""
611
+ try:
612
+ stream = self.mcp_client.generate_chat_completion_stream(messages, **{"max_tokens": max_tokens})
613
+ for chunk in stream:
614
+ delta = getattr(chunk.choices[0], "delta", None)
615
+ # Reasoning/thinking traces (best-effort extraction)
616
+ reason_delta = None
617
+ if delta is not None:
618
+ # Some providers expose .reasoning or .thinking
619
+ reason_delta = (
620
+ getattr(delta, "reasoning", None)
621
+ or getattr(delta, "thinking", None)
622
+ )
623
+ if reason_delta:
624
+ thinking_accum += str(reason_delta)
625
+ if thinking_index is None:
626
+ history.insert(main_index, ChatMessage(
627
+ role="assistant",
628
+ content=f"{thinking_accum}",
629
+ metadata={"title": "🧠 Reasoning", "status": "pending"}
630
+ ))
631
+ thinking_index = main_index
632
+ main_index += 1
633
+ else:
634
+ history[thinking_index] = ChatMessage(
635
+ role="assistant",
636
+ content=f"{thinking_accum}",
637
+ metadata={"title": "🧠 Reasoning", "status": "pending"}
638
+ )
639
+
640
+ # Main content
641
+ delta_text = ""
642
+ try:
643
+ delta_text = delta.content or ""
644
+ except Exception:
645
+ delta_text = getattr(delta, "content", "") or ""
646
+ if not delta_text:
647
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
648
+ continue
649
+ accumulated += delta_text
650
+ history[main_index] = ChatMessage(role="assistant", content=accumulated)
651
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
652
+ except Exception as e:
653
+ # Fallback to non-stream
654
+ try:
655
+ resp = self.mcp_client.generate_chat_completion(messages, **{"max_tokens": max_tokens})
656
+ final_text = resp.choices[0].message.content or "I understand your request and I'm here to help."
657
+ history[main_index] = ChatMessage(role="assistant", content=final_text)
658
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
659
+ return
660
+ except Exception as e2:
661
+ history[main_index] = ChatMessage(role="assistant", content=f"❌ API call failed: {str(e2)}")
662
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
663
+ return
664
+
665
+ # Final yield
666
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
667
+
668
+ def _stream_with_mcp(self, messages: List[Dict[str, Any]], uploaded_file_urls: List[str], history: List):
669
+ """Stream initial planning/tool JSON, execute MCP tool, then stream final response."""
670
+ # Enhanced system prompt with MCP guidance
671
+ system_prompt = self._get_mcp_system_prompt(uploaded_file_urls)
672
+ if messages and messages[0].get("role") == "system":
673
+ messages[0]["content"] = system_prompt + "\n\n" + messages[0]["content"]
674
+ else:
675
+ messages.insert(0, {"role": "system", "content": system_prompt})
676
+
677
+ # Compute max tokens taking enabled servers into account
678
+ enabled_servers = self.mcp_client.get_enabled_servers()
679
+ if self.mcp_client.current_model and self.mcp_client.current_provider:
680
+ ctx = AppConfig.get_optimal_context_settings(
681
+ self.mcp_client.current_model, self.mcp_client.current_provider, len(enabled_servers)
682
+ )
683
+ max_tokens = ctx["max_response_tokens"]
684
+ else:
685
+ max_tokens = 8192
686
+
687
+ # Placeholders: planning/tool JSON + main assistant
688
+ planning_index = None
689
+ thinking_index = None
690
+ history.append(ChatMessage(role="assistant", content=""))
691
+ main_index = len(history) - 1
692
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
693
+
694
+ text_accum = ""
695
+ tool_json_accum = ""
696
+ in_tool_json = False
697
+ tool_json_detected = False
698
+ try:
699
+ stream = self.mcp_client.generate_chat_completion_stream(messages, **{"max_tokens": max_tokens})
700
+ for chunk in stream:
701
+ delta = getattr(chunk.choices[0], "delta", None)
702
+ # Optional reasoning
703
+ reason_delta = None
704
+ if delta is not None:
705
+ reason_delta = (
706
+ getattr(delta, "reasoning", None)
707
+ or getattr(delta, "thinking", None)
708
+ )
709
+ if reason_delta:
710
+ if thinking_index is None:
711
+ history.insert(main_index, ChatMessage(
712
+ role="assistant",
713
+ content=str(reason_delta),
714
+ metadata={"title": "🧠 Reasoning", "status": "pending"}
715
+ ))
716
+ thinking_index = main_index
717
+ main_index += 1
718
+ else:
719
+ history[thinking_index] = ChatMessage(
720
+ role="assistant",
721
+ content=(history[thinking_index].content + str(reason_delta)),
722
+ metadata={"title": "🧠 Reasoning", "status": "pending"}
723
+ )
724
+
725
+ # Main content streaming and tool JSON detection (content-based JSON protocol)
726
+ piece = ""
727
+ try:
728
+ piece = delta.content or ""
729
+ except Exception:
730
+ piece = getattr(delta, "content", "") or ""
731
+ if not piece:
732
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
733
+ continue
734
+
735
+ # Detect start of tool JSON
736
+ if not tool_json_detected and '{"use_tool":' in piece:
737
+ in_tool_json = True
738
+ tool_json_detected = True
739
+ if in_tool_json:
740
+ tool_json_accum += piece
741
+ # Initialize planning message
742
+ if planning_index is None:
743
+ history.insert(main_index, ChatMessage(
744
+ role="assistant",
745
+ content=tool_json_accum,
746
+ metadata={"title": "πŸ”§ Tool call (planning)", "status": "pending"}
747
+ ))
748
+ planning_index = main_index
749
+ main_index += 1
750
+ else:
751
+ history[planning_index] = ChatMessage(
752
+ role="assistant",
753
+ content=tool_json_accum,
754
+ metadata={"title": "πŸ”§ Tool call (planning)", "status": "pending"}
755
+ )
756
+
757
+ # Try to reconstruct JSON when braces close
758
+ reconstructed = self.mcp_client._reconstruct_json_from_start(tool_json_accum)
759
+ if reconstructed:
760
+ # We have a complete JSON
761
+ in_tool_json = False
762
+ # Clean planning content to the reconstructed JSON (for clarity)
763
+ history[planning_index] = ChatMessage(
764
+ role="assistant",
765
+ content=reconstructed,
766
+ metadata={"title": "πŸ”§ Tool call", "status": "done"}
767
+ )
768
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
769
+
770
+ # Execute tool now
771
+ import json as _json
772
+ try:
773
+ tool_req = _json.loads(reconstructed)
774
+ except Exception:
775
+ tool_req = None
776
+ if tool_req and tool_req.get("use_tool"):
777
+ server_name = tool_req.get("server")
778
+ tool_name = tool_req.get("tool")
779
+ arguments = tool_req.get("arguments", {})
780
+
781
+ # Status message
782
+ exec_msg = ChatMessage(
783
+ role="assistant",
784
+ content=f"Executing {tool_name} on {server_name}…",
785
+ metadata={"title": "πŸ”§ Tool execution", "status": "pending"}
786
+ )
787
+ history.insert(main_index, exec_msg)
788
+ exec_index = main_index
789
+ main_index += 1
790
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
791
+
792
+ # Replace any local paths with uploaded URLs
793
+ if hasattr(self, 'file_url_mapping'):
794
+ for k, v in list(arguments.items()):
795
+ if isinstance(v, str) and v.startswith('/tmp/gradio/'):
796
+ for lpath, url in self.file_url_mapping.items():
797
+ if lpath in v or v in lpath:
798
+ arguments[k] = url
799
+ break
800
+
801
+ # Run tool (blocking)
802
+ def _run_tool():
803
+ loop = asyncio.new_event_loop()
804
+ asyncio.set_event_loop(loop)
805
+ try:
806
+ return loop.run_until_complete(
807
+ self.mcp_client.call_mcp_tool_async(server_name, tool_name, arguments)
808
+ )
809
+ finally:
810
+ loop.close()
811
+
812
+ success, result = _run_tool()
813
+ # Update exec message
814
+ if success:
815
+ content = str(result)
816
+ history[exec_index] = ChatMessage(
817
+ role="assistant",
818
+ content=content if len(content) < 800 else content[:800] + "…",
819
+ metadata={"title": "πŸ“Š Server Response", "status": "done"}
820
+ )
821
+ else:
822
+ history[exec_index] = ChatMessage(
823
+ role="assistant",
824
+ content=f"❌ Tool failed: {result}",
825
+ metadata={"title": "πŸ“Š Server Response", "status": "done"}
826
+ )
827
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
828
+
829
+ # Start final streamed response using tool result
830
+ final_messages = messages.copy()
831
+ # Remove tools instruction portion from system if present
832
+ if final_messages and final_messages[0].get("role") == "system":
833
+ sys_text = final_messages[0]["content"]
834
+ cut = sys_text.split("You have access to the following MCP tools:")[0].strip()
835
+ final_messages[0]["content"] = cut
836
+ # Add prior assistant (planning) and user tool result follow-up
837
+ final_messages.append({"role": "assistant", "content": text_accum})
838
+ final_messages.append({
839
+ "role": "user",
840
+ "content": f"Tool '{tool_name}' from server '{server_name}' completed. Result: {result}. Please provide a helpful response."
841
+ })
842
+
843
+ # Stream final answer into main message
844
+ final_accum = ""
845
+ try:
846
+ final_stream = self.mcp_client.generate_chat_completion_stream(final_messages, **{"max_tokens": max_tokens})
847
+ for fchunk in final_stream:
848
+ fdelta = getattr(fchunk.choices[0], "delta", None)
849
+ ftext = getattr(fdelta, "content", "") if fdelta is not None else ""
850
+ if not ftext:
851
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
852
+ continue
853
+ final_accum += ftext
854
+ history[main_index] = ChatMessage(role="assistant", content=(text_accum + final_accum))
855
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
856
+ except Exception:
857
+ # Fallback non-stream finalization
858
+ try:
859
+ fresp = self.mcp_client.generate_chat_completion(final_messages, **{"max_tokens": max_tokens})
860
+ ftxt = fresp.choices[0].message.content or ""
861
+ history[main_index] = ChatMessage(role="assistant", content=(text_accum + ftxt))
862
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
863
+ return
864
+ except Exception as e3:
865
+ history[main_index] = ChatMessage(role="assistant", content=(text_accum + f"\n❌ Finalization failed: {e3}"))
866
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
867
+ return
868
+
869
+ # Done
870
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
871
+ return
872
+ else:
873
+ # Normal assistant visible text outside of tool JSON
874
+ text_accum += piece
875
+ history[main_index] = ChatMessage(role="assistant", content=text_accum)
876
+ yield history, gr.MultimodalTextbox(value=None, interactive=False)
877
+ except Exception as e:
878
+ # Fallback: Use non-streaming MCP path
879
+ responses = self._call_hf_with_mcp(messages, uploaded_file_urls)
880
+ history.extend(responses)
881
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
882
+ return
883
+
884
+ # If we streamed without any tool usage, finalize
885
+ yield history, gr.MultimodalTextbox(value=None, interactive=True)
886
 
887
  def _extract_media_url(self, result_text: str, server_name: str) -> Optional[str]:
888
  """Extract media URL from MCP response with improved pattern matching"""
 
1045
  - ALWAYS provide a descriptive message before the JSON tool call
1046
  - After tool execution, you can provide additional context or ask if the user needs anything else
1047
  Current time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
1048
+ Current model: {self.mcp_client.current_model} via {self.mcp_client.current_provider}"""