ChatMCP / config.py
Nymbo's picture
Update config.py
3fbe0e2 verified
"""
Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support
"""
import os
from dataclasses import dataclass
from typing import Optional, Dict, List
import logging
# Set up enhanced logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
@dataclass
class MCPServerConfig:
"""Configuration for an MCP server connection"""
name: str
url: str
description: str
space_id: Optional[str] = None
class AppConfig:
"""Application configuration settings"""
# HuggingFace Configuration
HF_TOKEN = os.getenv("HF_TOKEN")
# OpenAI GPT OSS Models with enhanced configurations
AVAILABLE_MODELS = {
"openai/gpt-oss-120b": {
"name": "GPT OSS 120B",
"description": "117B parameters, 5.1B active - Production use with reasoning",
"size": "120B",
"context_length": 128000, # Full 128k context length
"supports_reasoning": True,
"supports_tool_calling": True,
"active_params": "5.1B",
"is_gpt_oss": True
},
"openai/gpt-oss-20b": {
"name": "GPT OSS 20B",
"description": "21B parameters, 3.6B active - Lower latency with reasoning",
"size": "20B",
"context_length": 128000, # Full 128k context length
"supports_reasoning": True,
"supports_tool_calling": True,
"active_params": "3.6B",
"is_gpt_oss": True
},
"Qwen/Qwen3-VL-235B-A22B-Thinking": {
"name": "Qwen3-VL 235B A22B Thinking",
"description": "Vision-Language reasoning model (Qwen), strong multimodal understanding",
"size": "235B",
"context_length": 256000,
"supports_reasoning": True,
"supports_tool_calling": True,
"is_gpt_oss": False,
"active_params": "22B"
},
"Qwen/Qwen3-VL-235B-A22B-Instruct": {
"name": "Qwen3-VL 235B A22B Instruct",
"description": "Vision-Language instruct-tuned model (Qwen)",
"size": "235B",
"context_length": 256000,
"supports_reasoning": True,
"supports_tool_calling": True,
"is_gpt_oss": False,
"active_params": "22B"
}
}
# Enhanced Inference Providers supporting GPT OSS models
INFERENCE_PROVIDERS = {
"auto": {
"name": "Auto (Router)",
"description": "Let the HF router automatically choose the best provider",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": None, # Omit suffix to use router auto selection
"speed": "Varies",
"recommended_for": ["getting-started", "automatic"],
"max_context_support": 128000
},
"cerebras": {
"name": "Cerebras",
"description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "cerebras",
"speed": "Very Fast",
"recommended_for": ["production", "high-throughput"],
"max_context_support": 128000 # Full context support
},
"fireworks-ai": {
"name": "Fireworks AI",
"description": "Fast and reliable inference with excellent reliability",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "fireworks-ai",
"speed": "Fast",
"recommended_for": ["production", "general-use"],
"max_context_support": 128000 # Full context support
},
"together-ai": {
"name": "Together AI",
"description": "Collaborative AI inference with good performance",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "together-ai",
"speed": "Fast",
"recommended_for": ["development", "experimentation"],
"max_context_support": 128000 # Full context support
},
"replicate": {
"name": "Replicate",
"description": "Machine learning deployment platform",
"supports_120b": True,
"supports_20b": True,
"endpoint_suffix": "replicate",
"speed": "Medium",
"recommended_for": ["prototyping", "low-volume"],
"max_context_support": 128000 # Full context support
}
}
# Enhanced Model Configuration for GPT-OSS - Utilizing full context
MAX_TOKENS = 128000 # Full context length for GPT-OSS models
# Response token allocation - increased for longer responses
DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses
MIN_RESPONSE_TOKENS = 4096 # Minimum response size
# Context management - optimized for full 128k usage
SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions)
MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled
# History management - much larger with 128k context
MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention
DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance
# Reasoning configuration
DEFAULT_REASONING_EFFORT = "medium" # low, medium, high
# UI Configuration
GRADIO_THEME = "ocean"
DEBUG_MODE = True
# Gradio hosting defaults (can be overridden by env GRADIO_SERVER_NAME/PORT)
GRADIO_SERVER_NAME = "localhost"
GRADIO_SERVER_PORT = 7860
# MCP Server recommendations
OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance
WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this
# File Support
SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg']
SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma']
SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv']
SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt']
@classmethod
def get_available_models_for_provider(cls, provider_id: str) -> List[str]:
"""Get models available for a specific provider"""
if provider_id not in cls.INFERENCE_PROVIDERS:
return []
provider = cls.INFERENCE_PROVIDERS[provider_id]
available_models = []
# Auto provider exposes all models (router decides)
if provider_id == "auto":
return list(cls.AVAILABLE_MODELS.keys())
for model_id, model_info in cls.AVAILABLE_MODELS.items():
# If model declares explicit providers, honor that
if "providers" in model_info:
if provider_id in model_info["providers"]:
available_models.append(model_id)
continue
# Legacy gating by size for GPT-OSS entries
if model_info.get("size") == "120B" and provider.get("supports_120b"):
available_models.append(model_id)
elif model_info.get("size") == "20B" and provider.get("supports_20b"):
available_models.append(model_id)
else:
# Default: include other models unless explicitly incompatible
available_models.append(model_id)
return available_models
@classmethod
def is_gpt_oss_model(cls, model_id: str) -> bool:
info = cls.AVAILABLE_MODELS.get(model_id, {})
if info.get("is_gpt_oss"):
return True
return model_id.startswith("openai/gpt-oss-")
@classmethod
def get_model_endpoint(cls, model_id: str, provider_id: str) -> str:
"""Get the full model endpoint for HF Inference Providers"""
if provider_id not in cls.INFERENCE_PROVIDERS:
raise ValueError(f"Unknown provider: {provider_id}")
provider = cls.INFERENCE_PROVIDERS[provider_id]
# For 'auto', omit provider suffix and let router choose
endpoint_suffix = provider.get('endpoint_suffix')
if provider_id == 'auto' or not endpoint_suffix:
return model_id
return f"{model_id}:{endpoint_suffix}"
@classmethod
def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]:
"""Get optimal context settings for a model/provider combination"""
model_info = cls.AVAILABLE_MODELS.get(model_id, {})
provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {})
# Get the minimum of model and provider context support
model_context = model_info.get("context_length", 128000)
provider_context = provider_info.get("max_context_support", 128000)
context_length = min(model_context, provider_context)
# Calculate reserves based on MCP server count
system_reserve = cls.SYSTEM_PROMPT_RESERVE
if mcp_servers_count > 0:
# Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions)
system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300)
# Dynamic response token allocation based on available context
if context_length >= 100000:
max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384
elif context_length >= 50000:
max_response_tokens = 12288
elif context_length >= 20000:
max_response_tokens = 8192
else:
max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096
# Calculate available context for history
available_context = context_length - system_reserve - max_response_tokens
# Calculate recommended history limit
# Assume average message is ~200 tokens
avg_message_tokens = 200
recommended_history = min(
cls.MAX_HISTORY_MESSAGES,
available_context // avg_message_tokens
)
return {
"max_context": context_length,
"available_context": available_context,
"max_response_tokens": max_response_tokens,
"system_reserve": system_reserve,
"recommended_history_limit": max(10, recommended_history), # At least 10 messages
"context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved"
}
@classmethod
def get_all_media_extensions(cls):
"""Get all supported media file extensions"""
return (cls.SUPPORTED_IMAGE_EXTENSIONS +
cls.SUPPORTED_AUDIO_EXTENSIONS +
cls.SUPPORTED_VIDEO_EXTENSIONS)
@classmethod
def is_image_file(cls, file_path: str) -> bool:
"""Check if file is an image"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS)
@classmethod
def is_audio_file(cls, file_path: str) -> bool:
"""Check if file is an audio file"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS)
@classmethod
def is_video_file(cls, file_path: str) -> bool:
"""Check if file is a video file"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS)
@classmethod
def is_media_file(cls, file_path: str) -> bool:
"""Check if file is any supported media type"""
if not file_path:
return False
return any(ext in file_path.lower() for ext in cls.get_all_media_extensions())
@classmethod
def get_provider_recommendation(cls, use_case: str) -> List[str]:
"""Get recommended providers for specific use cases"""
recommendations = {
"production": ["cerebras", "fireworks-ai"],
"development": ["together-ai", "fireworks-ai"],
"experimentation": ["together-ai", "replicate"],
"high-throughput": ["cerebras"],
"cost-effective": ["together-ai", "replicate"],
"maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support
}
return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys()))
# Check for dependencies
try:
import httpx
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
logger.warning("httpx not available - file upload functionality limited")
# Enhanced CSS Configuration with better media display
CUSTOM_CSS = """
/* Hide Gradio footer */
footer {
display: none !important;
}
/* Make chatbot expand to fill available space */
.gradio-container {
height: 100vh !important;
}
/* Ensure proper flex layout */
.main-content {
display: flex;
flex-direction: column;
height: 100%;
}
/* Input area stays at bottom with minimal padding */
.input-area {
margin-top: auto;
padding-top: 0.25rem !important;
padding-bottom: 0 !important;
margin-bottom: 0 !important;
}
/* Reduce padding around chatbot */
.chatbot {
margin-bottom: 0 !important;
padding-bottom: 0 !important;
}
/* Provider and model selection styling */
.provider-model-selection {
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
border-left: 4px solid #007bff;
}
/* Secret input section styling */
.login-section {
padding: 10px;
border-radius: 8px;
margin-bottom: 10px;
border-left: 4px solid #4caf50;
}
/* Tool usage indicator */
.tool-usage {
background: #fff3cd;
border: 1px solid #ffeaa7;
border-radius: 4px;
padding: 8px;
margin: 4px 0;
}
/* Media display improvements */
.media-container {
max-width: 100%;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* Enhanced audio player styling */
audio {
width: 100%;
max-width: 500px;
height: 54px;
border-radius: 27px;
outline: none;
margin: 10px 0;
}
/* Enhanced video player styling */
video {
width: 100%;
max-width: 700px;
height: auto;
object-fit: contain;
border-radius: 8px;
margin: 10px 0;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
/* Server status indicators */
.server-status {
display: inline-block;
padding: 2px 8px;
border-radius: 12px;
font-size: 12px;
font-weight: bold;
}
.server-status.online {
background: #d4edda;
color: #155724;
}
.server-status.offline {
background: #f8d7da;
color: #721c24;
}
/* Message metadata styling */
.message-metadata {
font-size: 0.85em;
color: #666;
margin-top: 4px;
padding: 4px 8px;
background: #f0f0f0;
border-radius: 4px;
}
"""