Spaces:
Running
Running
| """ | |
| Configuration module for Universal MCP Client - Enhanced for GPT-OSS models with full context support | |
| """ | |
| import os | |
| from dataclasses import dataclass | |
| from typing import Optional, Dict, List | |
| import logging | |
| # Set up enhanced logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| class MCPServerConfig: | |
| """Configuration for an MCP server connection""" | |
| name: str | |
| url: str | |
| description: str | |
| space_id: Optional[str] = None | |
| class AppConfig: | |
| """Application configuration settings""" | |
| # HuggingFace Configuration | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # OpenAI GPT OSS Models with enhanced configurations | |
| AVAILABLE_MODELS = { | |
| "openai/gpt-oss-120b": { | |
| "name": "GPT OSS 120B", | |
| "description": "117B parameters, 5.1B active - Production use with reasoning", | |
| "size": "120B", | |
| "context_length": 128000, # Full 128k context length | |
| "supports_reasoning": True, | |
| "supports_tool_calling": True, | |
| "active_params": "5.1B" | |
| }, | |
| "openai/gpt-oss-20b": { | |
| "name": "GPT OSS 20B", | |
| "description": "21B parameters, 3.6B active - Lower latency with reasoning", | |
| "size": "20B", | |
| "context_length": 128000, # Full 128k context length | |
| "supports_reasoning": True, | |
| "supports_tool_calling": True, | |
| "active_params": "3.6B" | |
| } | |
| } | |
| # Enhanced Inference Providers supporting GPT OSS models | |
| INFERENCE_PROVIDERS = { | |
| "cerebras": { | |
| "name": "Cerebras", | |
| "description": "World-record inference speeds (2-4k tokens/sec for GPT-OSS)", | |
| "supports_120b": True, | |
| "supports_20b": True, | |
| "endpoint_suffix": "cerebras", | |
| "speed": "Very Fast", | |
| "recommended_for": ["production", "high-throughput"], | |
| "max_context_support": 128000 # Full context support | |
| }, | |
| "fireworks-ai": { | |
| "name": "Fireworks AI", | |
| "description": "Fast and reliable inference with excellent reliability", | |
| "supports_120b": True, | |
| "supports_20b": True, | |
| "endpoint_suffix": "fireworks-ai", | |
| "speed": "Fast", | |
| "recommended_for": ["production", "general-use"], | |
| "max_context_support": 128000 # Full context support | |
| }, | |
| "together-ai": { | |
| "name": "Together AI", | |
| "description": "Collaborative AI inference with good performance", | |
| "supports_120b": True, | |
| "supports_20b": True, | |
| "endpoint_suffix": "together-ai", | |
| "speed": "Fast", | |
| "recommended_for": ["development", "experimentation"], | |
| "max_context_support": 128000 # Full context support | |
| }, | |
| "replicate": { | |
| "name": "Replicate", | |
| "description": "Machine learning deployment platform", | |
| "supports_120b": True, | |
| "supports_20b": True, | |
| "endpoint_suffix": "replicate", | |
| "speed": "Medium", | |
| "recommended_for": ["prototyping", "low-volume"], | |
| "max_context_support": 128000 # Full context support | |
| } | |
| } | |
| # Enhanced Model Configuration for GPT-OSS - Utilizing full context | |
| MAX_TOKENS = 128000 # Full context length for GPT-OSS models | |
| # Response token allocation - increased for longer responses | |
| DEFAULT_MAX_RESPONSE_TOKENS = 16384 # Increased from 8192 for longer responses | |
| MIN_RESPONSE_TOKENS = 4096 # Minimum response size | |
| # Context management - optimized for full 128k usage | |
| SYSTEM_PROMPT_RESERVE = 3000 # Reserve for system prompt (includes MCP tool descriptions) | |
| MCP_TOOLS_RESERVE = 2000 # Additional reserve when MCP servers are enabled | |
| # History management - much larger with 128k context | |
| MAX_HISTORY_MESSAGES = 100 # Increased from 50 for better context retention | |
| DEFAULT_HISTORY_MESSAGES = 50 # Default for good performance | |
| # Reasoning configuration | |
| DEFAULT_REASONING_EFFORT = "medium" # low, medium, high | |
| # UI Configuration | |
| GRADIO_THEME = "ocean" | |
| DEBUG_MODE = True | |
| # MCP Server recommendations | |
| OPTIMAL_MCP_SERVER_COUNT = 6 # Recommended maximum for good performance | |
| WARNING_MCP_SERVER_COUNT = 10 # Show warning if more than this | |
| # File Support | |
| SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg'] | |
| SUPPORTED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.ogg', '.m4a', '.flac', '.aac', '.opus', '.wma'] | |
| SUPPORTED_VIDEO_EXTENSIONS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.m4v', '.wmv'] | |
| SUPPORTED_DOCUMENT_EXTENSIONS = ['.pdf', '.txt', '.docx', '.md', '.rtf', '.odt'] | |
| def get_available_models_for_provider(cls, provider_id: str) -> List[str]: | |
| """Get models available for a specific provider""" | |
| if provider_id not in cls.INFERENCE_PROVIDERS: | |
| return [] | |
| provider = cls.INFERENCE_PROVIDERS[provider_id] | |
| available_models = [] | |
| for model_id, model_info in cls.AVAILABLE_MODELS.items(): | |
| if model_info["size"] == "120B" and provider["supports_120b"]: | |
| available_models.append(model_id) | |
| elif model_info["size"] == "20B" and provider["supports_20b"]: | |
| available_models.append(model_id) | |
| return available_models | |
| def get_model_endpoint(cls, model_id: str, provider_id: str) -> str: | |
| """Get the full model endpoint for HF Inference Providers""" | |
| if provider_id not in cls.INFERENCE_PROVIDERS: | |
| raise ValueError(f"Unknown provider: {provider_id}") | |
| provider = cls.INFERENCE_PROVIDERS[provider_id] | |
| return f"{model_id}:{provider['endpoint_suffix']}" | |
| def get_optimal_context_settings(cls, model_id: str, provider_id: str, mcp_servers_count: int = 0) -> Dict[str, int]: | |
| """Get optimal context settings for a model/provider combination""" | |
| model_info = cls.AVAILABLE_MODELS.get(model_id, {}) | |
| provider_info = cls.INFERENCE_PROVIDERS.get(provider_id, {}) | |
| # Get the minimum of model and provider context support | |
| model_context = model_info.get("context_length", 128000) | |
| provider_context = provider_info.get("max_context_support", 128000) | |
| context_length = min(model_context, provider_context) | |
| # Calculate reserves based on MCP server count | |
| system_reserve = cls.SYSTEM_PROMPT_RESERVE | |
| if mcp_servers_count > 0: | |
| # Add extra reserve for MCP tools (roughly 300 tokens per server for tool descriptions) | |
| system_reserve += cls.MCP_TOOLS_RESERVE + (mcp_servers_count * 300) | |
| # Dynamic response token allocation based on available context | |
| if context_length >= 100000: | |
| max_response_tokens = cls.DEFAULT_MAX_RESPONSE_TOKENS # 16384 | |
| elif context_length >= 50000: | |
| max_response_tokens = 12288 | |
| elif context_length >= 20000: | |
| max_response_tokens = 8192 | |
| else: | |
| max_response_tokens = cls.MIN_RESPONSE_TOKENS # 4096 | |
| # Calculate available context for history | |
| available_context = context_length - system_reserve - max_response_tokens | |
| # Calculate recommended history limit | |
| # Assume average message is ~200 tokens | |
| avg_message_tokens = 200 | |
| recommended_history = min( | |
| cls.MAX_HISTORY_MESSAGES, | |
| available_context // avg_message_tokens | |
| ) | |
| return { | |
| "max_context": context_length, | |
| "available_context": available_context, | |
| "max_response_tokens": max_response_tokens, | |
| "system_reserve": system_reserve, | |
| "recommended_history_limit": max(10, recommended_history), # At least 10 messages | |
| "context_utilization": f"{((system_reserve + max_response_tokens) / context_length * 100):.1f}% reserved" | |
| } | |
| def get_all_media_extensions(cls): | |
| """Get all supported media file extensions""" | |
| return (cls.SUPPORTED_IMAGE_EXTENSIONS + | |
| cls.SUPPORTED_AUDIO_EXTENSIONS + | |
| cls.SUPPORTED_VIDEO_EXTENSIONS) | |
| def is_image_file(cls, file_path: str) -> bool: | |
| """Check if file is an image""" | |
| if not file_path: | |
| return False | |
| return any(ext in file_path.lower() for ext in cls.SUPPORTED_IMAGE_EXTENSIONS) | |
| def is_audio_file(cls, file_path: str) -> bool: | |
| """Check if file is an audio file""" | |
| if not file_path: | |
| return False | |
| return any(ext in file_path.lower() for ext in cls.SUPPORTED_AUDIO_EXTENSIONS) | |
| def is_video_file(cls, file_path: str) -> bool: | |
| """Check if file is a video file""" | |
| if not file_path: | |
| return False | |
| return any(ext in file_path.lower() for ext in cls.SUPPORTED_VIDEO_EXTENSIONS) | |
| def is_media_file(cls, file_path: str) -> bool: | |
| """Check if file is any supported media type""" | |
| if not file_path: | |
| return False | |
| return any(ext in file_path.lower() for ext in cls.get_all_media_extensions()) | |
| def get_provider_recommendation(cls, use_case: str) -> List[str]: | |
| """Get recommended providers for specific use cases""" | |
| recommendations = { | |
| "production": ["cerebras", "fireworks-ai"], | |
| "development": ["together-ai", "fireworks-ai"], | |
| "experimentation": ["together-ai", "replicate"], | |
| "high-throughput": ["cerebras"], | |
| "cost-effective": ["together-ai", "replicate"], | |
| "maximum-context": ["cerebras", "fireworks-ai"] # Providers with best context support | |
| } | |
| return recommendations.get(use_case, list(cls.INFERENCE_PROVIDERS.keys())) | |
| # Check for dependencies | |
| try: | |
| import httpx | |
| HTTPX_AVAILABLE = True | |
| except ImportError: | |
| HTTPX_AVAILABLE = False | |
| logger.warning("httpx not available - file upload functionality limited") | |
| try: | |
| import huggingface_hub | |
| HF_HUB_AVAILABLE = True | |
| except ImportError: | |
| HF_HUB_AVAILABLE = False | |
| logger.warning("huggingface_hub not available - login functionality disabled") | |
| # Enhanced CSS Configuration with better media display | |
| CUSTOM_CSS = """ | |
| /* Hide Gradio footer */ | |
| footer { | |
| display: none !important; | |
| } | |
| /* Make chatbot expand to fill available space */ | |
| .gradio-container { | |
| height: 100vh !important; | |
| } | |
| /* Ensure proper flex layout */ | |
| .main-content { | |
| display: flex; | |
| flex-direction: column; | |
| height: 100%; | |
| } | |
| /* Input area stays at bottom with minimal padding */ | |
| .input-area { | |
| margin-top: auto; | |
| padding-top: 0.25rem !important; | |
| padding-bottom: 0 !important; | |
| margin-bottom: 0 !important; | |
| } | |
| /* Reduce padding around chatbot */ | |
| .chatbot { | |
| margin-bottom: 0 !important; | |
| padding-bottom: 0 !important; | |
| } | |
| /* Provider and model selection styling */ | |
| .provider-model-selection { | |
| padding: 10px; | |
| border-radius: 8px; | |
| margin-bottom: 10px; | |
| border-left: 4px solid #007bff; | |
| } | |
| /* Login section styling */ | |
| .login-section { | |
| padding: 10px; | |
| border-radius: 8px; | |
| margin-bottom: 10px; | |
| border-left: 4px solid #4caf50; | |
| } | |
| /* Tool usage indicator */ | |
| .tool-usage { | |
| background: #fff3cd; | |
| border: 1px solid #ffeaa7; | |
| border-radius: 4px; | |
| padding: 8px; | |
| margin: 4px 0; | |
| } | |
| /* Media display improvements */ | |
| .media-container { | |
| max-width: 100%; | |
| border-radius: 8px; | |
| overflow: hidden; | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
| } | |
| /* Enhanced audio player styling */ | |
| audio { | |
| width: 100%; | |
| max-width: 500px; | |
| height: 54px; | |
| border-radius: 27px; | |
| outline: none; | |
| margin: 10px 0; | |
| } | |
| /* Enhanced video player styling */ | |
| video { | |
| width: 100%; | |
| max-width: 700px; | |
| height: auto; | |
| object-fit: contain; | |
| border-radius: 8px; | |
| margin: 10px 0; | |
| box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
| } | |
| /* Server status indicators */ | |
| .server-status { | |
| display: inline-block; | |
| padding: 2px 8px; | |
| border-radius: 12px; | |
| font-size: 12px; | |
| font-weight: bold; | |
| } | |
| .server-status.online { | |
| background: #d4edda; | |
| color: #155724; | |
| } | |
| .server-status.offline { | |
| background: #f8d7da; | |
| color: #721c24; | |
| } | |
| /* Message metadata styling */ | |
| .message-metadata { | |
| font-size: 0.85em; | |
| color: #666; | |
| margin-top: 4px; | |
| padding: 4px 8px; | |
| background: #f0f0f0; | |
| border-radius: 4px; | |
| } | |
| """ | |