import matplotlib.pyplot as plt import numpy as np import gradio as gr # Sample test results with test names MODELS = { "llama": { "amd": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore"], "failed": ["network_timeout"], "skipped": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu"], "error": [] }, "nvidia": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"], "failed": ["network_timeout", "distributed"], "skipped": ["multi_gpu"], "error": [] } }, "gemma3": { "amd": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation"], "failed": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu"], "skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw", "latency", "throughput"], "error": [] }, "nvidia": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"], "failed": ["distributed", "multi_gpu"], "skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw"], "error": [] } }, "csm": { "amd": { "passed": [], "failed": [], "skipped": [], "error": ["system_crash"] }, "nvidia": { "passed": [], "failed": [], "skipped": [], "error": ["system_crash"] } } } def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]: """Draws a pie chart of model's passed, failed, skipped, and error stats.""" model_stats = MODELS[model_name] # Softer color palette - less pastel, more vibrant colors = { 'passed': '#4CAF50', # Medium green 'failed': '#E53E3E', # More red 'skipped': '#FFD54F', # Medium yellow 'error': '#8B0000' # Dark red } # Convert test lists to counts for chart display amd_stats = {k: len(v) for k, v in model_stats['amd'].items()} nvidia_stats = {k: len(v) for k, v in model_stats['nvidia'].items()} # Filter out categories with 0 values for cleaner visualization amd_filtered = {k: v for k, v in amd_stats.items() if v > 0} nvidia_filtered = {k: v for k, v in nvidia_stats.items() if v > 0} if not amd_filtered and not nvidia_filtered: # Handle case where all values are 0 - minimal empty state fig, ax = plt.subplots(figsize=(10, 8), facecolor='#000000') ax.set_facecolor('#000000') ax.text(0.5, 0.5, 'No test results available', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=16, color='#888888', fontfamily='monospace', weight='normal') ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.axis('off') return fig, "", "" # Create figure with two subplots side by side with padding fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9), facecolor='#000000') ax1.set_facecolor('#000000') ax2.set_facecolor('#000000') def create_pie_chart(ax, device_label, filtered_stats): if not filtered_stats: ax.text(0.5, 0.5, 'No test results', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=14, color='#888888', fontfamily='monospace', weight='normal') ax.set_title(device_label, fontsize=28, weight='bold', pad=2, color='#FFFFFF', fontfamily='monospace') ax.axis('off') return chart_colors = [colors[category] for category in filtered_stats.keys()] # Create minimal pie chart - full pie, no donut effect wedges, texts, autotexts = ax.pie( filtered_stats.values(), labels=[label.lower() for label in filtered_stats.keys()], # Lowercase for minimal look colors=chart_colors, autopct=lambda pct: f'{int(pct/100*sum(filtered_stats.values()))}', startangle=90, explode=None, # No separation shadow=False, wedgeprops=dict(edgecolor='#1a1a1a', linewidth=0.5), # Minimal borders textprops={'fontsize': 12, 'weight': 'normal', 'color': '#CCCCCC', 'fontfamily': 'monospace'} ) # Enhanced percentage text styling for better readability for autotext in autotexts: autotext.set_color('#000000') # Black text for better contrast autotext.set_weight('bold') autotext.set_fontsize(14) autotext.set_fontfamily('monospace') # Minimal category labels for text in texts: text.set_color('#AAAAAA') text.set_weight('normal') text.set_fontsize(13) text.set_fontfamily('monospace') # Device label closer to chart and bigger ax.set_title(device_label, fontsize=28, weight='bold', pad=2, color='#FFFFFF', fontfamily='monospace') # Create both pie charts with device labels create_pie_chart(ax1, "amd", amd_filtered) create_pie_chart(ax2, "nvidia", nvidia_filtered) # Add subtle separation line between charts - stops at device labels level line_x = 0.5 fig.add_artist(plt.Line2D([line_x, line_x], [0.0, 0.85], color='#333333', linewidth=1, alpha=0.5, transform=fig.transFigure)) # Add central shared title for model name fig.suptitle(f'{model_name.lower()}', fontsize=18, weight='normal', color='#CCCCCC', fontfamily='monospace', y=0.95) # Clean layout with padding and space for central title plt.tight_layout() plt.subplots_adjust(top=0.85, wspace=0.4) # Added wspace for padding between charts # Generate separate failed tests info for AMD and NVIDIA amd_failed = model_stats['amd']['failed'] nvidia_failed = model_stats['nvidia']['failed'] amd_failed_info = "Fails on AMD:\n────────────\n" + ("\n".join(amd_failed) if amd_failed else "None") nvidia_failed_info = "Fails on NVIDIA:\n────────────────\n" + ("\n".join(nvidia_failed) if nvidia_failed else "None") return fig, amd_failed_info, nvidia_failed_info def get_model_stats_summary(model_name: str) -> tuple: """Get summary stats for a model (total tests, success rate, status indicator).""" stats = MODELS[model_name] # Combine AMD and NVIDIA results total_passed = len(stats['amd']['passed']) + len(stats['nvidia']['passed']) total_failed = len(stats['amd']['failed']) + len(stats['nvidia']['failed']) total_skipped = len(stats['amd']['skipped']) + len(stats['nvidia']['skipped']) total_error = len(stats['amd']['error']) + len(stats['nvidia']['error']) total = total_passed + total_failed + total_skipped + total_error success_rate = (total_passed / total * 100) if total > 0 else 0 # Determine status indicator color if success_rate >= 80: status_class = "success-high" elif success_rate >= 50: status_class = "success-medium" else: status_class = "success-low" return total, success_rate, status_class # Custom CSS for dark theme dark_theme_css = """ /* Global dark theme */ .gradio-container { background-color: #000000 !important; color: white !important; } /* Remove borders from all components */ .gr-box, .gr-form, .gr-panel { border: none !important; background-color: #000000 !important; } /* Sidebar styling */ .sidebar { background: linear-gradient(145deg, #111111, #1a1a1a) !important; border: none !important; padding: 25px !important; box-shadow: inset 2px 2px 5px rgba(0, 0, 0, 0.3) !important; margin: 0 !important; height: 100vh !important; position: fixed !important; left: 0 !important; top: 0 !important; width: 300px !important; } /* Enhanced model button styling */ .model-button { background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important; color: white !important; border: 2px solid transparent !important; margin: 2px 0 !important; border-radius: 5px !important; padding: 8px 12px !important; transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1) !important; position: relative !important; overflow: hidden !important; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2), inset 0 1px 0 rgba(255, 255, 255, 0.1) !important; font-weight: 600 !important; font-size: 16px !important; text-transform: uppercase !important; letter-spacing: 0.5px !important; font-family: monospace !important; } .model-button:hover { background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important; color: #74b9ff !important; } .model-button:active { background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important; color: #5a9bd4 !important; } /* Model stats badge */ .model-stats { display: flex !important; justify-content: space-between !important; align-items: center !important; margin-top: 8px !important; font-size: 12px !important; opacity: 0.8 !important; } .stats-badge { background: rgba(116, 185, 255, 0.2) !important; padding: 4px 8px !important; border-radius: 10px !important; font-weight: 500 !important; font-size: 11px !important; color: #74b9ff !important; } .success-indicator { width: 8px !important; height: 8px !important; border-radius: 50% !important; display: inline-block !important; margin-right: 6px !important; } .success-high { background-color: #4CAF50 !important; } .success-medium { background-color: #FF9800 !important; } .success-low { background-color: #F44336 !important; } /* Regular button styling for non-model buttons */ .gr-button:not(.model-button) { background-color: #222222 !important; color: white !important; border: 1px solid #444444 !important; margin: 5px 0 !important; border-radius: 8px !important; transition: all 0.3s ease !important; } .gr-button:not(.model-button):hover { background-color: #333333 !important; border-color: #666666 !important; } /* Plot container */ .plot-container { background-color: #000000 !important; border: none !important; } /* Text elements */ h1, h2, h3, p, .markdown { color: white !important; } /* Sidebar header enhancement */ .sidebar h1 { background: linear-gradient(45deg, #74b9ff, #a29bfe) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; background-clip: text !important; text-align: center !important; margin-bottom: 15px !important; font-size: 28px !important; font-weight: 700 !important; font-family: monospace !important; } /* Sidebar description text */ .sidebar p { text-align: center !important; margin-bottom: 20px !important; line-height: 1.5 !important; font-size: 14px !important; font-family: monospace !important; } .sidebar strong { color: #74b9ff !important; font-weight: 600 !important; font-family: monospace !important; } .sidebar em { color: #a29bfe !important; font-style: normal !important; opacity: 0.9 !important; font-family: monospace !important; } /* Remove all borders globally */ * { border-color: transparent !important; } /* Main content area */ .main-content { background-color: #000000 !important; padding: 20px !important; margin-left: 300px !important; } /* Failed tests display - seamless appearance */ .failed-tests textarea { background-color: #000000 !important; color: #FFFFFF !important; font-family: monospace !important; font-size: 14px !important; border: none !important; padding: 10px !important; outline: none !important; line-height: 1.4 !important; } .failed-tests { background-color: #000000 !important; } .failed-tests .gr-textbox { background-color: #000000 !important; border: none !important; } """ # Create the Gradio interface with sidebar and dark theme with gr.Blocks(title="Model Test Results Dashboard", css=dark_theme_css) as demo: with gr.Row(): # Sidebar for model selection with gr.Column(scale=1, elem_classes=["sidebar"]): gr.Markdown("# 🤖 AI Models") gr.Markdown("**Select a model to analyze test results**\n\n*Interactive dashboard with detailed metrics*") # Model selection buttons in sidebar model_buttons = [] for model_name in MODELS.keys(): btn = gr.Button( f"{model_name.lower()}", variant="secondary", size="lg", elem_classes=["model-button"] ) model_buttons.append(btn) # Main content area with gr.Column(scale=4, elem_classes=["main-content"]): gr.Markdown("# 📈 Test Results Dashboard") # Create the plot output plot_output = gr.Plot( label="", format="png", elem_classes=["plot-container"] ) # Create two separate failed tests displays in a row layout with gr.Row(): with gr.Column(scale=1): amd_failed_tests_output = gr.Textbox( value="Fails on AMD:\n────────────\nnetwork_timeout\ngpu_initialization\nmemory_overflow", lines=6, interactive=False, container=False, elem_classes=["failed-tests"] ) with gr.Column(scale=1): nvidia_failed_tests_output = gr.Textbox( value="Fails on NVIDIA:\n────────────────\ndistributed\nmulti_gpu\ndriver_conflict", lines=6, interactive=False, container=False, elem_classes=["failed-tests"] ) # Set up click handlers for each button for i, (model_name, button) in enumerate(zip(MODELS.keys(), model_buttons)): button.click( fn=lambda name=model_name: plot_model_stats(name), outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output] ) # Initialize with the first model demo.load( fn=lambda: plot_model_stats(list(MODELS.keys())[0]), outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output] ) if __name__ == "__main__": demo.launch()