ror's picture
ror HF Staff
Version 0.1
c7954ca
raw
history blame
22.7 kB
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import gradio as gr
# Configure matplotlib to prevent memory warnings and set dark background
matplotlib.rcParams['figure.max_open_warning'] = 0
matplotlib.rcParams['figure.facecolor'] = '#000000'
matplotlib.rcParams['axes.facecolor'] = '#000000'
matplotlib.rcParams['savefig.facecolor'] = '#000000'
plt.ioff() # Turn off interactive mode to prevent figure accumulation
# Sample test results with test names
MODELS = {
"llama": {
"amd": {
"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore"],
"failed": ["network_timeout"],
"skipped": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu"],
"error": []
},
"nvidia": {
"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"],
"failed": ["network_timeout", "distributed"],
"skipped": ["multi_gpu"],
"error": []
}
},
"gemma3": {
"amd": {
"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation"],
"failed": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu", "opencl_init", "driver_conflict", "memory_bandwidth", "compute_units", "rocm_version", "hip_compile", "kernel_launch", "buffer_transfer", "atomic_ops", "wavefront_sync"],
"skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw", "latency", "throughput"],
"error": []
},
"nvidia": {
"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"],
"failed": ["distributed", "multi_gpu", "cuda_version", "nvcc_compile", "stream_sync", "device_reset", "peer_access", "unified_memory", "texture_bind", "surface_write", "constant_mem", "shared_mem"],
"skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw"],
"error": []
}
},
"csm": {
"amd": {
"passed": [],
"failed": [],
"skipped": [],
"error": ["system_crash"]
},
"nvidia": {
"passed": [],
"failed": [],
"skipped": [],
"error": ["system_crash"]
}
}
}
def generate_underlined_line(text: str) -> str:
return text + "\n" + "─" * len(text) + "\n"
def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]:
"""Draws a pie chart of model's passed, failed, skipped, and error stats."""
model_stats = MODELS[model_name]
# Softer color palette - less pastel, more vibrant
colors = {
'passed': '#4CAF50', # Medium green
'failed': '#E53E3E', # More red
'skipped': '#FFD54F', # Medium yellow
'error': '#8B0000' # Dark red
}
# Convert test lists to counts for chart display
amd_stats = {k: len(v) for k, v in model_stats['amd'].items()}
nvidia_stats = {k: len(v) for k, v in model_stats['nvidia'].items()}
# Filter out categories with 0 values for cleaner visualization
amd_filtered = {k: v for k, v in amd_stats.items() if v > 0}
nvidia_filtered = {k: v for k, v in nvidia_stats.items() if v > 0}
if not amd_filtered and not nvidia_filtered:
# Handle case where all values are 0 - minimal empty state
fig, ax = plt.subplots(figsize=(10, 8), facecolor='#000000')
ax.set_facecolor('#000000')
ax.text(0.5, 0.5, 'No test results available',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=16, color='#888888',
fontfamily='monospace', weight='normal')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')
return fig, "", ""
# Create figure with two subplots side by side with padding
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9), facecolor='#000000')
ax1.set_facecolor('#000000')
ax2.set_facecolor('#000000')
def create_pie_chart(ax, device_label, filtered_stats):
if not filtered_stats:
ax.text(0.5, 0.5, 'No test results',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=14, color='#888888',
fontfamily='monospace', weight='normal')
ax.set_title(device_label,
fontsize=28, weight='bold', pad=2, color='#FFFFFF',
fontfamily='monospace')
ax.axis('off')
return
chart_colors = [colors[category] for category in filtered_stats.keys()]
# Create minimal pie chart - full pie, no donut effect
wedges, texts, autotexts = ax.pie(
filtered_stats.values(),
labels=[label.lower() for label in filtered_stats.keys()], # Lowercase for minimal look
colors=chart_colors,
autopct=lambda pct: f'{int(pct/100*sum(filtered_stats.values()))}',
startangle=90,
explode=None, # No separation
shadow=False,
wedgeprops=dict(edgecolor='#1a1a1a', linewidth=0.5), # Minimal borders
textprops={'fontsize': 12, 'weight': 'normal', 'color': '#CCCCCC', 'fontfamily': 'monospace'}
)
# Enhanced percentage text styling for better readability
for autotext in autotexts:
autotext.set_color('#000000') # Black text for better contrast
autotext.set_weight('bold')
autotext.set_fontsize(14)
autotext.set_fontfamily('monospace')
# Minimal category labels
for text in texts:
text.set_color('#AAAAAA')
text.set_weight('normal')
text.set_fontsize(13)
text.set_fontfamily('monospace')
# Device label closer to chart and bigger
ax.set_title(device_label,
fontsize=28, weight='normal', pad=2, color='#FFFFFF',
fontfamily='monospace')
# Create both pie charts with device labels
create_pie_chart(ax1, "amd", amd_filtered)
create_pie_chart(ax2, "nvidia", nvidia_filtered)
# Add subtle separation line between charts - stops at device labels level
line_x = 0.5
fig.add_artist(plt.Line2D([line_x, line_x], [0.0, 0.85],
color='#333333', linewidth=1, alpha=0.5,
transform=fig.transFigure))
# Add central shared title for model name
fig.suptitle(f'{model_name.lower()}',
fontsize=32, weight='bold', color='#CCCCCC',
fontfamily='monospace', y=0.95)
# Clean layout with padding and space for central title
plt.tight_layout()
plt.subplots_adjust(top=0.85, wspace=0.4) # Added wspace for padding between charts
# Generate separate failed tests info for AMD and NVIDIA with exclusive/common separation
amd_failed = set(model_stats['amd']['failed'])
nvidia_failed = set(model_stats['nvidia']['failed'])
# Find exclusive and common failures
amd_exclusive = amd_failed - nvidia_failed
nvidia_exclusive = nvidia_failed - amd_failed
common_failures = amd_failed & nvidia_failed
# Build AMD info
amd_failed_info = ""
if not amd_exclusive and not common_failures:
msg = "Error(s) detected" if model_stats["amd"]["error"] else "No failures"
amd_failed_info += generate_underlined_line(msg)
if amd_exclusive:
amd_failed_info += generate_underlined_line("Failures on AMD (exclusive):")
amd_failed_info += "\n".join(sorted(amd_exclusive))
amd_failed_info += "\n\n" if common_failures else ""
if common_failures:
amd_failed_info += generate_underlined_line("Failures on AMD (common):")
amd_failed_info += "\n".join(sorted(common_failures))
# Build NVIDIA info
nvidia_failed_info = ""
if not nvidia_exclusive and not common_failures:
msg = "Error(s) detected" if model_stats["nvidia"]["error"] else "No failures"
nvidia_failed_info += generate_underlined_line(msg)
if nvidia_exclusive:
nvidia_failed_info += generate_underlined_line("Failures on NVIDIA (exclusive):")
nvidia_failed_info += "\n".join(sorted(nvidia_exclusive))
nvidia_failed_info += "\n\n" if common_failures else ""
if common_failures:
nvidia_failed_info += generate_underlined_line("Failures on NVIDIA (common):")
nvidia_failed_info += "\n".join(sorted(common_failures))
return fig, amd_failed_info, nvidia_failed_info
def get_model_stats_summary(model_name: str) -> tuple:
"""Get summary stats for a model (total tests, success rate, status indicator)."""
stats = MODELS[model_name]
# Combine AMD and NVIDIA results
total_passed = len(stats['amd']['passed']) + len(stats['nvidia']['passed'])
total_failed = len(stats['amd']['failed']) + len(stats['nvidia']['failed'])
total_skipped = len(stats['amd']['skipped']) + len(stats['nvidia']['skipped'])
total_error = len(stats['amd']['error']) + len(stats['nvidia']['error'])
total = total_passed + total_failed + total_skipped + total_error
success_rate = (total_passed / total * 100) if total > 0 else 0
# Determine status indicator color
if success_rate >= 80:
status_class = "success-high"
elif success_rate >= 50:
status_class = "success-medium"
else:
status_class = "success-low"
return total, success_rate, status_class
# Custom CSS for dark theme
dark_theme_css = """
/* Global dark theme */
.gradio-container {
background-color: #000000 !important;
color: white !important;
}
/* Remove borders from all components */
.gr-box, .gr-form, .gr-panel {
border: none !important;
background-color: #000000 !important;
}
/* Sidebar styling */
.sidebar {
background: linear-gradient(145deg, #111111, #1a1a1a) !important;
border: none !important;
padding: 25px !important;
box-shadow: inset 2px 2px 5px rgba(0, 0, 0, 0.3) !important;
margin: 0 !important;
height: 100vh !important;
position: fixed !important;
left: 0 !important;
top: 0 !important;
width: 300px !important;
}
/* Enhanced model button styling */
.model-button {
background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
color: white !important;
border: 2px solid transparent !important;
margin: 2px 0 !important;
border-radius: 5px !important;
padding: 8px 12px !important;
transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1) !important;
position: relative !important;
overflow: hidden !important;
box-shadow:
0 4px 15px rgba(0, 0, 0, 0.2),
inset 0 1px 0 rgba(255, 255, 255, 0.1) !important;
font-weight: 600 !important;
font-size: 16px !important;
text-transform: uppercase !important;
letter-spacing: 0.5px !important;
font-family: monospace !important;
}
.model-button:hover {
background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
color: #74b9ff !important;
}
.model-button:active {
background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
color: #5a9bd4 !important;
}
/* Model stats badge */
.model-stats {
display: flex !important;
justify-content: space-between !important;
align-items: center !important;
margin-top: 8px !important;
font-size: 12px !important;
opacity: 0.8 !important;
}
.stats-badge {
background: rgba(116, 185, 255, 0.2) !important;
padding: 4px 8px !important;
border-radius: 10px !important;
font-weight: 500 !important;
font-size: 11px !important;
color: #74b9ff !important;
}
.success-indicator {
width: 8px !important;
height: 8px !important;
border-radius: 50% !important;
display: inline-block !important;
margin-right: 6px !important;
}
.success-high { background-color: #4CAF50 !important; }
.success-medium { background-color: #FF9800 !important; }
.success-low { background-color: #F44336 !important; }
/* Regular button styling for non-model buttons */
.gr-button:not(.model-button) {
background-color: #222222 !important;
color: white !important;
border: 1px solid #444444 !important;
margin: 5px 0 !important;
border-radius: 8px !important;
transition: all 0.3s ease !important;
}
.gr-button:not(.model-button):hover {
background-color: #333333 !important;
border-color: #666666 !important;
}
/* Plot container with smooth transitions */
.plot-container {
background-color: #000000 !important;
border: none !important;
transition: opacity 0.6s ease-in-out !important;
}
/* Gradio plot component styling */
.gr-plot {
background-color: #000000 !important;
transition: opacity 0.6s ease-in-out !important;
}
.gr-plot .gradio-plot {
background-color: #000000 !important;
transition: opacity 0.6s ease-in-out !important;
}
.gr-plot img {
transition: opacity 0.6s ease-in-out !important;
}
/* Target the plot wrapper */
div[data-testid="plot"] {
background-color: #000000 !important;
}
/* Target all possible plot containers */
.plot-container img,
.gr-plot img,
.gradio-plot img {
background-color: #000000 !important;
}
/* Ensure plot area background */
.gr-plot > div,
.plot-container > div {
background-color: #000000 !important;
}
/* Prevent white flash during plot updates */
.plot-container::before {
content: "";
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #000000;
z-index: -1;
}
/* Force all plot elements to have black background */
.plot-container *,
.gr-plot *,
div[data-testid="plot"] * {
background-color: #000000 !important;
}
/* Override any white backgrounds in matplotlib */
.plot-container canvas,
.gr-plot canvas {
background-color: #000000 !important;
}
/* Text elements */
h1, h2, h3, p, .markdown {
color: white !important;
}
/* Sidebar header enhancement */
.sidebar h1 {
background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
-webkit-background-clip: text !important;
-webkit-text-fill-color: transparent !important;
background-clip: text !important;
text-align: center !important;
margin-bottom: 15px !important;
font-size: 28px !important;
font-weight: 700 !important;
font-family: monospace !important;
}
/* Sidebar description text */
.sidebar p {
text-align: center !important;
margin-bottom: 20px !important;
line-height: 1.5 !important;
font-size: 14px !important;
font-family: monospace !important;
}
.sidebar strong {
color: #74b9ff !important;
font-weight: 600 !important;
font-family: monospace !important;
}
.sidebar em {
color: #a29bfe !important;
font-style: normal !important;
opacity: 0.9 !important;
font-family: monospace !important;
}
/* Remove all borders globally */
* {
border-color: transparent !important;
}
/* Main content area */
.main-content {
background-color: #000000 !important;
padding: 20px !important;
margin-left: 300px !important;
}
/* Failed tests display - seamless appearance with fixed height */
.failed-tests textarea {
background-color: #000000 !important;
color: #FFFFFF !important;
font-family: monospace !important;
font-size: 14px !important;
border: none !important;
padding: 10px !important;
outline: none !important;
line-height: 1.4 !important;
height: 200px !important;
max-height: 200px !important;
min-height: 200px !important;
overflow-y: auto !important;
resize: none !important;
scrollbar-width: thin !important;
scrollbar-color: #333333 #000000 !important;
scroll-behavior: auto;
transition: opacity 0.5s ease-in-out !important;
}
/* WebKit scrollbar styling for failed tests */
.failed-tests textarea::-webkit-scrollbar {
width: 8px !important;
}
.failed-tests textarea::-webkit-scrollbar-track {
background: #000000 !important;
}
.failed-tests textarea::-webkit-scrollbar-thumb {
background-color: #333333 !important;
border-radius: 4px !important;
}
.failed-tests textarea::-webkit-scrollbar-thumb:hover {
background-color: #555555 !important;
}
/* Prevent white flash in text boxes during updates */
.failed-tests::before {
content: "";
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #000000;
z-index: -1;
}
.failed-tests {
background-color: #000000 !important;
height: 220px !important;
max-height: 220px !important;
position: relative;
transition: opacity 0.5s ease-in-out !important;
}
.failed-tests .gr-textbox {
background-color: #000000 !important;
border: none !important;
height: 200px !important;
max-height: 200px !important;
transition: opacity 0.5s ease-in-out !important;
}
/* Force all textbox elements to have black background */
.failed-tests *,
.failed-tests .gr-textbox *,
.failed-tests textarea * {
background-color: #000000 !important;
}
/* JavaScript to reset scroll position */
.scroll-reset {
animation: resetScroll 0.1s ease;
}
@keyframes resetScroll {
0% { scroll-behavior: auto; }
100% { scroll-behavior: auto; }
}
"""
# Create the Gradio interface with sidebar and dark theme
with gr.Blocks(title="Model Test Results Dashboard", css=dark_theme_css) as demo:
with gr.Row():
# Sidebar for model selection
with gr.Column(scale=1, elem_classes=["sidebar"]):
gr.Markdown("# πŸ€– AI Models")
gr.Markdown("**Select a model to analyze test results**\n\n*Interactive dashboard with detailed metrics*")
# Model selection buttons in sidebar
model_buttons = []
for model_name in MODELS.keys():
btn = gr.Button(
f"{model_name.lower()}",
variant="secondary",
size="lg",
elem_classes=["model-button"]
)
model_buttons.append(btn)
# Main content area
with gr.Column(scale=4, elem_classes=["main-content"]):
gr.Markdown("# πŸ“ˆ Test Results Dashboard")
# Create the plot output
plot_output = gr.Plot(
label="",
format="png",
elem_classes=["plot-container"]
)
# Create two separate failed tests displays in a row layout
with gr.Row():
with gr.Column(scale=1):
amd_failed_tests_output = gr.Textbox(
value="Failures on AMD (exclusive):\n─────────────────────────────\nnetwork_timeout\n\nFailures on AMD (common):\n────────────────────────\ndistributed",
lines=8,
max_lines=8,
interactive=False,
container=False,
elem_classes=["failed-tests"]
)
with gr.Column(scale=1):
nvidia_failed_tests_output = gr.Textbox(
value="Failures on NVIDIA (exclusive):\n─────────────────────────────────\nmulti_gpu\n\nFailures on NVIDIA (common):\n────────────────────────────\ndistributed",
lines=8,
max_lines=8,
interactive=False,
container=False,
elem_classes=["failed-tests"]
)
# Set up click handlers for each button
for i, (model_name, button) in enumerate(zip(MODELS.keys(), model_buttons)):
button.click(
fn=lambda name=model_name: plot_model_stats(name),
outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
).then(
fn=None,
js="() => { setTimeout(() => { document.querySelectorAll('textarea').forEach(t => { if (t.closest('.failed-tests')) { t.scrollTop = 0; setTimeout(() => { t.style.scrollBehavior = 'smooth'; t.scrollTo({ top: 0, behavior: 'smooth' }); t.style.scrollBehavior = 'auto'; }, 50); } }); }, 300); }"
)
# Initialize with the first model
demo.load(
fn=lambda: plot_model_stats(list(MODELS.keys())[0]),
outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
)
if __name__ == "__main__":
demo.launch()