Spaces:

transformers-community
/

transformers-ci-dashboard

Running

App Files Files Community

transformers-ci-dashboard / app.py

ror HF Staff

Scoll is nice but animation to cover bottom start

c8a335c 5 months ago

raw

history blame

18.7 kB

	import matplotlib.pyplot as plt
	import numpy as np

	import gradio as gr


	# Sample test results with test names
	MODELS = {
	"llama": {
	"amd": {
	"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore"],
	"failed": ["network_timeout"],
	"skipped": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu"],
	"error": []
	},
	"nvidia": {
	"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"],
	"failed": ["network_timeout", "distributed"],
	"skipped": ["multi_gpu"],
	"error": []
	}
	},
	"gemma3": {
	"amd": {
	"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation"],
	"failed": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu", "opencl_init", "driver_conflict", "memory_bandwidth", "compute_units", "rocm_version", "hip_compile", "kernel_launch", "buffer_transfer", "atomic_ops", "wavefront_sync"],
	"skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw", "latency", "throughput"],
	"error": []
	},
	"nvidia": {
	"passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"],
	"failed": ["distributed", "multi_gpu", "cuda_version", "nvcc_compile", "stream_sync", "device_reset", "peer_access", "unified_memory", "texture_bind", "surface_write", "constant_mem", "shared_mem"],
	"skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw"],
	"error": []
	}
	},
	"csm": {
	"amd": {
	"passed": [],
	"failed": [],
	"skipped": [],
	"error": ["system_crash"]
	},
	"nvidia": {
	"passed": [],
	"failed": [],
	"skipped": [],
	"error": ["system_crash"]
	}
	}
	}

	def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]:
	"""Draws a pie chart of model's passed, failed, skipped, and error stats."""
	model_stats = MODELS[model_name]

	# Softer color palette - less pastel, more vibrant
	colors = {
	'passed': '#4CAF50', # Medium green
	'failed': '#E53E3E', # More red
	'skipped': '#FFD54F', # Medium yellow
	'error': '#8B0000' # Dark red
	}

	# Convert test lists to counts for chart display
	amd_stats = {k: len(v) for k, v in model_stats['amd'].items()}
	nvidia_stats = {k: len(v) for k, v in model_stats['nvidia'].items()}

	# Filter out categories with 0 values for cleaner visualization
	amd_filtered = {k: v for k, v in amd_stats.items() if v > 0}
	nvidia_filtered = {k: v for k, v in nvidia_stats.items() if v > 0}

	if not amd_filtered and not nvidia_filtered:
	# Handle case where all values are 0 - minimal empty state
	fig, ax = plt.subplots(figsize=(10, 8), facecolor='#000000')
	ax.set_facecolor('#000000')
	ax.text(0.5, 0.5, 'No test results available',
	horizontalalignment='center', verticalalignment='center',
	transform=ax.transAxes, fontsize=16, color='#888888',
	fontfamily='monospace', weight='normal')
	ax.set_xlim(0, 1)
	ax.set_ylim(0, 1)
	ax.axis('off')
	return fig, "", ""

	# Create figure with two subplots side by side with padding
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9), facecolor='#000000')
	ax1.set_facecolor('#000000')
	ax2.set_facecolor('#000000')

	def create_pie_chart(ax, device_label, filtered_stats):
	if not filtered_stats:
	ax.text(0.5, 0.5, 'No test results',
	horizontalalignment='center', verticalalignment='center',
	transform=ax.transAxes, fontsize=14, color='#888888',
	fontfamily='monospace', weight='normal')
	ax.set_title(device_label,
	fontsize=28, weight='bold', pad=2, color='#FFFFFF',
	fontfamily='monospace')
	ax.axis('off')
	return

	chart_colors = [colors[category] for category in filtered_stats.keys()]

	# Create minimal pie chart - full pie, no donut effect
	wedges, texts, autotexts = ax.pie(
	filtered_stats.values(),
	labels=[label.lower() for label in filtered_stats.keys()], # Lowercase for minimal look
	colors=chart_colors,
	autopct=lambda pct: f'{int(pct/100*sum(filtered_stats.values()))}',
	startangle=90,
	explode=None, # No separation
	shadow=False,
	wedgeprops=dict(edgecolor='#1a1a1a', linewidth=0.5), # Minimal borders
	textprops={'fontsize': 12, 'weight': 'normal', 'color': '#CCCCCC', 'fontfamily': 'monospace'}
	)

	# Enhanced percentage text styling for better readability
	for autotext in autotexts:
	autotext.set_color('#000000') # Black text for better contrast
	autotext.set_weight('bold')
	autotext.set_fontsize(14)
	autotext.set_fontfamily('monospace')

	# Minimal category labels
	for text in texts:
	text.set_color('#AAAAAA')
	text.set_weight('normal')
	text.set_fontsize(13)
	text.set_fontfamily('monospace')

	# Device label closer to chart and bigger
	ax.set_title(device_label,
	fontsize=28, weight='bold', pad=2, color='#FFFFFF',
	fontfamily='monospace')

	# Create both pie charts with device labels
	create_pie_chart(ax1, "amd", amd_filtered)
	create_pie_chart(ax2, "nvidia", nvidia_filtered)

	# Add subtle separation line between charts - stops at device labels level
	line_x = 0.5
	fig.add_artist(plt.Line2D([line_x, line_x], [0.0, 0.85],
	color='#333333', linewidth=1, alpha=0.5,
	transform=fig.transFigure))

	# Add central shared title for model name
	fig.suptitle(f'{model_name.lower()}',
	fontsize=32, weight='normal', color='#CCCCCC',
	fontfamily='monospace', y=0.95)

	# Clean layout with padding and space for central title
	plt.tight_layout()
	plt.subplots_adjust(top=0.85, wspace=0.4) # Added wspace for padding between charts

	# Generate separate failed tests info for AMD and NVIDIA
	amd_failed = model_stats['amd']['failed']
	nvidia_failed = model_stats['nvidia']['failed']

	amd_failed_info = "Fails on AMD:"
	amd_failed_info += "\n" + "─" * len(amd_failed_info) + "\n" + ("\n".join(amd_failed) if amd_failed else "None")

	nvidia_failed_info = "Fails on NVIDIA:"
	nvidia_failed_info += "\n" + "─" * len(nvidia_failed_info) + "\n" + ("\n".join(nvidia_failed) if nvidia_failed else "None")

	return fig, amd_failed_info, nvidia_failed_info

	def get_model_stats_summary(model_name: str) -> tuple:
	"""Get summary stats for a model (total tests, success rate, status indicator)."""
	stats = MODELS[model_name]
	# Combine AMD and NVIDIA results
	total_passed = len(stats['amd']['passed']) + len(stats['nvidia']['passed'])
	total_failed = len(stats['amd']['failed']) + len(stats['nvidia']['failed'])
	total_skipped = len(stats['amd']['skipped']) + len(stats['nvidia']['skipped'])
	total_error = len(stats['amd']['error']) + len(stats['nvidia']['error'])

	total = total_passed + total_failed + total_skipped + total_error
	success_rate = (total_passed / total * 100) if total > 0 else 0

	# Determine status indicator color
	if success_rate >= 80:
	status_class = "success-high"
	elif success_rate >= 50:
	status_class = "success-medium"
	else:
	status_class = "success-low"

	return total, success_rate, status_class

	# Custom CSS for dark theme
	dark_theme_css = """
	/* Global dark theme */
	.gradio-container {
	background-color: #000000 !important;
	color: white !important;
	}

	/* Remove borders from all components */
	.gr-box, .gr-form, .gr-panel {
	border: none !important;
	background-color: #000000 !important;
	}

	/* Sidebar styling */
	.sidebar {
	background: linear-gradient(145deg, #111111, #1a1a1a) !important;
	border: none !important;
	padding: 25px !important;
	box-shadow: inset 2px 2px 5px rgba(0, 0, 0, 0.3) !important;
	margin: 0 !important;
	height: 100vh !important;
	position: fixed !important;
	left: 0 !important;
	top: 0 !important;
	width: 300px !important;
	}

	/* Enhanced model button styling */
	.model-button {
	background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
	color: white !important;
	border: 2px solid transparent !important;
	margin: 2px 0 !important;
	border-radius: 5px !important;
	padding: 8px 12px !important;
	transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1) !important;
	position: relative !important;
	overflow: hidden !important;
	box-shadow:
	0 4px 15px rgba(0, 0, 0, 0.2),
	inset 0 1px 0 rgba(255, 255, 255, 0.1) !important;
	font-weight: 600 !important;
	font-size: 16px !important;
	text-transform: uppercase !important;
	letter-spacing: 0.5px !important;
	font-family: monospace !important;
	}

	.model-button:hover {
	background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
	color: #74b9ff !important;
	}

	.model-button:active {
	background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
	color: #5a9bd4 !important;
	}

	/* Model stats badge */
	.model-stats {
	display: flex !important;
	justify-content: space-between !important;
	align-items: center !important;
	margin-top: 8px !important;
	font-size: 12px !important;
	opacity: 0.8 !important;
	}

	.stats-badge {
	background: rgba(116, 185, 255, 0.2) !important;
	padding: 4px 8px !important;
	border-radius: 10px !important;
	font-weight: 500 !important;
	font-size: 11px !important;
	color: #74b9ff !important;
	}

	.success-indicator {
	width: 8px !important;
	height: 8px !important;
	border-radius: 50% !important;
	display: inline-block !important;
	margin-right: 6px !important;
	}

	.success-high { background-color: #4CAF50 !important; }
	.success-medium { background-color: #FF9800 !important; }
	.success-low { background-color: #F44336 !important; }

	/* Regular button styling for non-model buttons */
	.gr-button:not(.model-button) {
	background-color: #222222 !important;
	color: white !important;
	border: 1px solid #444444 !important;
	margin: 5px 0 !important;
	border-radius: 8px !important;
	transition: all 0.3s ease !important;
	}

	.gr-button:not(.model-button):hover {
	background-color: #333333 !important;
	border-color: #666666 !important;
	}

	/* Plot container */
	.plot-container {
	background-color: #000000 !important;
	border: none !important;
	}

	/* Text elements */
	h1, h2, h3, p, .markdown {
	color: white !important;
	}

	/* Sidebar header enhancement */
	.sidebar h1 {
	background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	background-clip: text !important;
	text-align: center !important;
	margin-bottom: 15px !important;
	font-size: 28px !important;
	font-weight: 700 !important;
	font-family: monospace !important;
	}

	/* Sidebar description text */
	.sidebar p {
	text-align: center !important;
	margin-bottom: 20px !important;
	line-height: 1.5 !important;
	font-size: 14px !important;
	font-family: monospace !important;
	}

	.sidebar strong {
	color: #74b9ff !important;
	font-weight: 600 !important;
	font-family: monospace !important;
	}

	.sidebar em {
	color: #a29bfe !important;
	font-style: normal !important;
	opacity: 0.9 !important;
	font-family: monospace !important;
	}

	/* Remove all borders globally */
	* {
	border-color: transparent !important;
	}

	/* Main content area */
	.main-content {
	background-color: #000000 !important;
	padding: 20px !important;
	margin-left: 300px !important;
	}

	/* Failed tests display - seamless appearance with fixed height */
	.failed-tests textarea {
	background-color: #000000 !important;
	color: #FFFFFF !important;
	font-family: monospace !important;
	font-size: 14px !important;
	border: none !important;
	padding: 10px !important;
	outline: none !important;
	line-height: 1.4 !important;
	height: 200px !important;
	max-height: 200px !important;
	min-height: 200px !important;
	overflow-y: auto !important;
	resize: none !important;
	scrollbar-width: thin !important;
	scrollbar-color: #333333 #000000 !important;
	}

	/* WebKit scrollbar styling for failed tests */
	.failed-tests textarea::-webkit-scrollbar {
	width: 8px !important;
	}

	.failed-tests textarea::-webkit-scrollbar-track {
	background: #000000 !important;
	}

	.failed-tests textarea::-webkit-scrollbar-thumb {
	background-color: #333333 !important;
	border-radius: 4px !important;
	}

	.failed-tests textarea::-webkit-scrollbar-thumb:hover {
	background-color: #555555 !important;
	}

	.failed-tests {
	background-color: #000000 !important;
	height: 220px !important;
	max-height: 220px !important;
	}

	.failed-tests .gr-textbox {
	background-color: #000000 !important;
	border: none !important;
	height: 200px !important;
	max-height: 200px !important;
	}

	/* JavaScript to reset scroll position */
	.scroll-reset {
	animation: resetScroll 0.1s ease;
	}

	@keyframes resetScroll {
	0% { scroll-behavior: auto; }
	100% { scroll-behavior: auto; }
	}


	"""

	# Create the Gradio interface with sidebar and dark theme
	with gr.Blocks(title="Model Test Results Dashboard", css=dark_theme_css) as demo:

	with gr.Row():
	# Sidebar for model selection
	with gr.Column(scale=1, elem_classes=["sidebar"]):
	gr.Markdown("# 🤖 AI Models")
	gr.Markdown("Select a model to analyze test results\n\nInteractive dashboard with detailed metrics")

	# Model selection buttons in sidebar
	model_buttons = []
	for model_name in MODELS.keys():
	btn = gr.Button(
	f"{model_name.lower()}",
	variant="secondary",
	size="lg",
	elem_classes=["model-button"]
	)
	model_buttons.append(btn)

	# Main content area
	with gr.Column(scale=4, elem_classes=["main-content"]):
	gr.Markdown("# 📈 Test Results Dashboard")

	# Create the plot output
	plot_output = gr.Plot(
	label="",
	format="png",
	elem_classes=["plot-container"]
	)

	# Create two separate failed tests displays in a row layout
	with gr.Row():
	with gr.Column(scale=1):
	amd_failed_tests_output = gr.Textbox(
	value="Fails on AMD:\n────────────\nnetwork_timeout\ngpu_initialization\nmemory_overflow",
	lines=8,
	max_lines=8,
	interactive=False,
	container=False,
	elem_classes=["failed-tests"]
	)
	with gr.Column(scale=1):
	nvidia_failed_tests_output = gr.Textbox(
	value="Fails on NVIDIA:\n────────────────\ndistributed\nmulti_gpu\ndriver_conflict",
	lines=8,
	max_lines=8,
	interactive=False,
	container=False,
	elem_classes=["failed-tests"]
	)

	# Set up click handlers for each button
	for i, (model_name, button) in enumerate(zip(MODELS.keys(), model_buttons)):
	button.click(
	fn=lambda name=model_name: plot_model_stats(name),
	outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
	).then(
	fn=None,
	js="() => { setTimeout(() => { document.querySelectorAll('textarea').forEach(t => { if (t.closest('.failed-tests')) { t.scrollTo({ top: 0, behavior: 'smooth' }); } }); }, 100); }"
	)

	# Initialize with the first model
	demo.load(
	fn=lambda: plot_model_stats(list(MODELS.keys())[0]),
	outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
	)

	if __name__ == "__main__":
	demo.launch()