drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
bf16265 verified
raw
history blame
5.38 kB
# /// script
# dependencies = [
# "matplotlib",
# "numpy",
# ]
# ///
import matplotlib.pyplot as plt
import numpy as np
import os
# get the pathf rom UVNOTE_SETUP env var
setup_path = os.getenv("UVNOTE_INPUT_SETUP", ".")
print(f"Reading benchmark data from: {setup_path}")
num_runs = 5
max_tokens = 64
times = []
with open(os.path.join(setup_path, "benchmark_times.txt"), "r") as f:
for line in f:
times.append(float(line.strip()))
avg_time = 0.0
min_time = 0.0
max_time = 0.0
final_mem = {"allocated_gb": 0.0, "peak_gb": 0.0, "reserved_gb": 0.0}
avg_tokens_per_sec = 0.0
with open(os.path.join(setup_path, "benchmark_avg_tokens_per_sec.txt"), "r") as f:
avg_tokens_per_sec = float(f.read().strip())
times_file = os.path.join(setup_path, "benchmark_times.txt")
memory_file = os.path.join(setup_path, "benchmark_memory.txt")
# Minimal brutalist palette (dark theme): grayscale + 1 accent
ACCENT = '#5ec8f8' # calm cyan-blue accent
FG = '#e6e6e6' # light gray text/lines
MUTED = '#9aa0a6' # muted gray for secondary
GRID = '#333333' # grid lines
# Styling tuned for clarity, high contrast, few colors
plt.style.use('dark_background')
plt.rcParams['figure.facecolor'] = 'none'
plt.rcParams['axes.facecolor'] = 'none'
plt.rcParams['savefig.facecolor'] = 'none'
plt.rcParams['savefig.transparent'] = True
plt.rcParams['font.family'] = 'monospace'
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['axes.linewidth'] = 3
plt.rcParams['grid.linewidth'] = 2
plt.rcParams['lines.linewidth'] = 3
plt.rcParams['patch.linewidth'] = 2
# Prepare data
runs = list(range(1, len(times) + 1))
tokens_per_sec_all = [max_tokens / t for t in times]
# Chart 1: Throughput Performance
fig1, ax1 = plt.subplots(1, 1, figsize=(12, 6))
fig1.patch.set_alpha(0)
ax1.patch.set_alpha(0)
ax1.plot(runs, tokens_per_sec_all, color=ACCENT, marker='o', markersize=12,
markerfacecolor=ACCENT, markeredgecolor=FG, markeredgewidth=3, linewidth=5, label='tok/s')
ax1.fill_between(runs, 0, tokens_per_sec_all, alpha=0.2, color=ACCENT)
ax1.axhline(y=avg_tokens_per_sec, color=FG, linestyle='--', linewidth=3,
label=f'AVG: {avg_tokens_per_sec:.1f}')
ax1.set_title('THROUGHPUT PERFORMANCE', color=FG, fontsize=18, pad=20, fontweight='bold')
ax1.set_xlabel('RUN NUMBER', color=FG, fontsize=14, fontweight='bold')
ax1.set_ylabel('TOKENS/SEC', color=FG, fontsize=14, fontweight='bold')
ax1.grid(True, color=GRID, alpha=0.5, linewidth=2)
ax1.tick_params(colors=FG, labelsize=12)
legend1 = ax1.legend(frameon=False, loc='lower right')
for text in legend1.get_texts():
text.set_color(FG)
text.set_fontweight('bold')
plt.tight_layout()
plt.savefig('throughput.png', dpi=150, bbox_inches='tight', transparent=True)
plt.show()
# Chart 2: Generation Latency
fig2, ax2 = plt.subplots(1, 1, figsize=(12, 6))
fig2.patch.set_alpha(0)
ax2.patch.set_alpha(0)
bar_colors = [ACCENT if i % 2 == 0 else MUTED for i in range(len(times))]
bars = ax2.bar(runs, times, color=bar_colors, edgecolor=FG, linewidth=3, width=0.6)
ax2.axhline(y=avg_time, color=FG, linestyle='--', linewidth=3,
label=f'AVG: {avg_time:.2f}s')
for i, (run, time, bar) in enumerate(zip(runs, times, bars)):
ax2.text(run, time + 0.02, f'{time:.2f}s', ha='center', va='bottom',
color=FG, fontweight='bold', fontsize=11)
ax2.set_title('GENERATION LATENCY', color=FG, fontsize=18, pad=20, fontweight='bold')
ax2.set_xlabel('RUN NUMBER', color=FG, fontsize=14, fontweight='bold')
ax2.set_ylabel('TIME (SECONDS)', color=FG, fontsize=14, fontweight='bold')
ax2.grid(True, axis='y', color=GRID, alpha=0.5, linewidth=2)
ax2.tick_params(colors=FG, labelsize=12)
ax2.set_ylim(0, max(times) * 1.15)
legend2 = ax2.legend(frameon=False, loc='upper right')
for text in legend2.get_texts():
text.set_color(FG)
text.set_fontweight('bold')
plt.tight_layout()
plt.savefig('latency.png', dpi=150, bbox_inches='tight', transparent=True)
plt.show()
# Chart 3: Memory Usage
fig3, ax3 = plt.subplots(1, 1, figsize=(12, 6))
fig3.patch.set_alpha(0)
ax3.patch.set_alpha(0)
memory_labels = ['ALLOCATED', 'PEAK', 'RESERVED']
memory_values = [final_mem['allocated_gb'], final_mem['peak_gb'], final_mem['reserved_gb']]
colors_mem = [MUTED, ACCENT, FG]
bars = ax3.barh(memory_labels, memory_values, color=colors_mem, edgecolor=FG, linewidth=3, height=0.5)
for i, (label, value, bar) in enumerate(zip(memory_labels, memory_values, bars)):
ax3.text(value + 0.5, i, f'{value:.1f} GB', va='center',
color=FG, fontweight='bold', fontsize=13)
ax3.set_title('MEMORY USAGE', color=FG, fontsize=18, pad=20, fontweight='bold')
ax3.set_xlabel('GIGABYTES', color=FG, fontsize=14, fontweight='bold')
ax3.set_xlim(0, max(memory_values) * 1.3)
ax3.grid(True, axis='x', color=GRID, alpha=0.5, linewidth=2)
ax3.tick_params(colors=FG, labelsize=12)
ax3.set_yticks(range(len(memory_labels)))
ax3.set_yticklabels(memory_labels, fontweight='bold')
plt.tight_layout()
plt.savefig('memory.png', dpi=150, bbox_inches='tight', transparent=True)
plt.show()
print(f"\n📊 Charts saved as:")
print(f" • throughput.png")
print(f" • latency.png")
print(f" • memory.png")
print(f"\nBenchmark Summary:")
print(f" avg tokens/sec: {avg_tokens_per_sec:.1f}")
print(f" min time: {min_time:.3f}s")
print(f" max time: {max_time:.3f}s")
print(f" peak memory: {final_mem['peak_gb']:.2f}GB")