1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
- 96
- 97
- 98
- 99
-100
-101
-102
-103
-104
-105
-106
-107
-108
-109
-110
-111
-112
-113
-114
-115
-116
-117
-118
-119
-120
-121
-122
-123
-124
-125
-126
-127
-128
-129
-130
-131
-132
-133 | import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-# get the pathf rom UVNOTE_SETUP env var
-setup_path = os.getenv("UVNOTE_INPUT_SETUP", ".")
-print(f"Reading benchmark data from: {setup_path}")
-
-num_runs = 5
-max_tokens = 64
-times = []
-with open(os.path.join(setup_path, "benchmark_times.txt"), "r") as f:
- for line in f:
- times.append(float(line.strip()))
-
-
-avg_time = 0.0
-min_time = 0.0
-max_time = 0.0
-final_mem = {"allocated_gb": 0.0, "peak_gb": 0.0, "reserved_gb": 0.0}
-
-avg_tokens_per_sec = 0.0
-with open(os.path.join(setup_path, "benchmark_avg_tokens_per_sec.txt"), "r") as f:
- avg_tokens_per_sec = float(f.read().strip())
-
-times_file = os.path.join(setup_path, "benchmark_times.txt")
-memory_file = os.path.join(setup_path, "benchmark_memory.txt")
-
-
-# Minimal brutalist palette (dark theme): grayscale + 1 accent
-ACCENT = '#5ec8f8' # calm cyan-blue accent
-FG = '#e6e6e6' # light gray text/lines
-MUTED = '#9aa0a6' # muted gray for secondary
-GRID = '#333333' # grid lines
-
-# Styling tuned for clarity, high contrast, few colors
-plt.style.use('dark_background')
-plt.rcParams['figure.facecolor'] = 'none'
-plt.rcParams['axes.facecolor'] = 'none'
-plt.rcParams['savefig.facecolor'] = 'none'
-plt.rcParams['savefig.transparent'] = True
-plt.rcParams['font.family'] = 'monospace'
-plt.rcParams['font.weight'] = 'bold'
-plt.rcParams['axes.linewidth'] = 3
-plt.rcParams['grid.linewidth'] = 2
-plt.rcParams['lines.linewidth'] = 3
-plt.rcParams['patch.linewidth'] = 2
-
-# Prepare data
-runs = list(range(1, len(times) + 1))
-tokens_per_sec_all = [max_tokens / t for t in times]
-
-# Chart 1: Throughput Performance
-fig1, ax1 = plt.subplots(1, 1, figsize=(12, 6))
-fig1.patch.set_alpha(0)
-ax1.patch.set_alpha(0)
-
-ax1.plot(runs, tokens_per_sec_all, color=ACCENT, marker='o', markersize=12,
- markerfacecolor=ACCENT, markeredgecolor=FG, markeredgewidth=3, linewidth=5, label='tok/s')
-ax1.fill_between(runs, 0, tokens_per_sec_all, alpha=0.2, color=ACCENT)
-ax1.axhline(y=avg_tokens_per_sec, color=FG, linestyle='--', linewidth=3,
- label=f'AVG: {avg_tokens_per_sec:.1f}')
-ax1.set_title('THROUGHPUT PERFORMANCE', color=FG, fontsize=18, pad=20, fontweight='bold')
-ax1.set_xlabel('RUN NUMBER', color=FG, fontsize=14, fontweight='bold')
-ax1.set_ylabel('TOKENS/SEC', color=FG, fontsize=14, fontweight='bold')
-ax1.grid(True, color=GRID, alpha=0.5, linewidth=2)
-ax1.tick_params(colors=FG, labelsize=12)
-legend1 = ax1.legend(frameon=False, loc='lower right')
-for text in legend1.get_texts():
- text.set_color(FG)
- text.set_fontweight('bold')
-plt.tight_layout()
-plt.savefig('throughput.png', dpi=150, bbox_inches='tight', transparent=True)
-plt.show()
-
-# Chart 2: Generation Latency
-fig2, ax2 = plt.subplots(1, 1, figsize=(12, 6))
-fig2.patch.set_alpha(0)
-ax2.patch.set_alpha(0)
-
-bar_colors = [ACCENT if i % 2 == 0 else MUTED for i in range(len(times))]
-bars = ax2.bar(runs, times, color=bar_colors, edgecolor=FG, linewidth=3, width=0.6)
-ax2.axhline(y=avg_time, color=FG, linestyle='--', linewidth=3,
- label=f'AVG: {avg_time:.2f}s')
-for i, (run, time, bar) in enumerate(zip(runs, times, bars)):
- ax2.text(run, time + 0.02, f'{time:.2f}s', ha='center', va='bottom',
- color=FG, fontweight='bold', fontsize=11)
-ax2.set_title('GENERATION LATENCY', color=FG, fontsize=18, pad=20, fontweight='bold')
-ax2.set_xlabel('RUN NUMBER', color=FG, fontsize=14, fontweight='bold')
-ax2.set_ylabel('TIME (SECONDS)', color=FG, fontsize=14, fontweight='bold')
-ax2.grid(True, axis='y', color=GRID, alpha=0.5, linewidth=2)
-ax2.tick_params(colors=FG, labelsize=12)
-ax2.set_ylim(0, max(times) * 1.15)
-legend2 = ax2.legend(frameon=False, loc='upper right')
-for text in legend2.get_texts():
- text.set_color(FG)
- text.set_fontweight('bold')
-plt.tight_layout()
-plt.savefig('latency.png', dpi=150, bbox_inches='tight', transparent=True)
-plt.show()
-
-# Chart 3: Memory Usage
-fig3, ax3 = plt.subplots(1, 1, figsize=(12, 6))
-fig3.patch.set_alpha(0)
-ax3.patch.set_alpha(0)
-
-memory_labels = ['ALLOCATED', 'PEAK', 'RESERVED']
-memory_values = [final_mem['allocated_gb'], final_mem['peak_gb'], final_mem['reserved_gb']]
-colors_mem = [MUTED, ACCENT, FG]
-bars = ax3.barh(memory_labels, memory_values, color=colors_mem, edgecolor=FG, linewidth=3, height=0.5)
-for i, (label, value, bar) in enumerate(zip(memory_labels, memory_values, bars)):
- ax3.text(value + 0.5, i, f'{value:.1f} GB', va='center',
- color=FG, fontweight='bold', fontsize=13)
-ax3.set_title('MEMORY USAGE', color=FG, fontsize=18, pad=20, fontweight='bold')
-ax3.set_xlabel('GIGABYTES', color=FG, fontsize=14, fontweight='bold')
-ax3.set_xlim(0, max(memory_values) * 1.3)
-ax3.grid(True, axis='x', color=GRID, alpha=0.5, linewidth=2)
-ax3.tick_params(colors=FG, labelsize=12)
-ax3.set_yticks(range(len(memory_labels)))
-ax3.set_yticklabels(memory_labels, fontweight='bold')
-plt.tight_layout()
-plt.savefig('memory.png', dpi=150, bbox_inches='tight', transparent=True)
-plt.show()
-
-print(f"\nπ Charts saved as:")
-print(f" β’ throughput.png")
-print(f" β’ latency.png")
-print(f" β’ memory.png")
-print(f"\nBenchmark Summary:")
-print(f" avg tokens/sec: {avg_tokens_per_sec:.1f}")
-print(f" min time: {min_time:.3f}s")
-print(f" max time: {max_time:.3f}s")
-print(f" peak memory: {final_mem['peak_gb']:.2f}GB")
- |