File size: 2,807 Bytes
58b76f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "numpy",
# "torch",
# "kernels-benchmark-tools",
# ]
#
# [tool.uv.sources]
# kernels-benchmark-tools = { git = "https://github.com/drbh/kernels-benchmark-tools.git", branch = "main" }
# ///
import os
import csv
from pathlib import Path
import json
# --- Locate benchmark artifacts --------------------------------------------------
cache_dirs = {
"Flash (PyTorch SDPA)": os.environ.get('UVNOTE_FILE_FLASH_ATTENTION_BENCHMARK'),
"MemEff (PyTorch SDPA)": os.environ.get('UVNOTE_FILE_MEM_EFFICIENT_ATTENTION_BENCHMARK'),
"Flash Attn 2": os.environ.get('UVNOTE_FILE_FLASH_ATTN2_BENCHMARK'),
"xFormers": os.environ.get('UVNOTE_FILE_XFORMERS_BENCHMARK'),
"SageAttention": os.environ.get('UVNOTE_FILE_SAGE_ATTENTION_BENCHMARK'),
"Compiled (default)": os.environ.get('UVNOTE_FILE_COMPILED_VARIANTS_BENCHMARK_DEFAULT'),
"Compiled (max-autotune)": os.environ.get('UVNOTE_FILE_COMPILED_VARIANTS_BENCHMARK_MAX_AUTOTUNE'),
"HF Kernels Flash Attn": os.environ.get('UVNOTE_FILE_HF_KERNELS_FLASH_ATTN_BENCHMARK'),
"HF Kernels Flash Attn3": os.environ.get('UVNOTE_FILE_HF_KERNELS_FLASH_ATTN3_BENCHMARK'),
}
file_mapping = {
"Flash (PyTorch SDPA)": "attn.jsonl",
"MemEff (PyTorch SDPA)": "attn.jsonl",
"Flash Attn 2": "attn.jsonl",
"xFormers": "attn.jsonl",
"SageAttention": "attn.jsonl",
"Compiled (default)": "attn_default.jsonl",
"Compiled (max-autotune)": "attn_max_autotune.jsonl",
"HF Kernels Flash Attn": "attn.jsonl",
"HF Kernels Flash Attn3": "attn.jsonl",
}
# Collect all benchmark data
all_data = {}
for name, cache_dir in cache_dirs.items():
if cache_dir:
path = Path(cache_dir) / file_mapping[name]
if path.exists() and path.stat().st_size > 0:
with open(path, 'r') as f:
records = [json.loads(line) for line in f]
all_data[name] = records
# Export to CSV
csv_path = Path("latency.csv")
with open(csv_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
# Write header
header = ["Implementation", "Sequence Length", "Latency (ms)", "Min (ms)", "Max (ms)", "Median (ms)"]
writer.writerow(header)
# Write data rows
for impl_name, records in all_data.items():
for record in records:
row = [
impl_name,
record.get('seqlen', ''),
record.get('latency', ''),
record.get('min', ''),
record.get('max', ''),
record.get('median', ''),
]
writer.writerow(row)
print(f"✓ CSV export complete: {csv_path}")
print(f"Total implementations: {len(all_data)}")
print(f"Total records: {sum(len(records) for records in all_data.values())}") |