Spaces:
Runtime error
Runtime error
File size: 3,980 Bytes
b4867cc 61e6275 b4867cc 61e6275 b4867cc 61e6275 b4867cc fd2180b 61e6275 b4867cc 61e6275 fd2180b 61e6275 b4867cc 61e6275 fd2180b b4867cc fd2180b b4867cc fd2180b b4867cc fd2180b b4867cc 61e6275 b4867cc fd2180b b4867cc fd2180b b4867cc fd2180b 61e6275 b4867cc 61e6275 b4867cc fd2180b 61e6275 b4867cc fd2180b b4867cc 61e6275 b4867cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import { pipeline, env } from "@huggingface/transformers";
import { performance } from "node:perf_hooks";
import fs from "node:fs";
import path from "node:path";
// Node benchmark with warm/cold modes, repeats, p50/p90
const modelId = process.argv[2] || "Xenova/distilbert-base-uncased";
const task = process.argv[3] || "feature-extraction";
function getArg(name: string, def?: string) {
const i = process.argv.indexOf(`--${name}`);
if (i !== -1 && i + 1 < process.argv.length) return process.argv[i + 1];
return def;
}
const mode = (getArg("mode", "warm") as "warm" | "cold");
const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
// Point library cache to a dedicated directory for controllable cold/warm behavior
env.cacheDir = cacheDir;
function ensureEmptyDir(dir: string) {
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
fs.mkdirSync(dir, { recursive: true });
}
function percentile(values: number[], q: number) {
const a = [...values].sort((x, y) => x - y);
const i = (a.length - 1) * q;
const i0 = Math.floor(i), i1 = Math.ceil(i);
return i0 === i1 ? a[i0] : a[i0] + (a[i1] - a[i0]) * (i - i0);
}
async function benchOnce() {
const t0 = performance.now();
const options: any = {};
if (dtype) options.dtype = dtype;
const pipe = await pipeline(task, modelId, options);
const t1 = performance.now();
const t2 = performance.now();
await pipe("The quick brown fox jumps over the lazy dog.");
const t3 = performance.now();
// Run additional inferences to measure subsequent performance
const subsequentTimes: number[] = [];
for (let i = 0; i < 3; i++) {
const t4 = performance.now();
await pipe("The quick brown fox jumps over the lazy dog.");
const t5 = performance.now();
subsequentTimes.push(+(t5 - t4).toFixed(1));
}
return {
load_ms: +(t1 - t0).toFixed(1),
first_infer_ms: +(t3 - t2).toFixed(1),
subsequent_infer_ms: subsequentTimes
};
}
async function main() {
console.log(`Model : ${modelId}`);
console.log(`Task : ${task}`);
console.log(`Mode : ${mode}`);
console.log(`Repeats: ${repeats}`);
console.log(`DType : ${dtype || 'auto'}`);
console.log(`Cache : ${cacheDir}`);
const loads: number[] = [];
const firsts: number[] = [];
const subsequents: number[] = [];
if (mode === "warm") {
// Fresh cache dir, prefetch once (not measured), then measure N times
ensureEmptyDir(cacheDir);
const warmOptions: any = {};
if (dtype) warmOptions.dtype = dtype;
const warm = await pipeline(task, modelId, warmOptions);
await warm("warmup");
for (let i = 0; i < repeats; i++) {
const r = await benchOnce();
loads.push(r.load_ms);
firsts.push(r.first_infer_ms);
subsequents.push(...r.subsequent_infer_ms);
}
} else {
// cold: delete cache dir before each measured run
for (let i = 0; i < repeats; i++) {
ensureEmptyDir(cacheDir);
const r = await benchOnce();
loads.push(r.load_ms);
firsts.push(r.first_infer_ms);
subsequents.push(...r.subsequent_infer_ms);
}
}
const result: any = {
platform: "node",
runtime: `node-${process.versions.node}`,
model: modelId,
task,
mode,
repeats,
cacheDir,
metrics: {
load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents }
}
};
if (dtype) result.dtype = dtype;
console.log(JSON.stringify(result, null, 2));
}
main().catch((e) => { console.error(e); process.exit(1); });
|