File size: 3,019 Bytes
b4867cc
61e6275
b4867cc
 
61e6275
b4867cc
61e6275
 
 
 
b4867cc
 
 
 
 
 
 
 
 
61e6275
b4867cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61e6275
b4867cc
61e6275
 
 
b4867cc
61e6275
 
b4867cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61e6275
b4867cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61e6275
 
 
 
b4867cc
 
 
61e6275
b4867cc
 
61e6275
b4867cc
 
 
61e6275
 
b4867cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import { pipeline, env } from "@huggingface/transformers";
import { performance } from "node:perf_hooks";
import fs from "node:fs";
import path from "node:path";

// Node benchmark with warm/cold modes, repeats, p50/p90

const modelId = process.argv[2] || "Xenova/distilbert-base-uncased";
const task = process.argv[3] || "feature-extraction";

function getArg(name: string, def?: string) {
  const i = process.argv.indexOf(`--${name}`);
  if (i !== -1 && i + 1 < process.argv.length) return process.argv[i + 1];
  return def;
}

const mode = (getArg("mode", "warm") as "warm" | "cold");
const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;

// Point library cache to a dedicated directory for controllable cold/warm behavior
env.cacheDir = cacheDir;

function ensureEmptyDir(dir: string) {
  if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  fs.mkdirSync(dir, { recursive: true });
}

function percentile(values: number[], q: number) {
  const a = [...values].sort((x, y) => x - y);
  const i = (a.length - 1) * q;
  const i0 = Math.floor(i), i1 = Math.ceil(i);
  return i0 === i1 ? a[i0] : a[i0] + (a[i1] - a[i0]) * (i - i0);
}

async function benchOnce() {
  const t0 = performance.now();
  const pipe = await pipeline(task, modelId, {});
  const t1 = performance.now();

  const t2 = performance.now();
  await pipe("The quick brown fox jumps over the lazy dog.");
  const t3 = performance.now();

  return { load_ms: +(t1 - t0).toFixed(1), first_infer_ms: +(t3 - t2).toFixed(1) };
}

async function main() {
  console.log(`Model  : ${modelId}`);
  console.log(`Task   : ${task}`);
  console.log(`Mode   : ${mode}`);
  console.log(`Repeats: ${repeats}`);
  console.log(`Cache  : ${cacheDir}`);

  const loads: number[] = [];
  const firsts: number[] = [];

  if (mode === "warm") {
    // Fresh cache dir, prefetch once (not measured), then measure N times
    ensureEmptyDir(cacheDir);
    const warm = await pipeline(task, modelId, {});
    await warm("warmup");

    for (let i = 0; i < repeats; i++) {
      const r = await benchOnce();
      loads.push(r.load_ms);
      firsts.push(r.first_infer_ms);
    }
  } else {
    // cold: delete cache dir before each measured run
    for (let i = 0; i < repeats; i++) {
      ensureEmptyDir(cacheDir);
      const r = await benchOnce();
      loads.push(r.load_ms);
      firsts.push(r.first_infer_ms);
    }
  }

  const result = {
    platform: "node",
    runtime: `node-${process.versions.node}`,
    model: modelId,
    task,
    mode,
    repeats,
    cacheDir,
    metrics: {
      load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
      first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts }
    }
  };

  console.log(JSON.stringify(result, null, 2));
}

main().catch((e) => { console.error(e); process.exit(1); });