File size: 3,980 Bytes
b4867cc
61e6275
b4867cc
 
61e6275
b4867cc
61e6275
 
 
 
b4867cc
 
 
 
 
 
 
 
 
fd2180b
61e6275
b4867cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61e6275
fd2180b
 
 
61e6275
 
 
b4867cc
61e6275
 
fd2180b
 
 
 
 
 
 
 
 
 
 
 
 
 
b4867cc
 
 
 
 
 
 
fd2180b
b4867cc
 
 
 
fd2180b
b4867cc
 
 
 
fd2180b
 
 
b4867cc
61e6275
b4867cc
 
 
 
fd2180b
b4867cc
 
 
 
 
 
 
 
fd2180b
b4867cc
 
 
fd2180b
61e6275
 
 
 
b4867cc
 
 
61e6275
b4867cc
fd2180b
 
61e6275
b4867cc
fd2180b
b4867cc
 
61e6275
 
b4867cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import { pipeline, env } from "@huggingface/transformers";
import { performance } from "node:perf_hooks";
import fs from "node:fs";
import path from "node:path";

// Node benchmark with warm/cold modes, repeats, p50/p90

const modelId = process.argv[2] || "Xenova/distilbert-base-uncased";
const task = process.argv[3] || "feature-extraction";

function getArg(name: string, def?: string) {
  const i = process.argv.indexOf(`--${name}`);
  if (i !== -1 && i + 1 < process.argv.length) return process.argv[i + 1];
  return def;
}

const mode = (getArg("mode", "warm") as "warm" | "cold");
const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.

// Point library cache to a dedicated directory for controllable cold/warm behavior
env.cacheDir = cacheDir;

function ensureEmptyDir(dir: string) {
  if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
  fs.mkdirSync(dir, { recursive: true });
}

function percentile(values: number[], q: number) {
  const a = [...values].sort((x, y) => x - y);
  const i = (a.length - 1) * q;
  const i0 = Math.floor(i), i1 = Math.ceil(i);
  return i0 === i1 ? a[i0] : a[i0] + (a[i1] - a[i0]) * (i - i0);
}

async function benchOnce() {
  const t0 = performance.now();
  const options: any = {};
  if (dtype) options.dtype = dtype;
  const pipe = await pipeline(task, modelId, options);
  const t1 = performance.now();

  const t2 = performance.now();
  await pipe("The quick brown fox jumps over the lazy dog.");
  const t3 = performance.now();

  // Run additional inferences to measure subsequent performance
  const subsequentTimes: number[] = [];
  for (let i = 0; i < 3; i++) {
    const t4 = performance.now();
    await pipe("The quick brown fox jumps over the lazy dog.");
    const t5 = performance.now();
    subsequentTimes.push(+(t5 - t4).toFixed(1));
  }

  return {
    load_ms: +(t1 - t0).toFixed(1),
    first_infer_ms: +(t3 - t2).toFixed(1),
    subsequent_infer_ms: subsequentTimes
  };
}

async function main() {
  console.log(`Model  : ${modelId}`);
  console.log(`Task   : ${task}`);
  console.log(`Mode   : ${mode}`);
  console.log(`Repeats: ${repeats}`);
  console.log(`DType  : ${dtype || 'auto'}`);
  console.log(`Cache  : ${cacheDir}`);

  const loads: number[] = [];
  const firsts: number[] = [];
  const subsequents: number[] = [];

  if (mode === "warm") {
    // Fresh cache dir, prefetch once (not measured), then measure N times
    ensureEmptyDir(cacheDir);
    const warmOptions: any = {};
    if (dtype) warmOptions.dtype = dtype;
    const warm = await pipeline(task, modelId, warmOptions);
    await warm("warmup");

    for (let i = 0; i < repeats; i++) {
      const r = await benchOnce();
      loads.push(r.load_ms);
      firsts.push(r.first_infer_ms);
      subsequents.push(...r.subsequent_infer_ms);
    }
  } else {
    // cold: delete cache dir before each measured run
    for (let i = 0; i < repeats; i++) {
      ensureEmptyDir(cacheDir);
      const r = await benchOnce();
      loads.push(r.load_ms);
      firsts.push(r.first_infer_ms);
      subsequents.push(...r.subsequent_infer_ms);
    }
  }

  const result: any = {
    platform: "node",
    runtime: `node-${process.versions.node}`,
    model: modelId,
    task,
    mode,
    repeats,
    cacheDir,
    metrics: {
      load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
      first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
      subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents }
    }
  };
  if (dtype) result.dtype = dtype;

  console.log(JSON.stringify(result, null, 2));
}

main().catch((e) => { console.error(e); process.exit(1); });