whitphx HF Staff commited on
Commit
fd2180b
·
1 Parent(s): 29e20e0

Specify dtype param

Browse files
.claude/settings.local.json CHANGED
@@ -6,7 +6,16 @@
6
  "Bash(npm run bench:cli:*)",
7
  "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device wasm)",
8
  "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device webgpu)",
9
- "Bash(timeout 180 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode cold --repeats 2 --device wasm)"
 
 
 
 
 
 
 
 
 
10
  ],
11
  "deny": [],
12
  "ask": []
 
6
  "Bash(npm run bench:cli:*)",
7
  "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device wasm)",
8
  "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device webgpu)",
9
+ "Bash(timeout 180 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode cold --repeats 2 --device wasm)",
10
+ "Bash(timeout 180 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm)",
11
+ "Bash(timeout 180 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu)",
12
+ "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
13
+ "Bash(cd:*)",
14
+ "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)",
15
+ "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype q8)",
16
+ "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
17
+ "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype fp32)",
18
+ "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)"
19
  ],
20
  "deny": [],
21
  "ask": []
bench-node/src/index.ts CHANGED
@@ -17,6 +17,7 @@ function getArg(name: string, def?: string) {
17
  const mode = (getArg("mode", "warm") as "warm" | "cold");
18
  const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
19
  const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
 
20
 
21
  // Point library cache to a dedicated directory for controllable cold/warm behavior
22
  env.cacheDir = cacheDir;
@@ -35,14 +36,29 @@ function percentile(values: number[], q: number) {
35
 
36
  async function benchOnce() {
37
  const t0 = performance.now();
38
- const pipe = await pipeline(task, modelId, {});
 
 
39
  const t1 = performance.now();
40
 
41
  const t2 = performance.now();
42
  await pipe("The quick brown fox jumps over the lazy dog.");
43
  const t3 = performance.now();
44
 
45
- return { load_ms: +(t1 - t0).toFixed(1), first_infer_ms: +(t3 - t2).toFixed(1) };
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
 
48
  async function main() {
@@ -50,21 +66,26 @@ async function main() {
50
  console.log(`Task : ${task}`);
51
  console.log(`Mode : ${mode}`);
52
  console.log(`Repeats: ${repeats}`);
 
53
  console.log(`Cache : ${cacheDir}`);
54
 
55
  const loads: number[] = [];
56
  const firsts: number[] = [];
 
57
 
58
  if (mode === "warm") {
59
  // Fresh cache dir, prefetch once (not measured), then measure N times
60
  ensureEmptyDir(cacheDir);
61
- const warm = await pipeline(task, modelId, {});
 
 
62
  await warm("warmup");
63
 
64
  for (let i = 0; i < repeats; i++) {
65
  const r = await benchOnce();
66
  loads.push(r.load_ms);
67
  firsts.push(r.first_infer_ms);
 
68
  }
69
  } else {
70
  // cold: delete cache dir before each measured run
@@ -73,10 +94,11 @@ async function main() {
73
  const r = await benchOnce();
74
  loads.push(r.load_ms);
75
  firsts.push(r.first_infer_ms);
 
76
  }
77
  }
78
 
79
- const result = {
80
  platform: "node",
81
  runtime: `node-${process.versions.node}`,
82
  model: modelId,
@@ -86,9 +108,11 @@ async function main() {
86
  cacheDir,
87
  metrics: {
88
  load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
89
- first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts }
 
90
  }
91
  };
 
92
 
93
  console.log(JSON.stringify(result, null, 2));
94
  }
 
17
  const mode = (getArg("mode", "warm") as "warm" | "cold");
18
  const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
19
  const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
20
+ const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
21
 
22
  // Point library cache to a dedicated directory for controllable cold/warm behavior
23
  env.cacheDir = cacheDir;
 
36
 
37
  async function benchOnce() {
38
  const t0 = performance.now();
39
+ const options: any = {};
40
+ if (dtype) options.dtype = dtype;
41
+ const pipe = await pipeline(task, modelId, options);
42
  const t1 = performance.now();
43
 
44
  const t2 = performance.now();
45
  await pipe("The quick brown fox jumps over the lazy dog.");
46
  const t3 = performance.now();
47
 
48
+ // Run additional inferences to measure subsequent performance
49
+ const subsequentTimes: number[] = [];
50
+ for (let i = 0; i < 3; i++) {
51
+ const t4 = performance.now();
52
+ await pipe("The quick brown fox jumps over the lazy dog.");
53
+ const t5 = performance.now();
54
+ subsequentTimes.push(+(t5 - t4).toFixed(1));
55
+ }
56
+
57
+ return {
58
+ load_ms: +(t1 - t0).toFixed(1),
59
+ first_infer_ms: +(t3 - t2).toFixed(1),
60
+ subsequent_infer_ms: subsequentTimes
61
+ };
62
  }
63
 
64
  async function main() {
 
66
  console.log(`Task : ${task}`);
67
  console.log(`Mode : ${mode}`);
68
  console.log(`Repeats: ${repeats}`);
69
+ console.log(`DType : ${dtype || 'auto'}`);
70
  console.log(`Cache : ${cacheDir}`);
71
 
72
  const loads: number[] = [];
73
  const firsts: number[] = [];
74
+ const subsequents: number[] = [];
75
 
76
  if (mode === "warm") {
77
  // Fresh cache dir, prefetch once (not measured), then measure N times
78
  ensureEmptyDir(cacheDir);
79
+ const warmOptions: any = {};
80
+ if (dtype) warmOptions.dtype = dtype;
81
+ const warm = await pipeline(task, modelId, warmOptions);
82
  await warm("warmup");
83
 
84
  for (let i = 0; i < repeats; i++) {
85
  const r = await benchOnce();
86
  loads.push(r.load_ms);
87
  firsts.push(r.first_infer_ms);
88
+ subsequents.push(...r.subsequent_infer_ms);
89
  }
90
  } else {
91
  // cold: delete cache dir before each measured run
 
94
  const r = await benchOnce();
95
  loads.push(r.load_ms);
96
  firsts.push(r.first_infer_ms);
97
+ subsequents.push(...r.subsequent_infer_ms);
98
  }
99
  }
100
 
101
+ const result: any = {
102
  platform: "node",
103
  runtime: `node-${process.versions.node}`,
104
  model: modelId,
 
108
  cacheDir,
109
  metrics: {
110
  load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
111
+ first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
112
+ subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents }
113
  }
114
  };
115
+ if (dtype) result.dtype = dtype;
116
 
117
  console.log(JSON.stringify(result, null, 2));
118
  }
bench-web/src/cli.ts CHANGED
@@ -15,6 +15,7 @@ function getArg(name: string, def?: string) {
15
  const mode = getArg("mode", "warm") as "warm" | "cold";
16
  const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
17
  const device = getArg("device", "webgpu") as "webgpu" | "wasm";
 
18
  const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
19
  const headed = getArg("headed") === "true";
20
 
@@ -24,6 +25,7 @@ async function main() {
24
  console.log(`Mode : ${mode}`);
25
  console.log(`Repeats : ${repeats}`);
26
  console.log(`Device : ${device}`);
 
27
  console.log(`Browser : ${browserType}`);
28
  console.log(`Headed : ${headed}`);
29
 
@@ -84,10 +86,25 @@ async function main() {
84
 
85
  console.log("\nStarting benchmark...");
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  // Use the exposed CLI function from main.ts
88
- const result = await page.evaluate(({ modelId, task, mode, repeats, device }) => {
89
- return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device });
90
- }, { modelId, task, mode, repeats, device });
91
 
92
  console.log("\n" + JSON.stringify(result, null, 2));
93
 
 
15
  const mode = getArg("mode", "warm") as "warm" | "cold";
16
  const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
17
  const device = getArg("device", "webgpu") as "webgpu" | "wasm";
18
+ const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
19
  const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
20
  const headed = getArg("headed") === "true";
21
 
 
25
  console.log(`Mode : ${mode}`);
26
  console.log(`Repeats : ${repeats}`);
27
  console.log(`Device : ${device}`);
28
+ console.log(`DType : ${dtype || 'auto'}`);
29
  console.log(`Browser : ${browserType}`);
30
  console.log(`Headed : ${headed}`);
31
 
 
86
 
87
  console.log("\nStarting benchmark...");
88
 
89
+ // Check WebGPU availability if using webgpu device
90
+ if (device === "webgpu") {
91
+ const gpuAvailable = await page.evaluate(() => {
92
+ return 'gpu' in navigator;
93
+ });
94
+
95
+ if (!gpuAvailable) {
96
+ console.error("\n❌ WebGPU is not available in this browser!");
97
+ console.error("Make sure to use --enable-unsafe-webgpu flag for Chromium.");
98
+ throw new Error("WebGPU not available");
99
+ }
100
+
101
+ console.log("✓ WebGPU is available");
102
+ }
103
+
104
  // Use the exposed CLI function from main.ts
105
+ const result = await page.evaluate(({ modelId, task, mode, repeats, device, dtype }) => {
106
+ return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device, dtype });
107
+ }, { modelId, task, mode, repeats, device, dtype });
108
 
109
  console.log("\n" + JSON.stringify(result, null, 2));
110
 
bench-web/src/main.ts CHANGED
@@ -36,34 +36,53 @@ async function clearCaches({ clearSession = false }: { clearSession?: boolean }
36
  if (clearSession) sessionStorage.clear();
37
  } catch { }
38
  }
39
- async function benchOnce(modelId: string, task: string, device: string) {
40
  const t0 = now();
41
- const pipe = await pipeline(task, modelId, { device });
 
 
42
  const t1 = now();
43
  const t2 = now();
44
  await pipe("The quick brown fox jumps over the lazy dog.");
45
  const t3 = now();
46
- return { load_ms: +(t1 - t0).toFixed(1), first_infer_ms: +(t3 - t2).toFixed(1) };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
- async function runMany(modelId: string, task: string, repeats: number, device: string) {
49
  const loads: number[] = [];
50
  const firsts: number[] = [];
 
51
  for (let i = 0; i < repeats; i++) {
52
- const r = await benchOnce(modelId, task, device);
53
  loads.push(r.load_ms);
54
  firsts.push(r.first_infer_ms);
 
55
  }
56
  return {
57
  load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
58
  first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
 
59
  };
60
  }
61
- async function runCold(modelId: string, task: string, repeats: number, device: string) {
62
  statusEl.textContent = "clearing caches (cold)...";
63
  await clearCaches();
64
  statusEl.textContent = "running (cold)...";
65
- const metrics = await runMany(modelId, task, repeats, device);
66
- return {
67
  platform: "browser",
68
  runtime: navigator.userAgent,
69
  mode: "cold",
@@ -74,14 +93,18 @@ async function runCold(modelId: string, task: string, repeats: number, device: s
74
  metrics,
75
  notes: "Only the 1st iteration is strictly cold in a single page session."
76
  };
 
 
77
  }
78
- async function runWarmDirect(modelId: string, task: string, repeats: number, device: string) {
79
  statusEl.textContent = "prefetching (warmup) ...";
80
- const p = await pipeline(task, modelId, { device });
 
 
81
  await p("warmup");
82
  statusEl.textContent = "running (warm)...";
83
- const metrics = await runMany(modelId, task, repeats, device);
84
- return {
85
  platform: "browser",
86
  runtime: navigator.userAgent,
87
  mode: "warm",
@@ -91,19 +114,23 @@ async function runWarmDirect(modelId: string, task: string, repeats: number, dev
91
  device,
92
  metrics
93
  };
 
 
94
  }
95
- async function runWarm(modelId: string, task: string, repeats: number, device: string) {
96
  const flag = sessionStorage.getItem("__warm_ready__");
97
  if (!flag) {
98
  statusEl.textContent = "prefetching (warmup) ...";
99
- const p = await pipeline(task, modelId, { device });
 
 
100
  await p("warmup");
101
- sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device }));
102
  location.reload();
103
  return null;
104
  } else {
105
  sessionStorage.removeItem("__warm_ready__");
106
- return await runWarmDirect(modelId, task, repeats, device);
107
  }
108
  }
109
  async function run() {
@@ -133,11 +160,11 @@ btn.addEventListener("click", () => {
133
  });
134
 
135
  // Expose for CLI use
136
- (window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string }) {
137
  if (params.mode === "cold") {
138
- return await runCold(params.modelId, params.task, params.repeats, params.device);
139
  } else {
140
  // For warm, use the direct function that skips reload logic
141
- return await runWarmDirect(params.modelId, params.task, params.repeats, params.device);
142
  }
143
  };
 
36
  if (clearSession) sessionStorage.clear();
37
  } catch { }
38
  }
39
+ async function benchOnce(modelId: string, task: string, device: string, dtype?: string) {
40
  const t0 = now();
41
+ const options: any = { device };
42
+ if (dtype) options.dtype = dtype;
43
+ const pipe = await pipeline(task, modelId, options);
44
  const t1 = now();
45
  const t2 = now();
46
  await pipe("The quick brown fox jumps over the lazy dog.");
47
  const t3 = now();
48
+
49
+ // Run additional inferences to measure subsequent performance
50
+ const subsequentTimes: number[] = [];
51
+ for (let i = 0; i < 3; i++) {
52
+ const t4 = now();
53
+ await pipe("The quick brown fox jumps over the lazy dog.");
54
+ const t5 = now();
55
+ subsequentTimes.push(+(t5 - t4).toFixed(1));
56
+ }
57
+
58
+ return {
59
+ load_ms: +(t1 - t0).toFixed(1),
60
+ first_infer_ms: +(t3 - t2).toFixed(1),
61
+ subsequent_infer_ms: subsequentTimes
62
+ };
63
  }
64
+ async function runMany(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
65
  const loads: number[] = [];
66
  const firsts: number[] = [];
67
+ const subsequents: number[] = [];
68
  for (let i = 0; i < repeats; i++) {
69
+ const r = await benchOnce(modelId, task, device, dtype);
70
  loads.push(r.load_ms);
71
  firsts.push(r.first_infer_ms);
72
+ subsequents.push(...r.subsequent_infer_ms);
73
  }
74
  return {
75
  load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
76
  first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
77
+ subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents },
78
  };
79
  }
80
+ async function runCold(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
81
  statusEl.textContent = "clearing caches (cold)...";
82
  await clearCaches();
83
  statusEl.textContent = "running (cold)...";
84
+ const metrics = await runMany(modelId, task, repeats, device, dtype);
85
+ const result: any = {
86
  platform: "browser",
87
  runtime: navigator.userAgent,
88
  mode: "cold",
 
93
  metrics,
94
  notes: "Only the 1st iteration is strictly cold in a single page session."
95
  };
96
+ if (dtype) result.dtype = dtype;
97
+ return result;
98
  }
99
+ async function runWarmDirect(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
100
  statusEl.textContent = "prefetching (warmup) ...";
101
+ const options: any = { device };
102
+ if (dtype) options.dtype = dtype;
103
+ const p = await pipeline(task, modelId, options);
104
  await p("warmup");
105
  statusEl.textContent = "running (warm)...";
106
+ const metrics = await runMany(modelId, task, repeats, device, dtype);
107
+ const result: any = {
108
  platform: "browser",
109
  runtime: navigator.userAgent,
110
  mode: "warm",
 
114
  device,
115
  metrics
116
  };
117
+ if (dtype) result.dtype = dtype;
118
+ return result;
119
  }
120
+ async function runWarm(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
121
  const flag = sessionStorage.getItem("__warm_ready__");
122
  if (!flag) {
123
  statusEl.textContent = "prefetching (warmup) ...";
124
+ const options: any = { device };
125
+ if (dtype) options.dtype = dtype;
126
+ const p = await pipeline(task, modelId, options);
127
  await p("warmup");
128
+ sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device, dtype }));
129
  location.reload();
130
  return null;
131
  } else {
132
  sessionStorage.removeItem("__warm_ready__");
133
+ return await runWarmDirect(modelId, task, repeats, device, dtype);
134
  }
135
  }
136
  async function run() {
 
160
  });
161
 
162
  // Expose for CLI use
163
+ (window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string, dtype?: string }) {
164
  if (params.mode === "cold") {
165
+ return await runCold(params.modelId, params.task, params.repeats, params.device, params.dtype);
166
  } else {
167
  // For warm, use the direct function that skips reload logic
168
+ return await runWarmDirect(params.modelId, params.task, params.repeats, params.device, params.dtype);
169
  }
170
  };