Spaces:

whitphx
/

transformersjs-performance-leaderboard-backend

Runtime error

App Files Files Community

whitphx HF Staff commited on Oct 6

Commit

fd2180b

1 Parent(s): 29e20e0

Specify dtype param

Browse files

Files changed (4) hide show

.claude/settings.local.json +10 -1
bench-node/src/index.ts +29 -5
bench-web/src/cli.ts +20 -3
bench-web/src/main.ts +46 -19

.claude/settings.local.json CHANGED Viewed

@@ -6,7 +6,16 @@
       "Bash(npm run bench:cli:*)",
       "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device wasm)",
       "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device webgpu)",
-      "Bash(timeout 180 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode cold --repeats 2 --device wasm)"
     ],
     "deny": [],
     "ask": []

       "Bash(npm run bench:cli:*)",
       "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device wasm)",
       "Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device webgpu)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode cold --repeats 2 --device wasm)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu)",
+      "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
+      "Bash(cd:*)",
+      "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)",
+      "Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype q8)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype fp32)",
+      "Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)"
     ],
     "deny": [],
     "ask": []

bench-node/src/index.ts CHANGED Viewed

@@ -17,6 +17,7 @@ function getArg(name: string, def?: string) {
 const mode = (getArg("mode", "warm") as "warm" | "cold");
 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
 // Point library cache to a dedicated directory for controllable cold/warm behavior
 env.cacheDir = cacheDir;
@@ -35,14 +36,29 @@ function percentile(values: number[], q: number) {
 async function benchOnce() {
   const t0 = performance.now();
-  const pipe = await pipeline(task, modelId, {});
   const t1 = performance.now();
   const t2 = performance.now();
   await pipe("The quick brown fox jumps over the lazy dog.");
   const t3 = performance.now();
-  return { load_ms: +(t1 - t0).toFixed(1), first_infer_ms: +(t3 - t2).toFixed(1) };
 }
 async function main() {
@@ -50,21 +66,26 @@ async function main() {
   console.log(`Task   : ${task}`);
   console.log(`Mode   : ${mode}`);
   console.log(`Repeats: ${repeats}`);
   console.log(`Cache  : ${cacheDir}`);
   const loads: number[] = [];
   const firsts: number[] = [];
   if (mode === "warm") {
     // Fresh cache dir, prefetch once (not measured), then measure N times
     ensureEmptyDir(cacheDir);
-    const warm = await pipeline(task, modelId, {});
     await warm("warmup");
     for (let i = 0; i < repeats; i++) {
       const r = await benchOnce();
       loads.push(r.load_ms);
       firsts.push(r.first_infer_ms);
     }
   } else {
     // cold: delete cache dir before each measured run
@@ -73,10 +94,11 @@ async function main() {
       const r = await benchOnce();
       loads.push(r.load_ms);
       firsts.push(r.first_infer_ms);
     }
   }
-  const result = {
     platform: "node",
     runtime: `node-${process.versions.node}`,
     model: modelId,
@@ -86,9 +108,11 @@ async function main() {
     cacheDir,
     metrics: {
       load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
-      first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts }
     }
   };
   console.log(JSON.stringify(result, null, 2));
 }

 const mode = (getArg("mode", "warm") as "warm" | "cold");
 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
+const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
 // Point library cache to a dedicated directory for controllable cold/warm behavior
 env.cacheDir = cacheDir;
 async function benchOnce() {
   const t0 = performance.now();
+  const options: any = {};
+  if (dtype) options.dtype = dtype;
+  const pipe = await pipeline(task, modelId, options);
   const t1 = performance.now();
   const t2 = performance.now();
   await pipe("The quick brown fox jumps over the lazy dog.");
   const t3 = performance.now();
+  // Run additional inferences to measure subsequent performance
+  const subsequentTimes: number[] = [];
+  for (let i = 0; i < 3; i++) {
+    const t4 = performance.now();
+    await pipe("The quick brown fox jumps over the lazy dog.");
+    const t5 = performance.now();
+    subsequentTimes.push(+(t5 - t4).toFixed(1));
+  }
+  return {
+    load_ms: +(t1 - t0).toFixed(1),
+    first_infer_ms: +(t3 - t2).toFixed(1),
+    subsequent_infer_ms: subsequentTimes
+  };
 }
 async function main() {
   console.log(`Task   : ${task}`);
   console.log(`Mode   : ${mode}`);
   console.log(`Repeats: ${repeats}`);
+  console.log(`DType  : ${dtype || 'auto'}`);
   console.log(`Cache  : ${cacheDir}`);
   const loads: number[] = [];
   const firsts: number[] = [];
+  const subsequents: number[] = [];
   if (mode === "warm") {
     // Fresh cache dir, prefetch once (not measured), then measure N times
     ensureEmptyDir(cacheDir);
+    const warmOptions: any = {};
+    if (dtype) warmOptions.dtype = dtype;
+    const warm = await pipeline(task, modelId, warmOptions);
     await warm("warmup");
     for (let i = 0; i < repeats; i++) {
       const r = await benchOnce();
       loads.push(r.load_ms);
       firsts.push(r.first_infer_ms);
+      subsequents.push(...r.subsequent_infer_ms);
     }
   } else {
     // cold: delete cache dir before each measured run
       const r = await benchOnce();
       loads.push(r.load_ms);
       firsts.push(r.first_infer_ms);
+      subsequents.push(...r.subsequent_infer_ms);
     }
   }
+  const result: any = {
     platform: "node",
     runtime: `node-${process.versions.node}`,
     model: modelId,
     cacheDir,
     metrics: {
       load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
+      first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
+      subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents }
     }
   };
+  if (dtype) result.dtype = dtype;
   console.log(JSON.stringify(result, null, 2));
 }

bench-web/src/cli.ts CHANGED Viewed

@@ -15,6 +15,7 @@ function getArg(name: string, def?: string) {
 const mode = getArg("mode", "warm") as "warm" | "cold";
 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const device = getArg("device", "webgpu") as "webgpu" | "wasm";
 const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
 const headed = getArg("headed") === "true";
@@ -24,6 +25,7 @@ async function main() {
   console.log(`Mode    : ${mode}`);
   console.log(`Repeats : ${repeats}`);
   console.log(`Device  : ${device}`);
   console.log(`Browser : ${browserType}`);
   console.log(`Headed  : ${headed}`);
@@ -84,10 +86,25 @@ async function main() {
     console.log("\nStarting benchmark...");
     // Use the exposed CLI function from main.ts
-    const result = await page.evaluate(({ modelId, task, mode, repeats, device }) => {
-      return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device });
-    }, { modelId, task, mode, repeats, device });
     console.log("\n" + JSON.stringify(result, null, 2));

 const mode = getArg("mode", "warm") as "warm" | "cold";
 const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
 const device = getArg("device", "webgpu") as "webgpu" | "wasm";
+const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
 const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
 const headed = getArg("headed") === "true";
   console.log(`Mode    : ${mode}`);
   console.log(`Repeats : ${repeats}`);
   console.log(`Device  : ${device}`);
+  console.log(`DType   : ${dtype || 'auto'}`);
   console.log(`Browser : ${browserType}`);
   console.log(`Headed  : ${headed}`);
     console.log("\nStarting benchmark...");
+    // Check WebGPU availability if using webgpu device
+    if (device === "webgpu") {
+      const gpuAvailable = await page.evaluate(() => {
+        return 'gpu' in navigator;
+      });
+      if (!gpuAvailable) {
+        console.error("\n❌ WebGPU is not available in this browser!");
+        console.error("Make sure to use --enable-unsafe-webgpu flag for Chromium.");
+        throw new Error("WebGPU not available");
+      }
+      console.log("✓ WebGPU is available");
+    }
     // Use the exposed CLI function from main.ts
+    const result = await page.evaluate(({ modelId, task, mode, repeats, device, dtype }) => {
+      return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device, dtype });
+    }, { modelId, task, mode, repeats, device, dtype });
     console.log("\n" + JSON.stringify(result, null, 2));

bench-web/src/main.ts CHANGED Viewed

@@ -36,34 +36,53 @@ async function clearCaches({ clearSession = false }: { clearSession?: boolean }
     if (clearSession) sessionStorage.clear();
   } catch { }
 }
-async function benchOnce(modelId: string, task: string, device: string) {
   const t0 = now();
-  const pipe = await pipeline(task, modelId, { device });
   const t1 = now();
   const t2 = now();
   await pipe("The quick brown fox jumps over the lazy dog.");
   const t3 = now();
-  return { load_ms: +(t1 - t0).toFixed(1), first_infer_ms: +(t3 - t2).toFixed(1) };
 }
-async function runMany(modelId: string, task: string, repeats: number, device: string) {
   const loads: number[] = [];
   const firsts: number[] = [];
   for (let i = 0; i < repeats; i++) {
-    const r = await benchOnce(modelId, task, device);
     loads.push(r.load_ms);
     firsts.push(r.first_infer_ms);
   }
   return {
     load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
     first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
   };
 }
-async function runCold(modelId: string, task: string, repeats: number, device: string) {
   statusEl.textContent = "clearing caches (cold)...";
   await clearCaches();
   statusEl.textContent = "running (cold)...";
-  const metrics = await runMany(modelId, task, repeats, device);
-  return {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "cold",
@@ -74,14 +93,18 @@ async function runCold(modelId: string, task: string, repeats: number, device: s
     metrics,
     notes: "Only the 1st iteration is strictly cold in a single page session."
   };
 }
-async function runWarmDirect(modelId: string, task: string, repeats: number, device: string) {
   statusEl.textContent = "prefetching (warmup) ...";
-  const p = await pipeline(task, modelId, { device });
   await p("warmup");
   statusEl.textContent = "running (warm)...";
-  const metrics = await runMany(modelId, task, repeats, device);
-  return {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "warm",
@@ -91,19 +114,23 @@ async function runWarmDirect(modelId: string, task: string, repeats: number, dev
     device,
     metrics
   };
 }
-async function runWarm(modelId: string, task: string, repeats: number, device: string) {
   const flag = sessionStorage.getItem("__warm_ready__");
   if (!flag) {
     statusEl.textContent = "prefetching (warmup) ...";
-    const p = await pipeline(task, modelId, { device });
     await p("warmup");
-    sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device }));
     location.reload();
     return null;
   } else {
     sessionStorage.removeItem("__warm_ready__");
-    return await runWarmDirect(modelId, task, repeats, device);
   }
 }
 async function run() {
@@ -133,11 +160,11 @@ btn.addEventListener("click", () => {
 });
 // Expose for CLI use
-(window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string }) {
   if (params.mode === "cold") {
-    return await runCold(params.modelId, params.task, params.repeats, params.device);
   } else {
     // For warm, use the direct function that skips reload logic
-    return await runWarmDirect(params.modelId, params.task, params.repeats, params.device);
   }
 };

     if (clearSession) sessionStorage.clear();
   } catch { }
 }
+async function benchOnce(modelId: string, task: string, device: string, dtype?: string) {
   const t0 = now();
+  const options: any = { device };
+  if (dtype) options.dtype = dtype;
+  const pipe = await pipeline(task, modelId, options);
   const t1 = now();
   const t2 = now();
   await pipe("The quick brown fox jumps over the lazy dog.");
   const t3 = now();
+  // Run additional inferences to measure subsequent performance
+  const subsequentTimes: number[] = [];
+  for (let i = 0; i < 3; i++) {
+    const t4 = now();
+    await pipe("The quick brown fox jumps over the lazy dog.");
+    const t5 = now();
+    subsequentTimes.push(+(t5 - t4).toFixed(1));
+  }
+  return {
+    load_ms: +(t1 - t0).toFixed(1),
+    first_infer_ms: +(t3 - t2).toFixed(1),
+    subsequent_infer_ms: subsequentTimes
+  };
 }
+async function runMany(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   const loads: number[] = [];
   const firsts: number[] = [];
+  const subsequents: number[] = [];
   for (let i = 0; i < repeats; i++) {
+    const r = await benchOnce(modelId, task, device, dtype);
     loads.push(r.load_ms);
     firsts.push(r.first_infer_ms);
+    subsequents.push(...r.subsequent_infer_ms);
   }
   return {
     load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
     first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
+    subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents },
   };
 }
+async function runCold(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   statusEl.textContent = "clearing caches (cold)...";
   await clearCaches();
   statusEl.textContent = "running (cold)...";
+  const metrics = await runMany(modelId, task, repeats, device, dtype);
+  const result: any = {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "cold",
     metrics,
     notes: "Only the 1st iteration is strictly cold in a single page session."
   };
+  if (dtype) result.dtype = dtype;
+  return result;
 }
+async function runWarmDirect(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   statusEl.textContent = "prefetching (warmup) ...";
+  const options: any = { device };
+  if (dtype) options.dtype = dtype;
+  const p = await pipeline(task, modelId, options);
   await p("warmup");
   statusEl.textContent = "running (warm)...";
+  const metrics = await runMany(modelId, task, repeats, device, dtype);
+  const result: any = {
     platform: "browser",
     runtime: navigator.userAgent,
     mode: "warm",
     device,
     metrics
   };
+  if (dtype) result.dtype = dtype;
+  return result;
 }
+async function runWarm(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
   const flag = sessionStorage.getItem("__warm_ready__");
   if (!flag) {
     statusEl.textContent = "prefetching (warmup) ...";
+    const options: any = { device };
+    if (dtype) options.dtype = dtype;
+    const p = await pipeline(task, modelId, options);
     await p("warmup");
+    sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device, dtype }));
     location.reload();
     return null;
   } else {
     sessionStorage.removeItem("__warm_ready__");
+    return await runWarmDirect(modelId, task, repeats, device, dtype);
   }
 }
 async function run() {
 });
 // Expose for CLI use
+(window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string, dtype?: string }) {
   if (params.mode === "cold") {
+    return await runCold(params.modelId, params.task, params.repeats, params.device, params.dtype);
   } else {
     // For warm, use the direct function that skips reload logic
+    return await runWarmDirect(params.modelId, params.task, params.repeats, params.device, params.dtype);
   }
 };