Spaces:
Runtime error
Runtime error
Specify dtype param
Browse files- .claude/settings.local.json +10 -1
- bench-node/src/index.ts +29 -5
- bench-web/src/cli.ts +20 -3
- bench-web/src/main.ts +46 -19
.claude/settings.local.json
CHANGED
|
@@ -6,7 +6,16 @@
|
|
| 6 |
"Bash(npm run bench:cli:*)",
|
| 7 |
"Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device wasm)",
|
| 8 |
"Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device webgpu)",
|
| 9 |
-
"Bash(timeout 180 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode cold --repeats 2 --device wasm)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
],
|
| 11 |
"deny": [],
|
| 12 |
"ask": []
|
|
|
|
| 6 |
"Bash(npm run bench:cli:*)",
|
| 7 |
"Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device wasm)",
|
| 8 |
"Bash(timeout 120 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode warm --repeats 2 --device webgpu)",
|
| 9 |
+
"Bash(timeout 180 npm run bench:cli -- Xenova/all-MiniLM-L6-v2 feature-extraction --mode cold --repeats 2 --device wasm)",
|
| 10 |
+
"Bash(timeout 180 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm)",
|
| 11 |
+
"Bash(timeout 180 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu)",
|
| 12 |
+
"Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
|
| 13 |
+
"Bash(cd:*)",
|
| 14 |
+
"Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)",
|
| 15 |
+
"Bash(timeout 120 npm run bench:cli -- Xenova/distilbert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype q8)",
|
| 16 |
+
"Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype fp32)",
|
| 17 |
+
"Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device webgpu --dtype fp32)",
|
| 18 |
+
"Bash(timeout 180 npm run bench:cli -- Xenova/bert-base-uncased feature-extraction --mode warm --repeats 2 --device wasm --dtype q8)"
|
| 19 |
],
|
| 20 |
"deny": [],
|
| 21 |
"ask": []
|
bench-node/src/index.ts
CHANGED
|
@@ -17,6 +17,7 @@ function getArg(name: string, def?: string) {
|
|
| 17 |
const mode = (getArg("mode", "warm") as "warm" | "cold");
|
| 18 |
const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
|
| 19 |
const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
|
|
|
|
| 20 |
|
| 21 |
// Point library cache to a dedicated directory for controllable cold/warm behavior
|
| 22 |
env.cacheDir = cacheDir;
|
|
@@ -35,14 +36,29 @@ function percentile(values: number[], q: number) {
|
|
| 35 |
|
| 36 |
async function benchOnce() {
|
| 37 |
const t0 = performance.now();
|
| 38 |
-
const
|
|
|
|
|
|
|
| 39 |
const t1 = performance.now();
|
| 40 |
|
| 41 |
const t2 = performance.now();
|
| 42 |
await pipe("The quick brown fox jumps over the lazy dog.");
|
| 43 |
const t3 = performance.now();
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
async function main() {
|
|
@@ -50,21 +66,26 @@ async function main() {
|
|
| 50 |
console.log(`Task : ${task}`);
|
| 51 |
console.log(`Mode : ${mode}`);
|
| 52 |
console.log(`Repeats: ${repeats}`);
|
|
|
|
| 53 |
console.log(`Cache : ${cacheDir}`);
|
| 54 |
|
| 55 |
const loads: number[] = [];
|
| 56 |
const firsts: number[] = [];
|
|
|
|
| 57 |
|
| 58 |
if (mode === "warm") {
|
| 59 |
// Fresh cache dir, prefetch once (not measured), then measure N times
|
| 60 |
ensureEmptyDir(cacheDir);
|
| 61 |
-
const
|
|
|
|
|
|
|
| 62 |
await warm("warmup");
|
| 63 |
|
| 64 |
for (let i = 0; i < repeats; i++) {
|
| 65 |
const r = await benchOnce();
|
| 66 |
loads.push(r.load_ms);
|
| 67 |
firsts.push(r.first_infer_ms);
|
|
|
|
| 68 |
}
|
| 69 |
} else {
|
| 70 |
// cold: delete cache dir before each measured run
|
|
@@ -73,10 +94,11 @@ async function main() {
|
|
| 73 |
const r = await benchOnce();
|
| 74 |
loads.push(r.load_ms);
|
| 75 |
firsts.push(r.first_infer_ms);
|
|
|
|
| 76 |
}
|
| 77 |
}
|
| 78 |
|
| 79 |
-
const result = {
|
| 80 |
platform: "node",
|
| 81 |
runtime: `node-${process.versions.node}`,
|
| 82 |
model: modelId,
|
|
@@ -86,9 +108,11 @@ async function main() {
|
|
| 86 |
cacheDir,
|
| 87 |
metrics: {
|
| 88 |
load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
|
| 89 |
-
first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts }
|
|
|
|
| 90 |
}
|
| 91 |
};
|
|
|
|
| 92 |
|
| 93 |
console.log(JSON.stringify(result, null, 2));
|
| 94 |
}
|
|
|
|
| 17 |
const mode = (getArg("mode", "warm") as "warm" | "cold");
|
| 18 |
const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
|
| 19 |
const cacheDir = getArg("cache-dir", path.resolve(".bench-cache/default"))!;
|
| 20 |
+
const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
|
| 21 |
|
| 22 |
// Point library cache to a dedicated directory for controllable cold/warm behavior
|
| 23 |
env.cacheDir = cacheDir;
|
|
|
|
| 36 |
|
| 37 |
async function benchOnce() {
|
| 38 |
const t0 = performance.now();
|
| 39 |
+
const options: any = {};
|
| 40 |
+
if (dtype) options.dtype = dtype;
|
| 41 |
+
const pipe = await pipeline(task, modelId, options);
|
| 42 |
const t1 = performance.now();
|
| 43 |
|
| 44 |
const t2 = performance.now();
|
| 45 |
await pipe("The quick brown fox jumps over the lazy dog.");
|
| 46 |
const t3 = performance.now();
|
| 47 |
|
| 48 |
+
// Run additional inferences to measure subsequent performance
|
| 49 |
+
const subsequentTimes: number[] = [];
|
| 50 |
+
for (let i = 0; i < 3; i++) {
|
| 51 |
+
const t4 = performance.now();
|
| 52 |
+
await pipe("The quick brown fox jumps over the lazy dog.");
|
| 53 |
+
const t5 = performance.now();
|
| 54 |
+
subsequentTimes.push(+(t5 - t4).toFixed(1));
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
load_ms: +(t1 - t0).toFixed(1),
|
| 59 |
+
first_infer_ms: +(t3 - t2).toFixed(1),
|
| 60 |
+
subsequent_infer_ms: subsequentTimes
|
| 61 |
+
};
|
| 62 |
}
|
| 63 |
|
| 64 |
async function main() {
|
|
|
|
| 66 |
console.log(`Task : ${task}`);
|
| 67 |
console.log(`Mode : ${mode}`);
|
| 68 |
console.log(`Repeats: ${repeats}`);
|
| 69 |
+
console.log(`DType : ${dtype || 'auto'}`);
|
| 70 |
console.log(`Cache : ${cacheDir}`);
|
| 71 |
|
| 72 |
const loads: number[] = [];
|
| 73 |
const firsts: number[] = [];
|
| 74 |
+
const subsequents: number[] = [];
|
| 75 |
|
| 76 |
if (mode === "warm") {
|
| 77 |
// Fresh cache dir, prefetch once (not measured), then measure N times
|
| 78 |
ensureEmptyDir(cacheDir);
|
| 79 |
+
const warmOptions: any = {};
|
| 80 |
+
if (dtype) warmOptions.dtype = dtype;
|
| 81 |
+
const warm = await pipeline(task, modelId, warmOptions);
|
| 82 |
await warm("warmup");
|
| 83 |
|
| 84 |
for (let i = 0; i < repeats; i++) {
|
| 85 |
const r = await benchOnce();
|
| 86 |
loads.push(r.load_ms);
|
| 87 |
firsts.push(r.first_infer_ms);
|
| 88 |
+
subsequents.push(...r.subsequent_infer_ms);
|
| 89 |
}
|
| 90 |
} else {
|
| 91 |
// cold: delete cache dir before each measured run
|
|
|
|
| 94 |
const r = await benchOnce();
|
| 95 |
loads.push(r.load_ms);
|
| 96 |
firsts.push(r.first_infer_ms);
|
| 97 |
+
subsequents.push(...r.subsequent_infer_ms);
|
| 98 |
}
|
| 99 |
}
|
| 100 |
|
| 101 |
+
const result: any = {
|
| 102 |
platform: "node",
|
| 103 |
runtime: `node-${process.versions.node}`,
|
| 104 |
model: modelId,
|
|
|
|
| 108 |
cacheDir,
|
| 109 |
metrics: {
|
| 110 |
load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
|
| 111 |
+
first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
|
| 112 |
+
subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents }
|
| 113 |
}
|
| 114 |
};
|
| 115 |
+
if (dtype) result.dtype = dtype;
|
| 116 |
|
| 117 |
console.log(JSON.stringify(result, null, 2));
|
| 118 |
}
|
bench-web/src/cli.ts
CHANGED
|
@@ -15,6 +15,7 @@ function getArg(name: string, def?: string) {
|
|
| 15 |
const mode = getArg("mode", "warm") as "warm" | "cold";
|
| 16 |
const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
|
| 17 |
const device = getArg("device", "webgpu") as "webgpu" | "wasm";
|
|
|
|
| 18 |
const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
|
| 19 |
const headed = getArg("headed") === "true";
|
| 20 |
|
|
@@ -24,6 +25,7 @@ async function main() {
|
|
| 24 |
console.log(`Mode : ${mode}`);
|
| 25 |
console.log(`Repeats : ${repeats}`);
|
| 26 |
console.log(`Device : ${device}`);
|
|
|
|
| 27 |
console.log(`Browser : ${browserType}`);
|
| 28 |
console.log(`Headed : ${headed}`);
|
| 29 |
|
|
@@ -84,10 +86,25 @@ async function main() {
|
|
| 84 |
|
| 85 |
console.log("\nStarting benchmark...");
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
// Use the exposed CLI function from main.ts
|
| 88 |
-
const result = await page.evaluate(({ modelId, task, mode, repeats, device }) => {
|
| 89 |
-
return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device });
|
| 90 |
-
}, { modelId, task, mode, repeats, device });
|
| 91 |
|
| 92 |
console.log("\n" + JSON.stringify(result, null, 2));
|
| 93 |
|
|
|
|
| 15 |
const mode = getArg("mode", "warm") as "warm" | "cold";
|
| 16 |
const repeats = Math.max(1, parseInt(getArg("repeats", "3") || "3", 10));
|
| 17 |
const device = getArg("device", "webgpu") as "webgpu" | "wasm";
|
| 18 |
+
const dtype = getArg("dtype"); // optional: fp32, fp16, q8, q4, etc.
|
| 19 |
const browserType = getArg("browser", "chromium") as "chromium" | "firefox" | "webkit";
|
| 20 |
const headed = getArg("headed") === "true";
|
| 21 |
|
|
|
|
| 25 |
console.log(`Mode : ${mode}`);
|
| 26 |
console.log(`Repeats : ${repeats}`);
|
| 27 |
console.log(`Device : ${device}`);
|
| 28 |
+
console.log(`DType : ${dtype || 'auto'}`);
|
| 29 |
console.log(`Browser : ${browserType}`);
|
| 30 |
console.log(`Headed : ${headed}`);
|
| 31 |
|
|
|
|
| 86 |
|
| 87 |
console.log("\nStarting benchmark...");
|
| 88 |
|
| 89 |
+
// Check WebGPU availability if using webgpu device
|
| 90 |
+
if (device === "webgpu") {
|
| 91 |
+
const gpuAvailable = await page.evaluate(() => {
|
| 92 |
+
return 'gpu' in navigator;
|
| 93 |
+
});
|
| 94 |
+
|
| 95 |
+
if (!gpuAvailable) {
|
| 96 |
+
console.error("\n❌ WebGPU is not available in this browser!");
|
| 97 |
+
console.error("Make sure to use --enable-unsafe-webgpu flag for Chromium.");
|
| 98 |
+
throw new Error("WebGPU not available");
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
console.log("✓ WebGPU is available");
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
// Use the exposed CLI function from main.ts
|
| 105 |
+
const result = await page.evaluate(({ modelId, task, mode, repeats, device, dtype }) => {
|
| 106 |
+
return (window as any).runBenchmarkCLI({ modelId, task, mode, repeats, device, dtype });
|
| 107 |
+
}, { modelId, task, mode, repeats, device, dtype });
|
| 108 |
|
| 109 |
console.log("\n" + JSON.stringify(result, null, 2));
|
| 110 |
|
bench-web/src/main.ts
CHANGED
|
@@ -36,34 +36,53 @@ async function clearCaches({ clearSession = false }: { clearSession?: boolean }
|
|
| 36 |
if (clearSession) sessionStorage.clear();
|
| 37 |
} catch { }
|
| 38 |
}
|
| 39 |
-
async function benchOnce(modelId: string, task: string, device: string) {
|
| 40 |
const t0 = now();
|
| 41 |
-
const
|
|
|
|
|
|
|
| 42 |
const t1 = now();
|
| 43 |
const t2 = now();
|
| 44 |
await pipe("The quick brown fox jumps over the lazy dog.");
|
| 45 |
const t3 = now();
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
}
|
| 48 |
-
async function runMany(modelId: string, task: string, repeats: number, device: string) {
|
| 49 |
const loads: number[] = [];
|
| 50 |
const firsts: number[] = [];
|
|
|
|
| 51 |
for (let i = 0; i < repeats; i++) {
|
| 52 |
-
const r = await benchOnce(modelId, task, device);
|
| 53 |
loads.push(r.load_ms);
|
| 54 |
firsts.push(r.first_infer_ms);
|
|
|
|
| 55 |
}
|
| 56 |
return {
|
| 57 |
load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
|
| 58 |
first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
|
|
|
|
| 59 |
};
|
| 60 |
}
|
| 61 |
-
async function runCold(modelId: string, task: string, repeats: number, device: string) {
|
| 62 |
statusEl.textContent = "clearing caches (cold)...";
|
| 63 |
await clearCaches();
|
| 64 |
statusEl.textContent = "running (cold)...";
|
| 65 |
-
const metrics = await runMany(modelId, task, repeats, device);
|
| 66 |
-
|
| 67 |
platform: "browser",
|
| 68 |
runtime: navigator.userAgent,
|
| 69 |
mode: "cold",
|
|
@@ -74,14 +93,18 @@ async function runCold(modelId: string, task: string, repeats: number, device: s
|
|
| 74 |
metrics,
|
| 75 |
notes: "Only the 1st iteration is strictly cold in a single page session."
|
| 76 |
};
|
|
|
|
|
|
|
| 77 |
}
|
| 78 |
-
async function runWarmDirect(modelId: string, task: string, repeats: number, device: string) {
|
| 79 |
statusEl.textContent = "prefetching (warmup) ...";
|
| 80 |
-
const
|
|
|
|
|
|
|
| 81 |
await p("warmup");
|
| 82 |
statusEl.textContent = "running (warm)...";
|
| 83 |
-
const metrics = await runMany(modelId, task, repeats, device);
|
| 84 |
-
|
| 85 |
platform: "browser",
|
| 86 |
runtime: navigator.userAgent,
|
| 87 |
mode: "warm",
|
|
@@ -91,19 +114,23 @@ async function runWarmDirect(modelId: string, task: string, repeats: number, dev
|
|
| 91 |
device,
|
| 92 |
metrics
|
| 93 |
};
|
|
|
|
|
|
|
| 94 |
}
|
| 95 |
-
async function runWarm(modelId: string, task: string, repeats: number, device: string) {
|
| 96 |
const flag = sessionStorage.getItem("__warm_ready__");
|
| 97 |
if (!flag) {
|
| 98 |
statusEl.textContent = "prefetching (warmup) ...";
|
| 99 |
-
const
|
|
|
|
|
|
|
| 100 |
await p("warmup");
|
| 101 |
-
sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device }));
|
| 102 |
location.reload();
|
| 103 |
return null;
|
| 104 |
} else {
|
| 105 |
sessionStorage.removeItem("__warm_ready__");
|
| 106 |
-
return await runWarmDirect(modelId, task, repeats, device);
|
| 107 |
}
|
| 108 |
}
|
| 109 |
async function run() {
|
|
@@ -133,11 +160,11 @@ btn.addEventListener("click", () => {
|
|
| 133 |
});
|
| 134 |
|
| 135 |
// Expose for CLI use
|
| 136 |
-
(window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string }) {
|
| 137 |
if (params.mode === "cold") {
|
| 138 |
-
return await runCold(params.modelId, params.task, params.repeats, params.device);
|
| 139 |
} else {
|
| 140 |
// For warm, use the direct function that skips reload logic
|
| 141 |
-
return await runWarmDirect(params.modelId, params.task, params.repeats, params.device);
|
| 142 |
}
|
| 143 |
};
|
|
|
|
| 36 |
if (clearSession) sessionStorage.clear();
|
| 37 |
} catch { }
|
| 38 |
}
|
| 39 |
+
async function benchOnce(modelId: string, task: string, device: string, dtype?: string) {
|
| 40 |
const t0 = now();
|
| 41 |
+
const options: any = { device };
|
| 42 |
+
if (dtype) options.dtype = dtype;
|
| 43 |
+
const pipe = await pipeline(task, modelId, options);
|
| 44 |
const t1 = now();
|
| 45 |
const t2 = now();
|
| 46 |
await pipe("The quick brown fox jumps over the lazy dog.");
|
| 47 |
const t3 = now();
|
| 48 |
+
|
| 49 |
+
// Run additional inferences to measure subsequent performance
|
| 50 |
+
const subsequentTimes: number[] = [];
|
| 51 |
+
for (let i = 0; i < 3; i++) {
|
| 52 |
+
const t4 = now();
|
| 53 |
+
await pipe("The quick brown fox jumps over the lazy dog.");
|
| 54 |
+
const t5 = now();
|
| 55 |
+
subsequentTimes.push(+(t5 - t4).toFixed(1));
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
return {
|
| 59 |
+
load_ms: +(t1 - t0).toFixed(1),
|
| 60 |
+
first_infer_ms: +(t3 - t2).toFixed(1),
|
| 61 |
+
subsequent_infer_ms: subsequentTimes
|
| 62 |
+
};
|
| 63 |
}
|
| 64 |
+
async function runMany(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
|
| 65 |
const loads: number[] = [];
|
| 66 |
const firsts: number[] = [];
|
| 67 |
+
const subsequents: number[] = [];
|
| 68 |
for (let i = 0; i < repeats; i++) {
|
| 69 |
+
const r = await benchOnce(modelId, task, device, dtype);
|
| 70 |
loads.push(r.load_ms);
|
| 71 |
firsts.push(r.first_infer_ms);
|
| 72 |
+
subsequents.push(...r.subsequent_infer_ms);
|
| 73 |
}
|
| 74 |
return {
|
| 75 |
load_ms: { p50: +percentile(loads, 0.5).toFixed(1), p90: +percentile(loads, 0.9).toFixed(1), raw: loads },
|
| 76 |
first_infer_ms: { p50: +percentile(firsts, 0.5).toFixed(1), p90: +percentile(firsts, 0.9).toFixed(1), raw: firsts },
|
| 77 |
+
subsequent_infer_ms: { p50: +percentile(subsequents, 0.5).toFixed(1), p90: +percentile(subsequents, 0.9).toFixed(1), raw: subsequents },
|
| 78 |
};
|
| 79 |
}
|
| 80 |
+
async function runCold(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
|
| 81 |
statusEl.textContent = "clearing caches (cold)...";
|
| 82 |
await clearCaches();
|
| 83 |
statusEl.textContent = "running (cold)...";
|
| 84 |
+
const metrics = await runMany(modelId, task, repeats, device, dtype);
|
| 85 |
+
const result: any = {
|
| 86 |
platform: "browser",
|
| 87 |
runtime: navigator.userAgent,
|
| 88 |
mode: "cold",
|
|
|
|
| 93 |
metrics,
|
| 94 |
notes: "Only the 1st iteration is strictly cold in a single page session."
|
| 95 |
};
|
| 96 |
+
if (dtype) result.dtype = dtype;
|
| 97 |
+
return result;
|
| 98 |
}
|
| 99 |
+
async function runWarmDirect(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
|
| 100 |
statusEl.textContent = "prefetching (warmup) ...";
|
| 101 |
+
const options: any = { device };
|
| 102 |
+
if (dtype) options.dtype = dtype;
|
| 103 |
+
const p = await pipeline(task, modelId, options);
|
| 104 |
await p("warmup");
|
| 105 |
statusEl.textContent = "running (warm)...";
|
| 106 |
+
const metrics = await runMany(modelId, task, repeats, device, dtype);
|
| 107 |
+
const result: any = {
|
| 108 |
platform: "browser",
|
| 109 |
runtime: navigator.userAgent,
|
| 110 |
mode: "warm",
|
|
|
|
| 114 |
device,
|
| 115 |
metrics
|
| 116 |
};
|
| 117 |
+
if (dtype) result.dtype = dtype;
|
| 118 |
+
return result;
|
| 119 |
}
|
| 120 |
+
async function runWarm(modelId: string, task: string, repeats: number, device: string, dtype?: string) {
|
| 121 |
const flag = sessionStorage.getItem("__warm_ready__");
|
| 122 |
if (!flag) {
|
| 123 |
statusEl.textContent = "prefetching (warmup) ...";
|
| 124 |
+
const options: any = { device };
|
| 125 |
+
if (dtype) options.dtype = dtype;
|
| 126 |
+
const p = await pipeline(task, modelId, options);
|
| 127 |
await p("warmup");
|
| 128 |
+
sessionStorage.setItem("__warm_ready__", JSON.stringify({ modelId, task, repeats, device, dtype }));
|
| 129 |
location.reload();
|
| 130 |
return null;
|
| 131 |
} else {
|
| 132 |
sessionStorage.removeItem("__warm_ready__");
|
| 133 |
+
return await runWarmDirect(modelId, task, repeats, device, dtype);
|
| 134 |
}
|
| 135 |
}
|
| 136 |
async function run() {
|
|
|
|
| 160 |
});
|
| 161 |
|
| 162 |
// Expose for CLI use
|
| 163 |
+
(window as any).runBenchmarkCLI = async function (params: { modelId: string, task: string, mode: string, repeats: number, device: string, dtype?: string }) {
|
| 164 |
if (params.mode === "cold") {
|
| 165 |
+
return await runCold(params.modelId, params.task, params.repeats, params.device, params.dtype);
|
| 166 |
} else {
|
| 167 |
// For warm, use the direct function that skips reload logic
|
| 168 |
+
return await runWarmDirect(params.modelId, params.task, params.repeats, params.device, params.dtype);
|
| 169 |
}
|
| 170 |
};
|