Spaces:
Runtime error
Runtime error
upload result to HF dataset
Browse files- bench/.env.example +13 -0
- bench/bench/.gitignore +1 -0
- bench/docs/hf-dataset-integration.md +215 -0
- bench/package-lock.json +106 -0
- bench/package.json +2 -0
- bench/src/server/hf-dataset.ts +121 -0
- bench/src/server/index.ts +41 -1
bench/.env.example
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Dataset Configuration
|
| 2 |
+
# Repository to push benchmark results to (format: username/dataset-name)
|
| 3 |
+
HF_DATASET_REPO=whitphx/transformersjs-performance-leaderboard-results-dev
|
| 4 |
+
|
| 5 |
+
# Hugging Face API Token (required for pushing to datasets)
|
| 6 |
+
# Get your token from: https://huggingface.co/settings/tokens
|
| 7 |
+
HF_TOKEN=your_hf_token_here
|
| 8 |
+
|
| 9 |
+
# Local benchmark results directory
|
| 10 |
+
BENCHMARK_RESULTS_DIR=./benchmark-results
|
| 11 |
+
|
| 12 |
+
# Server configuration
|
| 13 |
+
PORT=7860
|
bench/bench/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
bench/docs/hf-dataset-integration.md
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Dataset Integration
|
| 2 |
+
|
| 3 |
+
The benchmark server can automatically upload results to a Hugging Face Dataset repository for centralized storage and sharing.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- **Automatic Upload**: Results are automatically pushed to HF Dataset when benchmarks complete
|
| 8 |
+
- **File Structure Preservation**: Uses the same path structure: `{task}/{org}/{model}/{params}.json`
|
| 9 |
+
- **JSON Format**: Results are stored as JSON (not JSONL) for better Dataset compatibility
|
| 10 |
+
- **Overwrite Strategy**: Each configuration gets a single file that is overwritten with the latest result
|
| 11 |
+
- **Error Tracking**: Failed benchmarks are also uploaded to track issues
|
| 12 |
+
|
| 13 |
+
## Setup
|
| 14 |
+
|
| 15 |
+
### 1. Create a Hugging Face Dataset
|
| 16 |
+
|
| 17 |
+
1. Go to https://huggingface.co/new-dataset
|
| 18 |
+
2. Create a new dataset (e.g., `username/transformersjs-benchmark-results`)
|
| 19 |
+
3. Keep it public or private based on your needs
|
| 20 |
+
|
| 21 |
+
### 2. Get Your HF Token
|
| 22 |
+
|
| 23 |
+
1. Go to https://huggingface.co/settings/tokens
|
| 24 |
+
2. Create a new token with `write` permissions
|
| 25 |
+
3. Copy the token
|
| 26 |
+
|
| 27 |
+
### 3. Configure Environment Variables
|
| 28 |
+
|
| 29 |
+
Create or update `.env` file in the `bench` directory:
|
| 30 |
+
|
| 31 |
+
```bash
|
| 32 |
+
# Hugging Face Dataset Configuration
|
| 33 |
+
HF_DATASET_REPO=whitphx/transformersjs-performance-leaderboard-results-dev
|
| 34 |
+
HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
| 35 |
+
|
| 36 |
+
# Optional: Local storage directory
|
| 37 |
+
BENCHMARK_RESULTS_DIR=./benchmark-results
|
| 38 |
+
|
| 39 |
+
# Optional: Server port
|
| 40 |
+
PORT=7860
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
**Important**: Never commit `.env` to git. It's already in `.gitignore`.
|
| 44 |
+
|
| 45 |
+
## Usage
|
| 46 |
+
|
| 47 |
+
Once configured, the server will automatically upload results:
|
| 48 |
+
|
| 49 |
+
```bash
|
| 50 |
+
# Start the server
|
| 51 |
+
npm run server
|
| 52 |
+
|
| 53 |
+
# You should see:
|
| 54 |
+
# π€ HF Dataset upload enabled: username/transformersjs-benchmark-results
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
When benchmarks complete, you'll see:
|
| 58 |
+
|
| 59 |
+
```
|
| 60 |
+
β
Completed: abc-123 in 5.2s
|
| 61 |
+
β Benchmark abc-123 saved to file
|
| 62 |
+
β Uploaded to HF Dataset: feature-extraction/Xenova/all-MiniLM-L6-v2/node_warm_cpu_fp32_b1.json
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
## File Structure in HF Dataset
|
| 66 |
+
|
| 67 |
+
The dataset will have the same structure as local storage:
|
| 68 |
+
|
| 69 |
+
```
|
| 70 |
+
feature-extraction/
|
| 71 |
+
βββ Xenova/
|
| 72 |
+
β βββ all-MiniLM-L6-v2/
|
| 73 |
+
β β βββ node_warm_cpu_fp32_b1.json
|
| 74 |
+
β β βββ node_warm_webgpu_fp16_b1.json
|
| 75 |
+
β β βββ web_warm_wasm_b1_chromium.json
|
| 76 |
+
β βββ distilbert-base-uncased/
|
| 77 |
+
β βββ node_warm_cpu_fp32_b1.json
|
| 78 |
+
text-classification/
|
| 79 |
+
βββ Xenova/
|
| 80 |
+
βββ distilbert-base-uncased/
|
| 81 |
+
βββ node_warm_cpu_fp32_b1.json
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
## JSON Format
|
| 85 |
+
|
| 86 |
+
Each file contains a single benchmark result (not multiple runs):
|
| 87 |
+
|
| 88 |
+
```json
|
| 89 |
+
{
|
| 90 |
+
"id": "abc-123-456",
|
| 91 |
+
"platform": "node",
|
| 92 |
+
"modelId": "Xenova/all-MiniLM-L6-v2",
|
| 93 |
+
"task": "feature-extraction",
|
| 94 |
+
"mode": "warm",
|
| 95 |
+
"repeats": 3,
|
| 96 |
+
"dtype": "fp32",
|
| 97 |
+
"batchSize": 1,
|
| 98 |
+
"device": "cpu",
|
| 99 |
+
"timestamp": 1234567890,
|
| 100 |
+
"status": "completed",
|
| 101 |
+
"result": {
|
| 102 |
+
"metrics": { ... },
|
| 103 |
+
"environment": { ... }
|
| 104 |
+
}
|
| 105 |
+
}
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
## Behavior
|
| 109 |
+
|
| 110 |
+
### Overwriting Results
|
| 111 |
+
|
| 112 |
+
- Each benchmark configuration maps to a single file
|
| 113 |
+
- New results **overwrite** the existing file
|
| 114 |
+
- Only the **latest** result is kept per configuration
|
| 115 |
+
- This ensures the dataset always has current data
|
| 116 |
+
|
| 117 |
+
### Local vs Remote Storage
|
| 118 |
+
|
| 119 |
+
- **Local (JSONL)**: Keeps history of all runs (append-only)
|
| 120 |
+
- **Remote (JSON)**: Keeps only latest result (overwrite)
|
| 121 |
+
|
| 122 |
+
This dual approach allows:
|
| 123 |
+
- Local: Full history for analysis
|
| 124 |
+
- Remote: Clean, current results for leaderboards
|
| 125 |
+
|
| 126 |
+
### Failed Benchmarks
|
| 127 |
+
|
| 128 |
+
Failed benchmarks are also uploaded to track:
|
| 129 |
+
- Which models/configs have issues
|
| 130 |
+
- Error types (memory errors, etc.)
|
| 131 |
+
- Environmental context
|
| 132 |
+
|
| 133 |
+
Example failed result:
|
| 134 |
+
|
| 135 |
+
```json
|
| 136 |
+
{
|
| 137 |
+
"id": "def-456-789",
|
| 138 |
+
"status": "failed",
|
| 139 |
+
"error": "Benchmark failed with code 1: ...",
|
| 140 |
+
"result": {
|
| 141 |
+
"error": {
|
| 142 |
+
"type": "memory_error",
|
| 143 |
+
"message": "Aborted(). Build with -sASSERTIONS for more info.",
|
| 144 |
+
"stage": "load"
|
| 145 |
+
},
|
| 146 |
+
"environment": { ... }
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
## Git Commits
|
| 152 |
+
|
| 153 |
+
Each upload creates a git commit in the dataset with:
|
| 154 |
+
|
| 155 |
+
```
|
| 156 |
+
Update benchmark: Xenova/all-MiniLM-L6-v2 (node/feature-extraction)
|
| 157 |
+
|
| 158 |
+
Benchmark ID: abc-123-456
|
| 159 |
+
Status: completed
|
| 160 |
+
Timestamp: 2025-10-13T06:48:57.481Z
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
## Disabling Upload
|
| 164 |
+
|
| 165 |
+
To disable HF Dataset upload:
|
| 166 |
+
|
| 167 |
+
1. Remove `HF_TOKEN` from `.env`, or
|
| 168 |
+
2. Remove both `HF_DATASET_REPO` and `HF_TOKEN`
|
| 169 |
+
|
| 170 |
+
The server will show:
|
| 171 |
+
|
| 172 |
+
```
|
| 173 |
+
π€ HF Dataset upload disabled (set HF_DATASET_REPO and HF_TOKEN to enable)
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
## Error Handling
|
| 177 |
+
|
| 178 |
+
If HF upload fails:
|
| 179 |
+
- The error is logged but doesn't fail the benchmark
|
| 180 |
+
- Local storage still succeeds
|
| 181 |
+
- You can retry manually or fix configuration
|
| 182 |
+
|
| 183 |
+
Example error:
|
| 184 |
+
|
| 185 |
+
```
|
| 186 |
+
β Failed to upload benchmark abc-123 to HF Dataset: Authentication failed
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
## API Endpoint (Future)
|
| 190 |
+
|
| 191 |
+
Currently uploads happen automatically. In the future, we could add:
|
| 192 |
+
|
| 193 |
+
```bash
|
| 194 |
+
# Manually trigger upload of a specific result
|
| 195 |
+
POST /api/benchmark/:id/upload
|
| 196 |
+
|
| 197 |
+
# Re-upload all local results to HF Dataset
|
| 198 |
+
POST /api/benchmarks/sync
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
## Development vs Production
|
| 202 |
+
|
| 203 |
+
Use different dataset repositories for development and production:
|
| 204 |
+
|
| 205 |
+
**Development** (`.env`):
|
| 206 |
+
```bash
|
| 207 |
+
HF_DATASET_REPO=whitphx/transformersjs-performance-leaderboard-results-dev
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
**Production** (deployed environment):
|
| 211 |
+
```bash
|
| 212 |
+
HF_DATASET_REPO=whitphx/transformersjs-performance-leaderboard-results
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
This allows testing without polluting production data.
|
bench/package-lock.json
CHANGED
|
@@ -9,7 +9,9 @@
|
|
| 9 |
"version": "0.0.2",
|
| 10 |
"dependencies": {
|
| 11 |
"@hono/node-server": "^1.19.5",
|
|
|
|
| 12 |
"@huggingface/transformers": "^3.7.4",
|
|
|
|
| 13 |
"hono": "^4.9.10",
|
| 14 |
"zod": "^4.1.11"
|
| 15 |
},
|
|
@@ -485,6 +487,24 @@
|
|
| 485 |
"hono": "^4"
|
| 486 |
}
|
| 487 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
"node_modules/@huggingface/jinja": {
|
| 489 |
"version": "0.5.1",
|
| 490 |
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.1.tgz",
|
|
@@ -494,6 +514,12 @@
|
|
| 494 |
"node": ">=18"
|
| 495 |
}
|
| 496 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
"node_modules/@huggingface/transformers": {
|
| 498 |
"version": "3.7.5",
|
| 499 |
"resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.7.5.tgz",
|
|
@@ -1349,6 +1375,16 @@
|
|
| 1349 |
"undici-types": "~7.14.0"
|
| 1350 |
}
|
| 1351 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1352 |
"node_modules/boolean": {
|
| 1353 |
"version": "3.2.0",
|
| 1354 |
"resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
|
|
@@ -1365,6 +1401,19 @@
|
|
| 1365 |
"node": ">=18"
|
| 1366 |
}
|
| 1367 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1368 |
"node_modules/define-data-property": {
|
| 1369 |
"version": "1.1.4",
|
| 1370 |
"resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
|
|
@@ -1414,6 +1463,25 @@
|
|
| 1414 |
"integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==",
|
| 1415 |
"license": "MIT"
|
| 1416 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1417 |
"node_modules/es-define-property": {
|
| 1418 |
"version": "1.0.1",
|
| 1419 |
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
|
|
@@ -1616,6 +1684,16 @@
|
|
| 1616 |
"node": ">=16.9.0"
|
| 1617 |
}
|
| 1618 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1619 |
"node_modules/json-stringify-safe": {
|
| 1620 |
"version": "5.0.1",
|
| 1621 |
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
|
|
@@ -2003,6 +2081,34 @@
|
|
| 2003 |
"integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==",
|
| 2004 |
"license": "BSD-3-Clause"
|
| 2005 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2006 |
"node_modules/tar": {
|
| 2007 |
"version": "7.5.1",
|
| 2008 |
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz",
|
|
|
|
| 9 |
"version": "0.0.2",
|
| 10 |
"dependencies": {
|
| 11 |
"@hono/node-server": "^1.19.5",
|
| 12 |
+
"@huggingface/hub": "^2.6.12",
|
| 13 |
"@huggingface/transformers": "^3.7.4",
|
| 14 |
+
"dotenv": "^17.2.3",
|
| 15 |
"hono": "^4.9.10",
|
| 16 |
"zod": "^4.1.11"
|
| 17 |
},
|
|
|
|
| 487 |
"hono": "^4"
|
| 488 |
}
|
| 489 |
},
|
| 490 |
+
"node_modules/@huggingface/hub": {
|
| 491 |
+
"version": "2.6.12",
|
| 492 |
+
"resolved": "https://registry.npmjs.org/@huggingface/hub/-/hub-2.6.12.tgz",
|
| 493 |
+
"integrity": "sha512-/AZN2LAtrt4B8S83/Ru4wMorHY4NPwIYXo60SkuD6c/Mr135t1UqffD1vqcqtDYVf0hhLiyVmm1LMU1CXi8iKQ==",
|
| 494 |
+
"license": "MIT",
|
| 495 |
+
"dependencies": {
|
| 496 |
+
"@huggingface/tasks": "^0.19.50"
|
| 497 |
+
},
|
| 498 |
+
"bin": {
|
| 499 |
+
"hfjs": "dist/cli.js"
|
| 500 |
+
},
|
| 501 |
+
"engines": {
|
| 502 |
+
"node": ">=18"
|
| 503 |
+
},
|
| 504 |
+
"optionalDependencies": {
|
| 505 |
+
"cli-progress": "^3.12.0"
|
| 506 |
+
}
|
| 507 |
+
},
|
| 508 |
"node_modules/@huggingface/jinja": {
|
| 509 |
"version": "0.5.1",
|
| 510 |
"resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.1.tgz",
|
|
|
|
| 514 |
"node": ">=18"
|
| 515 |
}
|
| 516 |
},
|
| 517 |
+
"node_modules/@huggingface/tasks": {
|
| 518 |
+
"version": "0.19.50",
|
| 519 |
+
"resolved": "https://registry.npmjs.org/@huggingface/tasks/-/tasks-0.19.50.tgz",
|
| 520 |
+
"integrity": "sha512-kHrfiDsJttkuwpdp7PgFiFHaK9rj+COJTIZ+221gk9vdF4B5QySW7hQT4aOGkwwZP+4qbXGhjMNGg/bxOq+LwA==",
|
| 521 |
+
"license": "MIT"
|
| 522 |
+
},
|
| 523 |
"node_modules/@huggingface/transformers": {
|
| 524 |
"version": "3.7.5",
|
| 525 |
"resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.7.5.tgz",
|
|
|
|
| 1375 |
"undici-types": "~7.14.0"
|
| 1376 |
}
|
| 1377 |
},
|
| 1378 |
+
"node_modules/ansi-regex": {
|
| 1379 |
+
"version": "5.0.1",
|
| 1380 |
+
"resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
|
| 1381 |
+
"integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
|
| 1382 |
+
"license": "MIT",
|
| 1383 |
+
"optional": true,
|
| 1384 |
+
"engines": {
|
| 1385 |
+
"node": ">=8"
|
| 1386 |
+
}
|
| 1387 |
+
},
|
| 1388 |
"node_modules/boolean": {
|
| 1389 |
"version": "3.2.0",
|
| 1390 |
"resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
|
|
|
|
| 1401 |
"node": ">=18"
|
| 1402 |
}
|
| 1403 |
},
|
| 1404 |
+
"node_modules/cli-progress": {
|
| 1405 |
+
"version": "3.12.0",
|
| 1406 |
+
"resolved": "https://registry.npmjs.org/cli-progress/-/cli-progress-3.12.0.tgz",
|
| 1407 |
+
"integrity": "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A==",
|
| 1408 |
+
"license": "MIT",
|
| 1409 |
+
"optional": true,
|
| 1410 |
+
"dependencies": {
|
| 1411 |
+
"string-width": "^4.2.3"
|
| 1412 |
+
},
|
| 1413 |
+
"engines": {
|
| 1414 |
+
"node": ">=4"
|
| 1415 |
+
}
|
| 1416 |
+
},
|
| 1417 |
"node_modules/define-data-property": {
|
| 1418 |
"version": "1.1.4",
|
| 1419 |
"resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
|
|
|
|
| 1463 |
"integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==",
|
| 1464 |
"license": "MIT"
|
| 1465 |
},
|
| 1466 |
+
"node_modules/dotenv": {
|
| 1467 |
+
"version": "17.2.3",
|
| 1468 |
+
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.2.3.tgz",
|
| 1469 |
+
"integrity": "sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==",
|
| 1470 |
+
"license": "BSD-2-Clause",
|
| 1471 |
+
"engines": {
|
| 1472 |
+
"node": ">=12"
|
| 1473 |
+
},
|
| 1474 |
+
"funding": {
|
| 1475 |
+
"url": "https://dotenvx.com"
|
| 1476 |
+
}
|
| 1477 |
+
},
|
| 1478 |
+
"node_modules/emoji-regex": {
|
| 1479 |
+
"version": "8.0.0",
|
| 1480 |
+
"resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
|
| 1481 |
+
"integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
|
| 1482 |
+
"license": "MIT",
|
| 1483 |
+
"optional": true
|
| 1484 |
+
},
|
| 1485 |
"node_modules/es-define-property": {
|
| 1486 |
"version": "1.0.1",
|
| 1487 |
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
|
|
|
|
| 1684 |
"node": ">=16.9.0"
|
| 1685 |
}
|
| 1686 |
},
|
| 1687 |
+
"node_modules/is-fullwidth-code-point": {
|
| 1688 |
+
"version": "3.0.0",
|
| 1689 |
+
"resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
|
| 1690 |
+
"integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
|
| 1691 |
+
"license": "MIT",
|
| 1692 |
+
"optional": true,
|
| 1693 |
+
"engines": {
|
| 1694 |
+
"node": ">=8"
|
| 1695 |
+
}
|
| 1696 |
+
},
|
| 1697 |
"node_modules/json-stringify-safe": {
|
| 1698 |
"version": "5.0.1",
|
| 1699 |
"resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
|
|
|
|
| 2081 |
"integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==",
|
| 2082 |
"license": "BSD-3-Clause"
|
| 2083 |
},
|
| 2084 |
+
"node_modules/string-width": {
|
| 2085 |
+
"version": "4.2.3",
|
| 2086 |
+
"resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
|
| 2087 |
+
"integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
|
| 2088 |
+
"license": "MIT",
|
| 2089 |
+
"optional": true,
|
| 2090 |
+
"dependencies": {
|
| 2091 |
+
"emoji-regex": "^8.0.0",
|
| 2092 |
+
"is-fullwidth-code-point": "^3.0.0",
|
| 2093 |
+
"strip-ansi": "^6.0.1"
|
| 2094 |
+
},
|
| 2095 |
+
"engines": {
|
| 2096 |
+
"node": ">=8"
|
| 2097 |
+
}
|
| 2098 |
+
},
|
| 2099 |
+
"node_modules/strip-ansi": {
|
| 2100 |
+
"version": "6.0.1",
|
| 2101 |
+
"resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
|
| 2102 |
+
"integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
|
| 2103 |
+
"license": "MIT",
|
| 2104 |
+
"optional": true,
|
| 2105 |
+
"dependencies": {
|
| 2106 |
+
"ansi-regex": "^5.0.1"
|
| 2107 |
+
},
|
| 2108 |
+
"engines": {
|
| 2109 |
+
"node": ">=8"
|
| 2110 |
+
}
|
| 2111 |
+
},
|
| 2112 |
"node_modules/tar": {
|
| 2113 |
"version": "7.5.1",
|
| 2114 |
"resolved": "https://registry.npmjs.org/tar/-/tar-7.5.1.tgz",
|
bench/package.json
CHANGED
|
@@ -16,7 +16,9 @@
|
|
| 16 |
},
|
| 17 |
"dependencies": {
|
| 18 |
"@hono/node-server": "^1.19.5",
|
|
|
|
| 19 |
"@huggingface/transformers": "^3.7.4",
|
|
|
|
| 20 |
"hono": "^4.9.10",
|
| 21 |
"zod": "^4.1.11"
|
| 22 |
},
|
|
|
|
| 16 |
},
|
| 17 |
"dependencies": {
|
| 18 |
"@hono/node-server": "^1.19.5",
|
| 19 |
+
"@huggingface/hub": "^2.6.12",
|
| 20 |
"@huggingface/transformers": "^3.7.4",
|
| 21 |
+
"dotenv": "^17.2.3",
|
| 22 |
"hono": "^4.9.10",
|
| 23 |
"zod": "^4.1.11"
|
| 24 |
},
|
bench/src/server/hf-dataset.ts
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Hugging Face Dataset Integration
|
| 3 |
+
*
|
| 4 |
+
* Uploads benchmark results to a Hugging Face Dataset repository.
|
| 5 |
+
* - Preserves file path structure: {task}/{org}/{model}/{params}.json
|
| 6 |
+
* - Uses JSON format (not JSONL)
|
| 7 |
+
* - Overwrites existing files instead of appending
|
| 8 |
+
*/
|
| 9 |
+
|
| 10 |
+
import { uploadFile, listFiles } from "@huggingface/hub";
|
| 11 |
+
import { generateBenchmarkPath, type BenchmarkSettings } from "../core/benchmark-id.js";
|
| 12 |
+
import type { QueuedBenchmark } from "./queue.js";
|
| 13 |
+
|
| 14 |
+
export interface HFDatasetConfig {
|
| 15 |
+
repo: string;
|
| 16 |
+
token: string;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
export class HFDatasetUploader {
|
| 20 |
+
private config: HFDatasetConfig | null = null;
|
| 21 |
+
|
| 22 |
+
constructor(config?: HFDatasetConfig) {
|
| 23 |
+
if (config && config.repo && config.token) {
|
| 24 |
+
this.config = config;
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
/**
|
| 29 |
+
* Check if HF Dataset upload is enabled
|
| 30 |
+
*/
|
| 31 |
+
isEnabled(): boolean {
|
| 32 |
+
return this.config !== null;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
/**
|
| 36 |
+
* Get the HF Dataset file path for a benchmark
|
| 37 |
+
*/
|
| 38 |
+
private getHFFilePath(benchmark: QueuedBenchmark): string {
|
| 39 |
+
const settings: BenchmarkSettings = {
|
| 40 |
+
platform: benchmark.platform,
|
| 41 |
+
modelId: benchmark.modelId,
|
| 42 |
+
task: benchmark.task,
|
| 43 |
+
mode: benchmark.mode,
|
| 44 |
+
device: benchmark.device,
|
| 45 |
+
dtype: benchmark.dtype,
|
| 46 |
+
batchSize: benchmark.batchSize,
|
| 47 |
+
browser: benchmark.browser,
|
| 48 |
+
headed: benchmark.headed,
|
| 49 |
+
};
|
| 50 |
+
|
| 51 |
+
const { fullPath } = generateBenchmarkPath(settings);
|
| 52 |
+
// Replace .jsonl extension with .json
|
| 53 |
+
return fullPath.replace(/\.jsonl$/, ".json");
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/**
|
| 57 |
+
* Upload a benchmark result to HF Dataset
|
| 58 |
+
* Overwrites the file if it already exists
|
| 59 |
+
*/
|
| 60 |
+
async uploadResult(benchmark: QueuedBenchmark): Promise<void> {
|
| 61 |
+
if (!this.config) {
|
| 62 |
+
throw new Error("HF Dataset upload is not configured");
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
const filePath = this.getHFFilePath(benchmark);
|
| 66 |
+
|
| 67 |
+
// Convert benchmark to JSON string
|
| 68 |
+
const content = JSON.stringify(benchmark, null, 2);
|
| 69 |
+
const blob = new Blob([content], { type: "application/json" });
|
| 70 |
+
|
| 71 |
+
try {
|
| 72 |
+
// Upload file to HF Dataset (overwrites if exists)
|
| 73 |
+
await uploadFile({
|
| 74 |
+
repo: {
|
| 75 |
+
type: "dataset",
|
| 76 |
+
name: this.config.repo,
|
| 77 |
+
},
|
| 78 |
+
credentials: { accessToken: this.config.token },
|
| 79 |
+
file: {
|
| 80 |
+
path: filePath,
|
| 81 |
+
content: blob,
|
| 82 |
+
},
|
| 83 |
+
commitTitle: `Update benchmark: ${benchmark.modelId} (${benchmark.platform}/${benchmark.task})`,
|
| 84 |
+
commitDescription: `Benchmark ID: ${benchmark.id}\nStatus: ${benchmark.status}\nTimestamp: ${new Date(benchmark.timestamp).toISOString()}`,
|
| 85 |
+
});
|
| 86 |
+
|
| 87 |
+
console.log(`β Uploaded to HF Dataset: ${filePath}`);
|
| 88 |
+
} catch (error: any) {
|
| 89 |
+
console.error(`β Failed to upload to HF Dataset: ${filePath}`, error.message);
|
| 90 |
+
throw error;
|
| 91 |
+
}
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
/**
|
| 95 |
+
* List all files in the HF Dataset
|
| 96 |
+
*/
|
| 97 |
+
async listAllFiles(): Promise<string[]> {
|
| 98 |
+
if (!this.config) {
|
| 99 |
+
throw new Error("HF Dataset upload is not configured");
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
try {
|
| 103 |
+
const files = [];
|
| 104 |
+
for await (const file of listFiles({
|
| 105 |
+
repo: {
|
| 106 |
+
type: "dataset",
|
| 107 |
+
name: this.config.repo,
|
| 108 |
+
},
|
| 109 |
+
credentials: { accessToken: this.config.token },
|
| 110 |
+
})) {
|
| 111 |
+
if (file.path.endsWith(".json")) {
|
| 112 |
+
files.push(file.path);
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
return files;
|
| 116 |
+
} catch (error: any) {
|
| 117 |
+
console.error("β Failed to list files from HF Dataset", error.message);
|
| 118 |
+
throw error;
|
| 119 |
+
}
|
| 120 |
+
}
|
| 121 |
+
}
|
bench/src/server/index.ts
CHANGED
|
@@ -3,17 +3,38 @@ import { cors } from "hono/cors";
|
|
| 3 |
import { serve } from "@hono/node-server";
|
| 4 |
import { BenchmarkQueue, BenchmarkRequest } from "./queue.js";
|
| 5 |
import { BenchmarkStorage } from "./storage.js";
|
|
|
|
| 6 |
import { randomUUID } from "crypto";
|
| 7 |
import { z } from "zod";
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
const app = new Hono();
|
| 10 |
const queue = new BenchmarkQueue();
|
| 11 |
const storage = new BenchmarkStorage();
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
// Enable CORS for development
|
| 14 |
app.use("/*", cors());
|
| 15 |
|
| 16 |
-
// Store completed benchmarks to file
|
| 17 |
queue.on("completed", async (benchmark) => {
|
| 18 |
try {
|
| 19 |
await storage.appendResult(benchmark);
|
|
@@ -21,6 +42,16 @@ queue.on("completed", async (benchmark) => {
|
|
| 21 |
} catch (error) {
|
| 22 |
console.error(`β Failed to save benchmark ${benchmark.id}:`, error);
|
| 23 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
});
|
| 25 |
|
| 26 |
queue.on("failed", async (benchmark) => {
|
|
@@ -30,6 +61,15 @@ queue.on("failed", async (benchmark) => {
|
|
| 30 |
} catch (error) {
|
| 31 |
console.error(`β Failed to save failed benchmark ${benchmark.id}:`, error);
|
| 32 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
});
|
| 34 |
|
| 35 |
// Log queue events
|
|
|
|
| 3 |
import { serve } from "@hono/node-server";
|
| 4 |
import { BenchmarkQueue, BenchmarkRequest } from "./queue.js";
|
| 5 |
import { BenchmarkStorage } from "./storage.js";
|
| 6 |
+
import { HFDatasetUploader } from "./hf-dataset.js";
|
| 7 |
import { randomUUID } from "crypto";
|
| 8 |
import { z } from "zod";
|
| 9 |
+
import { config as dotenvConfig } from "dotenv";
|
| 10 |
+
|
| 11 |
+
// Load environment variables
|
| 12 |
+
dotenvConfig();
|
| 13 |
|
| 14 |
const app = new Hono();
|
| 15 |
const queue = new BenchmarkQueue();
|
| 16 |
const storage = new BenchmarkStorage();
|
| 17 |
|
| 18 |
+
// Initialize HF Dataset uploader if configured
|
| 19 |
+
const hfUploader = new HFDatasetUploader(
|
| 20 |
+
process.env.HF_DATASET_REPO && process.env.HF_TOKEN
|
| 21 |
+
? {
|
| 22 |
+
repo: process.env.HF_DATASET_REPO,
|
| 23 |
+
token: process.env.HF_TOKEN,
|
| 24 |
+
}
|
| 25 |
+
: undefined
|
| 26 |
+
);
|
| 27 |
+
|
| 28 |
+
if (hfUploader.isEnabled()) {
|
| 29 |
+
console.log(`π€ HF Dataset upload enabled: ${process.env.HF_DATASET_REPO}`);
|
| 30 |
+
} else {
|
| 31 |
+
console.log("π€ HF Dataset upload disabled (set HF_DATASET_REPO and HF_TOKEN to enable)");
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
// Enable CORS for development
|
| 35 |
app.use("/*", cors());
|
| 36 |
|
| 37 |
+
// Store completed benchmarks to file and upload to HF Dataset
|
| 38 |
queue.on("completed", async (benchmark) => {
|
| 39 |
try {
|
| 40 |
await storage.appendResult(benchmark);
|
|
|
|
| 42 |
} catch (error) {
|
| 43 |
console.error(`β Failed to save benchmark ${benchmark.id}:`, error);
|
| 44 |
}
|
| 45 |
+
|
| 46 |
+
// Upload to HF Dataset if enabled
|
| 47 |
+
if (hfUploader.isEnabled()) {
|
| 48 |
+
try {
|
| 49 |
+
await hfUploader.uploadResult(benchmark);
|
| 50 |
+
} catch (error) {
|
| 51 |
+
console.error(`β Failed to upload benchmark ${benchmark.id} to HF Dataset:`, error);
|
| 52 |
+
// Don't fail the whole operation if HF upload fails
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
});
|
| 56 |
|
| 57 |
queue.on("failed", async (benchmark) => {
|
|
|
|
| 61 |
} catch (error) {
|
| 62 |
console.error(`β Failed to save failed benchmark ${benchmark.id}:`, error);
|
| 63 |
}
|
| 64 |
+
|
| 65 |
+
// Also upload failed benchmarks to HF Dataset if enabled (for tracking failures)
|
| 66 |
+
if (hfUploader.isEnabled()) {
|
| 67 |
+
try {
|
| 68 |
+
await hfUploader.uploadResult(benchmark);
|
| 69 |
+
} catch (error) {
|
| 70 |
+
console.error(`β Failed to upload failed benchmark ${benchmark.id} to HF Dataset:`, error);
|
| 71 |
+
}
|
| 72 |
+
}
|
| 73 |
});
|
| 74 |
|
| 75 |
// Log queue events
|