jayminban commited on
Commit
3b622a6
·
1 Parent(s): 8c5861a

Gahahahahaha

Browse files
Files changed (2) hide show
  1. index.html +1258 -2
  2. style.css +0 -28
index.html CHANGED
@@ -34,11 +34,12 @@
34
  /* ====== Layout ====== */
35
  .wrap {
36
  max-width: 1200px;
37
- margin: 32px auto 48px;
38
  padding: 0 16px;
39
  }
 
40
  .title {
41
- margin: 0 0 12px;
42
  font-size: 22px;
43
  font-weight: 800;
44
  letter-spacing: .2px;
@@ -113,9 +114,327 @@
113
  /* Make links subtle if you add them later */
114
  a { color: var(--accent); text-decoration: none; }
115
  a:hover { text-decoration: underline; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  </style>
 
 
 
 
 
 
 
 
 
117
  </head>
118
  <body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  <div class="wrap">
120
  <h1 class="title">Overall Rank (Average Rank)</h1>
121
 
@@ -422,6 +741,943 @@
422
  <td>0.3799</td>
423
  <td>41</td>
424
  </tr>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  </tbody>
426
  </table>
427
 
 
34
  /* ====== Layout ====== */
35
  .wrap {
36
  max-width: 1200px;
37
+ margin: 15px auto 48px;
38
  padding: 0 16px;
39
  }
40
+
41
  .title {
42
+ margin: 0 0 25px;
43
  font-size: 22px;
44
  font-weight: 800;
45
  letter-spacing: .2px;
 
114
  /* Make links subtle if you add them later */
115
  a { color: var(--accent); text-decoration: none; }
116
  a:hover { text-decoration: underline; }
117
+
118
+
119
+ /* ===== Container aligns with your table width ===== */
120
+ .lb-container, .lb-divider {
121
+ max-width: 1200px;
122
+ margin: 0 auto;
123
+ padding: 0 12px;
124
+ }
125
+
126
+ /* ----- Hero ----- */
127
+ .lb-hero { margin: 28px 0 10px; }
128
+ .lb-hero h1 {
129
+ margin: 0 0 6px;
130
+ font-size: 30px;
131
+ font-weight: 800;
132
+ letter-spacing: .2px;
133
+ }
134
+
135
+ .lb-tagline {
136
+ margin: 10px 0 2px;
137
+ max-width: 860px;
138
+ text-align: left; /* left-align instead of center */
139
+ color: rgba(255, 255, 255, 0.85);
140
+ font-size: 15px;
141
+ line-height: 1;
142
+ }
143
+
144
+ .lb-tagline p {
145
+ margin: 4px 0;
146
+ }
147
+
148
+
149
+ .lb-meta {
150
+ margin-top: 10px;
151
+ display: flex; align-items: center; gap: 8px; flex-wrap: wrap;
152
+ }
153
+ .badge {
154
+ font-size: 12px;
155
+ padding: 4px 8px;
156
+ border-radius: 999px;
157
+ border: 1px solid var(--border, rgba(255,255,255,.08));
158
+ background: var(--panel, rgba(255,255,255,.03));
159
+ }
160
+ .meta-spacer { flex: 1 1 auto; }
161
+ .updated { font-size: 12px; opacity: .75; }
162
+
163
+ /* ----- Cards grid ----- */
164
+ .card-grid {
165
+ display: grid;
166
+ gap: 12px;
167
+ grid-template-columns: repeat(3, 1fr);
168
+ margin: 16px 0 12px;
169
+ }
170
+ @media (max-width: 980px) { .card-grid { grid-template-columns: 1fr; } }
171
+
172
+ .card {
173
+ background: var(--panel, rgba(255,255,255,.03));
174
+ border: 1px solid var(--border, rgba(255,255,255,.08));
175
+ border-radius: 14px;
176
+ padding: 14px;
177
+ box-shadow: var(--shadow, 0 2px 12px rgba(0,0,0,.25));
178
+ }
179
+ .card h3 { margin: 0 0 2px; font-size: 16px; }
180
+ .muted { opacity: .75; font-size: 13px; margin: 0 0 8px; }
181
+
182
+ /* chips */
183
+ .chips { display: flex; flex-wrap: wrap; gap: 8px; }
184
+ .chip {
185
+ font-size: 12px;
186
+ padding: 4px 8px;
187
+ border-radius: 8px;
188
+ background: rgba(255,255,255,.06);
189
+ border: 1px solid rgba(255,255,255,.08);
190
+ }
191
+
192
+ /* ----- Definitions card ----- */
193
+ .card-defs h3 { margin: 2px 0 8px; }
194
+ .defs { display: grid; grid-template-columns: 220px 1fr; gap: 8px 16px; }
195
+ @media (max-width: 800px) { .defs { grid-template-columns: 1fr; } }
196
+ .defs dt { font-weight: 700; }
197
+ .defs dd { margin: 0; opacity: .92; }
198
+ .defs code {
199
+ background: rgba(255,255,255,.06);
200
+ padding: 1px 4px; border-radius: 4px;
201
+ }
202
+
203
+ /* ----- Links row ----- */
204
+ .link-row {
205
+ display: flex; flex-wrap: wrap; gap: 10px;
206
+ margin: 12px 0 18px;
207
+ }
208
+ .btn {
209
+ display: inline-block;
210
+ padding: 8px 12px;
211
+ border-radius: 10px;
212
+ border: 1px solid var(--border, rgba(255,255,255,.10));
213
+ background: linear-gradient(180deg, rgba(255,255,255,.06), rgba(255,255,255,.02));
214
+ color: var(--accent, #8ab4ff); text-decoration: none;
215
+ font-weight: 600; font-size: 14px;
216
+ }
217
+ .btn:hover { text-decoration: underline; }
218
+ .btn.ghost { background: transparent; }
219
+
220
+ /* ----- Divider ----- */
221
+ .lb-divider {
222
+ height: 1px; border: none;
223
+ background: linear-gradient(to right, transparent, rgba(255,255,255,.12), transparent);
224
+ margin: 18px 0 14px;
225
+ }
226
+
227
+ .card h3 {
228
+ margin-bottom: 10px;
229
+ }
230
+
231
+ .equation-svg {
232
+ display: flex; /* flex container */
233
+ align-items: center; /* vertically aligns with bullets */
234
+ margin: 1.5em 2em 1em; /* top/bottom spacing, tighter */
235
+ }
236
+
237
+ .equation-svg img {
238
+ max-width: 100%;
239
+ height: auto;
240
+ display: block;
241
+ }
242
+
243
+ /* Doc-style content card */
244
+ .card-doc {
245
+ margin-top: 16px;
246
+ }
247
+
248
+ .card-doc h3 {
249
+ margin: 4px 0 10px;
250
+ font-size: 18px;
251
+ font-weight: 700;
252
+ }
253
+
254
+ .card-doc .lead {
255
+ margin: 6px 0 12px;
256
+ opacity: .92;
257
+ line-height: 1.6;
258
+ }
259
+
260
+ .contents-list {
261
+ margin: 0 0 12px 0;
262
+ padding-left: 22px; /* neat bullets */
263
+ }
264
+
265
+ .contents-list li {
266
+ margin: 6px 0;
267
+ line-height: 1.6;
268
+ }
269
+
270
+ .ref-note {
271
+ margin-top: 12px;
272
+ padding-top: 10px;
273
+ border-top: 1px solid rgba(255,255,255,.08);
274
+ opacity: .9;
275
+ }
276
+
277
+ .ref-note a {
278
+ color: var(--accent, #8ab4ff);
279
+ text-decoration: underline;
280
+ }
281
+ .ref-note a:hover { text-decoration: none; }
282
+
283
  </style>
284
+
285
+ <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
286
+ <script id="MathJax-script" async
287
+ src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
288
+
289
+
290
+
291
+
292
+
293
  </head>
294
  <body>
295
+
296
+
297
+
298
+
299
+ <!-- ===== Polished Info Header & Sections ===== -->
300
+ <section class="lb-container">
301
+
302
+ <!-- Title + meta badges -->
303
+ <header class="lb-hero">
304
+ <h1>41 Open-Source LLMs Evaluated Locally on 19 Benchmarks ⚡</h1>
305
+ <p class="lb-tagline">
306
+ Evaluations run with the lm-evaluation-harness library on a local workstation.
307
+ </p>
308
+ <p class="lb-tagline">Benchmarks are grouped into three categories, with the corresponding tasks and metrics listed below.
309
+ </p>
310
+ </header>
311
+
312
+
313
+
314
+ <!-- Three cards: task groups -->
315
+ <div class="card-grid">
316
+ <article class="card">
317
+ <h3>Reasoning & Math</h3>
318
+
319
+ <div class="chips">
320
+ <span class="chip">gsm8k(exact_match,strict-match)</span>
321
+ <span class="chip">bbh(exact_match,get-answer)</span>
322
+ <span class="chip">arc_challenge(acc_norm,none)</span>
323
+ <span class="chip">anli_r1(acc,none)</span>
324
+ <span class="chip">anli_r2(acc,none)</span>
325
+ <span class="chip">anli_r3(acc,none)</span>
326
+ <span class="chip">gqpa_main_zeroshot(acc_norm,none)</span>
327
+ </div>
328
+ </article>
329
+
330
+ <article class="card">
331
+ <h3>Commonsense & Natural Language Inference</h3>
332
+
333
+ <div class="chips">
334
+ <span class="chip">hellaswag(acc_norm,none)</span>
335
+ <span class="chip">piqa(acc_norm,none)</span>
336
+ <span class="chip">winogrande(acc,none)</span>
337
+ <span class="chip">boolq(acc,none)</span>
338
+ <span class="chip">openbookqa(acc_norm,none)</span>
339
+ <span class="chip">sciq(acc_norm,none)</span>
340
+ <span class="chip">qnli(acc,none)</span>
341
+ </div>
342
+ </article>
343
+
344
+ <article class="card">
345
+ <h3>Knowledge & Reading</h3>
346
+
347
+ <div class="chips">
348
+ <span class="chip">mmlu(acc,none)</span>
349
+ <span class="chip">nq_open(exact_match,remove_whitespace)</span>
350
+ <span class="chip">drop(f1,none)</span>
351
+ <span class="chip">truthfulqa_mc1(acc,none)</span>
352
+ <span class="chip">truthfulqa_mc2(acc,none)</span>
353
+ <span class="chip">triviaqa(exact_match,remove_whitespace)</span>
354
+ </div>
355
+ </article>
356
+ </div>
357
+
358
+ <!-- Table field definitions -->
359
+ <section class="card card-defs">
360
+ <h3>Table Fields</h3>
361
+ <dl class="defs">
362
+ <dt>Model Name</dt>
363
+ <dd>Listed as <code>Company_ModelName</code>. If quantized, <code>(8bit)</code> is appended.</dd>
364
+
365
+ <dt>Total Time</dt>
366
+ <dd>Wall-clock time for the full evaluation run per model.</dd>
367
+
368
+ <dt>GPU Util Time</dt>
369
+ <dd>
370
+ Equivalent RTX 5090 GPU time at 100% utilization. GPU utilization was logged once per minute,
371
+ and the effective utilization time was calculated using the following equation:
372
+
373
+ <div class="equation-svg">
374
+ <img src="equation.svg" alt="T_util-100% = Σ (u_i / 100) Δt" />
375
+ </div>
376
+ <ul>
377
+ <li><span class="muted">interval:</span> 1 minute</li>
378
+ <li><span class="muted">u<sub>i</sub>:</span> GPU utilization (%) during interval i</li>
379
+ <li><span class="muted">Δt:</span> duration of interval i (s)</li>
380
+ </ul>
381
+ </dd>
382
+
383
+ <dt>Mean Score</dt>
384
+ <dd>Arithmetic mean across the 19 tasks (0–1; higher is better). Unweighted.</dd>
385
+
386
+ <dt>Overall Rank</dt>
387
+ <dd>Rank by Mean Score (1 = best).</dd>
388
+ </dl>
389
+ </section>
390
+
391
+ <section class="card card-doc">
392
+ <h3>Repository Contents</h3>
393
+
394
+
395
+ <ul class="contents-list">
396
+ <li>
397
+ <strong>Scripts</strong> — scripts for running benchmarks and collecting metrics/logs.
398
+ </li>
399
+ <li>
400
+ <strong>Notebooks</strong> — Jupyter notebook for table generation and post-processing.
401
+ </li>
402
+ <li>
403
+ <strong>Results (CSV / Excel)</strong> — tables for each ranking category, overall rankings, and a master table with all scores and fields.
404
+ </li>
405
+ <li>
406
+ <strong>Raw JSON</strong> — per-run outputs from <code>lm-evaluation-harness</code>.
407
+ </li>
408
+ <li>
409
+ <strong>Stdout logs</strong> — per-run console logs.
410
+ </li>
411
+ <li>
412
+ <strong>GPU utilization logs</strong> — minute-level utilization log used to compute <em>GPU Util Time</em>.
413
+ </li>
414
+ </ul>
415
+
416
+ <div class="ref-note">
417
+ Benchmarks were referenced from
418
+ <a href="https://github.com/leobeeson/llm_benchmarks" target="_blank" rel="noopener">
419
+ leobeeson/llm_benchmarks
420
+ </a>.
421
+ Detailed descriptions of each benchmark can be found in this repository.
422
+ </div>
423
+ </section>
424
+
425
+ <hr class="lb-divider">
426
+
427
+ </section>
428
+
429
+ <script>
430
+ // optional: stamp “last updated” from the page build time
431
+ const el = document.getElementById('last-updated');
432
+ if (el) el.textContent = new Date().toISOString().slice(0,10);
433
+ </script>
434
+
435
+
436
+
437
+
438
  <div class="wrap">
439
  <h1 class="title">Overall Rank (Average Rank)</h1>
440
 
 
741
  <td>0.3799</td>
742
  <td>41</td>
743
  </tr>
744
+ </tbody>
745
+ </table>
746
+
747
+ </div>
748
+ </div>
749
+ </div>
750
+
751
+ <div class="wrap">
752
+ <h1 class="title">Reasoning & Math (Average Rank)</h1>
753
+
754
+ <div class="table-card">
755
+ <!-- 🔽 Paste your raw HTML table inside this div -->
756
+ <div id="table-slot">
757
+ <!-- Example: your pandas to_html dump goes here -->
758
+ <!-- <table> ... </table> -->
759
+ <table border="1" class="dataframe">
760
+ <thead>
761
+ <tr style="text-align: right;">
762
+ <th>Model Name</th>
763
+ <th>Total Time</th>
764
+ <th>GPU Util Time</th>
765
+ <th>Reasoning &amp; Math Mean Score</th>
766
+ <th>Reasoning &amp; Math Avg. Rank</th>
767
+ </tr>
768
+ </thead>
769
+ <tbody>
770
+ <tr>
771
+ <td>google_gemma-3-12b-it</td>
772
+ <td>15h 45m</td>
773
+ <td>14h 8m</td>
774
+ <td>0.6266</td>
775
+ <td>1</td>
776
+ </tr>
777
+ <tr>
778
+ <td>Qwen_Qwen3-8B</td>
779
+ <td>15h 31m</td>
780
+ <td>13h 44m</td>
781
+ <td>0.6214</td>
782
+ <td>2</td>
783
+ </tr>
784
+ <tr>
785
+ <td>Qwen_Qwen3-14B (8bit)</td>
786
+ <td>29h 45m</td>
787
+ <td>17h 29m</td>
788
+ <td>0.5860</td>
789
+ <td>3</td>
790
+ </tr>
791
+ <tr>
792
+ <td>Qwen_Qwen3-4B</td>
793
+ <td>5h 51m</td>
794
+ <td>5h 3m</td>
795
+ <td>0.5712</td>
796
+ <td>4</td>
797
+ </tr>
798
+ <tr>
799
+ <td>Qwen_Qwen2.5-7B-Instruct</td>
800
+ <td>9h 36m</td>
801
+ <td>8h 33m</td>
802
+ <td>0.5541</td>
803
+ <td>5</td>
804
+ </tr>
805
+ <tr>
806
+ <td>openchat_openchat-3.6-8b-20240522</td>
807
+ <td>7h 51m</td>
808
+ <td>6h 59m</td>
809
+ <td>0.5505</td>
810
+ <td>6</td>
811
+ </tr>
812
+ <tr>
813
+ <td>Qwen_Qwen2.5-14B-Instruct (8bit)</td>
814
+ <td>52h 44m</td>
815
+ <td>29h 32m</td>
816
+ <td>0.5488</td>
817
+ <td>7</td>
818
+ </tr>
819
+ <tr>
820
+ <td>mistralai_Ministral-8B-Instruct-2410</td>
821
+ <td>10h 46m</td>
822
+ <td>9h 27m</td>
823
+ <td>0.5446</td>
824
+ <td>8</td>
825
+ </tr>
826
+ <tr>
827
+ <td>01-ai_Yi-1.5-9B-Chat</td>
828
+ <td>13h 54m</td>
829
+ <td>12h 15m</td>
830
+ <td>0.5399</td>
831
+ <td>9</td>
832
+ </tr>
833
+ <tr>
834
+ <td>deepseek-ai_DeepSeek-R1-0528-Qwen3-8B</td>
835
+ <td>17h 57m</td>
836
+ <td>15h 30m</td>
837
+ <td>0.5387</td>
838
+ <td>10</td>
839
+ </tr>
840
+ <tr>
841
+ <td>google_gemma-3-4b-it</td>
842
+ <td>4h 51m</td>
843
+ <td>3h 50m</td>
844
+ <td>0.5374</td>
845
+ <td>11</td>
846
+ </tr>
847
+ <tr>
848
+ <td>meta-llama_Llama-3.1-8B-Instruct</td>
849
+ <td>12h 19m</td>
850
+ <td>10h 52m</td>
851
+ <td>0.5366</td>
852
+ <td>12</td>
853
+ </tr>
854
+ <tr>
855
+ <td>meta-llama_Meta-Llama-3-8B-Instruct</td>
856
+ <td>6h 30m</td>
857
+ <td>5h 46m</td>
858
+ <td>0.5286</td>
859
+ <td>13</td>
860
+ </tr>
861
+ <tr>
862
+ <td>Qwen_Qwen2-7B-Instruct</td>
863
+ <td>11h 30m</td>
864
+ <td>10h 11m</td>
865
+ <td>0.5285</td>
866
+ <td>14</td>
867
+ </tr>
868
+ <tr>
869
+ <td>Qwen_Qwen2.5-7B-Instruct-1M</td>
870
+ <td>11h 17m</td>
871
+ <td>10h 10m</td>
872
+ <td>0.5245</td>
873
+ <td>15</td>
874
+ </tr>
875
+ <tr>
876
+ <td>01-ai_Yi-1.5-9B</td>
877
+ <td>11h 43m</td>
878
+ <td>10h 26m</td>
879
+ <td>0.5206</td>
880
+ <td>16</td>
881
+ </tr>
882
+ <tr>
883
+ <td>NousResearch_Hermes-2-Pro-Mistral-7B</td>
884
+ <td>8h 27m</td>
885
+ <td>7h 28m</td>
886
+ <td>0.5184</td>
887
+ <td>17</td>
888
+ </tr>
889
+ <tr>
890
+ <td>Qwen_Qwen2.5-Math-7B</td>
891
+ <td>27h 21m</td>
892
+ <td>24h 38m</td>
893
+ <td>0.5010</td>
894
+ <td>18</td>
895
+ </tr>
896
+ <tr>
897
+ <td>01-ai_Yi-1.5-6B-Chat</td>
898
+ <td>8h 4m</td>
899
+ <td>7h 1m</td>
900
+ <td>0.5006</td>
901
+ <td>19</td>
902
+ </tr>
903
+ <tr>
904
+ <td>Qwen_Qwen2.5-Math-7B-Instruct</td>
905
+ <td>5h 37m</td>
906
+ <td>4h 57m</td>
907
+ <td>0.4997</td>
908
+ <td>20</td>
909
+ </tr>
910
+ <tr>
911
+ <td>deepseek-ai_DeepSeek-R1-Distill-Qwen-7B</td>
912
+ <td>6h 28m</td>
913
+ <td>5h 43m</td>
914
+ <td>0.4841</td>
915
+ <td>21</td>
916
+ </tr>
917
+ <tr>
918
+ <td>mistralai_Mistral-7B-Instruct-v0.3</td>
919
+ <td>8h 38m</td>
920
+ <td>7h 41m</td>
921
+ <td>0.4704</td>
922
+ <td>22</td>
923
+ </tr>
924
+ <tr>
925
+ <td>meta-llama_Llama-3.2-3B-Instruct</td>
926
+ <td>7h 12m</td>
927
+ <td>5h 57m</td>
928
+ <td>0.4688</td>
929
+ <td>23</td>
930
+ </tr>
931
+ <tr>
932
+ <td>01-ai_Yi-1.5-6B</td>
933
+ <td>4h 28m</td>
934
+ <td>3h 54m</td>
935
+ <td>0.4495</td>
936
+ <td>24</td>
937
+ </tr>
938
+ <tr>
939
+ <td>Qwen_Qwen3-1.7B</td>
940
+ <td>4h 25m</td>
941
+ <td>3h 36m</td>
942
+ <td>0.4493</td>
943
+ <td>25</td>
944
+ </tr>
945
+ <tr>
946
+ <td>deepseek-ai_DeepSeek-R1-Distill-Llama-8B</td>
947
+ <td>11h 46m</td>
948
+ <td>10h 36m</td>
949
+ <td>0.4469</td>
950
+ <td>26</td>
951
+ </tr>
952
+ <tr>
953
+ <td>deepseek-ai_deepseek-llm-7b-chat</td>
954
+ <td>10h 6m</td>
955
+ <td>9h 8m</td>
956
+ <td>0.4244</td>
957
+ <td>27</td>
958
+ </tr>
959
+ <tr>
960
+ <td>meta-llama_Llama-2-13b-chat-hf</td>
961
+ <td>17h 8m</td>
962
+ <td>15h 37m</td>
963
+ <td>0.4143</td>
964
+ <td>28</td>
965
+ </tr>
966
+ <tr>
967
+ <td>Qwen_Qwen2.5-Math-1.5B-Instruct</td>
968
+ <td>3h 25m</td>
969
+ <td>2h 39m</td>
970
+ <td>0.4085</td>
971
+ <td>29</td>
972
+ </tr>
973
+ <tr>
974
+ <td>deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B</td>
975
+ <td>3h 40m</td>
976
+ <td>2h 52m</td>
977
+ <td>0.4009</td>
978
+ <td>30</td>
979
+ </tr>
980
+ <tr>
981
+ <td>Qwen_Qwen2.5-1.5B-Instruct</td>
982
+ <td>3h 20m</td>
983
+ <td>2h 36m</td>
984
+ <td>0.3874</td>
985
+ <td>31</td>
986
+ </tr>
987
+ <tr>
988
+ <td>Qwen_Qwen2.5-3B-Instruct</td>
989
+ <td>7h 48m</td>
990
+ <td>6h 30m</td>
991
+ <td>0.3823</td>
992
+ <td>32</td>
993
+ </tr>
994
+ <tr>
995
+ <td>meta-llama_Llama-2-13b-hf</td>
996
+ <td>19h 21m</td>
997
+ <td>17h 38m</td>
998
+ <td>0.3719</td>
999
+ <td>33</td>
1000
+ </tr>
1001
+ <tr>
1002
+ <td>deepseek-ai_deepseek-math-7b-rl</td>
1003
+ <td>8h 2m</td>
1004
+ <td>7h 12m</td>
1005
+ <td>0.3702</td>
1006
+ <td>34</td>
1007
+ </tr>
1008
+ <tr>
1009
+ <td>meta-llama_Llama-2-7b-chat-hf</td>
1010
+ <td>6h 57m</td>
1011
+ <td>6h 7m</td>
1012
+ <td>0.3674</td>
1013
+ <td>35</td>
1014
+ </tr>
1015
+ <tr>
1016
+ <td>Qwen_Qwen3-0.6B</td>
1017
+ <td>3h 45m</td>
1018
+ <td>2h 53m</td>
1019
+ <td>0.3494</td>
1020
+ <td>36</td>
1021
+ </tr>
1022
+ <tr>
1023
+ <td>meta-llama_Llama-3.2-1B-Instruct</td>
1024
+ <td>3h 30m</td>
1025
+ <td>2h 35m</td>
1026
+ <td>0.3450</td>
1027
+ <td>37</td>
1028
+ </tr>
1029
+ <tr>
1030
+ <td>deepseek-ai_deepseek-llm-7b-base</td>
1031
+ <td>7h 11m</td>
1032
+ <td>6h 26m</td>
1033
+ <td>0.3377</td>
1034
+ <td>38</td>
1035
+ </tr>
1036
+ <tr>
1037
+ <td>meta-llama_Llama-2-7b-hf</td>
1038
+ <td>5h 42m</td>
1039
+ <td>4h 59m</td>
1040
+ <td>0.3361</td>
1041
+ <td>39</td>
1042
+ </tr>
1043
+ <tr>
1044
+ <td>google_gemma-3-1b-it</td>
1045
+ <td>6h 50m</td>
1046
+ <td>4h 52m</td>
1047
+ <td>0.3312</td>
1048
+ <td>40</td>
1049
+ </tr>
1050
+ <tr>
1051
+ <td>Qwen_Qwen2.5-0.5B-Instruct</td>
1052
+ <td>2h 34m</td>
1053
+ <td>1h 48m</td>
1054
+ <td>0.2914</td>
1055
+ <td>41</td>
1056
+ </tr>
1057
+ </tbody>
1058
+ </table>
1059
+
1060
+ </div>
1061
+ </div>
1062
+ </div>
1063
+ <div class="wrap">
1064
+ <h1 class="title">Commonsense & NLI (Average Rank)</h1>
1065
+
1066
+ <div class="table-card">
1067
+ <!-- 🔽 Paste your raw HTML table inside this div -->
1068
+ <div id="table-slot">
1069
+ <!-- Example: your pandas to_html dump goes here -->
1070
+ <!-- <table> ... </table> -->
1071
+ <table border="1" class="dataframe">
1072
+ <thead>
1073
+ <tr style="text-align: right;">
1074
+ <th>Model Name</th>
1075
+ <th>Total Time</th>
1076
+ <th>GPU Util Time</th>
1077
+ <th>Commonsense &amp; NLI Mean Score</th>
1078
+ <th>Commonsense &amp; NLI Avg. Rank</th>
1079
+ </tr>
1080
+ </thead>
1081
+ <tbody>
1082
+ <tr>
1083
+ <td>Qwen_Qwen2.5-14B-Instruct (8bit)</td>
1084
+ <td>52h 44m</td>
1085
+ <td>29h 32m</td>
1086
+ <td>0.7941</td>
1087
+ <td>1</td>
1088
+ </tr>
1089
+ <tr>
1090
+ <td>Qwen_Qwen3-14B (8bit)</td>
1091
+ <td>29h 45m</td>
1092
+ <td>17h 29m</td>
1093
+ <td>0.7807</td>
1094
+ <td>2</td>
1095
+ </tr>
1096
+ <tr>
1097
+ <td>google_gemma-3-12b-it</td>
1098
+ <td>15h 45m</td>
1099
+ <td>14h 8m</td>
1100
+ <td>0.7737</td>
1101
+ <td>3</td>
1102
+ </tr>
1103
+ <tr>
1104
+ <td>Qwen_Qwen2.5-7B-Instruct</td>
1105
+ <td>9h 36m</td>
1106
+ <td>8h 33m</td>
1107
+ <td>0.7730</td>
1108
+ <td>4</td>
1109
+ </tr>
1110
+ <tr>
1111
+ <td>openchat_openchat-3.6-8b-20240522</td>
1112
+ <td>7h 51m</td>
1113
+ <td>6h 59m</td>
1114
+ <td>0.7726</td>
1115
+ <td>5</td>
1116
+ </tr>
1117
+ <tr>
1118
+ <td>01-ai_Yi-1.5-9B-Chat</td>
1119
+ <td>13h 54m</td>
1120
+ <td>12h 15m</td>
1121
+ <td>0.7691</td>
1122
+ <td>6</td>
1123
+ </tr>
1124
+ <tr>
1125
+ <td>Qwen_Qwen2.5-7B-Instruct-1M</td>
1126
+ <td>11h 17m</td>
1127
+ <td>10h 10m</td>
1128
+ <td>0.7564</td>
1129
+ <td>7</td>
1130
+ </tr>
1131
+ <tr>
1132
+ <td>Qwen_Qwen3-8B</td>
1133
+ <td>15h 31m</td>
1134
+ <td>13h 44m</td>
1135
+ <td>0.7468</td>
1136
+ <td>8</td>
1137
+ </tr>
1138
+ <tr>
1139
+ <td>mistralai_Mistral-7B-Instruct-v0.3</td>
1140
+ <td>8h 38m</td>
1141
+ <td>7h 41m</td>
1142
+ <td>0.7403</td>
1143
+ <td>9</td>
1144
+ </tr>
1145
+ <tr>
1146
+ <td>01-ai_Yi-1.5-6B-Chat</td>
1147
+ <td>8h 4m</td>
1148
+ <td>7h 1m</td>
1149
+ <td>0.7374</td>
1150
+ <td>10</td>
1151
+ </tr>
1152
+ <tr>
1153
+ <td>Qwen_Qwen2.5-3B-Instruct</td>
1154
+ <td>7h 48m</td>
1155
+ <td>6h 30m</td>
1156
+ <td>0.7367</td>
1157
+ <td>11</td>
1158
+ </tr>
1159
+ <tr>
1160
+ <td>mistralai_Ministral-8B-Instruct-2410</td>
1161
+ <td>10h 46m</td>
1162
+ <td>9h 27m</td>
1163
+ <td>0.7328</td>
1164
+ <td>12</td>
1165
+ </tr>
1166
+ <tr>
1167
+ <td>NousResearch_Hermes-2-Pro-Mistral-7B</td>
1168
+ <td>8h 27m</td>
1169
+ <td>7h 28m</td>
1170
+ <td>0.7284</td>
1171
+ <td>13</td>
1172
+ </tr>
1173
+ <tr>
1174
+ <td>Qwen_Qwen2-7B-Instruct</td>
1175
+ <td>11h 30m</td>
1176
+ <td>10h 11m</td>
1177
+ <td>0.7274</td>
1178
+ <td>14</td>
1179
+ </tr>
1180
+ <tr>
1181
+ <td>01-ai_Yi-1.5-9B</td>
1182
+ <td>11h 43m</td>
1183
+ <td>10h 26m</td>
1184
+ <td>0.7266</td>
1185
+ <td>15</td>
1186
+ </tr>
1187
+ <tr>
1188
+ <td>Qwen_Qwen3-4B</td>
1189
+ <td>5h 51m</td>
1190
+ <td>5h 3m</td>
1191
+ <td>0.7266</td>
1192
+ <td>16</td>
1193
+ </tr>
1194
+ <tr>
1195
+ <td>meta-llama_Llama-3.1-8B-Instruct</td>
1196
+ <td>12h 19m</td>
1197
+ <td>10h 52m</td>
1198
+ <td>0.7249</td>
1199
+ <td>17</td>
1200
+ </tr>
1201
+ <tr>
1202
+ <td>01-ai_Yi-1.5-6B</td>
1203
+ <td>4h 28m</td>
1204
+ <td>3h 54m</td>
1205
+ <td>0.7199</td>
1206
+ <td>18</td>
1207
+ </tr>
1208
+ <tr>
1209
+ <td>google_gemma-3-4b-it</td>
1210
+ <td>4h 51m</td>
1211
+ <td>3h 50m</td>
1212
+ <td>0.7167</td>
1213
+ <td>19</td>
1214
+ </tr>
1215
+ <tr>
1216
+ <td>meta-llama_Llama-2-13b-hf</td>
1217
+ <td>19h 21m</td>
1218
+ <td>17h 38m</td>
1219
+ <td>0.7157</td>
1220
+ <td>20</td>
1221
+ </tr>
1222
+ <tr>
1223
+ <td>meta-llama_Llama-2-13b-chat-hf</td>
1224
+ <td>17h 8m</td>
1225
+ <td>15h 37m</td>
1226
+ <td>0.7153</td>
1227
+ <td>21</td>
1228
+ </tr>
1229
+ <tr>
1230
+ <td>meta-llama_Meta-Llama-3-8B-Instruct</td>
1231
+ <td>6h 30m</td>
1232
+ <td>5h 46m</td>
1233
+ <td>0.7147</td>
1234
+ <td>22</td>
1235
+ </tr>
1236
+ <tr>
1237
+ <td>deepseek-ai_DeepSeek-R1-0528-Qwen3-8B</td>
1238
+ <td>17h 57m</td>
1239
+ <td>15h 30m</td>
1240
+ <td>0.7094</td>
1241
+ <td>23</td>
1242
+ </tr>
1243
+ <tr>
1244
+ <td>deepseek-ai_deepseek-llm-7b-chat</td>
1245
+ <td>10h 6m</td>
1246
+ <td>9h 8m</td>
1247
+ <td>0.7090</td>
1248
+ <td>24</td>
1249
+ </tr>
1250
+ <tr>
1251
+ <td>meta-llama_Llama-2-7b-chat-hf</td>
1252
+ <td>6h 57m</td>
1253
+ <td>6h 7m</td>
1254
+ <td>0.6978</td>
1255
+ <td>25</td>
1256
+ </tr>
1257
+ <tr>
1258
+ <td>meta-llama_Llama-2-7b-hf</td>
1259
+ <td>5h 42m</td>
1260
+ <td>4h 59m</td>
1261
+ <td>0.6956</td>
1262
+ <td>26</td>
1263
+ </tr>
1264
+ <tr>
1265
+ <td>deepseek-ai_DeepSeek-R1-Distill-Llama-8B</td>
1266
+ <td>11h 46m</td>
1267
+ <td>10h 36m</td>
1268
+ <td>0.6928</td>
1269
+ <td>27</td>
1270
+ </tr>
1271
+ <tr>
1272
+ <td>deepseek-ai_deepseek-llm-7b-base</td>
1273
+ <td>7h 11m</td>
1274
+ <td>6h 26m</td>
1275
+ <td>0.6886</td>
1276
+ <td>28</td>
1277
+ </tr>
1278
+ <tr>
1279
+ <td>Qwen_Qwen2.5-1.5B-Instruct</td>
1280
+ <td>3h 20m</td>
1281
+ <td>2h 36m</td>
1282
+ <td>0.6803</td>
1283
+ <td>29</td>
1284
+ </tr>
1285
+ <tr>
1286
+ <td>meta-llama_Llama-3.2-3B-Instruct</td>
1287
+ <td>7h 12m</td>
1288
+ <td>5h 57m</td>
1289
+ <td>0.6788</td>
1290
+ <td>30</td>
1291
+ </tr>
1292
+ <tr>
1293
+ <td>deepseek-ai_deepseek-math-7b-rl</td>
1294
+ <td>8h 2m</td>
1295
+ <td>7h 12m</td>
1296
+ <td>0.6711</td>
1297
+ <td>31</td>
1298
+ </tr>
1299
+ <tr>
1300
+ <td>Qwen_Qwen2.5-Math-7B</td>
1301
+ <td>27h 21m</td>
1302
+ <td>24h 38m</td>
1303
+ <td>0.6587</td>
1304
+ <td>32</td>
1305
+ </tr>
1306
+ <tr>
1307
+ <td>Qwen_Qwen3-1.7B</td>
1308
+ <td>4h 25m</td>
1309
+ <td>3h 36m</td>
1310
+ <td>0.6442</td>
1311
+ <td>33</td>
1312
+ </tr>
1313
+ <tr>
1314
+ <td>deepseek-ai_DeepSeek-R1-Distill-Qwen-7B</td>
1315
+ <td>6h 28m</td>
1316
+ <td>5h 43m</td>
1317
+ <td>0.6422</td>
1318
+ <td>34</td>
1319
+ </tr>
1320
+ <tr>
1321
+ <td>google_gemma-3-1b-it</td>
1322
+ <td>6h 50m</td>
1323
+ <td>4h 52m</td>
1324
+ <td>0.6267</td>
1325
+ <td>35</td>
1326
+ </tr>
1327
+ <tr>
1328
+ <td>meta-llama_Llama-3.2-1B-Instruct</td>
1329
+ <td>3h 30m</td>
1330
+ <td>2h 35m</td>
1331
+ <td>0.6264</td>
1332
+ <td>36</td>
1333
+ </tr>
1334
+ <tr>
1335
+ <td>Qwen_Qwen2.5-Math-7B-Instruct</td>
1336
+ <td>5h 37m</td>
1337
+ <td>4h 57m</td>
1338
+ <td>0.6184</td>
1339
+ <td>37</td>
1340
+ </tr>
1341
+ <tr>
1342
+ <td>Qwen_Qwen2.5-0.5B-Instruct</td>
1343
+ <td>2h 34m</td>
1344
+ <td>1h 48m</td>
1345
+ <td>0.6039</td>
1346
+ <td>38</td>
1347
+ </tr>
1348
+ <tr>
1349
+ <td>deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B</td>
1350
+ <td>3h 40m</td>
1351
+ <td>2h 52m</td>
1352
+ <td>0.5703</td>
1353
+ <td>39</td>
1354
+ </tr>
1355
+ <tr>
1356
+ <td>Qwen_Qwen3-0.6B</td>
1357
+ <td>3h 45m</td>
1358
+ <td>2h 53m</td>
1359
+ <td>0.5696</td>
1360
+ <td>40</td>
1361
+ </tr>
1362
+ <tr>
1363
+ <td>Qwen_Qwen2.5-Math-1.5B-Instruct</td>
1364
+ <td>3h 25m</td>
1365
+ <td>2h 39m</td>
1366
+ <td>0.5181</td>
1367
+ <td>41</td>
1368
+ </tr>
1369
+ </tbody>
1370
+ </table>
1371
+
1372
+ </div>
1373
+ </div>
1374
+ </div>
1375
+ <div class="wrap">
1376
+ <h1 class="title">Knowledge & Reading (Average Rank)</h1>
1377
+
1378
+ <div class="table-card">
1379
+ <!-- 🔽 Paste your raw HTML table inside this div -->
1380
+ <div id="table-slot">
1381
+ <!-- Example: your pandas to_html dump goes here -->
1382
+ <!-- <table> ... </table> -->
1383
+ <table border="1" class="dataframe">
1384
+ <thead>
1385
+ <tr style="text-align: right;">
1386
+ <th>Model Name</th>
1387
+ <th>Total Time</th>
1388
+ <th>GPU Util Time</th>
1389
+ <th>Knowledge &amp; Reading Mean Score</th>
1390
+ <th>Knowledge &amp; Reading Avg. Rank</th>
1391
+ </tr>
1392
+ </thead>
1393
+ <tbody>
1394
+ <tr>
1395
+ <td>01-ai_Yi-1.5-9B</td>
1396
+ <td>11h 43m</td>
1397
+ <td>10h 26m</td>
1398
+ <td>0.4369</td>
1399
+ <td>1</td>
1400
+ </tr>
1401
+ <tr>
1402
+ <td>openchat_openchat-3.6-8b-20240522</td>
1403
+ <td>7h 51m</td>
1404
+ <td>6h 59m</td>
1405
+ <td>0.4136</td>
1406
+ <td>2</td>
1407
+ </tr>
1408
+ <tr>
1409
+ <td>meta-llama_Llama-3.1-8B-Instruct</td>
1410
+ <td>12h 19m</td>
1411
+ <td>10h 52m</td>
1412
+ <td>0.4127</td>
1413
+ <td>3</td>
1414
+ </tr>
1415
+ <tr>
1416
+ <td>01-ai_Yi-1.5-6B</td>
1417
+ <td>4h 28m</td>
1418
+ <td>3h 54m</td>
1419
+ <td>0.4063</td>
1420
+ <td>4</td>
1421
+ </tr>
1422
+ <tr>
1423
+ <td>mistralai_Mistral-7B-Instruct-v0.3</td>
1424
+ <td>8h 38m</td>
1425
+ <td>7h 41m</td>
1426
+ <td>0.4045</td>
1427
+ <td>5</td>
1428
+ </tr>
1429
+ <tr>
1430
+ <td>Qwen_Qwen2.5-7B-Instruct-1M</td>
1431
+ <td>11h 17m</td>
1432
+ <td>10h 10m</td>
1433
+ <td>0.3963</td>
1434
+ <td>6</td>
1435
+ </tr>
1436
+ <tr>
1437
+ <td>Qwen_Qwen3-14B (8bit)</td>
1438
+ <td>29h 45m</td>
1439
+ <td>17h 29m</td>
1440
+ <td>0.3926</td>
1441
+ <td>7</td>
1442
+ </tr>
1443
+ <tr>
1444
+ <td>meta-llama_Meta-Llama-3-8B-Instruct</td>
1445
+ <td>6h 30m</td>
1446
+ <td>5h 46m</td>
1447
+ <td>0.3923</td>
1448
+ <td>8</td>
1449
+ </tr>
1450
+ <tr>
1451
+ <td>Qwen_Qwen2.5-7B-Instruct</td>
1452
+ <td>9h 36m</td>
1453
+ <td>8h 33m</td>
1454
+ <td>0.3810</td>
1455
+ <td>9</td>
1456
+ </tr>
1457
+ <tr>
1458
+ <td>google_gemma-3-12b-it</td>
1459
+ <td>15h 45m</td>
1460
+ <td>14h 8m</td>
1461
+ <td>0.3791</td>
1462
+ <td>10</td>
1463
+ </tr>
1464
+ <tr>
1465
+ <td>NousResearch_Hermes-2-Pro-Mistral-7B</td>
1466
+ <td>8h 27m</td>
1467
+ <td>7h 28m</td>
1468
+ <td>0.3719</td>
1469
+ <td>11</td>
1470
+ </tr>
1471
+ <tr>
1472
+ <td>mistralai_Ministral-8B-Instruct-2410</td>
1473
+ <td>10h 46m</td>
1474
+ <td>9h 27m</td>
1475
+ <td>0.3683</td>
1476
+ <td>12</td>
1477
+ </tr>
1478
+ <tr>
1479
+ <td>Qwen_Qwen2.5-14B-Instruct (8bit)</td>
1480
+ <td>52h 44m</td>
1481
+ <td>29h 32m</td>
1482
+ <td>0.3581</td>
1483
+ <td>13</td>
1484
+ </tr>
1485
+ <tr>
1486
+ <td>Qwen_Qwen3-8B</td>
1487
+ <td>15h 31m</td>
1488
+ <td>13h 44m</td>
1489
+ <td>0.3566</td>
1490
+ <td>14</td>
1491
+ </tr>
1492
+ <tr>
1493
+ <td>01-ai_Yi-1.5-9B-Chat</td>
1494
+ <td>13h 54m</td>
1495
+ <td>12h 15m</td>
1496
+ <td>0.3467</td>
1497
+ <td>15</td>
1498
+ </tr>
1499
+ <tr>
1500
+ <td>meta-llama_Llama-3.2-3B-Instruct</td>
1501
+ <td>7h 12m</td>
1502
+ <td>5h 57m</td>
1503
+ <td>0.3438</td>
1504
+ <td>16</td>
1505
+ </tr>
1506
+ <tr>
1507
+ <td>Qwen_Qwen2.5-3B-Instruct</td>
1508
+ <td>7h 48m</td>
1509
+ <td>6h 30m</td>
1510
+ <td>0.3406</td>
1511
+ <td>17</td>
1512
+ </tr>
1513
+ <tr>
1514
+ <td>meta-llama_Llama-2-13b-hf</td>
1515
+ <td>19h 21m</td>
1516
+ <td>17h 38m</td>
1517
+ <td>0.3374</td>
1518
+ <td>18</td>
1519
+ </tr>
1520
+ <tr>
1521
+ <td>01-ai_Yi-1.5-6B-Chat</td>
1522
+ <td>8h 4m</td>
1523
+ <td>7h 1m</td>
1524
+ <td>0.3339</td>
1525
+ <td>19</td>
1526
+ </tr>
1527
+ <tr>
1528
+ <td>google_gemma-3-4b-it</td>
1529
+ <td>4h 51m</td>
1530
+ <td>3h 50m</td>
1531
+ <td>0.3261</td>
1532
+ <td>20</td>
1533
+ </tr>
1534
+ <tr>
1535
+ <td>Qwen_Qwen3-4B</td>
1536
+ <td>5h 51m</td>
1537
+ <td>5h 3m</td>
1538
+ <td>0.3226</td>
1539
+ <td>21</td>
1540
+ </tr>
1541
+ <tr>
1542
+ <td>meta-llama_Llama-2-7b-hf</td>
1543
+ <td>5h 42m</td>
1544
+ <td>4h 59m</td>
1545
+ <td>0.3018</td>
1546
+ <td>22</td>
1547
+ </tr>
1548
+ <tr>
1549
+ <td>deepseek-ai_deepseek-llm-7b-chat</td>
1550
+ <td>10h 6m</td>
1551
+ <td>9h 8m</td>
1552
+ <td>0.3007</td>
1553
+ <td>23</td>
1554
+ </tr>
1555
+ <tr>
1556
+ <td>Qwen_Qwen2-7B-Instruct</td>
1557
+ <td>11h 30m</td>
1558
+ <td>10h 11m</td>
1559
+ <td>0.2919</td>
1560
+ <td>24</td>
1561
+ </tr>
1562
+ <tr>
1563
+ <td>Qwen_Qwen2.5-1.5B-Instruct</td>
1564
+ <td>3h 20m</td>
1565
+ <td>2h 36m</td>
1566
+ <td>0.2903</td>
1567
+ <td>25</td>
1568
+ </tr>
1569
+ <tr>
1570
+ <td>meta-llama_Llama-2-13b-chat-hf</td>
1571
+ <td>17h 8m</td>
1572
+ <td>15h 37m</td>
1573
+ <td>0.2864</td>
1574
+ <td>26</td>
1575
+ </tr>
1576
+ <tr>
1577
+ <td>deepseek-ai_deepseek-llm-7b-base</td>
1578
+ <td>7h 11m</td>
1579
+ <td>6h 26m</td>
1580
+ <td>0.2864</td>
1581
+ <td>27</td>
1582
+ </tr>
1583
+ <tr>
1584
+ <td>deepseek-ai_DeepSeek-R1-0528-Qwen3-8B</td>
1585
+ <td>17h 57m</td>
1586
+ <td>15h 30m</td>
1587
+ <td>0.2834</td>
1588
+ <td>28</td>
1589
+ </tr>
1590
+ <tr>
1591
+ <td>Qwen_Qwen2.5-Math-7B</td>
1592
+ <td>27h 21m</td>
1593
+ <td>24h 38m</td>
1594
+ <td>0.2827</td>
1595
+ <td>29</td>
1596
+ </tr>
1597
+ <tr>
1598
+ <td>deepseek-ai_DeepSeek-R1-Distill-Llama-8B</td>
1599
+ <td>11h 46m</td>
1600
+ <td>10h 36m</td>
1601
+ <td>0.2805</td>
1602
+ <td>30</td>
1603
+ </tr>
1604
+ <tr>
1605
+ <td>meta-llama_Llama-3.2-1B-Instruct</td>
1606
+ <td>3h 30m</td>
1607
+ <td>2h 35m</td>
1608
+ <td>0.2731</td>
1609
+ <td>31</td>
1610
+ </tr>
1611
+ <tr>
1612
+ <td>meta-llama_Llama-2-7b-chat-hf</td>
1613
+ <td>6h 57m</td>
1614
+ <td>6h 7m</td>
1615
+ <td>0.2656</td>
1616
+ <td>32</td>
1617
+ </tr>
1618
+ <tr>
1619
+ <td>deepseek-ai_deepseek-math-7b-rl</td>
1620
+ <td>8h 2m</td>
1621
+ <td>7h 12m</td>
1622
+ <td>0.2581</td>
1623
+ <td>33</td>
1624
+ </tr>
1625
+ <tr>
1626
+ <td>Qwen_Qwen3-1.7B</td>
1627
+ <td>4h 25m</td>
1628
+ <td>3h 36m</td>
1629
+ <td>0.2567</td>
1630
+ <td>34</td>
1631
+ </tr>
1632
+ <tr>
1633
+ <td>deepseek-ai_DeepSeek-R1-Distill-Qwen-7B</td>
1634
+ <td>6h 28m</td>
1635
+ <td>5h 43m</td>
1636
+ <td>0.2340</td>
1637
+ <td>35</td>
1638
+ </tr>
1639
+ <tr>
1640
+ <td>Qwen_Qwen2.5-Math-7B-Instruct</td>
1641
+ <td>5h 37m</td>
1642
+ <td>4h 57m</td>
1643
+ <td>0.2276</td>
1644
+ <td>36</td>
1645
+ </tr>
1646
+ <tr>
1647
+ <td>Qwen_Qwen2.5-0.5B-Instruct</td>
1648
+ <td>2h 34m</td>
1649
+ <td>1h 48m</td>
1650
+ <td>0.2218</td>
1651
+ <td>37</td>
1652
+ </tr>
1653
+ <tr>
1654
+ <td>google_gemma-3-1b-it</td>
1655
+ <td>6h 50m</td>
1656
+ <td>4h 52m</td>
1657
+ <td>0.2202</td>
1658
+ <td>38</td>
1659
+ </tr>
1660
+ <tr>
1661
+ <td>Qwen_Qwen3-0.6B</td>
1662
+ <td>3h 45m</td>
1663
+ <td>2h 53m</td>
1664
+ <td>0.2000</td>
1665
+ <td>39</td>
1666
+ </tr>
1667
+ <tr>
1668
+ <td>Qwen_Qwen2.5-Math-1.5B-Instruct</td>
1669
+ <td>3h 25m</td>
1670
+ <td>2h 39m</td>
1671
+ <td>0.1983</td>
1672
+ <td>40</td>
1673
+ </tr>
1674
+ <tr>
1675
+ <td>deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B</td>
1676
+ <td>3h 40m</td>
1677
+ <td>2h 52m</td>
1678
+ <td>0.1954</td>
1679
+ <td>41</td>
1680
+ </tr>
1681
  </tbody>
1682
  </table>
1683
 
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }