Update index.html
Browse files- index.html +6 -52
index.html
CHANGED
|
@@ -36,52 +36,6 @@
|
|
| 36 |
</p>
|
| 37 |
</div>
|
| 38 |
</section>
|
| 39 |
-
|
| 40 |
-
<section class="section">
|
| 41 |
-
<div class="container">
|
| 42 |
-
<h2 class="title is-3">📊 Results</h2>
|
| 43 |
-
<div class="highlight-box">
|
| 44 |
-
<p><strong>✔️ Accuracy</strong></p>
|
| 45 |
-
<ul>
|
| 46 |
-
<li>Spearman’s ρ > 0.87 with human ground truth</li>
|
| 47 |
-
</ul>
|
| 48 |
-
</div>
|
| 49 |
-
<div class="highlight-box">
|
| 50 |
-
<p><strong>📈 Downstream LLM Training Impact</strong></p>
|
| 51 |
-
<ul>
|
| 52 |
-
<li>+7.2% benchmark performance improvement</li>
|
| 53 |
-
<li>+4.8% token retention compared to FineWeb2 heuristic filter</li>
|
| 54 |
-
<li>Reliable thresholding with 0.6 and 0.7 quantiles</li>
|
| 55 |
-
</ul>
|
| 56 |
-
</div>
|
| 57 |
-
<div class="highlight-box">
|
| 58 |
-
<p><strong>⚡ Annotation Speed</strong></p>
|
| 59 |
-
<ul>
|
| 60 |
-
<li>~11,000 docs/min (on A100 GPU, avg. 690 tokens per doc)</li>
|
| 61 |
-
</ul>
|
| 62 |
-
</div>
|
| 63 |
-
</div>
|
| 64 |
-
</section>
|
| 65 |
-
|
| 66 |
-
<section class="section">
|
| 67 |
-
<div class="container">
|
| 68 |
-
<h2 class="title is-3">📁 Available Artifacts</h2>
|
| 69 |
-
<div class="highlight-box">
|
| 70 |
-
<ul>
|
| 71 |
-
<li>📄 Ground truth annotations in <strong>35 languages</strong></li>
|
| 72 |
-
<li>🧠 Synthetic LLM-annotated dataset (<strong>14M+ documents</strong>)</li>
|
| 73 |
-
<li>🪶 Lightweight annotation models:
|
| 74 |
-
<ul>
|
| 75 |
-
<li>JQL-Gemma</li>
|
| 76 |
-
<li>JQL-Mistral</li>
|
| 77 |
-
<li>JQL-Llama</li>
|
| 78 |
-
</ul>
|
| 79 |
-
</li>
|
| 80 |
-
<li>🛠️ Training & inference scripts <em>(coming soon)</em></li>
|
| 81 |
-
</ul>
|
| 82 |
-
</div>
|
| 83 |
-
</div>
|
| 84 |
-
</section>
|
| 85 |
|
| 86 |
<section class="section">
|
| 87 |
<div class="container content">
|
|
@@ -104,15 +58,15 @@
|
|
| 104 |
<div class="container content">
|
| 105 |
<h2 class="title is-3">📊 Results</h2>
|
| 106 |
<ul>
|
| 107 |
-
<li><strong
|
| 108 |
-
<li><strong
|
| 109 |
<ul>
|
| 110 |
<li>+7.2% benchmark performance improvement</li>
|
| 111 |
<li>+4.8% token retention vs. FineWeb2 heuristic filter</li>
|
| 112 |
<li>Effective threshold strategies: 0.6 and 0.7 quantile</li>
|
| 113 |
</ul>
|
| 114 |
</li>
|
| 115 |
-
<li><strong
|
| 116 |
</ul>
|
| 117 |
</div>
|
| 118 |
</section>
|
|
@@ -121,9 +75,9 @@
|
|
| 121 |
<div class="container content">
|
| 122 |
<h2 class="title is-3">📁 Available Artifacts</h2>
|
| 123 |
<ul>
|
| 124 |
-
<li
|
| 125 |
-
<li
|
| 126 |
-
<li
|
| 127 |
<ul>
|
| 128 |
<li>JQL-Gemma</li>
|
| 129 |
<li>JQL-Mistral</li>
|
|
|
|
| 36 |
</p>
|
| 37 |
</div>
|
| 38 |
</section>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
<section class="section">
|
| 41 |
<div class="container content">
|
|
|
|
| 58 |
<div class="container content">
|
| 59 |
<h2 class="title is-3">📊 Results</h2>
|
| 60 |
<ul>
|
| 61 |
+
<li><strong>✔️ Accuracy:</strong> Spearman’s ρ > 0.87 with human ground truth</li>
|
| 62 |
+
<li><strong>📈 Downstream LLM Training:</strong>
|
| 63 |
<ul>
|
| 64 |
<li>+7.2% benchmark performance improvement</li>
|
| 65 |
<li>+4.8% token retention vs. FineWeb2 heuristic filter</li>
|
| 66 |
<li>Effective threshold strategies: 0.6 and 0.7 quantile</li>
|
| 67 |
</ul>
|
| 68 |
</li>
|
| 69 |
+
<li><strong>⚡ Annotation Speed:</strong> ~11,000 docs/min (A100 GPU, avg. 690 tokens)</li>
|
| 70 |
</ul>
|
| 71 |
</div>
|
| 72 |
</section>
|
|
|
|
| 75 |
<div class="container content">
|
| 76 |
<h2 class="title is-3">📁 Available Artifacts</h2>
|
| 77 |
<ul>
|
| 78 |
+
<li>📄 Ground truth annotations in 35 languages</li>
|
| 79 |
+
<li>🧠 Synthetic LLM-annotated dataset (14M+ documents)</li>
|
| 80 |
+
<li>🪶 Lightweight annotation models:
|
| 81 |
<ul>
|
| 82 |
<li>JQL-Gemma</li>
|
| 83 |
<li>JQL-Mistral</li>
|