Update index.html
Browse files- index.html +12 -5
index.html
CHANGED
|
@@ -15,6 +15,7 @@
|
|
| 15 |
.hero.is-primary { background-color: #f9d5e5; }
|
| 16 |
.subtitle img { max-width: 100%; height: auto; }
|
| 17 |
.section-title { margin-top: 2em; }
|
|
|
|
| 18 |
</style>
|
| 19 |
</head>
|
| 20 |
<body>
|
|
@@ -62,7 +63,12 @@
|
|
| 62 |
<span class="author-block"><sup>5</sup>Computer Science Department, TU Darmstadt,</span>
|
| 63 |
<span class="author-block"><sup>6</sup>AI Sweden</span>
|
| 64 |
</div>
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
<div class="column has-text-centered">
|
| 67 |
<span class="link-block">
|
| 68 |
<a href="https://arxiv.org/abs/2505.22232" target="_blank"
|
|
@@ -154,6 +160,7 @@
|
|
| 154 |
<li>Benchmark performance improvement over FineWeb2</li>
|
| 155 |
<li>Higher document retention vs. FineWeb2 heuristic filter</li>
|
| 156 |
<li>Effective dynamic threshold strategies: Trade-off document quality for quantity</li>
|
|
|
|
| 157 |
</ul>
|
| 158 |
</li>
|
| 159 |
<li><strong>⚡ Annotation Speed:</strong> ~11,000 docs/min (A100 GPU, avg. 690 tokens)</li>
|
|
@@ -169,14 +176,14 @@
|
|
| 169 |
<li><a href="https://huggingface.co/datasets/Jackal-AI/JQL-LLM-Edu-Annotations" target="_blank">🧠 Synthetic LLM-annotated dataset (14M+ documents)</a></li>
|
| 170 |
<li><a href="https://huggingface.co/Jackal-AI/JQL-Edu-Heads" target="_blank">🪶 Lightweight annotation models</a>:
|
| 171 |
<ul>
|
| 172 |
-
<li>JQL-Gemma</li>
|
| 173 |
-
<li>JQL-Mistral</li>
|
| 174 |
-
<li>JQL-Llama</li>
|
| 175 |
</ul>
|
| 176 |
</li>
|
| 177 |
<li>🛠️ Training & inference scripts</li>
|
| 178 |
<ul>
|
| 179 |
-
<li><a href="https://
|
| 180 |
<li>More coming soon</li>
|
| 181 |
</ul>
|
| 182 |
<li>🗄️ Large-scale dataset coming soon</li>
|
|
|
|
| 15 |
.hero.is-primary { background-color: #f9d5e5; }
|
| 16 |
.subtitle img { max-width: 100%; height: auto; }
|
| 17 |
.section-title { margin-top: 2em; }
|
| 18 |
+
.contact-info { margin-top: 1em; } /* Added style for contact info */
|
| 19 |
</style>
|
| 20 |
</head>
|
| 21 |
<body>
|
|
|
|
| 63 |
<span class="author-block"><sup>5</sup>Computer Science Department, TU Darmstadt,</span>
|
| 64 |
<span class="author-block"><sup>6</sup>AI Sweden</span>
|
| 65 |
</div>
|
| 66 |
+
<div class="is-size-5 contact-info has-text-centered">
|
| 67 |
+
<span class="icon">
|
| 68 |
+
<i class="fas fa-envelope"></i>
|
| 69 |
+
</span>
|
| 70 |
+
<span>Contact: mehdi.ali@iais.fraunhofer.de, brack@cs.tu-darmstadt.de</span>
|
| 71 |
+
</div>
|
| 72 |
<div class="column has-text-centered">
|
| 73 |
<span class="link-block">
|
| 74 |
<a href="https://arxiv.org/abs/2505.22232" target="_blank"
|
|
|
|
| 160 |
<li>Benchmark performance improvement over FineWeb2</li>
|
| 161 |
<li>Higher document retention vs. FineWeb2 heuristic filter</li>
|
| 162 |
<li>Effective dynamic threshold strategies: Trade-off document quality for quantity</li>
|
| 163 |
+
<li>Generalizes to unseen languages</li>
|
| 164 |
</ul>
|
| 165 |
</li>
|
| 166 |
<li><strong>⚡ Annotation Speed:</strong> ~11,000 docs/min (A100 GPU, avg. 690 tokens)</li>
|
|
|
|
| 176 |
<li><a href="https://huggingface.co/datasets/Jackal-AI/JQL-LLM-Edu-Annotations" target="_blank">🧠 Synthetic LLM-annotated dataset (14M+ documents)</a></li>
|
| 177 |
<li><a href="https://huggingface.co/Jackal-AI/JQL-Edu-Heads" target="_blank">🪶 Lightweight annotation models</a>:
|
| 178 |
<ul>
|
| 179 |
+
<li>JQL-Edu-Gemma</li>
|
| 180 |
+
<li>JQL-Edu-Mistral</li>
|
| 181 |
+
<li>JQL-Edu-Llama</li>
|
| 182 |
</ul>
|
| 183 |
</li>
|
| 184 |
<li>🛠️ Training & inference scripts</li>
|
| 185 |
<ul>
|
| 186 |
+
<li><a href="https://github.com/JQL-AI/JQL-Annotation-Pipeline" target="_blank">Web Corpus Annotation</a></li>
|
| 187 |
<li>More coming soon</li>
|
| 188 |
</ul>
|
| 189 |
<li>🗄️ Large-scale dataset coming soon</li>
|