RuQualBench / index.html
kristaller486's picture
add google/gemma-3n-E4B-it (AI Studio)
4c74347 verified
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>RuQualBench Leaderboard </title>
<link rel="stylesheet" href="style.css">
<style>
/* Фикс места под иконку и отсутствие "дёргания" заголовков */
th .sort-toggle {
width: 16px;
height: 16px;
line-height: 0;
display: inline-flex;
align-items: center;
justify-content: center;
vertical-align: middle;
margin-left: 6px;
position: relative;
}
th .sort-toggle svg {
position: absolute;
top: 0;
left: 0;
width: 14px;
height: 14px;
transition: opacity 0.15s ease;
/* fill: currentColor; // уже задано inline, дублирование не мешает */
}
/* Запрет переноса текста в заголовках и лёгкая подстройка вертикального выравнивания кнопки */
thead th{ white-space:nowrap; }
thead th .sort-toggle{ top:1px; }
</style>
</head>
<body>
<div class="container">
<header>
<h1>RuQualBench Leaderboard <span class="frog">🐸</span></h1>
<p class="subtitle">Бенчмарк качества русского языка для LLM</p>
<div class="view-toggle" role="group" aria-label="Переключение вида">
<button class="toggle-btn is-active" data-mode="table" type="button">Таблица</button>
<button class="toggle-btn" data-mode="cards" type="button">Карточки</button>
</div>
</header>
<div class="leaderboard">
<div class="table-wrap">
<table>
<thead>
<tr>
<th class="sticky-0" data-col="0" data-type="num">#
<button class="sort-toggle" aria-label="Сортировать" title="Сортировать" data-state="off" style="border:none;background:none;cursor:pointer;margin-left:6px;display:inline-flex;align-items:center;vertical-align:middle;padding:0;">
<svg class="icon-up" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 9.97H11.03V18.89L13.04 18.92V9.97H17.03L12.03 4.97Z" /></svg>
<svg class="icon-down" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 13.92H11.03V5L13.04 4.97V13.92H17.03L12.03 18.92Z" /></svg>
</button>
</th>
<th class="sticky-1" data-col="1" data-type="str">Модель
<button class="sort-toggle" aria-label="Сортировать" title="Сортировать" data-state="off" style="border:none;background:none;cursor:pointer;margin-left:6px;display:inline-flex;align-items:center;vertical-align:middle;padding:0;">
<svg class="icon-up" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 9.97H11.03V18.89L13.04 18.92V9.97H17.03L12.03 4.97Z" /></svg>
<svg class="icon-down" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 13.92H11.03V5L13.04 4.97V13.92H17.03L12.03 18.92Z" /></svg>
</button>
</th>
<th data-col="2" data-type="num">Критичные/1000
<button class="sort-toggle" aria-label="Сортировать" title="Сортировать" data-state="off" style="border:none;background:none;cursor:pointer;margin-left:6px;display:inline-flex;align-items:center;vertical-align:middle;padding:0;">
<svg class="icon-up" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 9.97H11.03V18.89L13.04 18.92V9.97H17.03L12.03 4.97Z" /></svg>
<svg class="icon-down" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 13.92H11.03V5L13.04 4.97V13.92H17.03L12.03 18.92Z" /></svg>
</button>
</th>
<th data-col="3" data-type="num">Обычные/1000
<button class="sort-toggle" aria-label="Сортировать" title="Сортировать" data-state="off" style="border:none;background:none;cursor:pointer;margin-left:6px;display:inline-flex;align-items:center;vertical-align:middle;padding:0;">
<svg class="icon-up" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 9.97H11.03V18.89L13.04 18.92V9.97H17.03L12.03 4.97Z" /></svg>
<svg class="icon-down" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 13.92H11.03V5L13.04 4.97V13.92H17.03L12.03 18.92Z" /></svg>
</button>
</th>
<th data-col="4" data-type="num">Доп./1000
<button class="sort-toggle" aria-label="Сортировать" title="Сортировать" data-state="off" style="border:none;background:none;cursor:pointer;margin-left:6px;display:inline-flex;align-items:center;vertical-align:middle;padding:0;">
<svg class="icon-up" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 9.97H11.03V18.89L13.04 18.92V9.97H17.03L12.03 4.97Z" /></svg>
<svg class="icon-down" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 13.92H11.03V5L13.04 4.97V13.92H17.03L12.03 18.92Z" /></svg>
</button>
</th>
<th class="score-col" data-col="5" data-type="num">Нормировано ошибок
<button class="sort-toggle" aria-label="Сортировать" title="Сортировать" data-state="off" style="border:none;background:none;cursor:pointer;margin-left:6px;display:inline-flex;align-items:center;vertical-align:middle;padding:0;">
<svg class="icon-up" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 9.97H11.03V18.89L13.04 18.92V9.97H17.03L12.03 4.97Z" /></svg>
<svg class="icon-down" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 13.92H11.03V5L13.04 4.97V13.92H17.03L12.03 18.92Z" /></svg>
</button>
</th>
<th data-col="6" data-type="num">Всего токенов
<button class="sort-toggle" aria-label="Сортировать" title="Сортировать" data-state="off" style="border:none;background:none;cursor:pointer;margin-left:6px;display:inline-flex;align-items:center;vertical-align:middle;padding:0;">
<svg class="icon-up" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 9.97H11.03V18.89L13.04 18.92V9.97H17.03L12.03 4.97Z" /></svg>
<svg class="icon-down" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" style="width:14px;height:14px;display:none;fill:currentColor;"><path d="M7.03 13.92H11.03V5L13.04 4.97V13.92H17.03L12.03 18.92Z" /></svg>
</button>
</th>
</tr>
</thead>
<tbody>
<tr data-model="openrouter/anthropic/claude-sonnet-4.5">
<td class="rank mono sticky-0" data-label="#">#1</td>
<td class="model-name sticky-1" data-label="Модель">Claude Sonnet 4.5</td>
<td class="num mono" data-label="Критичные/1000">
0.07 ± 0.01
</td>
<td class="num mono" data-label="Обычные/1000">
0.18 ± 0.02
</td>
<td class="num mono" data-label="Доп./1000">
0.12 ± 0.01
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 91.2037037037037%"></div>
</div>
<span class="score-value">
0.38 ± 0.04
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">121,783</td>
</tr>
<tr data-model="openrouter/openai/gpt-4o">
<td class="rank mono sticky-0" data-label="#">#2</td>
<td class="model-name sticky-1" data-label="Модель">GPT-4o</td>
<td class="num mono" data-label="Критичные/1000">
0.03 ± 0.01
</td>
<td class="num mono" data-label="Обычные/1000">
0.21 ± 0.05
</td>
<td class="num mono" data-label="Доп./1000">
0.34 ± 0.08
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 89.81481481481481%"></div>
</div>
<span class="score-value">
0.44 ± 0.04
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">55,547</td>
</tr>
<tr data-model="litellm_proxy/gemini-2.5-flash">
<td class="rank mono sticky-0" data-label="#">#3</td>
<td class="model-name sticky-1" data-label="Модель">Gemini 2.5 Flash (GA)</td>
<td class="num mono" data-label="Критичные/1000">
0.08 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.21 ± 0.03
</td>
<td class="num mono" data-label="Доп./1000">
0.24 ± 0.01
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 88.88888888888889%"></div>
</div>
<span class="score-value">
0.48 ± 0.03
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">151,942</td>
</tr>
<tr data-model="openrouter/minimax/minimax-01">
<td class="rank mono sticky-0" data-label="#">#4</td>
<td class="model-name sticky-1" data-label="Модель">MiniMaxAI/MiniMax-Text-01 (Minimax API)</td>
<td class="num mono" data-label="Критичные/1000">
0.04 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.21 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.46 ± 0.02
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 88.19444444444444%"></div>
</div>
<span class="score-value">
0.51 ± 0.07
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">72,187</td>
</tr>
<tr data-model="openai/Vikhrmodels/Vistral-24B-Instruct">
<td class="rank mono sticky-0" data-label="#">#5</td>
<td class="model-name sticky-1" data-label="Модель">Vikhrmodels/Vistral-24B-Instruct (SGLang)</td>
<td class="num mono" data-label="Критичные/1000">
0.08 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.31 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.12 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 87.73148148148148%"></div>
</div>
<span class="score-value">
0.53 ± 0.02
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">85,900</td>
</tr>
<tr data-model="openai/unsloth/gemma-3-27b-it">
<td class="rank mono sticky-0" data-label="#">#6</td>
<td class="model-name sticky-1" data-label="Модель">Gemma-3-27b-it (SGLang)</td>
<td class="num mono" data-label="Критичные/1000">
0.08 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.28 ± 0.02
</td>
<td class="num mono" data-label="Доп./1000">
0.31 ± 0.05
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 86.3425925925926%"></div>
</div>
<span class="score-value">
0.59 ± 0.06
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">101,336</td>
</tr>
<tr data-model="openrouter/qwen/qwen3-235b-a22b-2507">
<td class="rank mono sticky-0" data-label="#">#7</td>
<td class="model-name sticky-1" data-label="Модель">Qwen/Qwen3-235B-A22B-Instruct-2507 (Vertex AI API)</td>
<td class="num mono" data-label="Критичные/1000">
0.09 ± 0.01
</td>
<td class="num mono" data-label="Обычные/1000">
0.33 ± 0.06
</td>
<td class="num mono" data-label="Доп./1000">
0.17 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 86.11111111111111%"></div>
</div>
<span class="score-value">
0.60 ± 0.07
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">133,057</td>
</tr>
<tr data-model="openai/mistralai/Mistral-Small-3.2-24B-Instruct-2506">
<td class="rank mono sticky-0" data-label="#">#8</td>
<td class="model-name sticky-1" data-label="Модель">Mistral-Small-3.2-24B-Instruct-2506 (vllm)</td>
<td class="num mono" data-label="Критичные/1000">
0.10 ± 0.03
</td>
<td class="num mono" data-label="Обычные/1000">
0.41 ± 0.02
</td>
<td class="num mono" data-label="Доп./1000">
0.17 ± 0.02
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 84.02777777777779%"></div>
</div>
<span class="score-value">
0.69 ± 0.05
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">74,469</td>
</tr>
<tr data-model="openrouter/deepseek/deepseek-chat">
<td class="rank mono sticky-0" data-label="#">#9</td>
<td class="model-name sticky-1" data-label="Модель">DeepSeek V3 (Novita API)</td>
<td class="num mono" data-label="Критичные/1000">
0.15
</td>
<td class="num mono" data-label="Обычные/1000">
0.34 ± 0.03
</td>
<td class="num mono" data-label="Доп./1000">
0.12 ± 0.02
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 83.7962962962963%"></div>
</div>
<span class="score-value">
0.70 ± 0.02
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">75,606</td>
</tr>
<tr data-model="openai/RefalMachine/RuadaptQwen3-32B-Instruct">
<td class="rank mono sticky-0" data-label="#">#10</td>
<td class="model-name sticky-1" data-label="Модель">RefalMachine/RuadaptQwen3-32B-Instruct (SGLang)</td>
<td class="num mono" data-label="Критичные/1000">
0.09 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.39 ± 0.05
</td>
<td class="num mono" data-label="Доп./1000">
0.26 ± 0.06
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 83.56481481481481%"></div>
</div>
<span class="score-value">
0.71 ± 0.12
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">163,424</td>
</tr>
<tr data-model="openai/yandex/YandexGPT-5-Lite-8B-instruct">
<td class="rank mono sticky-0" data-label="#">#11</td>
<td class="model-name sticky-1" data-label="Модель">yandex/YandexGPT-5-Lite-8B-instruct (SGLang)</td>
<td class="num mono" data-label="Критичные/1000">
0.09
</td>
<td class="num mono" data-label="Обычные/1000">
0.26 ± 0.03
</td>
<td class="num mono" data-label="Доп./1000">
0.69 ± 0.04
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 81.94444444444444%"></div>
</div>
<span class="score-value">
0.78 ± 0.05
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">47,060</td>
</tr>
<tr data-model="openrouter/anthropic/claude-haiku-4.5">
<td class="rank mono sticky-0" data-label="#">#12</td>
<td class="model-name sticky-1" data-label="Модель">Claude Haiku 4.5</td>
<td class="num mono" data-label="Критичные/1000">
0.12 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.47 ± 0.03
</td>
<td class="num mono" data-label="Доп./1000">
0.25 ± 0.06
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 80.55555555555556%"></div>
</div>
<span class="score-value">
0.84 ± 0.02
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">114,212</td>
</tr>
<tr data-model="openai/Qwen/Qwen3-VL-32B-Instruct">
<td class="rank mono sticky-0" data-label="#">#13</td>
<td class="model-name sticky-1" data-label="Модель">Qwen3-VL-32B-Instruct (SGLang)</td>
<td class="num mono" data-label="Критичные/1000">
0.12 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.52 ± 0.02
</td>
<td class="num mono" data-label="Доп./1000">
0.27 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 79.39814814814815%"></div>
</div>
<span class="score-value">
0.89 ± 0.07
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">147,626</td>
</tr>
<tr data-model="openai/AvitoTech/avibe">
<td class="rank mono sticky-0" data-label="#">#14</td>
<td class="model-name sticky-1" data-label="Модель">AvitoTech/avibe</td>
<td class="num mono" data-label="Критичные/1000">
0.06
</td>
<td class="num mono" data-label="Обычные/1000">
0.65 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.44 ± 0.05
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 77.08333333333334%"></div>
</div>
<span class="score-value">
0.99 ± 0.06
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">88,360</td>
</tr>
<tr data-model="openai/ai-sage/GigaChat-20B-A3B-instruct-v1.5-bf16">
<td class="rank mono sticky-0" data-label="#">#15</td>
<td class="model-name sticky-1" data-label="Модель">GigaChat-20B-A3B-instruct-v1.5 (SGLang)</td>
<td class="num mono" data-label="Критичные/1000">
0.06 ± 0.01
</td>
<td class="num mono" data-label="Обычные/1000">
0.40 ± 0.02
</td>
<td class="num mono" data-label="Доп./1000">
1.03 ± 0.06
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 75.92592592592592%"></div>
</div>
<span class="score-value">
1.04 ± 0.01
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">74,836</td>
</tr>
<tr data-model="litellm_proxy/deepseek-v3">
<td class="rank mono sticky-0" data-label="#">#16</td>
<td class="model-name sticky-1" data-label="Модель">Deepseek V3.2-Exp (Deepseek API)</td>
<td class="num mono" data-label="Критичные/1000">
0.26 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.47 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.24 ± 0.05
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 74.53703703703704%"></div>
</div>
<span class="score-value">
1.10 ± 0.03
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">135,889</td>
</tr>
<tr data-model="litellm_proxy/tmp-gemma-3n-e4b-it">
<td class="rank mono sticky-0" data-label="#">#17</td>
<td class="model-name sticky-1" data-label="Модель">google/gemma-3n-E4B-it (AI Studio)</td>
<td class="num mono" data-label="Критичные/1000">
0.18 ± 0.01
</td>
<td class="num mono" data-label="Обычные/1000">
0.50 ± 0.02
</td>
<td class="num mono" data-label="Доп./1000">
0.48 ± 0.07
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 74.30555555555556%"></div>
</div>
<span class="score-value">
1.11
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">99,646</td>
</tr>
<tr data-model="openrouter/qwen/qwen3-next-80b-a3b-instruct">
<td class="rank mono sticky-0" data-label="#">#18</td>
<td class="model-name sticky-1" data-label="Модель">Qwen3-Next-80B-A3B-Instruct (Alibaba API)</td>
<td class="num mono" data-label="Критичные/1000">
0.19 ± 0.02
</td>
<td class="num mono" data-label="Обычные/1000">
0.73 ± 0.03
</td>
<td class="num mono" data-label="Доп./1000">
0.23 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 71.75925925925925%"></div>
</div>
<span class="score-value">
1.22 ± 0.02
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">175,847</td>
</tr>
<tr data-model="openrouter/baidu/ernie-4.5-300b-a47b">
<td class="rank mono sticky-0" data-label="#">#19</td>
<td class="model-name sticky-1" data-label="Модель">baidu/ERNIE-4.5-300B-A47B-PT (Novita API)</td>
<td class="num mono" data-label="Критичные/1000">
0.11 ± 0.01
</td>
<td class="num mono" data-label="Обычные/1000">
0.74 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.52 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 71.75925925925925%"></div>
</div>
<span class="score-value">
1.22 ± 0.05
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">95,684</td>
</tr>
<tr data-model="openai/Qwen/Qwen3-32B">
<td class="rank mono sticky-0" data-label="#">#20</td>
<td class="model-name sticky-1" data-label="Модель">Qwen3-32B (SGLang, without reasoining)</td>
<td class="num mono" data-label="Критичные/1000">
0.13 ± 0.03
</td>
<td class="num mono" data-label="Обычные/1000">
0.84 ± 0.03
</td>
<td class="num mono" data-label="Доп./1000">
0.29 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 71.06481481481481%"></div>
</div>
<span class="score-value">
1.25 ± 0.04
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">99,705</td>
</tr>
<tr data-model="openai/t-tech/T-pro-it-2.0">
<td class="rank mono sticky-0" data-label="#">#21</td>
<td class="model-name sticky-1" data-label="Модель">t-tech/T-pro-it-2.0 (SGLang, without reasoning)</td>
<td class="num mono" data-label="Критичные/1000">
0.26 ± 0.05
</td>
<td class="num mono" data-label="Обычные/1000">
0.68 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.15 ± 0.02
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 70.60185185185186%"></div>
</div>
<span class="score-value">
1.27 ± 0.11
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">132,071</td>
</tr>
<tr data-model="openai/tiiuae/Falcon-H1-34B-Instruct">
<td class="rank mono sticky-0" data-label="#">#22</td>
<td class="model-name sticky-1" data-label="Модель">tiiuae/Falcon-H1-34B-Instruct (vllm)</td>
<td class="num mono" data-label="Критичные/1000">
0.22 ± 0.04
</td>
<td class="num mono" data-label="Обычные/1000">
0.77 ± 0.06
</td>
<td class="num mono" data-label="Доп./1000">
0.42 ± 0.09
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 66.89814814814815%"></div>
</div>
<span class="score-value">
1.43 ± 0.10
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">60,048</td>
</tr>
<tr data-model="openrouter/qwen/qwen3-235b-a22b-2507">
<td class="rank mono sticky-0" data-label="#">#23</td>
<td class="model-name sticky-1" data-label="Модель">Qwen3-235B-A22B-2507-Instruct (Alibaba API)</td>
<td class="num mono" data-label="Критичные/1000">
0.46 ± 0.19
</td>
<td class="num mono" data-label="Обычные/1000">
0.56 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.16 ± 0.01
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 64.12037037037037%"></div>
</div>
<span class="score-value">
1.55 ± 0.35
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">137,855</td>
</tr>
<tr data-model="openrouter/qwen/qwen3-vl-8b-instruct">
<td class="rank mono sticky-0" data-label="#">#24</td>
<td class="model-name sticky-1" data-label="Модель">Qwen3-VL-8B-Instruct (Alibaba API, presence_penalty=2)</td>
<td class="num mono" data-label="Критичные/1000">
0.24 ± 0.03
</td>
<td class="num mono" data-label="Обычные/1000">
1.02 ± 0.05
</td>
<td class="num mono" data-label="Доп./1000">
0.32 ± 0.01
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 61.57407407407408%"></div>
</div>
<span class="score-value">
1.66 ± 0.11
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">165,307</td>
</tr>
<tr data-model="openrouter/moonshotai/kimi-k2-0905">
<td class="rank mono sticky-0" data-label="#">#25</td>
<td class="model-name sticky-1" data-label="Модель">moonshotai/Kimi-K2-Instruct-0905 (Novita API)</td>
<td class="num mono" data-label="Критичные/1000">
0.39 ± 0.10
</td>
<td class="num mono" data-label="Обычные/1000">
0.83 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.26 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 59.95370370370371%"></div>
</div>
<span class="score-value">
1.73 ± 0.16
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">100,380</td>
</tr>
<tr data-model="openrouter/z-ai/glm-4.6">
<td class="rank mono sticky-0" data-label="#">#26</td>
<td class="model-name sticky-1" data-label="Модель">GLM-4.6 (Z.ai API)</td>
<td class="num mono" data-label="Критичные/1000">
0.49 ± 0.07
</td>
<td class="num mono" data-label="Обычные/1000">
0.71 ± 0.07
</td>
<td class="num mono" data-label="Доп./1000">
0.17 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 58.796296296296305%"></div>
</div>
<span class="score-value">
1.78 ± 0.17
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">151,775</td>
</tr>
<tr data-model="openrouter/openai/gpt-5">
<td class="rank mono sticky-0" data-label="#">#27</td>
<td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: minimal)</td>
<td class="num mono" data-label="Критичные/1000">
0.30 ± 0.05
</td>
<td class="num mono" data-label="Обычные/1000">
1.09 ± 0.04
</td>
<td class="num mono" data-label="Доп./1000">
0.28 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 57.407407407407405%"></div>
</div>
<span class="score-value">
1.84 ± 0.05
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">163,827</td>
</tr>
<tr data-model="openrouter/openai/gpt-5">
<td class="rank mono sticky-0" data-label="#">#28</td>
<td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: low)</td>
<td class="num mono" data-label="Критичные/1000">
0.25 ± 0.01
</td>
<td class="num mono" data-label="Обычные/1000">
1.40 ± 0.13
</td>
<td class="num mono" data-label="Доп./1000">
0.40 ± 0.01
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 51.620370370370374%"></div>
</div>
<span class="score-value">
2.09 ± 0.12
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">165,854</td>
</tr>
<tr data-model="openai/nvidia/NVIDIA-Nemotron-Nano-12B-v2">
<td class="rank mono sticky-0" data-label="#">#29</td>
<td class="model-name sticky-1" data-label="Модель">nvidia/NVIDIA-Nemotron-Nano-12B-v2 (vllm, reasoning=false)</td>
<td class="num mono" data-label="Критичные/1000">
0.38 ± 0.06
</td>
<td class="num mono" data-label="Обычные/1000">
1.18 ± 0.10
</td>
<td class="num mono" data-label="Доп./1000">
0.45 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 49.768518518518526%"></div>
</div>
<span class="score-value">
2.17 ± 0.22
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">86,045</td>
</tr>
<tr data-model="openrouter/openai/gpt-oss-120b">
<td class="rank mono sticky-0" data-label="#">#30</td>
<td class="model-name sticky-1" data-label="Модель">GPT-OSS-120B (Vertex AI API)</td>
<td class="num mono" data-label="Критичные/1000">
0.37 ± 0.05
</td>
<td class="num mono" data-label="Обычные/1000">
1.36 ± 0.08
</td>
<td class="num mono" data-label="Доп./1000">
0.40 ± 0.01
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 46.99074074074075%"></div>
</div>
<span class="score-value">
2.29 ± 0.17
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">183,215</td>
</tr>
<tr data-model="openrouter/mistralai/mistral-nemo">
<td class="rank mono sticky-0" data-label="#">#31</td>
<td class="model-name sticky-1" data-label="Модель">Mistral-Nemo (Mistral API)</td>
<td class="num mono" data-label="Критичные/1000">
0.51 ± 0.07
</td>
<td class="num mono" data-label="Обычные/1000">
1.17 ± 0.09
</td>
<td class="num mono" data-label="Доп./1000">
1.15 ± 0.12
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 35.879629629629626%"></div>
</div>
<span class="score-value">
2.77 ± 0.24
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">53,243</td>
</tr>
<tr data-model="openrouter/minimax/minimax-m2:free">
<td class="rank mono sticky-0" data-label="#">#32</td>
<td class="model-name sticky-1" data-label="Модель">MiniMaxAI/MiniMax-M2 (Minimax API)</td>
<td class="num mono" data-label="Критичные/1000">
0.46 ± 0.04
</td>
<td class="num mono" data-label="Обычные/1000">
1.90 ± 0.05
</td>
<td class="num mono" data-label="Доп./1000">
0.41 ± 0.03
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 30.09259259259259%"></div>
</div>
<span class="score-value">
3.02 ± 0.09
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">239,780</td>
</tr>
<tr data-model="openrouter/minimax/minimax-m2:free">
<td class="rank mono sticky-0" data-label="#">#33</td>
<td class="model-name sticky-1" data-label="Модель">MiniMaxAI/MiniMax-M2 (Minimax API, recommend params)</td>
<td class="num mono" data-label="Критичные/1000">
0.83 ± 0.15
</td>
<td class="num mono" data-label="Обычные/1000">
2.43 ± 0.01
</td>
<td class="num mono" data-label="Доп./1000">
0.48 ± 0.02
</td>
<td data-label="Нормировано ошибок">
<div class="score-cell">
<div class="progress-bar">
<div class="progress-fill" style="width: 0.0%"></div>
</div>
<span class="score-value">
4.32 ± 0.30
</span>
</div>
</td>
<td class="num mono" data-label="Всего токенов">203,067</td>
</tr>
</tbody>
</table>
</div>
</div>
<div class="legend">
<p>
<p>Метрики показывают ошибки русского языка, подсчитанные на 1000 токенов ответа: </p>
<p>"Критичные" - грубые нарушения (проблемы в соглавсовании слов, вставки на других языках, наиболее неестественные придуманные слова);</p>
<p>"Обычные" - заметные ошибки (кальки, согласование и т.п.);</p>
<p>"Дополнительные" - малозначимые нарушения, почти не влияющие на читаемость.</p>
</p>
<p>
Значения нормированы на 1000 токенов o200k_base текста; "±" - стандартная ошибка (SE) по серии прогонов, если запусков было несколько.
Чем меньше "Нормировано ошибок", тем лучше.
</p>
<p>
"Нормировано ошибок" - взвешенная метрика: критичные ошибки умножаются на 2, обычные - на 1, дополнительные - на 0.5; сумма нормируется на 1000 токенов ответа.
</p>
<p>Результаты для Gemini 2.5 Flash Lite могут быть завышеными. Для тестирования используется temperature=0, если не указано иное. Recomend params - параметры из карточки модели.</p>
</div>
<p class="info-text">Если хотите, чтобы я добавил ту или иную модель в лидерборд - не стесняйтесь открыть issue/pull request на Github.</p>
<p class="info-text">
Обновлено: 2025-10-30 11:50:26 | Всего моделей: 33 | <a href="https://github.com/kristaller486/RuQualBench">GitHub</a> | <a href="https://t.me/krists">Telegram</a>
</p>
</div>
<script>
document.addEventListener('DOMContentLoaded', function() {
const h1 = document.querySelector('h1');
const frogSpan = document.querySelector('.frog');
let isKva = false;
frogSpan.style.cursor = 'pointer';
frogSpan.addEventListener('click', function() {
isKva = !isKva;
const textNode = h1.firstChild;
textNode.textContent = isKva ? 'RuКваlBench Leaderboard ' : 'RuQualBench Leaderboard ';
});
});
</script>
<script>
document.addEventListener('DOMContentLoaded', function() {
const table = document.querySelector('.leaderboard table');
if (!table || !table.tHead || !table.tBodies.length) return;
const thead = table.tHead;
const tbody = table.tBodies[0];
// Зафиксируем исходный порядок строк, чтобы можно было вернуть "выключено"
Array.from(tbody.querySelectorAll('tr')).forEach((r, i) => r.dataset.initIndex = i);
function parseNumeric(text) {
if (!text) return NaN;
let base = text.split('±')[0];
base = base.replace(/[^0-9\.\-]/g, '');
const num = parseFloat(base);
return isNaN(num) ? NaN : num;
}
function getCellValue(row, col, type) {
const td = row.children[col];
if (!td) return type === 'num' ? NaN : '';
if (type === 'num') {
let text;
// Для "Нормировано ошибок" берём число из span.score-value
if (col === 5) {
const sv = td.querySelector('.score-value');
text = sv ? sv.textContent : td.textContent;
} else {
text = td.textContent;
}
return parseNumeric(text);
} else {
return td.textContent.trim().toLowerCase();
}
}
function resort(col, type, state) {
const rows = Array.from(tbody.querySelectorAll('tr'));
if (state === 'off') {
rows.sort((a, b) => (parseInt(a.dataset.initIndex, 10) - parseInt(b.dataset.initIndex, 10)));
} else {
const dir = state === 'asc' ? 1 : -1;
rows.sort((r1, r2) => {
const v1 = getCellValue(r1, col, type);
const v2 = getCellValue(r2, col, type);
let cmp = 0;
if (type === 'num') {
if (isNaN(v1) && isNaN(v2)) cmp = 0;
else if (isNaN(v1)) cmp = 1;
else if (isNaN(v2)) cmp = -1;
else cmp = v1 - v2;
} else {
cmp = v1.localeCompare(v2, 'ru');
}
return cmp * dir;
});
}
rows.forEach(r => tbody.appendChild(r));
// Пересчёт колонки "#"
Array.from(tbody.querySelectorAll('tr')).forEach((r, idx) => {
const rankCell = r.children[0];
if (rankCell) rankCell.textContent = '#' + (idx + 1);
});
}
function updateThIcon(th) {
const state = th.dataset.state || 'off';
const btn = th.querySelector('.sort-toggle');
if (!btn) return;
const up = btn.querySelector('.icon-up');
const down = btn.querySelector('.icon-down');
// Не скрываем элемент целиком — только прозрачность, чтобы не было сдвигов
if (up) {
up.style.display = 'block';
up.style.opacity = (state === 'asc') ? '1' : '0';
}
if (down) {
down.style.display = 'block';
// В "off" показываем бледную вниз-стрелку как неактивную подсказку
down.style.opacity = (state === 'off') ? '0.35' : (state === 'desc' ? '1' : '0');
}
}
function clearOtherStates(activeTh) {
thead.querySelectorAll('th[data-col]').forEach(th => {
if (th !== activeTh) {
th.dataset.state = 'off';
updateThIcon(th);
}
});
}
thead.querySelectorAll('th[data-col]').forEach(th => {
th.dataset.state = 'off';
updateThIcon(th);
const btn = th.querySelector('.sort-toggle');
if (!btn) return;
btn.addEventListener('click', () => {
const current = th.dataset.state || 'off';
const next = current === 'off' ? 'desc' : (current === 'desc' ? 'asc' : 'off');
clearOtherStates(th);
th.dataset.state = next;
updateThIcon(th);
const col = parseInt(th.getAttribute('data-col'), 10);
const type = th.getAttribute('data-type') || 'str';
resort(col, type, next);
});
});
});
</script>
<script>
document.addEventListener('DOMContentLoaded', function(){
const root = document.body;
const btns = document.querySelectorAll('.view-toggle .toggle-btn');
function setMode(mode){
if(mode === 'cards'){
root.classList.add('cards-mode');
}else{
root.classList.remove('cards-mode');
}
btns.forEach(b => b.classList.toggle('is-active', b.dataset.mode === mode));
}
const preferCards = window.matchMedia && window.matchMedia('(max-width: 640px)').matches;
setMode(preferCards ? 'cards' : 'table');
btns.forEach(b => b.addEventListener('click', () => setMode(b.dataset.mode)));
});
</script>
</body>
</html>