kristaller486 commited on
Commit
cb8f2c6
·
verified ·
1 Parent(s): 2338099

add tiiuae/Falcon-H1-34B-Instruct, add nvidia/NVIDIA-Nemotron-Nano-12B-v2

Browse files
Files changed (1) hide show
  1. index.html +80 -8
index.html CHANGED
@@ -634,8 +634,44 @@
634
  <td class="num mono" data-label="Всего токенов">132,071</td>
635
  </tr>
636
 
637
- <tr data-model="openrouter/qwen/qwen3-235b-a22b-2507">
638
  <td class="rank mono sticky-0" data-label="#">#16</td>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
  <td class="model-name sticky-1" data-label="Модель">Qwen3-235B-A22B-2507-Instruct (Alibaba API)</td>
640
  <td class="num mono" data-label="Критичные/1000">
641
 
@@ -671,7 +707,7 @@
671
  </tr>
672
 
673
  <tr data-model="openrouter/moonshotai/kimi-k2-0905">
674
- <td class="rank mono sticky-0" data-label="#">#17</td>
675
  <td class="model-name sticky-1" data-label="Модель">moonshotai/Kimi-K2-Instruct-0905 (Novita API)</td>
676
  <td class="num mono" data-label="Критичные/1000">
677
 
@@ -707,7 +743,7 @@
707
  </tr>
708
 
709
  <tr data-model="openrouter/z-ai/glm-4.6">
710
- <td class="rank mono sticky-0" data-label="#">#18</td>
711
  <td class="model-name sticky-1" data-label="Модель">GLM-4.6 (Z.ai API)</td>
712
  <td class="num mono" data-label="Критичные/1000">
713
 
@@ -743,7 +779,7 @@
743
  </tr>
744
 
745
  <tr data-model="openrouter/openai/gpt-5">
746
- <td class="rank mono sticky-0" data-label="#">#19</td>
747
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: minimal)</td>
748
  <td class="num mono" data-label="Критичные/1000">
749
 
@@ -779,7 +815,7 @@
779
  </tr>
780
 
781
  <tr data-model="openrouter/openai/gpt-5">
782
- <td class="rank mono sticky-0" data-label="#">#20</td>
783
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: low)</td>
784
  <td class="num mono" data-label="Критичные/1000">
785
 
@@ -814,8 +850,44 @@
814
  <td class="num mono" data-label="Всего токенов">165,854</td>
815
  </tr>
816
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
817
  <tr data-model="openrouter/openai/gpt-oss-120b">
818
- <td class="rank mono sticky-0" data-label="#">#21</td>
819
  <td class="model-name sticky-1" data-label="Модель">GPT-OSS-120B (Vertex AI API)</td>
820
  <td class="num mono" data-label="Критичные/1000">
821
 
@@ -851,7 +923,7 @@
851
  </tr>
852
 
853
  <tr data-model="openrouter/mistralai/mistral-nemo">
854
- <td class="rank mono sticky-0" data-label="#">#22</td>
855
  <td class="model-name sticky-1" data-label="Модель">Mistral-Nemo (Mistral API)</td>
856
  <td class="num mono" data-label="Критичные/1000">
857
 
@@ -909,7 +981,7 @@
909
  </div>
910
  <p class="info-text">Если хотите, чтобы я добавил ту или иную модель в лидерборд - не стесняйтесь открыть issue/pull request на Github.</p>
911
  <p class="info-text">
912
- Обновлено: 2025-10-22 16:35:03 | Всего моделей: 22 | <a href="https://github.com/kristaller486/RuQualBench">GitHub</a> | <a href="https://t.me/krists">Telegram</a>
913
  </p>
914
  </div>
915
  <script>
 
634
  <td class="num mono" data-label="Всего токенов">132,071</td>
635
  </tr>
636
 
637
+ <tr data-model="openai/tiiuae/Falcon-H1-34B-Instruct">
638
  <td class="rank mono sticky-0" data-label="#">#16</td>
639
+ <td class="model-name sticky-1" data-label="Модель">tiiuae/Falcon-H1-34B-Instruct (vllm)</td>
640
+ <td class="num mono" data-label="Критичные/1000">
641
+
642
+ 0.22 ± 0.04
643
+
644
+ </td>
645
+ <td class="num mono" data-label="Обычные/1000">
646
+
647
+ 0.77 ± 0.06
648
+
649
+ </td>
650
+ <td class="num mono" data-label="Доп./1000">
651
+
652
+ 0.42 ± 0.09
653
+
654
+ </td>
655
+ <td data-label="Нормировано ошибок">
656
+ <div class="score-cell">
657
+ <div class="progress-bar">
658
+
659
+
660
+
661
+ <div class="progress-fill" style="width: 48.375451263537904%"></div>
662
+ </div>
663
+ <span class="score-value">
664
+
665
+ 1.43 ± 0.10
666
+
667
+ </span>
668
+ </div>
669
+ </td>
670
+ <td class="num mono" data-label="Всего токенов">60,048</td>
671
+ </tr>
672
+
673
+ <tr data-model="openrouter/qwen/qwen3-235b-a22b-2507">
674
+ <td class="rank mono sticky-0" data-label="#">#17</td>
675
  <td class="model-name sticky-1" data-label="Модель">Qwen3-235B-A22B-2507-Instruct (Alibaba API)</td>
676
  <td class="num mono" data-label="Критичные/1000">
677
 
 
707
  </tr>
708
 
709
  <tr data-model="openrouter/moonshotai/kimi-k2-0905">
710
+ <td class="rank mono sticky-0" data-label="#">#18</td>
711
  <td class="model-name sticky-1" data-label="Модель">moonshotai/Kimi-K2-Instruct-0905 (Novita API)</td>
712
  <td class="num mono" data-label="Критичные/1000">
713
 
 
743
  </tr>
744
 
745
  <tr data-model="openrouter/z-ai/glm-4.6">
746
+ <td class="rank mono sticky-0" data-label="#">#19</td>
747
  <td class="model-name sticky-1" data-label="Модель">GLM-4.6 (Z.ai API)</td>
748
  <td class="num mono" data-label="Критичные/1000">
749
 
 
779
  </tr>
780
 
781
  <tr data-model="openrouter/openai/gpt-5">
782
+ <td class="rank mono sticky-0" data-label="#">#20</td>
783
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: minimal)</td>
784
  <td class="num mono" data-label="Критичные/1000">
785
 
 
815
  </tr>
816
 
817
  <tr data-model="openrouter/openai/gpt-5">
818
+ <td class="rank mono sticky-0" data-label="#">#21</td>
819
  <td class="model-name sticky-1" data-label="Модель">GPT-5 (reasoning: low)</td>
820
  <td class="num mono" data-label="Критичные/1000">
821
 
 
850
  <td class="num mono" data-label="Всего токенов">165,854</td>
851
  </tr>
852
 
853
+ <tr data-model="openai/nvidia/NVIDIA-Nemotron-Nano-12B-v2">
854
+ <td class="rank mono sticky-0" data-label="#">#22</td>
855
+ <td class="model-name sticky-1" data-label="Модель">nvidia/NVIDIA-Nemotron-Nano-12B-v2 (vllm, reasoning=false)</td>
856
+ <td class="num mono" data-label="Критичные/1000">
857
+
858
+ 0.38 ± 0.06
859
+
860
+ </td>
861
+ <td class="num mono" data-label="Обычные/1000">
862
+
863
+ 1.18 ± 0.10
864
+
865
+ </td>
866
+ <td class="num mono" data-label="Доп./1000">
867
+
868
+ 0.45 ± 0.03
869
+
870
+ </td>
871
+ <td data-label="Нормировано ошибок">
872
+ <div class="score-cell">
873
+ <div class="progress-bar">
874
+
875
+
876
+
877
+ <div class="progress-fill" style="width: 21.660649819494594%"></div>
878
+ </div>
879
+ <span class="score-value">
880
+
881
+ 2.17 ± 0.22
882
+
883
+ </span>
884
+ </div>
885
+ </td>
886
+ <td class="num mono" data-label="Всего токенов">86,045</td>
887
+ </tr>
888
+
889
  <tr data-model="openrouter/openai/gpt-oss-120b">
890
+ <td class="rank mono sticky-0" data-label="#">#23</td>
891
  <td class="model-name sticky-1" data-label="Модель">GPT-OSS-120B (Vertex AI API)</td>
892
  <td class="num mono" data-label="Критичные/1000">
893
 
 
923
  </tr>
924
 
925
  <tr data-model="openrouter/mistralai/mistral-nemo">
926
+ <td class="rank mono sticky-0" data-label="#">#24</td>
927
  <td class="model-name sticky-1" data-label="Модель">Mistral-Nemo (Mistral API)</td>
928
  <td class="num mono" data-label="Критичные/1000">
929
 
 
981
  </div>
982
  <p class="info-text">Если хотите, чтобы я добавил ту или иную модель в лидерборд - не стесняйтесь открыть issue/pull request на Github.</p>
983
  <p class="info-text">
984
+ Обновлено: 2025-10-23 17:11:18 | Всего моделей: 24 | <a href="https://github.com/kristaller486/RuQualBench">GitHub</a> | <a href="https://t.me/krists">Telegram</a>
985
  </p>
986
  </div>
987
  <script>