Commit
·
7ce5aba
1
Parent(s):
2139f5c
add the missing models
Browse files
app.py
CHANGED
|
@@ -77,6 +77,20 @@ def filter_and_search_models(
|
|
| 77 |
architecture_mask |= filtered_df["Model Name"].str.contains(
|
| 78 |
"google", case=False, na=False
|
| 79 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
filtered_df = filtered_df[architecture_mask]
|
| 82 |
|
|
@@ -136,6 +150,10 @@ def create_html_table(df):
|
|
| 136 |
row_class = "qwen-row"
|
| 137 |
elif "google" in model_name:
|
| 138 |
row_class = "google-row"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
html += f'<tr class="{row_class}">'
|
| 141 |
for i, col in enumerate(df.columns):
|
|
@@ -149,7 +167,14 @@ def create_html_table(df):
|
|
| 149 |
else: # Score columns
|
| 150 |
cell_class = "score-cell"
|
| 151 |
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
html += "</tr>"
|
| 154 |
html += "</tbody>"
|
| 155 |
html += "</table>"
|
|
@@ -205,8 +230,10 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
| 205 |
("🐧 Qwen", "qwen"),
|
| 206 |
("🦙 Llama", "llama"),
|
| 207 |
("🔷 Gemma", "google"),
|
|
|
|
|
|
|
| 208 |
],
|
| 209 |
-
value=["llama", "deepseek", "qwen", "google"],
|
| 210 |
label="",
|
| 211 |
elem_classes="architecture-filter",
|
| 212 |
container=False,
|
|
@@ -232,7 +259,7 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
| 232 |
"",
|
| 233 |
["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
|
| 234 |
"Combined Score",
|
| 235 |
-
["llama", "deepseek", "qwen", "google"],
|
| 236 |
)
|
| 237 |
),
|
| 238 |
elem_id="leaderboard-table",
|
|
@@ -382,6 +409,23 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
| 382 |
word-wrap: break-word;
|
| 383 |
}
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
.size-cell {
|
| 386 |
text-align: center;
|
| 387 |
font-weight: 500;
|
|
@@ -428,6 +472,22 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
| 428 |
background-color: #ffe6f0;
|
| 429 |
}
|
| 430 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
.size-filter {
|
| 432 |
margin-top: 10px;
|
| 433 |
}
|
|
@@ -569,6 +629,34 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
| 569 |
box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important;
|
| 570 |
}
|
| 571 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 572 |
/* Search and Filter Section Styling */
|
| 573 |
.search-input input {
|
| 574 |
border: 2px solid #e9ecef !important;
|
|
@@ -652,6 +740,117 @@ with gr.Blocks(title="FACTS Grounding Leaderboard", theme=gr.themes.Base()) as a
|
|
| 652 |
.size-filter {
|
| 653 |
margin-top: 10px;
|
| 654 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
"""
|
| 656 |
|
| 657 |
# Launch the app
|
|
|
|
| 77 |
architecture_mask |= filtered_df["Model Name"].str.contains(
|
| 78 |
"google", case=False, na=False
|
| 79 |
)
|
| 80 |
+
elif arch == "mistral":
|
| 81 |
+
architecture_mask |= filtered_df["Model Name"].str.contains(
|
| 82 |
+
"mistralai", case=False, na=False
|
| 83 |
+
)
|
| 84 |
+
elif arch == "others":
|
| 85 |
+
# Include models that don't match any of the main categories
|
| 86 |
+
others_mask = ~(
|
| 87 |
+
filtered_df["Model Name"].str.contains("meta-llama", case=False, na=False) |
|
| 88 |
+
filtered_df["Model Name"].str.contains("deepseek", case=False, na=False) |
|
| 89 |
+
filtered_df["Model Name"].str.contains("Qwen", case=False, na=False) |
|
| 90 |
+
filtered_df["Model Name"].str.contains("google", case=False, na=False) |
|
| 91 |
+
filtered_df["Model Name"].str.contains("mistralai", case=False, na=False)
|
| 92 |
+
)
|
| 93 |
+
architecture_mask |= others_mask
|
| 94 |
|
| 95 |
filtered_df = filtered_df[architecture_mask]
|
| 96 |
|
|
|
|
| 150 |
row_class = "qwen-row"
|
| 151 |
elif "google" in model_name:
|
| 152 |
row_class = "google-row"
|
| 153 |
+
elif "mistralai" in model_name:
|
| 154 |
+
row_class = "mistral-row"
|
| 155 |
+
else:
|
| 156 |
+
row_class = "others-row"
|
| 157 |
|
| 158 |
html += f'<tr class="{row_class}">'
|
| 159 |
for i, col in enumerate(df.columns):
|
|
|
|
| 167 |
else: # Score columns
|
| 168 |
cell_class = "score-cell"
|
| 169 |
|
| 170 |
+
# Create Hugging Face link for model name
|
| 171 |
+
if col == "Model Name":
|
| 172 |
+
hf_url = f"https://huggingface.co/{model_name}"
|
| 173 |
+
cell_content = f'<a href="{hf_url}" target="_blank" class="model-link">{model_name}</a>'
|
| 174 |
+
else:
|
| 175 |
+
cell_content = str(row[col])
|
| 176 |
+
|
| 177 |
+
html += f'<td class="{cell_class}">{cell_content}</td>'
|
| 178 |
html += "</tr>"
|
| 179 |
html += "</tbody>"
|
| 180 |
html += "</table>"
|
|
|
|
| 230 |
("🐧 Qwen", "qwen"),
|
| 231 |
("🦙 Llama", "llama"),
|
| 232 |
("🔷 Gemma", "google"),
|
| 233 |
+
("🌟 Mistral", "mistral"),
|
| 234 |
+
("🔧 Others", "others"),
|
| 235 |
],
|
| 236 |
+
value=["llama", "deepseek", "qwen", "google", "mistral", "others"],
|
| 237 |
label="",
|
| 238 |
elem_classes="architecture-filter",
|
| 239 |
container=False,
|
|
|
|
| 259 |
"",
|
| 260 |
["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B"],
|
| 261 |
"Combined Score",
|
| 262 |
+
["llama", "deepseek", "qwen", "google", "mistral", "others"],
|
| 263 |
)
|
| 264 |
),
|
| 265 |
elem_id="leaderboard-table",
|
|
|
|
| 409 |
word-wrap: break-word;
|
| 410 |
}
|
| 411 |
|
| 412 |
+
.model-link {
|
| 413 |
+
color: #0066cc !important;
|
| 414 |
+
text-decoration: none !important;
|
| 415 |
+
font-weight: 500 !important;
|
| 416 |
+
transition: all 0.2s ease !important;
|
| 417 |
+
border-bottom: 1px solid transparent !important;
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
.model-link:hover {
|
| 421 |
+
color: #0052a3 !important;
|
| 422 |
+
border-bottom: 1px solid #0066cc !important;
|
| 423 |
+
background-color: rgba(0, 102, 204, 0.05) !important;
|
| 424 |
+
padding: 2px 4px !important;
|
| 425 |
+
border-radius: 4px !important;
|
| 426 |
+
margin: -2px -4px !important;
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
.size-cell {
|
| 430 |
text-align: center;
|
| 431 |
font-weight: 500;
|
|
|
|
| 472 |
background-color: #ffe6f0;
|
| 473 |
}
|
| 474 |
|
| 475 |
+
.mistral-row {
|
| 476 |
+
background-color: #faf5ff;
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
.mistral-row:hover {
|
| 480 |
+
background-color: #f3e8ff;
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
+
.others-row {
|
| 484 |
+
background-color: #f8fafc;
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
.others-row:hover {
|
| 488 |
+
background-color: #f1f5f9;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
.size-filter {
|
| 492 |
margin-top: 10px;
|
| 493 |
}
|
|
|
|
| 629 |
box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important;
|
| 630 |
}
|
| 631 |
|
| 632 |
+
/* Mistral styling */
|
| 633 |
+
.architecture-filter label:nth-child(5) {
|
| 634 |
+
background: #faf5ff !important;
|
| 635 |
+
border-color: #d8b4fe !important;
|
| 636 |
+
color: #7c3aed !important;
|
| 637 |
+
}
|
| 638 |
+
|
| 639 |
+
.architecture-filter label:nth-child(5):has(input[type="checkbox"]:checked) {
|
| 640 |
+
background: #8b5cf6 !important;
|
| 641 |
+
border-color: #8b5cf6 !important;
|
| 642 |
+
color: white !important;
|
| 643 |
+
box-shadow: 0 2px 4px rgba(139, 92, 246, 0.3) !important;
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
/* Others styling */
|
| 647 |
+
.architecture-filter label:nth-child(6) {
|
| 648 |
+
background: #f8fafc !important;
|
| 649 |
+
border-color: #cbd5e1 !important;
|
| 650 |
+
color: #475569 !important;
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
.architecture-filter label:nth-child(6):has(input[type="checkbox"]:checked) {
|
| 654 |
+
background: #64748b !important;
|
| 655 |
+
border-color: #64748b !important;
|
| 656 |
+
color: white !important;
|
| 657 |
+
box-shadow: 0 2px 4px rgba(100, 116, 139, 0.3) !important;
|
| 658 |
+
}
|
| 659 |
+
|
| 660 |
/* Search and Filter Section Styling */
|
| 661 |
.search-input input {
|
| 662 |
border: 2px solid #e9ecef !important;
|
|
|
|
| 740 |
.size-filter {
|
| 741 |
margin-top: 10px;
|
| 742 |
}
|
| 743 |
+
|
| 744 |
+
/* Dark Mode Specific Styles */
|
| 745 |
+
@media (prefers-color-scheme: dark) {
|
| 746 |
+
.leaderboard-table {
|
| 747 |
+
background: #1f2937 !important;
|
| 748 |
+
color: #f9fafb !important;
|
| 749 |
+
}
|
| 750 |
+
|
| 751 |
+
.leaderboard-table th {
|
| 752 |
+
background-color: #374151 !important;
|
| 753 |
+
color: #f9fafb !important;
|
| 754 |
+
border-bottom: 2px solid #4b5563 !important;
|
| 755 |
+
}
|
| 756 |
+
|
| 757 |
+
.leaderboard-table td {
|
| 758 |
+
color: #f9fafb !important;
|
| 759 |
+
border-bottom: 1px solid #374151 !important;
|
| 760 |
+
}
|
| 761 |
+
|
| 762 |
+
.leaderboard-table tbody tr:hover {
|
| 763 |
+
background-color: #374151 !important;
|
| 764 |
+
}
|
| 765 |
+
|
| 766 |
+
.rank-cell {
|
| 767 |
+
background-color: #374151 !important;
|
| 768 |
+
color: #f9fafb !important;
|
| 769 |
+
}
|
| 770 |
+
|
| 771 |
+
.model-cell {
|
| 772 |
+
color: #f9fafb !important;
|
| 773 |
+
}
|
| 774 |
+
|
| 775 |
+
.size-cell {
|
| 776 |
+
color: #d1d5db !important;
|
| 777 |
+
}
|
| 778 |
+
|
| 779 |
+
.score-cell {
|
| 780 |
+
color: #f9fafb !important;
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
/* Dark mode row colors with better contrast */
|
| 784 |
+
.llama-row {
|
| 785 |
+
background-color: rgba(245, 158, 11, 0.1) !important;
|
| 786 |
+
}
|
| 787 |
+
|
| 788 |
+
.llama-row:hover {
|
| 789 |
+
background-color: rgba(245, 158, 11, 0.2) !important;
|
| 790 |
+
}
|
| 791 |
+
|
| 792 |
+
.deepseek-row {
|
| 793 |
+
background-color: rgba(59, 130, 246, 0.1) !important;
|
| 794 |
+
}
|
| 795 |
+
|
| 796 |
+
.deepseek-row:hover {
|
| 797 |
+
background-color: rgba(59, 130, 246, 0.2) !important;
|
| 798 |
+
}
|
| 799 |
+
|
| 800 |
+
.qwen-row {
|
| 801 |
+
background-color: rgba(34, 197, 94, 0.1) !important;
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
+
.qwen-row:hover {
|
| 805 |
+
background-color: rgba(34, 197, 94, 0.2) !important;
|
| 806 |
+
}
|
| 807 |
+
|
| 808 |
+
.google-row {
|
| 809 |
+
background-color: rgba(236, 72, 153, 0.2) !important;
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
.google-row:hover {
|
| 813 |
+
background-color: rgba(236, 72, 153, 0.2) !important;
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
.mistral-row {
|
| 817 |
+
background-color: rgba(139, 92, 246, 0.1) !important;
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
.mistral-row:hover {
|
| 821 |
+
background-color: rgba(139, 92, 246, 0.2) !important;
|
| 822 |
+
}
|
| 823 |
+
|
| 824 |
+
.others-row {
|
| 825 |
+
background-color: rgba(107, 114, 128, 0.1) !important;
|
| 826 |
+
}
|
| 827 |
+
|
| 828 |
+
.others-row:hover {
|
| 829 |
+
background-color: rgba(107, 114, 128, 0.2) !important;
|
| 830 |
+
}
|
| 831 |
+
|
| 832 |
+
.leaderboard-container {
|
| 833 |
+
border: 1px solid #4b5563 !important;
|
| 834 |
+
}
|
| 835 |
+
|
| 836 |
+
.model-cell {
|
| 837 |
+
color: #f9fafb !important;
|
| 838 |
+
}
|
| 839 |
+
|
| 840 |
+
.model-link {
|
| 841 |
+
color: #60a5fa !important;
|
| 842 |
+
}
|
| 843 |
+
|
| 844 |
+
.model-link:hover {
|
| 845 |
+
color: #93c5fd !important;
|
| 846 |
+
border-bottom: 1px solid #60a5fa !important;
|
| 847 |
+
background-color: rgba(96, 165, 250, 0.1) !important;
|
| 848 |
+
}
|
| 849 |
+
|
| 850 |
+
.size-cell {
|
| 851 |
+
color: #d1d5db !important;
|
| 852 |
+
}
|
| 853 |
+
}
|
| 854 |
"""
|
| 855 |
|
| 856 |
# Launch the app
|