Spaces:
Paused
Paused
Amber Tanaka
commited on
Wrangling Table Headers (#10)
Browse files- assets/api.svg +3 -0
- assets/circle-dark.svg +3 -0
- assets/circle-light.svg +3 -0
- assets/diamond-dark.svg +3 -0
- assets/diamond-light.svg +3 -0
- assets/open-source.svg +3 -0
- assets/open-weights.svg +3 -0
- assets/star-dark.svg +3 -0
- assets/star-light.svg +3 -0
- assets/ui.svg +3 -0
- content.py +52 -1
- ui_components.py +160 -36
assets/api.svg
ADDED
|
|
assets/circle-dark.svg
ADDED
|
|
assets/circle-light.svg
ADDED
|
|
assets/diamond-dark.svg
ADDED
|
|
assets/diamond-light.svg
ADDED
|
|
assets/open-source.svg
ADDED
|
|
assets/open-weights.svg
ADDED
|
|
assets/star-dark.svg
ADDED
|
|
assets/star-light.svg
ADDED
|
|
assets/ui.svg
ADDED
|
|
content.py
CHANGED
|
@@ -100,7 +100,11 @@ CITATION_BUTTON_TEXT = r"""@article{asta-bench,
|
|
| 100 |
primaryClass={cs.AI},
|
| 101 |
secondaryClass={cs.CL}
|
| 102 |
}"""
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
def format_error(msg):
|
| 106 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
|
|
@@ -202,6 +206,8 @@ nav.svelte-ti537g.svelte-ti537g {
|
|
| 202 |
}
|
| 203 |
#leaderboard-accordion .label-wrap {
|
| 204 |
font-size: 1.4rem !important;
|
|
|
|
|
|
|
| 205 |
}
|
| 206 |
.dark #leaderboard-accordion .label-wrap {
|
| 207 |
color: #0FCB8C !important;
|
|
@@ -236,4 +242,49 @@ nav.svelte-ti537g.svelte-ti537g {
|
|
| 236 |
.sub-nav-link-button:hover {
|
| 237 |
text-decoration: underline;
|
| 238 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
"""
|
|
|
|
| 100 |
primaryClass={cs.AI},
|
| 101 |
secondaryClass={cs.CL}
|
| 102 |
}"""
|
| 103 |
+
# legend_tooltips = {
|
| 104 |
+
# "pareto": "The Pareto frontier represents optimal agents where you cannot improve score without increasing cost.",
|
| 105 |
+
# "openness": "Describes the accessibility of the agent's core model (e.g., Open, Closed, API).",
|
| 106 |
+
# "tooling": "Describes the tools an agent uses (e.g., Standard, Custom)."
|
| 107 |
+
# }
|
| 108 |
|
| 109 |
def format_error(msg):
|
| 110 |
return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"
|
|
|
|
| 206 |
}
|
| 207 |
#leaderboard-accordion .label-wrap {
|
| 208 |
font-size: 1.4rem !important;
|
| 209 |
+
z-index: 10 !important;
|
| 210 |
+
position: relative !important;
|
| 211 |
}
|
| 212 |
.dark #leaderboard-accordion .label-wrap {
|
| 213 |
color: #0FCB8C !important;
|
|
|
|
| 242 |
.sub-nav-link-button:hover {
|
| 243 |
text-decoration: underline;
|
| 244 |
}
|
| 245 |
+
.wrap-header-df th span{
|
| 246 |
+
white-space: normal !important;
|
| 247 |
+
word-break: normal !important;
|
| 248 |
+
overflow-wrap: break-word !important;
|
| 249 |
+
line-height: 1.2 !important;
|
| 250 |
+
vertical-align: top !important;
|
| 251 |
+
font-size: 12px !important;
|
| 252 |
+
|
| 253 |
+
}
|
| 254 |
+
.wrap-header-df th {
|
| 255 |
+
height: auto !important;
|
| 256 |
+
}
|
| 257 |
+
.wrap-header-df .cell-wrap img {
|
| 258 |
+
width: 16px;
|
| 259 |
+
height: 16px;
|
| 260 |
+
vertical-align: middle;
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
/* By default, hide BOTH theme-aware icons inside a DataFrame cell */
|
| 264 |
+
.wrap-header-df .cell-wrap .light-mode-icon,
|
| 265 |
+
.wrap-header-df .cell-wrap .dark-mode-icon {
|
| 266 |
+
display: none !important;
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
/* Light Theme Rule: Show the light-mode icon */
|
| 270 |
+
html:not(.dark) .wrap-header-df .cell-wrap .light-mode-icon {
|
| 271 |
+
display: inline-block !important;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
/* Dark Theme Rule: Show the dark-mode icon */
|
| 275 |
+
.dark .wrap-header-df .cell-wrap .dark-mode-icon {
|
| 276 |
+
display: inline-block !important;
|
| 277 |
+
}
|
| 278 |
+
#legend-markdown img {
|
| 279 |
+
width: 16px;
|
| 280 |
+
height: 16px;
|
| 281 |
+
vertical-align: middle;
|
| 282 |
+
}
|
| 283 |
+
html:not(.dark) #legend-markdown .light-mode-icon,
|
| 284 |
+
.dark #legend-markdown .dark-mode-icon {
|
| 285 |
+
display: inline-block;
|
| 286 |
+
}
|
| 287 |
+
#legend-markdown .light-mode-icon, #legend-markdown .dark-mode-icon {
|
| 288 |
+
display: none;
|
| 289 |
+
}
|
| 290 |
"""
|
ui_components.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
| 3 |
import plotly.graph_objects as go
|
| 4 |
import os
|
| 5 |
import re
|
|
|
|
| 6 |
|
| 7 |
from agenteval.leaderboard.view import LeaderboardViewer
|
| 8 |
from huggingface_hub import HfApi
|
|
@@ -50,24 +51,132 @@ MAX_UPLOAD_BYTES = 100 * 1024**2
|
|
| 50 |
AGENTEVAL_MANIFEST_NAME = "agenteval.json"
|
| 51 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# Global variables
|
| 54 |
-
|
| 55 |
-
"Closed":
|
| 56 |
-
"API Available": '🟠',
|
| 57 |
-
"Open Source": '🟢',
|
| 58 |
-
"Open Source + Open Weights": '🔵'
|
| 59 |
}
|
| 60 |
-
|
| 61 |
-
"Standard": "
|
| 62 |
-
"Custom with Standard Search": "
|
| 63 |
-
"Fully Custom": "
|
| 64 |
}
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# --- Global State for Viewers (simple caching) ---
|
| 73 |
CACHED_VIEWERS = {}
|
|
@@ -154,14 +263,9 @@ def create_leaderboard_display(
|
|
| 154 |
lambda row: '📈' if row['id'] in pareto_agent_names else '',
|
| 155 |
axis=1
|
| 156 |
)
|
| 157 |
-
# Create mapping for Openness
|
| 158 |
-
|
| 159 |
-
df_view['
|
| 160 |
-
|
| 161 |
-
# For this column, we'll use .apply() to handle the "Other" case cleanly.
|
| 162 |
-
df_view['Agent Tooling'] = df_view['Agent Tooling'].apply(
|
| 163 |
-
lambda ctrl: control_emoji_map.get(ctrl, f"{ctrl}" if pd.notna(ctrl) else "")
|
| 164 |
-
)
|
| 165 |
|
| 166 |
|
| 167 |
# Format cost columns
|
|
@@ -185,7 +289,22 @@ def create_leaderboard_display(
|
|
| 185 |
df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
|
| 186 |
|
| 187 |
df_headers = df_view.columns.tolist()
|
| 188 |
-
df_datatypes = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
|
| 190 |
plot_component = gr.Plot(
|
| 191 |
value=scatter_plot,
|
|
@@ -195,18 +314,19 @@ def create_leaderboard_display(
|
|
| 195 |
|
| 196 |
# Put table and key into an accordion
|
| 197 |
with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
|
|
|
|
| 198 |
dataframe_component = gr.DataFrame(
|
| 199 |
headers=df_headers,
|
| 200 |
value=df_view,
|
| 201 |
datatype=df_datatypes,
|
| 202 |
interactive=False,
|
| 203 |
wrap=True,
|
| 204 |
-
column_widths=[30, 30, 30,
|
|
|
|
| 205 |
)
|
| 206 |
-
gr.Markdown(value=legend_markdown, elem_id="legend-markdown")
|
| 207 |
|
| 208 |
# Return the components so they can be referenced elsewhere.
|
| 209 |
-
return plot_component, dataframe_component
|
| 210 |
|
| 211 |
def get_full_leaderboard_data(split: str) -> tuple[pd.DataFrame, dict]:
|
| 212 |
"""
|
|
@@ -339,13 +459,8 @@ def create_benchmark_details_display(
|
|
| 339 |
axis=1
|
| 340 |
)
|
| 341 |
|
| 342 |
-
|
| 343 |
-
benchmark_table_df['
|
| 344 |
-
|
| 345 |
-
# For this column, we'll use .apply() to handle the "Other" case cleanly.
|
| 346 |
-
benchmark_table_df['Agent Tooling'] = benchmark_table_df['Agent Tooling'].apply(
|
| 347 |
-
lambda ctrl: control_emoji_map.get(ctrl, f"{ctrl}" if pd.notna(ctrl) else "")
|
| 348 |
-
)
|
| 349 |
|
| 350 |
# Calculated and add "Benchmark Attempted" column
|
| 351 |
def check_benchmark_status(row):
|
|
@@ -389,7 +504,14 @@ def create_benchmark_details_display(
|
|
| 389 |
}, inplace=True)
|
| 390 |
# Ensure the 'Logs' column is formatted correctly
|
| 391 |
df_headers = benchmark_table_df.columns.tolist()
|
| 392 |
-
df_datatypes = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
# Create the scatter plot using the full data for context, but plotting benchmark metrics
|
| 395 |
# This shows all agents on the same axis for better comparison.
|
|
@@ -403,12 +525,14 @@ def create_benchmark_details_display(
|
|
| 403 |
gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
|
| 404 |
# Put table and key into an accordion
|
| 405 |
with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
|
|
|
|
| 406 |
gr.DataFrame(
|
| 407 |
headers=df_headers,
|
| 408 |
value=benchmark_table_df,
|
| 409 |
datatype=df_datatypes,
|
| 410 |
interactive=False,
|
| 411 |
wrap=True,
|
|
|
|
| 412 |
)
|
| 413 |
-
|
| 414 |
|
|
|
|
| 3 |
import plotly.graph_objects as go
|
| 4 |
import os
|
| 5 |
import re
|
| 6 |
+
import base64
|
| 7 |
|
| 8 |
from agenteval.leaderboard.view import LeaderboardViewer
|
| 9 |
from huggingface_hub import HfApi
|
|
|
|
| 51 |
AGENTEVAL_MANIFEST_NAME = "agenteval.json"
|
| 52 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 53 |
|
| 54 |
+
# --- NEW: A global cache to store encoded SVG data ---
|
| 55 |
+
SVG_DATA_URI_CACHE = {}
|
| 56 |
+
|
| 57 |
+
def get_svg_as_data_uri(file_path: str) -> str:
|
| 58 |
+
"""
|
| 59 |
+
Reads an SVG file, encodes it in Base64, and returns a Data URI.
|
| 60 |
+
Uses a cache to avoid re-reading files from disk.
|
| 61 |
+
"""
|
| 62 |
+
# Return from cache if we have already processed this file
|
| 63 |
+
if file_path in SVG_DATA_URI_CACHE:
|
| 64 |
+
return SVG_DATA_URI_CACHE[file_path]
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
# Read the file in binary mode, encode it, and format as a Data URI
|
| 68 |
+
with open(file_path, "rb") as svg_file:
|
| 69 |
+
encoded_string = base64.b64encode(svg_file.read()).decode('utf-8')
|
| 70 |
+
data_uri = f"data:image/svg+xml;base64,{encoded_string}"
|
| 71 |
+
|
| 72 |
+
# Store in cache for future use
|
| 73 |
+
SVG_DATA_URI_CACHE[file_path] = data_uri
|
| 74 |
+
return data_uri
|
| 75 |
+
except FileNotFoundError:
|
| 76 |
+
# If the file doesn't exist, print a warning and return an empty string
|
| 77 |
+
print(f"Warning: SVG file not found at '{file_path}'")
|
| 78 |
+
return ""
|
| 79 |
+
|
| 80 |
+
def create_svg_html(value, svg_map):
|
| 81 |
+
"""
|
| 82 |
+
Generates the absolute simplest HTML for an icon, without any extra text.
|
| 83 |
+
This version is compatible with gr.DataFrame.
|
| 84 |
+
"""
|
| 85 |
+
# If the value isn't in our map, return an empty string so the cell is blank.
|
| 86 |
+
if pd.isna(value) or value not in svg_map:
|
| 87 |
+
return ""
|
| 88 |
+
|
| 89 |
+
path_info = svg_map[value]
|
| 90 |
+
|
| 91 |
+
# For light/dark-aware icons (like Tooling)
|
| 92 |
+
if isinstance(path_info, dict):
|
| 93 |
+
light_theme_icon_uri = get_svg_as_data_uri(path_info['dark'])
|
| 94 |
+
dark_theme_icon_uri = get_svg_as_data_uri(path_info['light'])
|
| 95 |
+
|
| 96 |
+
# Generate the HTML for the two icons side-by-side, with NO text.
|
| 97 |
+
img1 = f'<img src="{light_theme_icon_uri}" class="light-mode-icon" alt="{value}" title="{value}">'
|
| 98 |
+
img2 = f'<img src="{dark_theme_icon_uri}" class="dark-mode-icon" alt="{value}" title="{value}">'
|
| 99 |
+
return f'{img1}{img2}'
|
| 100 |
+
|
| 101 |
+
# For single icons that don't change with theme (like Openness)
|
| 102 |
+
elif isinstance(path_info, str):
|
| 103 |
+
src = get_svg_as_data_uri(path_info)
|
| 104 |
+
# Generate the HTML for the single icon, with NO text.
|
| 105 |
+
return f'<img src="{src}" style="width: 16px; height: 16px; vertical-align: middle;" alt="{value}" title="{value}">'
|
| 106 |
+
|
| 107 |
+
# Fallback in case of an unexpected data type
|
| 108 |
+
return ""
|
| 109 |
+
|
| 110 |
# Global variables
|
| 111 |
+
OPENNESS_SVG_MAP = {
|
| 112 |
+
"Closed": "assets/ui.svg", "API Available": "assets/api.svg", "Open Source": "assets/open-source.svg", "Open Source + Open Weights": "assets/open-weights.svg"
|
|
|
|
|
|
|
|
|
|
| 113 |
}
|
| 114 |
+
TOOLING_SVG_MAP = {
|
| 115 |
+
"Standard": {"light": "assets/star-light.svg", "dark": "assets/star-dark.svg"},
|
| 116 |
+
"Custom with Standard Search": {"light": "assets/diamond-light.svg", "dark": "assets/diamond-dark.svg"},
|
| 117 |
+
"Fully Custom": {"light": "assets/circle-light.svg", "dark": "assets/circle-dark.svg"},
|
| 118 |
}
|
| 119 |
+
|
| 120 |
+
# Dynamically generate the correct HTML for the legend parts
|
| 121 |
+
openness_html = " ".join([create_svg_html(name, OPENNESS_SVG_MAP) for name in OPENNESS_SVG_MAP])
|
| 122 |
+
tooling_html = " ".join([create_svg_html(name, TOOLING_SVG_MAP) for name in TOOLING_SVG_MAP])
|
| 123 |
+
# Create HTML for the "Openness" legend items
|
| 124 |
+
openness_html_items = []
|
| 125 |
+
for name, path in OPENNESS_SVG_MAP.items():
|
| 126 |
+
uri = get_svg_as_data_uri(path)
|
| 127 |
+
# Each item is now its own flexbox container to guarantee alignment
|
| 128 |
+
openness_html_items.append(
|
| 129 |
+
f'<div style="display: flex; align-items: center; white-space: nowrap;">'
|
| 130 |
+
f'<img src="{uri}" alt="{name}" title="{name}" style="width:16px; height:16px; margin-right: 4px; flex-shrink: 0;">'
|
| 131 |
+
f'<span>{name}</span>'
|
| 132 |
+
f'</div>'
|
| 133 |
+
)
|
| 134 |
+
openness_html = " ".join(openness_html_items)
|
| 135 |
+
|
| 136 |
+
# Create HTML for the "Tooling" legend items
|
| 137 |
+
tooling_html_items = []
|
| 138 |
+
for name, paths in TOOLING_SVG_MAP.items():
|
| 139 |
+
light_theme_icon_uri = get_svg_as_data_uri(paths['dark'])
|
| 140 |
+
dark_theme_icon_uri = get_svg_as_data_uri(paths['light'])
|
| 141 |
+
|
| 142 |
+
# The two swapping icons need to be stacked with absolute positioning
|
| 143 |
+
img1 = f'<img src="{light_theme_icon_uri}" class="light-mode-icon" alt="{name}" title="{name}" style="position: absolute; top: 0; left: 0;">'
|
| 144 |
+
img2 = f'<img src="{dark_theme_icon_uri}" class="dark-mode-icon" alt="{name}" title="{name}" style="position: absolute; top: 0; left: 0;">'
|
| 145 |
+
|
| 146 |
+
# Their container needs a defined size and relative positioning
|
| 147 |
+
icon_container = f'<div style="width: 16px; height: 16px; position: relative; flex-shrink: 0;">{img1}{img2}</div>'
|
| 148 |
+
|
| 149 |
+
# This item is also a flexbox container
|
| 150 |
+
tooling_html_items.append(
|
| 151 |
+
f'<div style="display: flex; align-items: center; white-space: nowrap;">'
|
| 152 |
+
f'{icon_container}'
|
| 153 |
+
f'<span style="margin-left: 4px;">{name}</span>'
|
| 154 |
+
f'</div>'
|
| 155 |
+
)
|
| 156 |
+
tooling_html = " ".join(tooling_html_items)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
# Your final legend_markdown string (the structure of this does not change)
|
| 160 |
+
legend_markdown = f"""
|
| 161 |
+
<div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap: 24px; font-size: 14px; padding-bottom: 8px;">
|
| 162 |
+
|
| 163 |
+
<div> <!-- Container for the Pareto section -->
|
| 164 |
+
<b>Pareto</b>
|
| 165 |
+
<div style="padding-top: 4px;"><span>📈 On frontier</span></div>
|
| 166 |
+
</div>
|
| 167 |
+
|
| 168 |
+
<div> <!-- Container for the Openness section -->
|
| 169 |
+
<b>Agent Openness</b>
|
| 170 |
+
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 4px;">{openness_html}</div>
|
| 171 |
+
</div>
|
| 172 |
+
|
| 173 |
+
<div> <!-- Container for the Tooling section -->
|
| 174 |
+
<b>Agent Tooling</b>
|
| 175 |
+
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 16px; margin-top: 4px;">{tooling_html}</div>
|
| 176 |
+
</div>
|
| 177 |
+
|
| 178 |
+
</div>
|
| 179 |
+
"""
|
| 180 |
|
| 181 |
# --- Global State for Viewers (simple caching) ---
|
| 182 |
CACHED_VIEWERS = {}
|
|
|
|
| 263 |
lambda row: '📈' if row['id'] in pareto_agent_names else '',
|
| 264 |
axis=1
|
| 265 |
)
|
| 266 |
+
# Create mapping for Openness / tooling
|
| 267 |
+
df_view['Openness'] = df_view['Openness'].apply(lambda x: create_svg_html(x, OPENNESS_SVG_MAP))
|
| 268 |
+
df_view['Agent Tooling'] = df_view['Agent Tooling'].apply(lambda x: create_svg_html(x, TOOLING_SVG_MAP))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
|
| 271 |
# Format cost columns
|
|
|
|
| 289 |
df_view = df_view.drop(columns=columns_to_drop, errors='ignore')
|
| 290 |
|
| 291 |
df_headers = df_view.columns.tolist()
|
| 292 |
+
df_datatypes = []
|
| 293 |
+
for col in df_headers:
|
| 294 |
+
if col in ["Logs", "Agent"] or "Cost" in col or "Score" in col:
|
| 295 |
+
df_datatypes.append("markdown")
|
| 296 |
+
elif col in ["Openness", "Agent Tooling"]:
|
| 297 |
+
df_datatypes.append("html")
|
| 298 |
+
else:
|
| 299 |
+
df_datatypes.append("str")
|
| 300 |
+
|
| 301 |
+
header_rename_map = {
|
| 302 |
+
"Pareto": "",
|
| 303 |
+
"Openness": "",
|
| 304 |
+
"Agent Tooling": ""
|
| 305 |
+
}
|
| 306 |
+
# 2. Create the final list of headers for display.
|
| 307 |
+
df_view = df_view.rename(columns=header_rename_map)
|
| 308 |
|
| 309 |
plot_component = gr.Plot(
|
| 310 |
value=scatter_plot,
|
|
|
|
| 314 |
|
| 315 |
# Put table and key into an accordion
|
| 316 |
with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
|
| 317 |
+
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|
| 318 |
dataframe_component = gr.DataFrame(
|
| 319 |
headers=df_headers,
|
| 320 |
value=df_view,
|
| 321 |
datatype=df_datatypes,
|
| 322 |
interactive=False,
|
| 323 |
wrap=True,
|
| 324 |
+
column_widths=[30, 30, 30, 250],
|
| 325 |
+
elem_classes=["wrap-header-df"]
|
| 326 |
)
|
|
|
|
| 327 |
|
| 328 |
# Return the components so they can be referenced elsewhere.
|
| 329 |
+
return plot_component, dataframe_component
|
| 330 |
|
| 331 |
def get_full_leaderboard_data(split: str) -> tuple[pd.DataFrame, dict]:
|
| 332 |
"""
|
|
|
|
| 459 |
axis=1
|
| 460 |
)
|
| 461 |
|
| 462 |
+
benchmark_table_df['Openness'] = benchmark_table_df['Openness'].apply(lambda x: create_svg_html(x, OPENNESS_SVG_MAP))
|
| 463 |
+
benchmark_table_df['Agent Tooling'] = benchmark_table_df['Agent Tooling'].apply(lambda x: create_svg_html(x, TOOLING_SVG_MAP))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
| 465 |
# Calculated and add "Benchmark Attempted" column
|
| 466 |
def check_benchmark_status(row):
|
|
|
|
| 504 |
}, inplace=True)
|
| 505 |
# Ensure the 'Logs' column is formatted correctly
|
| 506 |
df_headers = benchmark_table_df.columns.tolist()
|
| 507 |
+
df_datatypes = []
|
| 508 |
+
for col in df_headers:
|
| 509 |
+
if "Logs" in col or "Cost" in col or "Score" in col:
|
| 510 |
+
df_datatypes.append("markdown")
|
| 511 |
+
elif col in ["Openness", "Agent Tooling"]:
|
| 512 |
+
df_datatypes.append("html")
|
| 513 |
+
else:
|
| 514 |
+
df_datatypes.append("str")
|
| 515 |
|
| 516 |
# Create the scatter plot using the full data for context, but plotting benchmark metrics
|
| 517 |
# This shows all agents on the same axis for better comparison.
|
|
|
|
| 525 |
gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
|
| 526 |
# Put table and key into an accordion
|
| 527 |
with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
|
| 528 |
+
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|
| 529 |
gr.DataFrame(
|
| 530 |
headers=df_headers,
|
| 531 |
value=benchmark_table_df,
|
| 532 |
datatype=df_datatypes,
|
| 533 |
interactive=False,
|
| 534 |
wrap=True,
|
| 535 |
+
elem_classes=["wrap-header-df"]
|
| 536 |
)
|
| 537 |
+
|
| 538 |
|