Spaces:
Paused
Paused
Amber Tanaka
commited on
Update table legend to use new names + styling (#66)
Browse files- aliases.py +4 -4
- content.py +2 -0
- leaderboard_transformer.py +2 -2
- ui_components.py +14 -14
aliases.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
|
| 4 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
|
| 5 |
|
|
@@ -10,8 +10,8 @@ CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully Custom"
|
|
| 10 |
|
| 11 |
|
| 12 |
OPENNESS_ALIASES = {
|
| 13 |
-
|
| 14 |
-
|
| 15 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
|
| 16 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
|
| 17 |
}
|
|
|
|
| 1 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS = "Open Source + Open Weights"
|
| 2 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS = "Open Source"
|
| 3 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
|
| 4 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
|
| 5 |
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
OPENNESS_ALIASES = {
|
| 13 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {"Open source & open weights"},
|
| 14 |
+
CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {"Open source & closed weights"},
|
| 15 |
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
|
| 16 |
CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
|
| 17 |
}
|
content.py
CHANGED
|
@@ -490,6 +490,8 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
|
|
| 490 |
}
|
| 491 |
.benchmark-title{
|
| 492 |
color: var(--color-primary-pink);
|
|
|
|
|
|
|
| 493 |
}
|
| 494 |
.dark .benchmark-title{
|
| 495 |
color: var(--color-primary-green);
|
|
|
|
| 490 |
}
|
| 491 |
.benchmark-title{
|
| 492 |
color: var(--color-primary-pink);
|
| 493 |
+
margin-top: 50px;
|
| 494 |
+
font-size: 20px;
|
| 495 |
}
|
| 496 |
.dark .benchmark-title{
|
| 497 |
color: var(--color-primary-green);
|
leaderboard_transformer.py
CHANGED
|
@@ -344,8 +344,8 @@ def _plot_scatter_plotly(
|
|
| 344 |
# These include aliases for openness categories,
|
| 345 |
# so multiple names might correspond to the same color.
|
| 346 |
color_map = {
|
| 347 |
-
aliases.
|
| 348 |
-
aliases.
|
| 349 |
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
|
| 350 |
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
|
| 351 |
}
|
|
|
|
| 344 |
# These include aliases for openness categories,
|
| 345 |
# so multiple names might correspond to the same color.
|
| 346 |
color_map = {
|
| 347 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: "deeppink",
|
| 348 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: "coral",
|
| 349 |
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
|
| 350 |
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
|
| 351 |
}
|
ui_components.py
CHANGED
|
@@ -40,12 +40,12 @@ AGENTEVAL_MANIFEST_NAME = "agenteval.json"
|
|
| 40 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 41 |
# Global variables
|
| 42 |
COMBINED_ICON_MAP = {
|
| 43 |
-
aliases.
|
| 44 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
|
| 45 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
|
| 46 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
|
| 47 |
},
|
| 48 |
-
aliases.
|
| 49 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
|
| 50 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
|
| 51 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
|
|
@@ -77,13 +77,13 @@ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
|
|
| 77 |
|
| 78 |
OPENNESS_SVG_MAP = {
|
| 79 |
"Open Source + Open Weights": "assets/os-ow-legend.svg",
|
| 80 |
-
"Open Source": "assets/os-legend.svg",
|
| 81 |
"API Available": "assets/api-legend.svg",
|
| 82 |
-
"Closed": "assets/c-legend.svg",
|
| 83 |
}
|
| 84 |
TOOLING_SVG_MAP = {
|
| 85 |
"Standard": "assets/standard-legend.svg",
|
| 86 |
-
"Custom
|
| 87 |
"Fully Custom": "assets/custom-legend.svg",
|
| 88 |
}
|
| 89 |
|
|
@@ -142,9 +142,9 @@ def build_openness_tooltip_content() -> str:
|
|
| 142 |
"""
|
| 143 |
descriptions = {
|
| 144 |
"Open Source + Open Weights": "Both code and ML models are open",
|
| 145 |
-
"Open Source": "Code is open but uses an ML model with closed-weights",
|
| 146 |
"API Available": "No access to code; API access only",
|
| 147 |
-
"Closed": "No access to code or API; UI access only",
|
| 148 |
}
|
| 149 |
html_items = []
|
| 150 |
for name, path in OPENNESS_SVG_MAP.items():
|
|
@@ -186,7 +186,7 @@ def build_tooling_tooltip_content() -> str:
|
|
| 186 |
"""Generates the inner HTML for the Agent Tooling tooltip card."""
|
| 187 |
descriptions = {
|
| 188 |
"Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
|
| 189 |
-
"Custom
|
| 190 |
"Fully Custom": "Uses tools beyond constraints of Standard or Custom interface",
|
| 191 |
}
|
| 192 |
custom_interface_sub_list = """
|
|
@@ -201,7 +201,7 @@ def build_tooling_tooltip_content() -> str:
|
|
| 201 |
desc = descriptions.get(name, "")
|
| 202 |
|
| 203 |
# Check if this is the special case that needs a sub-list
|
| 204 |
-
sub_list_html = custom_interface_sub_list if name == "Custom
|
| 205 |
|
| 206 |
html_items.append(f"""
|
| 207 |
<div class="tooltip-legend-item">
|
|
@@ -301,7 +301,7 @@ def create_legend_markdown(which_table: str) -> str:
|
|
| 301 |
descriptions_tooltip_content = build_descriptions_tooltip_content(which_table)
|
| 302 |
trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
|
| 303 |
legend_markdown = f"""
|
| 304 |
-
<div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap:
|
| 305 |
|
| 306 |
<div> <!-- Container for the Pareto section -->
|
| 307 |
<b>Pareto</b>
|
|
@@ -310,8 +310,8 @@ def create_legend_markdown(which_table: str) -> str:
|
|
| 310 |
<span class="tooltip-card">{pareto_tooltip_content}</span>
|
| 311 |
</span>
|
| 312 |
<div style="margin-top: 8px; display: flex; align-items: center; gap: 6px;">
|
| 313 |
-
<img src="{trophy_uri}" alt="On
|
| 314 |
-
<span>On
|
| 315 |
</div>
|
| 316 |
</div>
|
| 317 |
|
|
@@ -325,7 +325,7 @@ def create_legend_markdown(which_table: str) -> str:
|
|
| 325 |
<div class="tooltip-items-container">{openness_tooltip_content}</div>
|
| 326 |
</span>
|
| 327 |
</span>
|
| 328 |
-
<div style="display: flex; flex-wrap: wrap; align-items: center; gap:
|
| 329 |
</div>
|
| 330 |
|
| 331 |
<div> <!-- Container for the Tooling section -->
|
|
@@ -338,7 +338,7 @@ def create_legend_markdown(which_table: str) -> str:
|
|
| 338 |
<div class="tooltip-items-container">{tooling_tooltip_content}</div>
|
| 339 |
</span>
|
| 340 |
</span>
|
| 341 |
-
<div style="display: flex; flex-wrap: wrap; align-items: center; gap:
|
| 342 |
</div>
|
| 343 |
|
| 344 |
<div><!-- Container for the Column Descriptions section -->
|
|
|
|
| 40 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 41 |
# Global variables
|
| 42 |
COMBINED_ICON_MAP = {
|
| 43 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_OPEN_WEIGHTS: {
|
| 44 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
|
| 45 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
|
| 46 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
|
| 47 |
},
|
| 48 |
+
aliases.CANONICAL_OPENNESS_OPEN_SOURCE_CLOSED_WEIGHTS: {
|
| 49 |
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
|
| 50 |
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
|
| 51 |
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
|
|
|
|
| 77 |
|
| 78 |
OPENNESS_SVG_MAP = {
|
| 79 |
"Open Source + Open Weights": "assets/os-ow-legend.svg",
|
| 80 |
+
"Open Source + Closed Weights": "assets/os-legend.svg",
|
| 81 |
"API Available": "assets/api-legend.svg",
|
| 82 |
+
"Closed Source & UI only": "assets/c-legend.svg",
|
| 83 |
}
|
| 84 |
TOOLING_SVG_MAP = {
|
| 85 |
"Standard": "assets/standard-legend.svg",
|
| 86 |
+
"Custom Interface": "assets/equivalent-legend.svg",
|
| 87 |
"Fully Custom": "assets/custom-legend.svg",
|
| 88 |
}
|
| 89 |
|
|
|
|
| 142 |
"""
|
| 143 |
descriptions = {
|
| 144 |
"Open Source + Open Weights": "Both code and ML models are open",
|
| 145 |
+
"Open Source + Closed Weights": "Code is open but uses an ML model with closed-weights",
|
| 146 |
"API Available": "No access to code; API access only",
|
| 147 |
+
"Closed Source + UI Only": "No access to code or API; UI access only",
|
| 148 |
}
|
| 149 |
html_items = []
|
| 150 |
for name, path in OPENNESS_SVG_MAP.items():
|
|
|
|
| 186 |
"""Generates the inner HTML for the Agent Tooling tooltip card."""
|
| 187 |
descriptions = {
|
| 188 |
"Standard": "Uses only predefined tools from the evaluation environment (as defined in Inspect's state.tools).",
|
| 189 |
+
"Custom Interface": "Custom tools for accessing an equivalent underlying environment:",
|
| 190 |
"Fully Custom": "Uses tools beyond constraints of Standard or Custom interface",
|
| 191 |
}
|
| 192 |
custom_interface_sub_list = """
|
|
|
|
| 201 |
desc = descriptions.get(name, "")
|
| 202 |
|
| 203 |
# Check if this is the special case that needs a sub-list
|
| 204 |
+
sub_list_html = custom_interface_sub_list if name == "Custom Interface" else ""
|
| 205 |
|
| 206 |
html_items.append(f"""
|
| 207 |
<div class="tooltip-legend-item">
|
|
|
|
| 301 |
descriptions_tooltip_content = build_descriptions_tooltip_content(which_table)
|
| 302 |
trophy_uri = get_svg_as_data_uri("assets/trophy.svg")
|
| 303 |
legend_markdown = f"""
|
| 304 |
+
<div style="display: flex; flex-wrap: wrap; align-items: flex-start; gap: 0px; font-size: 13px; padding-bottom: 8px;">
|
| 305 |
|
| 306 |
<div> <!-- Container for the Pareto section -->
|
| 307 |
<b>Pareto</b>
|
|
|
|
| 310 |
<span class="tooltip-card">{pareto_tooltip_content}</span>
|
| 311 |
</span>
|
| 312 |
<div style="margin-top: 8px; display: flex; align-items: center; gap: 6px;">
|
| 313 |
+
<img src="{trophy_uri}" alt="On Frontier" style="width: 25px; height: 25px;">
|
| 314 |
+
<span>On Frontier</span>
|
| 315 |
</div>
|
| 316 |
</div>
|
| 317 |
|
|
|
|
| 325 |
<div class="tooltip-items-container">{openness_tooltip_content}</div>
|
| 326 |
</span>
|
| 327 |
</span>
|
| 328 |
+
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 6px; margin-top: 8px;">{openness_html}</div>
|
| 329 |
</div>
|
| 330 |
|
| 331 |
<div> <!-- Container for the Tooling section -->
|
|
|
|
| 338 |
<div class="tooltip-items-container">{tooling_tooltip_content}</div>
|
| 339 |
</span>
|
| 340 |
</span>
|
| 341 |
+
<div style="display: flex; flex-wrap: wrap; align-items: center; gap: 6px; margin-top: 8px;">{tooling_html}</div>
|
| 342 |
</div>
|
| 343 |
|
| 344 |
<div><!-- Container for the Column Descriptions section -->
|