Spaces:
Paused
Paused
File size: 4,221 Bytes
ae05bbd 11de2f8 7b52df4 11de2f8 64716c3 ae05bbd 11de2f8 02a4349 11de2f8 ac15cf4 11de2f8 02a4349 11de2f8 ae05bbd 11de2f8 ac15cf4 11de2f8 ae05bbd 11de2f8 ae05bbd 11de2f8 ae05bbd 11de2f8 ae05bbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
import pandas as pd
# Import our UI factories and the data loader
from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar
def build_category_page(CATEGORY_NAME, PAGE_DESCRIPTION):
with gr.Column(elem_id="page-content-wrapper"):
gr.HTML(f'<h2>AstaBench {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>', elem_id="main-header")
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
test_df, test_tag_map = get_full_leaderboard_data("test")
with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME, validation=True)
with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
gr.Markdown(PAGE_DESCRIPTION, elem_id="category-intro")
# --- This page now has two main sections: Validation and Test ---
with gr.Tabs():
with gr.Tab("Results: Test Set") as test_tab:
# Repeat the process for the "test" split
if not test_df.empty:
gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.")
create_leaderboard_display(
full_df=test_df,
tag_map=test_tag_map,
category_name=CATEGORY_NAME,
split_name="test"
)
create_benchmark_details_display(
full_df=test_df,
tag_map=test_tag_map,
category_name=CATEGORY_NAME,
validation=False,
)
else:
gr.Markdown("No data available for test split.")
with gr.Tab("Results: Validation Set") as validation_tab:
# 1. Load all necessary data for the "validation" split ONCE.
if not validation_df.empty:
gr.Markdown("**Validation Set** results are used during development to tune and compare agents before final testing.")
# 2. Render the main category display using the loaded data.
create_leaderboard_display(
full_df=validation_df,
tag_map=validation_tag_map,
category_name=CATEGORY_NAME,
split_name="validation"
)
# 3. Render the detailed breakdown for each benchmark in the category.
create_benchmark_details_display(
full_df=validation_df,
tag_map=validation_tag_map,
category_name=CATEGORY_NAME,
validation=True,
)
else:
gr.Markdown("No data available for validation split.")
show_validation_js = """
() => {
document.getElementById('validation_nav_container').style.display = 'block';
document.getElementById('test_nav_container').style.display = 'none';
setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
}
"""
# JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
show_test_js = """
() => {
document.getElementById('validation_nav_container').style.display = 'none';
document.getElementById('test_nav_container').style.display = 'block';
}
"""
# Assign the pure JS functions to the select events. No Python `fn` is needed.
validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
return validation_nav_container, test_nav_container |