File size: 4,221 Bytes
ae05bbd
 
 
 
 
 
 
11de2f8
7b52df4
11de2f8
 
 
64716c3
ae05bbd
11de2f8
 
 
 
 
 
 
 
02a4349
11de2f8
 
 
 
 
 
 
 
 
ac15cf4
 
11de2f8
 
 
 
 
 
02a4349
11de2f8
 
 
 
 
 
 
ae05bbd
11de2f8
 
 
 
ac15cf4
 
11de2f8
 
 
ae05bbd
 
11de2f8
 
 
 
 
 
 
ae05bbd
11de2f8
 
 
 
 
 
 
ae05bbd
11de2f8
 
 
ae05bbd
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import pandas as pd

# Import our UI factories and the data loader
from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar

def build_category_page(CATEGORY_NAME, PAGE_DESCRIPTION):
    with gr.Column(elem_id="page-content-wrapper"):
        gr.HTML(f'<h2>AstaBench {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>', elem_id="main-header")
        validation_df, validation_tag_map = get_full_leaderboard_data("validation")
        test_df, test_tag_map = get_full_leaderboard_data("test")
        with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container:
            create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME, validation=True)

        with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container:
            create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
        gr.Markdown(PAGE_DESCRIPTION, elem_id="category-intro")
        # --- This page now has two main sections: Validation and Test ---
        with gr.Tabs():
            with gr.Tab("Results: Test Set") as test_tab:
                # Repeat the process for the "test" split
                if not test_df.empty:
                    gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.")
                    create_leaderboard_display(
                        full_df=test_df,
                        tag_map=test_tag_map,
                        category_name=CATEGORY_NAME,
                        split_name="test"
                    )
                    create_benchmark_details_display(
                        full_df=test_df,
                        tag_map=test_tag_map,
                        category_name=CATEGORY_NAME,
                        validation=False,
                    )
                else:
                    gr.Markdown("No data available for test split.")
            with gr.Tab("Results: Validation Set") as validation_tab:
                # 1. Load all necessary data for the "validation" split ONCE.
                if not validation_df.empty:
                    gr.Markdown("**Validation Set** results are used during development to tune and compare agents before final testing.")
                    # 2. Render the main category display using the loaded data.
                    create_leaderboard_display(
                        full_df=validation_df,
                        tag_map=validation_tag_map,
                        category_name=CATEGORY_NAME,
                        split_name="validation"
                    )

                    # 3. Render the detailed breakdown for each benchmark in the category.
                    create_benchmark_details_display(
                        full_df=validation_df,
                        tag_map=validation_tag_map,
                        category_name=CATEGORY_NAME,
                        validation=True,
                    )
                else:
                    gr.Markdown("No data available for validation split.")


        show_validation_js = """
            () => {
                document.getElementById('validation_nav_container').style.display = 'block';
                document.getElementById('test_nav_container').style.display = 'none';
                setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
            }
            """

        # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
        show_test_js = """
            () => {
                document.getElementById('validation_nav_container').style.display = 'none';
                document.getElementById('test_nav_container').style.display = 'block';
            }
            """

        # Assign the pure JS functions to the select events. No Python `fn` is needed.
        validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
        test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)

    return validation_nav_container, test_nav_container