MEGA-Bench

Running

App Files Files Community

cccjc commited on Mar 23

Commit

14394ad

1 Parent(s): 12d161a

add a flag to disable single-image results in display

Browse files

Files changed (2) hide show

app.py +44 -27
constants.py +3 -0

app.py CHANGED Viewed

@@ -18,7 +18,8 @@ with open(table_css_file, "r") as f:
 # Initialize data loaders
 default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
-si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
 with gr.Blocks() as block:
     # Add a style element that we'll update
@@ -44,18 +45,20 @@ with gr.Blocks() as block:
                 TABLE_INTRODUCTION
             )
-            with gr.Row():
-                table_selector = gr.Radio(
-                    choices=["Default", "Single Image"],
-                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
-                    value="Default"
-                )
             # Define different captions for each table
             default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
             single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
             caption_component = gr.Markdown(
                 value=default_caption,
                 elem_classes="table-caption",
@@ -86,7 +89,8 @@ with gr.Blocks() as block:
             )
             def update_table_and_caption(table_type, super_group, model_group):
-                if table_type == "Default":
                     headers, data = default_loader.get_leaderboard_data(super_group, model_group)
                     caption = default_caption
                 else:  # Single-image
@@ -106,7 +110,8 @@ with gr.Blocks() as block:
                 ]
             def update_selectors(table_type):
-                loader = default_loader if table_type == "Default" else si_loader
                 return [
                     gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
                     gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
@@ -114,29 +119,41 @@ with gr.Blocks() as block:
             refresh_button = gr.Button("Refresh")
-            # Update click and change handlers to include caption updates
-            refresh_button.click(
-                fn=update_table_and_caption,
-                inputs=[table_selector, super_group_selector, model_group_selector],
-                outputs=[data_component, caption_component, css_style]
-            )
             super_group_selector.change(
                 fn=update_table_and_caption,
-                inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
             model_group_selector.change(
                 fn=update_table_and_caption,
-                inputs=[table_selector, super_group_selector, model_group_selector],
-                outputs=[data_component, caption_component, css_style]
-            )
-            table_selector.change(
-                fn=update_selectors,
-                inputs=[table_selector],
-                outputs=[super_group_selector, model_group_selector]
-            ).then(
-                fn=update_table_and_caption,
-                inputs=[table_selector, super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )

 # Initialize data loaders
 default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
+# Initialize single image loader only if enabled
+si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI") if ENABLE_SINGLE_IMAGE_TABLE else None
 with gr.Blocks() as block:
     # Add a style element that we'll update
                 TABLE_INTRODUCTION
             )
             # Define different captions for each table
             default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
             single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
+            with gr.Row():
+                # Only show table selector if single image table is enabled
+                if ENABLE_SINGLE_IMAGE_TABLE:
+                    table_selector = gr.Radio(
+                        choices=["Default", "Single Image"],
+                        label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
+                        value="Default"
+                    )
             caption_component = gr.Markdown(
                 value=default_caption,
                 elem_classes="table-caption",
             )
             def update_table_and_caption(table_type, super_group, model_group):
+                # If single image is disabled, always use default table
+                if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default":
                     headers, data = default_loader.get_leaderboard_data(super_group, model_group)
                     caption = default_caption
                 else:  # Single-image
                 ]
             def update_selectors(table_type):
+                # If single image is disabled, always use default loader
+                loader = default_loader if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default" else si_loader
                 return [
                     gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
                     gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
             refresh_button = gr.Button("Refresh")
+            # Set up different handlers based on whether single image table is enabled
+            if ENABLE_SINGLE_IMAGE_TABLE:
+                refresh_button.click(
+                    fn=update_table_and_caption,
+                    inputs=[table_selector, super_group_selector, model_group_selector],
+                    outputs=[data_component, caption_component, css_style]
+                )
+                table_selector.change(
+                    fn=update_selectors,
+                    inputs=[table_selector],
+                    outputs=[super_group_selector, model_group_selector]
+                ).then(
+                    fn=update_table_and_caption,
+                    inputs=[table_selector, super_group_selector, model_group_selector],
+                    outputs=[data_component, caption_component, css_style]
+                )
+            else:
+                # Simplified handlers when single image is disabled
+                refresh_button.click(
+                    fn=lambda super_group, model_group: update_table_and_caption("Default", super_group, model_group),
+                    inputs=[super_group_selector, model_group_selector],
+                    outputs=[data_component, caption_component, css_style]
+                )
+            # These handlers are needed in both cases
             super_group_selector.change(
                 fn=update_table_and_caption,
+                inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )
             model_group_selector.change(
                 fn=update_table_and_caption,
+                inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector],
                 outputs=[data_component, caption_component, css_style]
             )

constants.py CHANGED Viewed

@@ -2,6 +2,9 @@ import os
 HF_TOKEN = os.environ.get("HF_TOKEN")
 LEADERBOARD_INTRODUCTION = """# MEGA-Bench Leaderboard
 ## 🚀 Introduction

 HF_TOKEN = os.environ.get("HF_TOKEN")
+# Global configuration flag to control whether the "Single Image" table option should be displayed
+ENABLE_SINGLE_IMAGE_TABLE = False  # Set to True to enable, False to disable
 LEADERBOARD_INTRODUCTION = """# MEGA-Bench Leaderboard
 ## 🚀 Introduction