Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	add a flag to disable single-image results in display
Browse files- app.py +44 -27
- constants.py +3 -0
    	
        app.py
    CHANGED
    
    | @@ -18,7 +18,8 @@ with open(table_css_file, "r") as f: | |
| 18 |  | 
| 19 | 
             
            # Initialize data loaders
         | 
| 20 | 
             
            default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
         | 
| 21 | 
            -
             | 
|  | |
| 22 |  | 
| 23 | 
             
            with gr.Blocks() as block:
         | 
| 24 | 
             
                # Add a style element that we'll update
         | 
| @@ -44,18 +45,20 @@ with gr.Blocks() as block: | |
| 44 | 
             
                            TABLE_INTRODUCTION
         | 
| 45 | 
             
                        )
         | 
| 46 |  | 
| 47 | 
            -
                        with gr.Row():
         | 
| 48 | 
            -
                            table_selector = gr.Radio(
         | 
| 49 | 
            -
                                choices=["Default", "Single Image"],
         | 
| 50 | 
            -
                                label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
         | 
| 51 | 
            -
                                value="Default"
         | 
| 52 | 
            -
                            )
         | 
| 53 | 
            -
             | 
| 54 | 
             
                        # Define different captions for each table
         | 
| 55 | 
             
                        default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
         | 
| 56 |  | 
| 57 | 
             
                        single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
         | 
| 58 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 59 | 
             
                        caption_component = gr.Markdown(
         | 
| 60 | 
             
                            value=default_caption,
         | 
| 61 | 
             
                            elem_classes="table-caption",
         | 
| @@ -86,7 +89,8 @@ with gr.Blocks() as block: | |
| 86 | 
             
                        )
         | 
| 87 |  | 
| 88 | 
             
                        def update_table_and_caption(table_type, super_group, model_group):
         | 
| 89 | 
            -
                             | 
|  | |
| 90 | 
             
                                headers, data = default_loader.get_leaderboard_data(super_group, model_group)
         | 
| 91 | 
             
                                caption = default_caption
         | 
| 92 | 
             
                            else:  # Single-image
         | 
| @@ -106,7 +110,8 @@ with gr.Blocks() as block: | |
| 106 | 
             
                            ]
         | 
| 107 |  | 
| 108 | 
             
                        def update_selectors(table_type):
         | 
| 109 | 
            -
                             | 
|  | |
| 110 | 
             
                            return [
         | 
| 111 | 
             
                                gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
         | 
| 112 | 
             
                                gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
         | 
| @@ -114,29 +119,41 @@ with gr.Blocks() as block: | |
| 114 |  | 
| 115 | 
             
                        refresh_button = gr.Button("Refresh")
         | 
| 116 |  | 
| 117 | 
            -
                        #  | 
| 118 | 
            -
                         | 
| 119 | 
            -
                             | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 123 | 
             
                        super_group_selector.change(
         | 
| 124 | 
             
                            fn=update_table_and_caption, 
         | 
| 125 | 
            -
                            inputs=[table_selector, super_group_selector, model_group_selector], 
         | 
| 126 | 
             
                            outputs=[data_component, caption_component, css_style]
         | 
| 127 | 
             
                        )
         | 
|  | |
| 128 | 
             
                        model_group_selector.change(
         | 
| 129 | 
             
                            fn=update_table_and_caption, 
         | 
| 130 | 
            -
                            inputs=[table_selector, super_group_selector, model_group_selector], 
         | 
| 131 | 
            -
                            outputs=[data_component, caption_component, css_style]
         | 
| 132 | 
            -
                        )
         | 
| 133 | 
            -
                        table_selector.change(
         | 
| 134 | 
            -
                            fn=update_selectors,
         | 
| 135 | 
            -
                            inputs=[table_selector],
         | 
| 136 | 
            -
                            outputs=[super_group_selector, model_group_selector]
         | 
| 137 | 
            -
                        ).then(
         | 
| 138 | 
            -
                            fn=update_table_and_caption,
         | 
| 139 | 
            -
                            inputs=[table_selector, super_group_selector, model_group_selector],
         | 
| 140 | 
             
                            outputs=[data_component, caption_component, css_style]
         | 
| 141 | 
             
                        )
         | 
| 142 |  | 
|  | |
| 18 |  | 
| 19 | 
             
            # Initialize data loaders
         | 
| 20 | 
             
            default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
         | 
| 21 | 
            +
            # Initialize single image loader only if enabled
         | 
| 22 | 
            +
            si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI") if ENABLE_SINGLE_IMAGE_TABLE else None
         | 
| 23 |  | 
| 24 | 
             
            with gr.Blocks() as block:
         | 
| 25 | 
             
                # Add a style element that we'll update
         | 
|  | |
| 45 | 
             
                            TABLE_INTRODUCTION
         | 
| 46 | 
             
                        )
         | 
| 47 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 48 | 
             
                        # Define different captions for each table
         | 
| 49 | 
             
                        default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
         | 
| 50 |  | 
| 51 | 
             
                        single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
         | 
| 52 |  | 
| 53 | 
            +
                        with gr.Row():
         | 
| 54 | 
            +
                            # Only show table selector if single image table is enabled
         | 
| 55 | 
            +
                            if ENABLE_SINGLE_IMAGE_TABLE:
         | 
| 56 | 
            +
                                table_selector = gr.Radio(
         | 
| 57 | 
            +
                                    choices=["Default", "Single Image"],
         | 
| 58 | 
            +
                                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
         | 
| 59 | 
            +
                                    value="Default"
         | 
| 60 | 
            +
                                )
         | 
| 61 | 
            +
             | 
| 62 | 
             
                        caption_component = gr.Markdown(
         | 
| 63 | 
             
                            value=default_caption,
         | 
| 64 | 
             
                            elem_classes="table-caption",
         | 
|  | |
| 89 | 
             
                        )
         | 
| 90 |  | 
| 91 | 
             
                        def update_table_and_caption(table_type, super_group, model_group):
         | 
| 92 | 
            +
                            # If single image is disabled, always use default table
         | 
| 93 | 
            +
                            if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default":
         | 
| 94 | 
             
                                headers, data = default_loader.get_leaderboard_data(super_group, model_group)
         | 
| 95 | 
             
                                caption = default_caption
         | 
| 96 | 
             
                            else:  # Single-image
         | 
|  | |
| 110 | 
             
                            ]
         | 
| 111 |  | 
| 112 | 
             
                        def update_selectors(table_type):
         | 
| 113 | 
            +
                            # If single image is disabled, always use default loader
         | 
| 114 | 
            +
                            loader = default_loader if not ENABLE_SINGLE_IMAGE_TABLE or table_type == "Default" else si_loader
         | 
| 115 | 
             
                            return [
         | 
| 116 | 
             
                                gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
         | 
| 117 | 
             
                                gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
         | 
|  | |
| 119 |  | 
| 120 | 
             
                        refresh_button = gr.Button("Refresh")
         | 
| 121 |  | 
| 122 | 
            +
                        # Set up different handlers based on whether single image table is enabled
         | 
| 123 | 
            +
                        if ENABLE_SINGLE_IMAGE_TABLE:
         | 
| 124 | 
            +
                            refresh_button.click(
         | 
| 125 | 
            +
                                fn=update_table_and_caption, 
         | 
| 126 | 
            +
                                inputs=[table_selector, super_group_selector, model_group_selector], 
         | 
| 127 | 
            +
                                outputs=[data_component, caption_component, css_style]
         | 
| 128 | 
            +
                            )
         | 
| 129 | 
            +
                            
         | 
| 130 | 
            +
                            table_selector.change(
         | 
| 131 | 
            +
                                fn=update_selectors,
         | 
| 132 | 
            +
                                inputs=[table_selector],
         | 
| 133 | 
            +
                                outputs=[super_group_selector, model_group_selector]
         | 
| 134 | 
            +
                            ).then(
         | 
| 135 | 
            +
                                fn=update_table_and_caption,
         | 
| 136 | 
            +
                                inputs=[table_selector, super_group_selector, model_group_selector],
         | 
| 137 | 
            +
                                outputs=[data_component, caption_component, css_style]
         | 
| 138 | 
            +
                            )
         | 
| 139 | 
            +
                        else:
         | 
| 140 | 
            +
                            # Simplified handlers when single image is disabled
         | 
| 141 | 
            +
                            refresh_button.click(
         | 
| 142 | 
            +
                                fn=lambda super_group, model_group: update_table_and_caption("Default", super_group, model_group), 
         | 
| 143 | 
            +
                                inputs=[super_group_selector, model_group_selector], 
         | 
| 144 | 
            +
                                outputs=[data_component, caption_component, css_style]
         | 
| 145 | 
            +
                            )
         | 
| 146 | 
            +
                        
         | 
| 147 | 
            +
                        # These handlers are needed in both cases
         | 
| 148 | 
             
                        super_group_selector.change(
         | 
| 149 | 
             
                            fn=update_table_and_caption, 
         | 
| 150 | 
            +
                            inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector], 
         | 
| 151 | 
             
                            outputs=[data_component, caption_component, css_style]
         | 
| 152 | 
             
                        )
         | 
| 153 | 
            +
                        
         | 
| 154 | 
             
                        model_group_selector.change(
         | 
| 155 | 
             
                            fn=update_table_and_caption, 
         | 
| 156 | 
            +
                            inputs=[table_selector if ENABLE_SINGLE_IMAGE_TABLE else gr.State("Default"), super_group_selector, model_group_selector], 
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 157 | 
             
                            outputs=[data_component, caption_component, css_style]
         | 
| 158 | 
             
                        )
         | 
| 159 |  | 
    	
        constants.py
    CHANGED
    
    | @@ -2,6 +2,9 @@ import os | |
| 2 |  | 
| 3 | 
             
            HF_TOKEN = os.environ.get("HF_TOKEN")
         | 
| 4 |  | 
|  | |
|  | |
|  | |
| 5 | 
             
            LEADERBOARD_INTRODUCTION = """# MEGA-Bench Leaderboard
         | 
| 6 |  | 
| 7 | 
             
            ## 🚀 Introduction
         | 
|  | |
| 2 |  | 
| 3 | 
             
            HF_TOKEN = os.environ.get("HF_TOKEN")
         | 
| 4 |  | 
| 5 | 
            +
            # Global configuration flag to control whether the "Single Image" table option should be displayed
         | 
| 6 | 
            +
            ENABLE_SINGLE_IMAGE_TABLE = False  # Set to True to enable, False to disable
         | 
| 7 | 
            +
             | 
| 8 | 
             
            LEADERBOARD_INTRODUCTION = """# MEGA-Bench Leaderboard
         | 
| 9 |  | 
| 10 | 
             
            ## 🚀 Introduction
         | 
