Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	Add model json file upload button
Browse filesModels can be uploaded temporarily in JSON form to see their scores. The uploaded JSON is not stored anywhere.
    	
        app.py
    CHANGED
    
    | @@ -342,6 +342,46 @@ def load_models_from_json(directory): | |
| 342 | 
             
            # scorecard_template = load_scorecard_templates('scorecard_templates')
         | 
| 343 | 
             
            models = load_models_from_json('model_data')
         | 
| 344 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 345 | 
             
            def create_source_html(sources):
         | 
| 346 | 
             
                if not sources:
         | 
| 347 | 
             
                    return ""
         | 
| @@ -825,13 +865,31 @@ with gr.Blocks(css=css) as demo: | |
| 825 | 
             
                    tab_selection = gr.Radio(["Detailed Scorecard", "Category Analysis", "Leaderboard"], 
         | 
| 826 | 
             
                                           label="Select Tab", value="Detailed Scorecard")
         | 
| 827 |  | 
| 828 | 
            -
                 | 
| 829 | 
            -
             | 
| 830 | 
            -
                     | 
| 831 | 
            -
                     | 
| 832 | 
            -
             | 
| 833 | 
            -
             | 
| 834 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 835 | 
             
                    model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
         | 
| 836 | 
             
                                                    label="Select AI Systems for Comparison",
         | 
| 837 | 
             
                                                    value=[],
         | 
| @@ -936,7 +994,7 @@ with gr.Blocks(css=css) as demo: | |
| 936 | 
             
                                                icon = "✗"
         | 
| 937 | 
             
                                                category_no += 1
         | 
| 938 | 
             
                                                total_no += 1
         | 
| 939 | 
            -
             | 
| 940 | 
             
                                        card_content += f"<div class='question-item {style_class}'>{icon} {question}</div>"
         | 
| 941 | 
             
                                    card_content += "</div></details>"
         | 
| 942 |  | 
| @@ -981,8 +1039,12 @@ with gr.Blocks(css=css) as demo: | |
| 981 | 
             
                        "leaderboard": False,
         | 
| 982 | 
             
                        "category_chart": False,
         | 
| 983 | 
             
                        "detailed_scorecard": False,
         | 
|  | |
|  | |
| 984 | 
             
                        "model_chooser": False,
         | 
| 985 | 
            -
                        "model_multi_chooser": False
         | 
|  | |
|  | |
| 986 | 
             
                    }
         | 
| 987 |  | 
| 988 | 
             
                    # Initialize outputs with None
         | 
| @@ -997,17 +1059,31 @@ with gr.Blocks(css=css) as demo: | |
| 997 | 
             
                    # Update visibility based on selected tab
         | 
| 998 | 
             
                    if tab == "Leaderboard":
         | 
| 999 | 
             
                        component_states["leaderboard"] = True
         | 
|  | |
| 1000 | 
             
                        outputs["leaderboard"] = create_leaderboard(selected_categories)
         | 
| 1001 |  | 
| 1002 | 
             
                    elif tab == "Category Analysis":
         | 
| 1003 | 
             
                        component_states["category_chart"] = True
         | 
|  | |
|  | |
|  | |
|  | |
| 1004 | 
             
                        component_states["model_multi_chooser"] = True
         | 
|  | |
|  | |
|  | |
|  | |
| 1005 | 
             
                        if selected_models:  # Only update chart if models are selected
         | 
| 1006 | 
             
                            outputs["category_chart"] = create_category_chart(selected_models, selected_categories)
         | 
| 1007 |  | 
| 1008 | 
             
                    elif tab == "Detailed Scorecard":
         | 
| 1009 | 
             
                        component_states["detailed_scorecard"] = True
         | 
|  | |
|  | |
|  | |
| 1010 | 
             
                        component_states["model_chooser"] = True
         | 
|  | |
|  | |
| 1011 | 
             
                        if selected_model:
         | 
| 1012 | 
             
                            scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
         | 
| 1013 | 
             
                            outputs["model_metadata"] = scorecard_updates[0]
         | 
| @@ -1019,7 +1095,11 @@ with gr.Blocks(css=css) as demo: | |
| 1019 | 
             
                        gr.update(visible=component_states["leaderboard"]),
         | 
| 1020 | 
             
                        gr.update(visible=component_states["category_chart"]),
         | 
| 1021 | 
             
                        gr.update(visible=component_states["detailed_scorecard"]),
         | 
|  | |
|  | |
| 1022 | 
             
                        gr.update(visible=component_states["model_chooser"]),
         | 
|  | |
|  | |
| 1023 | 
             
                        gr.update(visible=component_states["model_multi_chooser"]),
         | 
| 1024 | 
             
                        outputs["leaderboard"] if outputs["leaderboard"] is not None else gr.update(),
         | 
| 1025 | 
             
                        outputs["category_chart"] if outputs["category_chart"] is not None else gr.update(),
         | 
| @@ -1028,17 +1108,66 @@ with gr.Blocks(css=css) as demo: | |
| 1028 | 
             
                        outputs["total_score"] if outputs["total_score"] is not None else gr.update()
         | 
| 1029 | 
             
                    ]
         | 
| 1030 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1031 | 
             
                # Set up event handlers
         | 
| 1032 | 
             
                for component in [tab_selection, model_chooser, model_multi_chooser, category_filter]:
         | 
| 1033 | 
             
                    component.change(
         | 
| 1034 | 
             
                        fn=update_dashboard,
         | 
| 1035 | 
             
                        inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
         | 
| 1036 | 
             
                        outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
         | 
| 1037 | 
            -
                                model_chooser, model_multi_chooser,
         | 
| 1038 | 
             
                                leaderboard_output, category_chart, model_metadata,
         | 
| 1039 | 
             
                                all_category_cards, total_score]
         | 
| 1040 | 
             
                    )
         | 
| 1041 |  | 
| 1042 | 
             
            # Launch the app
         | 
| 1043 | 
             
            if __name__ == "__main__":
         | 
| 1044 | 
            -
                demo.launch(ssr_mode=False)
         | 
|  | 
|  | |
| 342 | 
             
            # scorecard_template = load_scorecard_templates('scorecard_templates')
         | 
| 343 | 
             
            models = load_models_from_json('model_data')
         | 
| 344 |  | 
| 345 | 
            +
            def handle_file_upload(uploaded_file):
         | 
| 346 | 
            +
                """Handle uploaded JSON file and update model data."""
         | 
| 347 | 
            +
                global models
         | 
| 348 | 
            +
                try:
         | 
| 349 | 
            +
                    if uploaded_file is None:
         | 
| 350 | 
            +
                        return gr.update(), gr.update(), ""
         | 
| 351 | 
            +
                    
         | 
| 352 | 
            +
                    # Read the uploaded file
         | 
| 353 | 
            +
                    with open(uploaded_file, 'r') as f:
         | 
| 354 | 
            +
                        new_model_data = json.load(f)
         | 
| 355 | 
            +
                    
         | 
| 356 | 
            +
                    # Extract model name from metadata
         | 
| 357 | 
            +
                    original_model_name = new_model_data.get('metadata', {}).get('Name', 'Unknown Model')
         | 
| 358 | 
            +
                    
         | 
| 359 | 
            +
                    # Create display name with "(Uploaded)" suffix
         | 
| 360 | 
            +
                    display_model_name = f"{original_model_name} (Uploaded)"
         | 
| 361 | 
            +
                    
         | 
| 362 | 
            +
                    # Add to models dictionary using the display name
         | 
| 363 | 
            +
                    models[display_model_name] = new_model_data
         | 
| 364 | 
            +
                    
         | 
| 365 | 
            +
                    # Update dropdown choices with new model
         | 
| 366 | 
            +
                    model_choices = [""] + list(models.keys())
         | 
| 367 | 
            +
                    
         | 
| 368 | 
            +
                    return (
         | 
| 369 | 
            +
                        gr.update(choices=model_choices, value=display_model_name),  # Update model_chooser
         | 
| 370 | 
            +
                        gr.update(choices=list(models.keys())),  # Update model_multi_chooser
         | 
| 371 | 
            +
                        f"Successfully loaded model: {display_model_name}"
         | 
| 372 | 
            +
                    )
         | 
| 373 | 
            +
                except Exception as e:
         | 
| 374 | 
            +
                    return (
         | 
| 375 | 
            +
                        gr.update(),  # Don't change model_chooser
         | 
| 376 | 
            +
                        gr.update(),  # Don't change model_multi_chooser
         | 
| 377 | 
            +
                        f"Error loading file: {str(e)}"
         | 
| 378 | 
            +
                    )
         | 
| 379 | 
            +
             | 
| 380 | 
            +
            def download_template():
         | 
| 381 | 
            +
                """Return the path to the template file for download."""
         | 
| 382 | 
            +
                template_path = "model_data/gemma-scorecard-json.json"
         | 
| 383 | 
            +
                return template_path
         | 
| 384 | 
            +
             | 
| 385 | 
             
            def create_source_html(sources):
         | 
| 386 | 
             
                if not sources:
         | 
| 387 | 
             
                    return ""
         | 
|  | |
| 865 | 
             
                    tab_selection = gr.Radio(["Detailed Scorecard", "Category Analysis", "Leaderboard"], 
         | 
| 866 | 
             
                                           label="Select Tab", value="Detailed Scorecard")
         | 
| 867 |  | 
| 868 | 
            +
                # Model Selection Card (for Detailed Scorecard tab)
         | 
| 869 | 
            +
                with gr.Group(elem_classes="model-selection-card") as model_selection_card:
         | 
| 870 | 
            +
                    gr.HTML("<div class='card-title'>🤖 AI System Selection</div>")
         | 
| 871 | 
            +
                    with gr.Row(elem_classes="model-selection-row"):
         | 
| 872 | 
            +
                        # Make model_chooser visible by default since Detailed Scorecard is first tab
         | 
| 873 | 
            +
                        # Set "StarCoder2" as the default selected model
         | 
| 874 | 
            +
                        model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
         | 
| 875 | 
            +
                                                  label="Select AI System for Details", 
         | 
| 876 | 
            +
                                                  value="StarCoder2",
         | 
| 877 | 
            +
                                                  interactive=True, visible=True)
         | 
| 878 | 
            +
                        file_upload = gr.File(
         | 
| 879 | 
            +
                            label="📁 Upload Model JSON",
         | 
| 880 | 
            +
                            file_types=[".json"],
         | 
| 881 | 
            +
                            type="filepath",
         | 
| 882 | 
            +
                            elem_id="file_upload",
         | 
| 883 | 
            +
                            visible=True
         | 
| 884 | 
            +
                        )
         | 
| 885 | 
            +
                        download_button = gr.DownloadButton(
         | 
| 886 | 
            +
                            label="Download Template",
         | 
| 887 | 
            +
                            value="model_data/gemma-scorecard-json.json",
         | 
| 888 | 
            +
                            visible=False  # Hidden by default - only shown in Detailed Scorecard tab
         | 
| 889 | 
            +
                        )
         | 
| 890 | 
            +
                
         | 
| 891 | 
            +
                # Separate Multi-Model Selector (for Category Analysis tab only)
         | 
| 892 | 
            +
                with gr.Group(elem_classes="multi-model-selection-card") as multi_model_selection_card:
         | 
| 893 | 
             
                    model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
         | 
| 894 | 
             
                                                    label="Select AI Systems for Comparison",
         | 
| 895 | 
             
                                                    value=[],
         | 
|  | |
| 994 | 
             
                                                icon = "✗"
         | 
| 995 | 
             
                                                category_no += 1
         | 
| 996 | 
             
                                                total_no += 1
         | 
| 997 | 
            +
                                    
         | 
| 998 | 
             
                                        card_content += f"<div class='question-item {style_class}'>{icon} {question}</div>"
         | 
| 999 | 
             
                                    card_content += "</div></details>"
         | 
| 1000 |  | 
|  | |
| 1039 | 
             
                        "leaderboard": False,
         | 
| 1040 | 
             
                        "category_chart": False,
         | 
| 1041 | 
             
                        "detailed_scorecard": False,
         | 
| 1042 | 
            +
                        "model_selection_card": False,
         | 
| 1043 | 
            +
                        "multi_model_selection_card": False,
         | 
| 1044 | 
             
                        "model_chooser": False,
         | 
| 1045 | 
            +
                        "model_multi_chooser": False,
         | 
| 1046 | 
            +
                        "file_upload": False,
         | 
| 1047 | 
            +
                        "download_button": False
         | 
| 1048 | 
             
                    }
         | 
| 1049 |  | 
| 1050 | 
             
                    # Initialize outputs with None
         | 
|  | |
| 1059 | 
             
                    # Update visibility based on selected tab
         | 
| 1060 | 
             
                    if tab == "Leaderboard":
         | 
| 1061 | 
             
                        component_states["leaderboard"] = True
         | 
| 1062 | 
            +
                        component_states["model_selection_card"] = False
         | 
| 1063 | 
             
                        outputs["leaderboard"] = create_leaderboard(selected_categories)
         | 
| 1064 |  | 
| 1065 | 
             
                    elif tab == "Category Analysis":
         | 
| 1066 | 
             
                        component_states["category_chart"] = True
         | 
| 1067 | 
            +
                        # Hide the main model selection card completely
         | 
| 1068 | 
            +
                        component_states["model_selection_card"] = False
         | 
| 1069 | 
            +
                        # Show only the multi-model selection card
         | 
| 1070 | 
            +
                        component_states["multi_model_selection_card"] = True
         | 
| 1071 | 
             
                        component_states["model_multi_chooser"] = True
         | 
| 1072 | 
            +
                        # Explicitly hide other components for category analysis
         | 
| 1073 | 
            +
                        component_states["model_chooser"] = False
         | 
| 1074 | 
            +
                        component_states["file_upload"] = False
         | 
| 1075 | 
            +
                        component_states["download_button"] = False
         | 
| 1076 | 
             
                        if selected_models:  # Only update chart if models are selected
         | 
| 1077 | 
             
                            outputs["category_chart"] = create_category_chart(selected_models, selected_categories)
         | 
| 1078 |  | 
| 1079 | 
             
                    elif tab == "Detailed Scorecard":
         | 
| 1080 | 
             
                        component_states["detailed_scorecard"] = True
         | 
| 1081 | 
            +
                        component_states["model_selection_card"] = True
         | 
| 1082 | 
            +
                        component_states["multi_model_selection_card"] = False
         | 
| 1083 | 
            +
                        # Show all components for detailed scorecard
         | 
| 1084 | 
             
                        component_states["model_chooser"] = True
         | 
| 1085 | 
            +
                        component_states["file_upload"] = True
         | 
| 1086 | 
            +
                        component_states["download_button"] = True
         | 
| 1087 | 
             
                        if selected_model:
         | 
| 1088 | 
             
                            scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
         | 
| 1089 | 
             
                            outputs["model_metadata"] = scorecard_updates[0]
         | 
|  | |
| 1095 | 
             
                        gr.update(visible=component_states["leaderboard"]),
         | 
| 1096 | 
             
                        gr.update(visible=component_states["category_chart"]),
         | 
| 1097 | 
             
                        gr.update(visible=component_states["detailed_scorecard"]),
         | 
| 1098 | 
            +
                        gr.update(visible=component_states["model_selection_card"]),
         | 
| 1099 | 
            +
                        gr.update(visible=component_states["multi_model_selection_card"]),
         | 
| 1100 | 
             
                        gr.update(visible=component_states["model_chooser"]),
         | 
| 1101 | 
            +
                        gr.update(visible=component_states["file_upload"]),
         | 
| 1102 | 
            +
                        gr.update(visible=component_states["download_button"]),
         | 
| 1103 | 
             
                        gr.update(visible=component_states["model_multi_chooser"]),
         | 
| 1104 | 
             
                        outputs["leaderboard"] if outputs["leaderboard"] is not None else gr.update(),
         | 
| 1105 | 
             
                        outputs["category_chart"] if outputs["category_chart"] is not None else gr.update(),
         | 
|  | |
| 1108 | 
             
                        outputs["total_score"] if outputs["total_score"] is not None else gr.update()
         | 
| 1109 | 
             
                    ]
         | 
| 1110 |  | 
| 1111 | 
            +
                # Set up file upload handler
         | 
| 1112 | 
            +
                def handle_upload_and_update(uploaded_file):
         | 
| 1113 | 
            +
                    """Handle file upload and trigger dashboard update if successful"""
         | 
| 1114 | 
            +
                    upload_result = handle_file_upload(uploaded_file)
         | 
| 1115 | 
            +
                    model_chooser_update, model_multi_chooser_update, _ = upload_result
         | 
| 1116 | 
            +
                    
         | 
| 1117 | 
            +
                    # If upload was successful and model was selected, trigger dashboard update
         | 
| 1118 | 
            +
                    if model_chooser_update.get('value'):
         | 
| 1119 | 
            +
                        selected_model = model_chooser_update.get('value')
         | 
| 1120 | 
            +
                        dashboard_result = update_dashboard("Detailed Scorecard", [], selected_model, category_choices)
         | 
| 1121 | 
            +
                        
         | 
| 1122 | 
            +
                        # Combine upload results with dashboard updates
         | 
| 1123 | 
            +
                        return (
         | 
| 1124 | 
            +
                            model_chooser_update,
         | 
| 1125 | 
            +
                            model_multi_chooser_update,
         | 
| 1126 | 
            +
                            dashboard_result[0],  # leaderboard_tab visibility
         | 
| 1127 | 
            +
                            dashboard_result[1],  # category_analysis_tab visibility  
         | 
| 1128 | 
            +
                            dashboard_result[2],  # detailed_scorecard_tab visibility
         | 
| 1129 | 
            +
                            dashboard_result[8],  # leaderboard_output
         | 
| 1130 | 
            +
                            dashboard_result[9],  # category_chart
         | 
| 1131 | 
            +
                            dashboard_result[10], # model_metadata
         | 
| 1132 | 
            +
                            dashboard_result[11], # all_category_cards
         | 
| 1133 | 
            +
                            dashboard_result[12]  # total_score
         | 
| 1134 | 
            +
                        )
         | 
| 1135 | 
            +
                    else:
         | 
| 1136 | 
            +
                        # If upload failed, just return upload results without dashboard changes
         | 
| 1137 | 
            +
                        return (
         | 
| 1138 | 
            +
                            model_chooser_update,
         | 
| 1139 | 
            +
                            model_multi_chooser_update,
         | 
| 1140 | 
            +
                            gr.update(),  # leaderboard_tab
         | 
| 1141 | 
            +
                            gr.update(),  # category_analysis_tab
         | 
| 1142 | 
            +
                            gr.update(),  # detailed_scorecard_tab
         | 
| 1143 | 
            +
                            gr.update(),  # leaderboard_output
         | 
| 1144 | 
            +
                            gr.update(),  # category_chart
         | 
| 1145 | 
            +
                            gr.update(),  # model_metadata
         | 
| 1146 | 
            +
                            gr.update(),  # all_category_cards
         | 
| 1147 | 
            +
                            gr.update()   # total_score
         | 
| 1148 | 
            +
                        )
         | 
| 1149 | 
            +
                
         | 
| 1150 | 
            +
                file_upload.upload(
         | 
| 1151 | 
            +
                    fn=handle_upload_and_update,
         | 
| 1152 | 
            +
                    inputs=[file_upload],
         | 
| 1153 | 
            +
                    outputs=[model_chooser, model_multi_chooser,
         | 
| 1154 | 
            +
                            leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
         | 
| 1155 | 
            +
                            leaderboard_output, category_chart, model_metadata,
         | 
| 1156 | 
            +
                            all_category_cards, total_score]
         | 
| 1157 | 
            +
                )
         | 
| 1158 | 
            +
                
         | 
| 1159 | 
             
                # Set up event handlers
         | 
| 1160 | 
             
                for component in [tab_selection, model_chooser, model_multi_chooser, category_filter]:
         | 
| 1161 | 
             
                    component.change(
         | 
| 1162 | 
             
                        fn=update_dashboard,
         | 
| 1163 | 
             
                        inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
         | 
| 1164 | 
             
                        outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
         | 
| 1165 | 
            +
                                model_selection_card, multi_model_selection_card, model_chooser, file_upload, download_button, model_multi_chooser,
         | 
| 1166 | 
             
                                leaderboard_output, category_chart, model_metadata,
         | 
| 1167 | 
             
                                all_category_cards, total_score]
         | 
| 1168 | 
             
                    )
         | 
| 1169 |  | 
| 1170 | 
             
            # Launch the app
         | 
| 1171 | 
             
            if __name__ == "__main__":
         | 
| 1172 | 
            +
                demo.launch(ssr_mode=False)
         | 
| 1173 | 
            +
                
         | 
 
			

