Eval_Cards

Sleeping

App Files Files Community

evijit HF Staff commited on Dec 9, 2024

Commit

9959b2f

verified ·

1 Parent(s): c3e6774

Update app.py

Browse files

Files changed (1) hide show

app.py +219 -64

app.py CHANGED Viewed

@@ -348,34 +348,82 @@ def create_source_html(sources):
     html += "</div>"
     return html
-def create_leaderboard():
     scores = []
     for model, data in models.items():
         total_score = 0
         total_questions = 0
-        for category in data['scores'].values():
             for section in category.values():
                 if section['status'] != 'N/A':
                     questions = section.get('questions', {})
-                    total_score += sum(1 for q in questions.values() if q)
-                    total_questions += len(questions)
         score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
-        scores.append((model, score_percentage))
-    df = pd.DataFrame(scores, columns=['Model', 'Score Percentage'])
-    df = df.sort_values('Score Percentage', ascending=False).reset_index(drop=True)
-    html = "<div class='card leaderboard-card'>"
-    html += "<div class='card-title'>AI Model Social Impact Leaderboard</div>"
-    html += "<table class='leaderboard-table'>"
-    html += "<tr><th>Rank</th><th>Model</th><th>Score Percentage</th></tr>"
-    for i, (_, row) in enumerate(df.iterrows(), 1):
-        html += f"<tr><td>{i}</td><td>{row['Model']}</td><td>{row['Score Percentage']:.2f}%</td></tr>"
-    html += "</table></div>"
-    return html
 def create_category_chart(selected_models, selected_categories):
     if not selected_models:
@@ -1070,6 +1118,98 @@ css = """
 .dark .completion-bar-container.na .completion-bar {
     background-color: #666;
 }
 """
 first_model = next(iter(models.values()))
@@ -1080,7 +1220,7 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         tab_selection = gr.Radio(["Leaderboard", "Category Analysis", "Detailed Scorecard"],
-                                label="Select Tab", value="Leaderboard")
     with gr.Row():
         model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
@@ -1088,15 +1228,25 @@ with gr.Blocks(css=css) as demo:
                                   value="",
                                   interactive=True, visible=False)
         model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
-                                        label="Select Models for Comparison",
-                                        multiselect=True, interactive=True, visible=False)
-        category_filter = gr.CheckboxGroup(choices=category_choices,
-                                         label="Filter Categories",
-                                         value=category_choices,
-                                         visible=False)
     with gr.Column(visible=True) as leaderboard_tab:
-        leaderboard_output = gr.HTML()
     with gr.Column(visible=False) as category_analysis_tab:
         category_chart = gr.Plot()
@@ -1106,55 +1256,60 @@ with gr.Blocks(css=css) as demo:
         all_category_cards = gr.HTML()
         total_score = gr.Markdown()
-    # Initialize the dashboard with the leaderboard
-    leaderboard_output.value = create_leaderboard()
     def update_dashboard(tab, selected_models, selected_model, selected_categories):
-            leaderboard_visibility = gr.update(visible=False)
-            category_chart_visibility = gr.update(visible=False)
-            detailed_scorecard_visibility = gr.update(visible=False)
-            model_chooser_visibility = gr.update(visible=False)
-            model_multi_chooser_visibility = gr.update(visible=False)
-            category_filter_visibility = gr.update(visible=False)
-            if tab == "Leaderboard":
-                leaderboard_visibility = gr.update(visible=True)
-                leaderboard_html = create_leaderboard()
-                return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
-                        model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
-                        gr.update(value=leaderboard_html), gr.update(), gr.update(), gr.update(), gr.update()]
-            elif tab == "Category Analysis":
-                category_chart_visibility = gr.update(visible=True)
-                model_multi_chooser_visibility = gr.update(visible=True)
-                category_filter_visibility = gr.update(visible=True)
-                category_plot = create_category_chart(selected_models or [], selected_categories)
-                return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
-                        model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
-                        gr.update(), gr.update(value=category_plot), gr.update(), gr.update(), gr.update()]
-            elif tab == "Detailed Scorecard":
-                detailed_scorecard_visibility = gr.update(visible=True)
-                model_chooser_visibility = gr.update(visible=True)
-                category_filter_visibility = gr.update(visible=True)
-                if selected_model:
-                    scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
-                else:
-                    scorecard_updates = [
-                        gr.update(value="Please select a model to view details.", visible=True),
-                        gr.update(visible=False),
-                        gr.update(visible=False)
-                    ]
-                return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
-                        model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
-                        gr.update(), gr.update()] + scorecard_updates
     # Set up event handlers
     tab_selection.change(
         fn=update_dashboard,
         inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
         outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
-                model_chooser, model_multi_chooser, category_filter,
                 leaderboard_output, category_chart, model_metadata,
                 all_category_cards, total_score]
     )
@@ -1181,7 +1336,7 @@ with gr.Blocks(css=css) as demo:
         fn=update_dashboard,
         inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
         outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
-                model_chooser, model_multi_chooser, category_filter,
                 leaderboard_output, category_chart, model_metadata,
                 all_category_cards, total_score]
     )

     html += "</div>"
     return html
+def create_leaderboard(selected_categories):
     scores = []
     for model, data in models.items():
         total_score = 0
         total_questions = 0
+        score_by_category = {}
+        # Calculate scores by category
+        for category_name, category in data['scores'].items():
+            category_score = 0
+            category_total = 0
             for section in category.values():
                 if section['status'] != 'N/A':
                     questions = section.get('questions', {})
+                    category_score += sum(1 for q in questions.values() if q)
+                    category_total += len(questions)
+            if category_total > 0:
+                score_by_category[category_name] = (category_score / category_total) * 100
+                total_score += category_score
+                total_questions += category_total
+        # Calculate overall score
         score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
+        # Get model type
+        model_type = data['metadata'].get('Type', 'Unknown')
+        # Create entry with numerical scores
+        model_entry = {
+            'Model': model,
+            'Type': model_type,
+            'Overall Completion Rate': score_percentage
+        }
+        # Add selected category scores with emojis
+        category_map = {
+            '1. Bias, Stereotypes, and Representational Harms Evaluation': '⚖️ Bias and Fairness',
+            '2. Cultural Values and Sensitive Content Evaluation': '🌍 Cultural Values',
+            '3. Disparate Performance Evaluation': '📊 Disparate Performance',
+            '4. Environmental Costs and Carbon Emissions Evaluation': '🌱 Environmental Impact',
+            '5. Privacy and Data Protection Evaluation': '🔒 Privacy',
+            '6. Financial Costs Evaluation': '💰 Financial Costs',
+            '7. Data and Content Moderation Labor Evaluation': '👥 Labor Practices'
+        }
+        for full_cat_name, display_name in category_map.items():
+            if full_cat_name in selected_categories:
+                score = score_by_category.get(full_cat_name, 0)
+                model_entry[display_name] = score
+        scores.append(model_entry)
+    # Convert to DataFrame
+    df = pd.DataFrame(scores)
+    # Sort by Overall Completion Rate descending
+    df = df.sort_values('Overall Completion Rate', ascending=False)
+    # Add rank column based on current sort
+    df.insert(0, 'Rank', range(1, len(df) + 1))
+    # Format scores with % after sorting
+    numeric_columns = ['Overall Completion Rate'] + list(category_map.values())
+    for col in df.columns:
+        if col in numeric_columns:
+            df[col] = df[col].apply(lambda x: f"{x:.1f}%")
+    return df
+with gr.Column(visible=True) as leaderboard_tab:
+    leaderboard_output = gr.DataFrame(
+        interactive=True,  # Allow sorting
+        wrap=True
+    )
 def create_category_chart(selected_models, selected_categories):
     if not selected_models:
 .dark .completion-bar-container.na .completion-bar {
     background-color: #666;
 }
+.leaderboard-filters {
+    margin-bottom: 20px;
+    padding: 15px;
+    background-color: #f8f9fa;
+    border-radius: 8px;
+}
+.dark .leaderboard-filters {
+    background-color: #2a2a2a;
+}
+.filter-group {
+    margin-bottom: 10px;
+}
+.filter-label {
+    font-weight: 600;
+    margin-bottom: 5px;
+    display: block;
+}
+.score-column {
+    background-color: #f0f7ff;
+}
+.dark .score-column {
+    background-color: #1a2733;
+}
+.metric-header {
+    font-size: 0.9em;
+    color: #666;
+    text-align: center;
+}
+.dark .metric-header {
+    color: #aaa;
+}
+.table-container {
+    overflow-x: auto;
+}
+.leaderboard-table td {
+    white-space: nowrap;
+}
+.score-cell {
+    text-align: right;
+    padding-right: 15px !important;
+}
+.model-cell {
+    max-width: 300px;
+    overflow: hidden;
+    text-overflow: ellipsis;
+    white-space: nowrap;
+}
+.leaderboard-table {
+    width: 100%;
+    border-collapse: collapse;
+}
+.leaderboard-table th,
+.leaderboard-table td {
+    padding: 10px;
+    text-align: left;
+    border: 1px solid #e0e0e0;
+}
+.dark .leaderboard-table th,
+.dark .leaderboard-table td {
+    border-color: #444;
+}
+.leaderboard-table th {
+    background-color: #f2f2f2;
+    font-weight: bold;
+}
+.dark .leaderboard-table th {
+    background-color: #2c3e50;
+}
+.leaderboard-table tr:hover {
+    background-color: #f5f5f5;
+}
+.dark .leaderboard-table tr:hover {
+    background-color: #2d2d2d;
+}
 """
 first_model = next(iter(models.values()))
     with gr.Row():
         tab_selection = gr.Radio(["Leaderboard", "Category Analysis", "Detailed Scorecard"],
+                               label="Select Tab", value="Leaderboard")
     with gr.Row():
         model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
                                   value="",
                                   interactive=True, visible=False)
         model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
+                                        label="Select Models for Comparison",
+                                        value=[],
+                                        multiselect=True,
+                                        interactive=True,
+                                        visible=False,
+                                        info="Select one or more models")
+    # Category filter now visible for all tabs
+    category_filter = gr.CheckboxGroup(choices=category_choices,
+                                     label="Filter Categories",
+                                     value=category_choices)
     with gr.Column(visible=True) as leaderboard_tab:
+        leaderboard_output = gr.DataFrame(
+            headers=["Rank", "Model", "Type", "Overall Score"],
+            datatype=["number", "str", "str", "str"],
+            interactive=False,
+            wrap=True
+        )
     with gr.Column(visible=False) as category_analysis_tab:
         category_chart = gr.Plot()
         all_category_cards = gr.HTML()
         total_score = gr.Markdown()
+    # Initialize the dashboard
+    def init_leaderboard():
+        df = create_leaderboard(category_filter.value)
+        return df
+    leaderboard_output.value = init_leaderboard()
+    # Update handlers
     def update_dashboard(tab, selected_models, selected_model, selected_categories):
+        leaderboard_visibility = gr.update(visible=False)
+        category_chart_visibility = gr.update(visible=False)
+        detailed_scorecard_visibility = gr.update(visible=False)
+        model_chooser_visibility = gr.update(visible=False)
+        model_multi_chooser_visibility = gr.update(visible=False)
+        if tab == "Leaderboard":
+            leaderboard_visibility = gr.update(visible=True)
+            df = create_leaderboard(selected_categories)
+            return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
+                    model_chooser_visibility, model_multi_chooser_visibility,
+                    gr.update(value=df), gr.update(), gr.update(), gr.update(), gr.update()]
+        elif tab == "Category Analysis":
+            category_chart_visibility = gr.update(visible=True)
+            model_multi_chooser_visibility = gr.update(visible=True)
+            category_filter_visibility = gr.update(visible=True)
+            category_plot = create_category_chart(selected_models or [], selected_categories)
+            return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
+                    model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
+                    None, gr.update(value=category_plot), gr.update(), gr.update(), gr.update()]
+        elif tab == "Detailed Scorecard":
+            detailed_scorecard_visibility = gr.update(visible=True)
+            model_chooser_visibility = gr.update(visible=True)
+            category_filter_visibility = gr.update(visible=True)
+            if selected_model:
+                scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
+            else:
+                scorecard_updates = [
+                    gr.update(value="Please select a model to view details.", visible=True),
+                    gr.update(visible=False),
+                    gr.update(visible=False)
+                ]
+            return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
+                    model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
+                    None, None] + scorecard_updates
     # Set up event handlers
     tab_selection.change(
         fn=update_dashboard,
         inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
         outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
+                model_chooser, model_multi_chooser,
                 leaderboard_output, category_chart, model_metadata,
                 all_category_cards, total_score]
     )
         fn=update_dashboard,
         inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
         outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
+                model_chooser, model_multi_chooser,
                 leaderboard_output, category_chart, model_metadata,
                 all_category_cards, total_score]
     )