Spaces:

Writer
/

Financial_LLM_Performance_Leaderboard

Running

App Files Files Community

wassemgtk commited on Feb 18

Commit

c2c6410

verified ·

1 Parent(s): 7d6243e

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -21

app.py CHANGED Viewed

@@ -41,44 +41,57 @@ context_grounding_data = {
     "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
 }
-# Function to bold the highest score per column (excluding "Model Name")
-def format_table(df):
     styled_df = df.copy()
     numeric_columns = [col for col in df.columns if col != "Model Name"]
     for col in numeric_columns:
         if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
             # Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
-            if any(" (" in str(x) for x in df[col]):
                 # Handle string values with deltas (e.g., "0.95 (0.0)")
-                values = [float(str(x).split(" (")[0]) for x in df[col]]
             else:
                 # Handle direct float values
-                values = df[col].astype(float)
             max_value = np.max(values)
-            styled_df[col] = df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
     return styled_df
 # Function to calculate top 3 models based on combined score (average of numeric columns)
 def get_top_3_models(robustness_df, context_grounding_df):
     # Combine numeric columns from both datasets
     numeric_cols_robustness = ["Baseline", "Robustness (Δ)"]  # Columns with numeric or string-numeric data
     numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]  # From context grounding
-    # Extract numeric values for each column in robustness_df
     robustness_scores = pd.DataFrame()
     for col in numeric_cols_robustness:
-        if any(" (" in str(x) for x in robustness_df[col]):
-            # Handle string values with deltas (e.g., "0.95 (0.0)")
-            robustness_scores[col] = robustness_df[col].apply(lambda x: float(str(x).split(" (")[0]) if " (" in str(x) else float(x))
-        else:
-            # Handle direct float values
-            robustness_scores[col] = robustness_df[col].astype(float)
-    # Extract numeric values for context_grounding_df (all are already float values)
-    context_scores = context_grounding_df[numeric_cols_context].astype(float)
     # Combine scores by averaging
     combined_scores = (robustness_scores.mean(axis=1) + context_scores.mean(axis=1)) / 2
@@ -107,11 +120,11 @@ def create_leaderboard():
     robustness_df = pd.DataFrame(robustness_data)
     context_grounding_df = pd.DataFrame(context_grounding_data)
-    # Format tables to bold highest scores
-    robustness_df = format_table(robustness_df)
-    context_grounding_df = format_table(context_grounding_df)
-    # Get top 3 winners
     winners_df = get_top_3_models(robustness_df, context_grounding_df)
     # Create Gradio interface with a nice theme
@@ -122,14 +135,14 @@ def create_leaderboard():
             with gr.Column():
                 with gr.Tab("Robustness Results"):
                     gr.DataFrame(
-                        value=robustness_df,
                         label="Robustness Results",
                         wrap=True,
                         elem_classes=["custom-table"]
                     )
                 with gr.Tab("Context Grounding Results"):
                     gr.DataFrame(
-                        value=context_grounding_df,
                         label="Context Grounding Results",
                         wrap=True,
                         elem_classes=["custom-table"]

     "Compliance": [0.76, 0.72, 0.52, 0.59, 0.63, 0.34, 0.40, 0.44, 0.43, 0.41, 0.66, 0.51, 0.49, 0.71, 0.71, 0.80, 0.67, 0.62, 0.68, 0.54, 0.46, 0.35, 0.34, 0.81]
 }
+# Function to bold the highest score per column (excluding "Model Name") but keep original data for calculations
+def format_table(df, original_df=None):
     styled_df = df.copy()
     numeric_columns = [col for col in df.columns if col != "Model Name"]
+    if original_df is None:
+        original_df = df.copy()  # Use the input df as original if none provided
     for col in numeric_columns:
         if col in ["Baseline", "Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]:
             # Convert string values (e.g., "0.95 (0.0)") to float for comparison, or use direct float values
+            if any(" (" in str(x) for x in original_df[col]):
                 # Handle string values with deltas (e.g., "0.95 (0.0)")
+                values = [float(str(x).split(" (")[0]) for x in original_df[col]]
             else:
                 # Handle direct float values
+                values = original_df[col].astype(float)
             max_value = np.max(values)
+            styled_df[col] = original_df[col].apply(lambda x: f"**{x}**" if (float(str(x).split(" (")[0]) if " (" in str(x) else float(x)) == max_value else x)
     return styled_df
+# Function to extract numeric value from a string (removing bold markup and deltas)
+def extract_numeric(value):
+    if pd.isna(value):
+        return np.nan
+    if isinstance(value, str):
+        # Remove bold markup (**)
+        value = value.replace("**", "")
+        # Extract numeric part before the delta (if present)
+        if " (" in value:
+            return float(value.split(" (")[0])
+        return float(value)
+    return float(value)
 # Function to calculate top 3 models based on combined score (average of numeric columns)
 def get_top_3_models(robustness_df, context_grounding_df):
     # Combine numeric columns from both datasets
     numeric_cols_robustness = ["Baseline", "Robustness (Δ)"]  # Columns with numeric or string-numeric data
     numeric_cols_context = ["Irrelevant Ctx", "No Ctx", "Ctx Grounding QA", "Ctx Grounding TG", "Ctx Grounding", "Robustness", "Compliance"]  # From context grounding
+    # Extract numeric values for each column in robustness_df, handling bold markup and deltas
     robustness_scores = pd.DataFrame()
     for col in numeric_cols_robustness:
+        robustness_scores[col] = robustness_df[col].apply(extract_numeric)
+    # Extract numeric values for context_grounding_df (all are already float values, but use extract_numeric for consistency)
+    context_scores = pd.DataFrame()
+    for col in numeric_cols_context:
+        context_scores[col] = context_grounding_df[col].apply(extract_numeric)
     # Combine scores by averaging
     combined_scores = (robustness_scores.mean(axis=1) + context_scores.mean(axis=1)) / 2
     robustness_df = pd.DataFrame(robustness_data)
     context_grounding_df = pd.DataFrame(context_grounding_data)
+    # Format tables to bold highest scores, but keep original data for calculations
+    formatted_robustness_df = format_table(robustness_df, robustness_df.copy())  # Pass original data for calculations
+    formatted_context_grounding_df = format_table(context_grounding_df, context_grounding_df.copy())  # Pass original data for calculations
+    # Get top 3 winners using the original (unformatted) DataFrames
     winners_df = get_top_3_models(robustness_df, context_grounding_df)
     # Create Gradio interface with a nice theme
             with gr.Column():
                 with gr.Tab("Robustness Results"):
                     gr.DataFrame(
+                        value=formatted_robustness_df,
                         label="Robustness Results",
                         wrap=True,
                         elem_classes=["custom-table"]
                     )
                 with gr.Tab("Context Grounding Results"):
                     gr.DataFrame(
+                        value=formatted_context_grounding_df,
                         label="Context Grounding Results",
                         wrap=True,
                         elem_classes=["custom-table"]