Spaces:

AlexTransformer
/

ernie-4.5-logprob-compare

Sleeping

App Files Files Community

AlexTransformer commited on Sep 21

Commit

b9a15d9

verified ·

1 Parent(s): 9387d94

Update app.py

Browse files

Files changed (1) hide show

app.py +194 -91

app.py CHANGED Viewed

@@ -4,7 +4,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import pandas as pd
 import math
-from plotly import graph_objects as go
 # Load model and tokenizer
 model_ids = {
@@ -29,14 +31,24 @@ def format_prob(prob):
 # Helper function to format log probability
 def format_log_prob(log_prob):
-    """Format log probability with color coding"""
     return f"{log_prob:.3f}"
 # Main function: compute token-wise log probabilities and top-k predictions
 @torch.no_grad()
 def compare_models(text, top_k=5):
     if not text.strip():
-        return None, "⚠️ Please enter some text to analyze"
     results = {}
@@ -63,6 +75,8 @@ def compare_models(text, top_k=5):
         # Generate top-k predictions for each position (up to first 20 tokens)
         topk_list = []
         confidence_list = []
         for i in range(min(20, shift_logits.shape[1])):
             topk = torch.topk(log_probs[0, i], k=top_k)
             topk_ids = topk.indices.tolist()
@@ -77,71 +91,124 @@ def compare_models(text, top_k=5):
             # Calculate confidence (probability of actual token)
             actual_token_prob = math.exp(token_log_probs[0, i].item())
             confidence_list.append(actual_token_prob)
-        # Prepare dataframe for display
-        df = pd.DataFrame({
-            "Token": tokens[:20],
-            "LogProb": [format_log_prob(float(x)) for x in token_log_probs[0][:20]],
-            "Confidence": [format_prob(x) for x in confidence_list[:20]],
-            f"Top-{top_k} Predictions": topk_list
-        })
         results[model_name] = {
-            "df": df,
-            "total_log_prob": total_log_prob,
             "tokens": tokens[:20],
-            "confidences": confidence_list[:20]
         }
-    # Create comparison dataframe
-    comparison_df = pd.DataFrame({
-        "Token": results["ERNIE-4.5-PT"]["df"]["Token"],
-        "ERNIE-4.5-PT": {
-            "LogProb": results["ERNIE-4.5-PT"]["df"]["LogProb"],
-            "Confidence": results["ERNIE-4.5-PT"]["df"]["Confidence"],
-            "Top-k": results["ERNIE-4.5-PT"]["df"][f"Top-{top_k} Predictions"]
-        },
-        "ERNIE-4.5-Base-PT": {
-            "LogProb": results["ERNIE-4.5-Base-PT"]["df"]["LogProb"],
-            "Confidence": results["ERNIE-4.5-Base-PT"]["df"]["Confidence"],
-            "Top-k": results["ERNIE-4.5-Base-PT"]["df"][f"Top-{top_k} Predictions"]
-        }
-    })
-    # Create visualization
-    fig = go.Figure()
-    # Add confidence bars for both models
-    fig.add_trace(go.Bar(
         name='ERNIE-4.5-PT',
         x=results["ERNIE-4.5-PT"]["tokens"],
-        y=results["ERNIE-4.5-PT"]["confidences"],
-        marker_color='royalblue'
     ))
-    fig.add_trace(go.Bar(
         name='ERNIE-4.5-Base-PT',
         x=results["ERNIE-4.5-Base-PT"]["tokens"],
-        y=results["ERNIE-4.5-Base-PT"]["confidences"],
-        marker_color='lightseagreen'
     ))
-    fig.update_layout(
-        title='Model Confidence Comparison',
         xaxis_title='Token',
         yaxis_title='Confidence (Probability)',
         barmode='group',
-        yaxis=dict(tickformat='.0%', range=[0, 1]),
         legend=dict(
             orientation="h",
             yanchor="bottom",
             y=1.02,
             xanchor="right",
             x=1
-        )
     )
-    # Create summary
     pt_logprob = results['ERNIE-4.5-PT']['total_log_prob']
     base_logprob = results['ERNIE-4.5-Base-PT']['total_log_prob']
@@ -153,34 +220,76 @@ def compare_models(text, top_k=5):
         better_model = "ERNIE-4.5-Base-PT"
         difference = base_logprob - pt_logprob
     summary = (
         f"📊 **Model Comparison Summary**\n\n"
         f"**Total Log Probability**:\n"
         f"- ERNIE-4.5-PT: {pt_logprob:.3f}\n"
         f"- ERNIE-4.5-Base-PT: {base_logprob:.3f}\n\n"
         f"🏆 **Higher Confidence Model**: {better_model}\n"
-        f"Difference: {difference:.3f} ({'+' if better_model == 'ERNIE-4.5-PT' else '-'}{difference:.3f})\n\n"
         f"**What this means**:\n"
         f"- Log probability closer to 0 (less negative) indicates higher model confidence\n"
         f"- The {better_model} model is more confident in predicting your input text\n"
-        f"- Confidence per token is shown in the table and chart below"
     )
-    return comparison_df, summary, fig
 # Create custom CSS for better styling
 css = """
 .main-container {
-    max-width: 1200px;
     margin: 0 auto;
 }
 .dataframe-container {
     margin: 20px 0;
 }
-.confidence-chart {
-    margin: 20px 0;
-    height: 400px;
-}
 .summary-box {
     background-color: #f8f9fa;
     border-left: 4px solid #4285f4;
@@ -188,26 +297,12 @@ css = """
     border-radius: 4px;
     margin: 20px 0;
 }
-.model-header {
-    font-weight: bold;
-    color: #1a73e8;
-    margin-top: 10px;
-}
-.token-cell {
-    font-family: monospace;
-    background-color: #f1f3f4;
-    padding: 4px 8px;
-    border-radius: 3px;
-}
-.confidence-high {
-    color: #0f9d58;
-    font-weight: bold;
-}
-.confidence-medium {
-    color: #f4b400;
-}
-.confidence-low {
-    color: #db4437;
 }
 """
@@ -217,13 +312,7 @@ with gr.Blocks(css=css, title="ERNIE Model Comparison Tool") as demo:
         """
         # 🔍 ERNIE 4.5 Model Comparison Tool
-        Compare how different ERNIE models process your text with detailed token-level analysis.
-        ## What this tool shows:
-        - **Token Log Probability**: How confident the model is in predicting each token (closer to 0 is better)
-        - **Confidence**: Probability percentage for each token prediction
-        - **Top-k Predictions**: What other tokens the model considered likely
-        - **Visual Comparison**: Bar chart showing confidence differences between models
         """
     )
@@ -233,7 +322,7 @@ with gr.Blocks(css=css, title="ERNIE Model Comparison Tool") as demo:
                 lines=3,
                 placeholder="Enter text to analyze (e.g., 'Hello, World!')",
                 label="Input Text",
-                value="Hello, World!"
             )
         with gr.Column(scale=1):
             top_k = gr.Slider(
@@ -245,7 +334,7 @@ with gr.Blocks(css=css, title="ERNIE Model Comparison Tool") as demo:
             )
     with gr.Row():
-        compare_btn = gr.Button("Compare Models", variant="primary")
     with gr.Row():
         with gr.Column():
@@ -256,18 +345,32 @@ with gr.Blocks(css=css, title="ERNIE Model Comparison Tool") as demo:
     with gr.Row():
         with gr.Column():
-            comparison_table = gr.Dataframe(
-                label="Token-Level Analysis",
-                elem_classes=["dataframe-container"],
-                interactive=False,
-                wrap=True
             )
     with gr.Row():
         with gr.Column():
             confidence_chart = gr.Plot(
-                label="Model Confidence Comparison",
-                elem_classes=["confidence-chart"]
             )
     # Examples section
@@ -287,10 +390,10 @@ with gr.Blocks(css=css, title="ERNIE Model Comparison Tool") as demo:
         """
         ## How to Interpret Results
-        1. **Log Probability**: Negative values where closer to 0 means higher model confidence
-        2. **Confidence**: Percentage showing how certain the model was about each token
-        3. **Top-k Predictions**: Alternative tokens the model considered likely
-        4. **Visual Chart**: Bar heights represent model confidence for each token
         **Model Differences**:
         - **ERNIE-4.5-PT**: Instruction-tuned model, better at following complex instructions
@@ -302,7 +405,7 @@ with gr.Blocks(css=css, title="ERNIE Model Comparison Tool") as demo:
     compare_btn.click(
         fn=compare_models,
         inputs=[input_text, top_k],
-        outputs=[comparison_table, summary_box, confidence_chart]
     )
 if __name__ == "__main__":

 import gradio as gr
 import pandas as pd
 import math
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
 # Load model and tokenizer
 model_ids = {
 # Helper function to format log probability
 def format_log_prob(log_prob):
+    """Format log probability"""
     return f"{log_prob:.3f}"
+# Helper function to get confidence level
+def get_confidence_level(prob):
+    """Get confidence level description based on probability"""
+    if prob > 0.8:
+        return "High", "🟢"
+    elif prob > 0.5:
+        return "Medium", "🟡"
+    else:
+        return "Low", "🔴"
 # Main function: compute token-wise log probabilities and top-k predictions
 @torch.no_grad()
 def compare_models(text, top_k=5):
     if not text.strip():
+        return None, "⚠️ Please enter some text to analyze", None
     results = {}
         # Generate top-k predictions for each position (up to first 20 tokens)
         topk_list = []
         confidence_list = []
+        confidence_indicators = []
         for i in range(min(20, shift_logits.shape[1])):
             topk = torch.topk(log_probs[0, i], k=top_k)
             topk_ids = topk.indices.tolist()
             # Calculate confidence (probability of actual token)
             actual_token_prob = math.exp(token_log_probs[0, i].item())
             confidence_list.append(actual_token_prob)
+            # Get confidence level and indicator
+            level, indicator = get_confidence_level(actual_token_prob)
+            confidence_indicators.append(indicator)
+        # Store results for this model
         results[model_name] = {
             "tokens": tokens[:20],
+            "log_probs": [format_log_prob(float(x)) for x in token_log_probs[0][:20]],
+            "confidences": [format_prob(x) for x in confidence_list[:20]],
+            "levels": confidence_indicators[:20],
+            "topk_predictions": topk_list,
+            "total_log_prob": total_log_prob,
+            "confidence_values": confidence_list[:20]  # Keep raw values for plotting
         }
+    # Create a properly structured dataframe
+    df_data = {"Token": results["ERNIE-4.5-PT"]["tokens"]}
+    # Add columns for each model
+    for model_name in ["ERNIE-4.5-PT", "ERNIE-4.5-Base-PT"]:
+        df_data[f"{model_name} LogProb"] = results[model_name]["log_probs"]
+        df_data[f"{model_name} Confidence"] = results[model_name]["confidences"]
+        df_data[f"{model_name} Level"] = results[model_name]["levels"]
+        df_data[f"{model_name} Top-{top_k}"] = results[model_name]["topk_predictions"]
+    # Create the dataframe
+    comparison_df = pd.DataFrame(df_data)
+    # Create visualizations
+    # 1. Token-level confidence comparison
+    fig_confidence = go.Figure()
+    # Add bars for both models
+    fig_confidence.add_trace(go.Bar(
         name='ERNIE-4.5-PT',
         x=results["ERNIE-4.5-PT"]["tokens"],
+        y=results["ERNIE-4.5-PT"]["confidence_values"],
+        marker_color='royalblue',
+        text=[format_prob(x) for x in results["ERNIE-4.5-PT"]["confidence_values"]],
+        textposition='auto',
+        textfont=dict(size=10)
     ))
+    fig_confidence.add_trace(go.Bar(
         name='ERNIE-4.5-Base-PT',
         x=results["ERNIE-4.5-Base-PT"]["tokens"],
+        y=results["ERNIE-4.5-Base-PT"]["confidence_values"],
+        marker_color='lightseagreen',
+        text=[format_prob(x) for x in results["ERNIE-4.5-Base-PT"]["confidence_values"]],
+        textposition='auto',
+        textfont=dict(size=10)
     ))
+    fig_confidence.update_layout(
+        title='Token-Level Confidence Comparison',
         xaxis_title='Token',
         yaxis_title='Confidence (Probability)',
         barmode='group',
+        yaxis=dict(tickformat='.0%', range=[0, 1.05]),
         legend=dict(
             orientation="h",
             yanchor="bottom",
             y=1.02,
             xanchor="right",
             x=1
+        ),
+        height=500
     )
+    # 2. Log probability trend comparison
+    fig_logprob = go.Figure()
+    # Convert log probabilities back to float for plotting
+    pt_logprobs = [float(x) for x in results["ERNIE-4.5-PT"]["log_probs"]]
+    base_logprobs = [float(x) for x in results["ERNIE-4.5-Base-PT"]["log_probs"]]
+    fig_logprob.add_trace(go.Scatter(
+        name='ERNIE-4.5-PT',
+        x=results["ERNIE-4.5-PT"]["tokens"],
+        y=pt_logprobs,
+        mode='lines+markers',
+        line=dict(color='royalblue', width=3),
+        marker=dict(size=8),
+        text=[f"LogProb: {x}<br>Token: {t}" for x, t in zip(pt_logprobs, results["ERNIE-4.5-PT"]["tokens"])],
+        hoverinfo='text'
+    ))
+    fig_logprob.add_trace(go.Scatter(
+        name='ERNIE-4.5-Base-PT',
+        x=results["ERNIE-4.5-Base-PT"]["tokens"],
+        y=base_logprobs,
+        mode='lines+markers',
+        line=dict(color='lightseagreen', width=3),
+        marker=dict(size=8),
+        text=[f"LogProb: {x}<br>Token: {t}" for x, t in zip(base_logprobs, results["ERNIE-4.5-Base-PT"]["tokens"])],
+        hoverinfo='text'
+    ))
+    # Add a horizontal line at y=0 for reference
+    fig_logprob.add_hline(y=0, line_dash="dash", line_color="red", annotation_text="Zero Reference")
+    fig_logprob.update_layout(
+        title='Token-Level Log Probability Trend',
+        xaxis_title='Token',
+        yaxis_title='Log Probability',
+        hovermode='closest',
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1
+        ),
+        height=400
+    )
+    # 3. Model summary comparison
     pt_logprob = results['ERNIE-4.5-PT']['total_log_prob']
     base_logprob = results['ERNIE-4.5-Base-PT']['total_log_prob']
         better_model = "ERNIE-4.5-Base-PT"
         difference = base_logprob - pt_logprob
+    # Calculate average confidence for each model
+    pt_avg_conf = sum(results['ERNIE-4.5-PT']['confidence_values']) / len(results['ERNIE-4.5-PT']['confidence_values'])
+    base_avg_conf = sum(results['ERNIE-4.5-Base-PT']['confidence_values']) / len(results['ERNIE-4.5-Base-PT']['confidence_values'])
+    # Create summary chart
+    fig_summary = go.Figure()
+    fig_summary.add_trace(go.Bar(
+        name='Total Log Probability',
+        x=['ERNIE-4.5-PT', 'ERNIE-4.5-Base-PT'],
+        y=[pt_logprob, base_logprob],
+        marker_color=['royalblue', 'lightseagreen'],
+        text=[f"{pt_logprob:.3f}", f"{base_logprob:.3f}"],
+        textposition='auto',
+        textfont=dict(size=14)
+    ))
+    fig_summary.update_layout(
+        title='Model Summary Comparison',
+        yaxis_title='Total Log Probability',
+        xaxis_title='Model',
+        height=300,
+        showlegend=False
+    )
+    # Add annotation for the better model
+    fig_summary.add_annotation(
+        x=0 if better_model == "ERNIE-4.5-PT" else 1,
+        y=max(pt_logprob, base_logprob) + 0.5,
+        text=f"🏆 {better_model}",
+        showarrow=True,
+        arrowhead=1,
+        ax=0,
+        ay=-30,
+        font=dict(size=16, color="green")
+    )
+    # Create summary text
     summary = (
         f"📊 **Model Comparison Summary**\n\n"
         f"**Total Log Probability**:\n"
         f"- ERNIE-4.5-PT: {pt_logprob:.3f}\n"
         f"- ERNIE-4.5-Base-PT: {base_logprob:.3f}\n\n"
+        f"**Average Confidence**:\n"
+        f"- ERNIE-4.5-PT: {format_prob(pt_avg_conf)}\n"
+        f"- ERNIE-4.5-Base-PT: {format_prob(base_avg_conf)}\n\n"
         f"🏆 **Higher Confidence Model**: {better_model}\n"
+        f"Difference: {difference:.3f}\n\n"
         f"**What this means**:\n"
         f"- Log probability closer to 0 (less negative) indicates higher model confidence\n"
         f"- The {better_model} model is more confident in predicting your input text\n"
+        f"- Confidence indicators: 🟢 High (>80%), 🟡 Medium (50-80%), 🔴 Low (<50%)\n\n"
+        f"**Interpretation Guide**:\n"
+        f"- **LogProb**: How confident the model is in predicting each token (closer to 0 is better)\n"
+        f"- **Confidence**: Probability percentage for each token prediction\n"
+        f"- **Level**: Visual indicator of confidence (🟢🟡🔴)\n"
+        f"- **Top-k**: What other tokens the model considered likely"
     )
+    return comparison_df, summary, fig_confidence, fig_logprob, fig_summary
 # Create custom CSS for better styling
 css = """
 .main-container {
+    max-width: 1400px;
     margin: 0 auto;
 }
 .dataframe-container {
     margin: 20px 0;
 }
 .summary-box {
     background-color: #f8f9fa;
     border-left: 4px solid #4285f4;
     border-radius: 4px;
     margin: 20px 0;
 }
+.chart-container {
+    margin: 20px 0;
+    border: 1px solid #e0e0e0;
+    border-radius: 8px;
+    padding: 15px;
+    background-color: #ffffff;
 }
 """
         """
         # 🔍 ERNIE 4.5 Model Comparison Tool
+        Compare how different ERNIE models process your text with detailed token-level analysis and visualizations.
         """
     )
                 lines=3,
                 placeholder="Enter text to analyze (e.g., 'Hello, World!')",
                 label="Input Text",
+                value="What is the meaning of life?"
             )
         with gr.Column(scale=1):
             top_k = gr.Slider(
             )
     with gr.Row():
+        compare_btn = gr.Button("Compare Models", variant="primary", size="lg")
     with gr.Row():
         with gr.Column():
     with gr.Row():
         with gr.Column():
+            summary_chart = gr.Plot(
+                label="Model Summary",
+                elem_classes=["chart-container"]
             )
     with gr.Row():
         with gr.Column():
             confidence_chart = gr.Plot(
+                label="Token-Level Confidence Comparison",
+                elem_classes=["chart-container"]
+            )
+    with gr.Row():
+        with gr.Column():
+            logprob_chart = gr.Plot(
+                label="Token-Level Log Probability Trend",
+                elem_classes=["chart-container"]
+            )
+    with gr.Row():
+        with gr.Column():
+            comparison_table = gr.Dataframe(
+                label="Token-Level Analysis",
+                elem_classes=["dataframe-container"],
+                interactive=False,
+                wrap=True
             )
     # Examples section
         """
         ## How to Interpret Results
+        1. **Model Summary Chart**: Shows which model has higher overall confidence for your input text
+        2. **Token-Level Confidence Chart**: Compares how confident each model is for each token in your text
+        3. **Log Probability Trend Chart**: Shows how log probability changes across tokens (closer to 0 is better)
+        4. **Token-Level Analysis Table**: Detailed breakdown of predictions for each token
         **Model Differences**:
         - **ERNIE-4.5-PT**: Instruction-tuned model, better at following complex instructions
     compare_btn.click(
         fn=compare_models,
         inputs=[input_text, top_k],
+        outputs=[comparison_table, summary_box, confidence_chart, logprob_chart, summary_chart]
     )
 if __name__ == "__main__":