Spaces:

XAI
/

VLMsAreBlind-ResultsReview

Running

App Files Files Community

taesiri commited on Jul 24, 2024

Commit

d18a9b2

1 Parent(s): 589beac

update

Browse files

Files changed (1) hide show

app.py +116 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import gradio as gr
+import pandas as pd
+from datasets import load_dataset
+df_final = pd.read_pickle("./df_final.pkl")
+dataset = load_dataset("XAI/vlmsareblind")
+def show_row(row_index, selected_task):
+    task_df = df_final[df_final["task"] == selected_task]
+    row = task_df.iloc[int(row_index)]
+    custom_id = int(row["custom_id"])
+    image = dataset["valid"][custom_id]["image"]
+    prompt = dataset["valid"][custom_id]["prompt"]
+    model_output = row["content_raw"]
+    ground_truth = row["gt"]
+    task = row["task"]
+    is_correct = row["is_correct"]
+    return image, prompt, model_output, ground_truth, task, is_correct
+def update_slider(selected_task):
+    task_df = df_final[df_final["task"] == selected_task]
+    return gr.Slider(
+        minimum=0,
+        maximum=len(task_df) - 1,
+        step=1,
+        label=f"Select Row Index (0-{len(task_df) - 1})",
+        value=0,
+    )
+# Create accuracy breakdown dataframe
+accuracy_breakdown = (
+    df_final.groupby("task")["is_correct"]
+    .mean()
+    .sort_values(ascending=False)
+    .mul(100)
+    .apply(lambda x: f"{x:.2f}")
+    .reset_index()
+)
+accuracy_breakdown.columns = ["Task", "Accuracy (%)"]
+# Create the Gradio interface
+with gr.Blocks() as app:
+    gr.Markdown("# VLMs Are Blind Results Review (GPT-4o-mini)")
+    gr.HTML(
+        """
+        <p style="text-align: center;">
+            This is a review of results from the GPT-4 model on the VLMs Are Blind dataset.
+            <br>
+            <a href="https://vlmsareblind.github.io/" target="_blank">Project Website</a> |
+            <a href="https://arxiv.org/abs/2407.06581" target="_blank">arXiv Paper</a>
+        </p>
+    """
+    )
+    with gr.Row():
+        task_dropdown = gr.Dropdown(
+            choices=df_final["task"].unique().tolist(),
+            label="Select Task",
+            value=df_final["task"].unique()[0],
+        )
+        row_selector = gr.Slider(
+            minimum=0,
+            maximum=len(df_final[df_final["task"] == df_final["task"].unique()[0]]) - 1,
+            step=1,
+            label=f"Select Row Index (0-{len(df_final[df_final['task'] == df_final['task'].unique()[0]]) - 1})",
+            value=0,
+        )
+    with gr.Row():
+        with gr.Column(scale=2):
+            image_output = gr.Image(label="Image", type="pil")
+        with gr.Column(scale=3):
+            prompt_output = gr.Textbox(label="Prompt", lines=3)
+            model_output = gr.Textbox(label="Model Output", lines=2)
+            ground_truth = gr.Textbox(label="Ground Truth", lines=2)
+            task = gr.Textbox(label="Task")
+            is_correct = gr.Checkbox(label="Is Correct")
+    gr.Markdown("## Accuracy Breakdown by Task")
+    gr.DataFrame(accuracy_breakdown)
+    task_dropdown.change(update_slider, inputs=task_dropdown, outputs=row_selector)
+    task_dropdown.change(
+        show_row,
+        inputs=[gr.Slider(value=0, visible=False), task_dropdown],
+        outputs=[
+            image_output,
+            prompt_output,
+            model_output,
+            ground_truth,
+            task,
+            is_correct,
+        ],
+    )
+    row_selector.change(
+        show_row,
+        inputs=[row_selector, task_dropdown],
+        outputs=[
+            image_output,
+            prompt_output,
+            model_output,
+            ground_truth,
+            task,
+            is_correct,
+        ],
+    )
+# Launch the app
+app.launch()