Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from datasets import load_dataset | |
| from PIL import Image, ImageOps | |
| df_final = pd.read_pickle("./df_final.pkl") | |
| dataset = load_dataset("XAI/vlmsareblind") | |
| def show_row(row_index, selected_task): | |
| task_df = df_final[df_final["task"] == selected_task] | |
| row = task_df.iloc[int(row_index)] | |
| custom_id = int(row["custom_id"]) | |
| image = dataset["valid"][custom_id]["image"] | |
| # Add white padding to the image | |
| original_size = image.size | |
| new_size = (original_size[0] * 2, original_size[1] * 2) | |
| padding = ( | |
| (new_size[0] - original_size[0]) // 2, | |
| (new_size[1] - original_size[1]) // 2, | |
| ) | |
| image_with_padding = ImageOps.expand(image, border=padding, fill="white") | |
| prompt = dataset["valid"][custom_id]["prompt"] | |
| model_output = row["content_raw"] | |
| ground_truth = row["gt"] | |
| task = row["task"] | |
| is_correct = row["is_correct"] | |
| return image_with_padding, prompt, model_output, ground_truth, task, is_correct | |
| def update_slider(selected_task): | |
| task_df = df_final[df_final["task"] == selected_task] | |
| return gr.Slider( | |
| minimum=0, | |
| maximum=len(task_df) - 1, | |
| step=1, | |
| label=f"Select Row Index (0-{len(task_df) - 1})", | |
| value=0, | |
| ) | |
| # Create accuracy breakdown dataframe | |
| accuracy_breakdown = ( | |
| df_final.groupby("task")["is_correct"] | |
| .mean() | |
| .sort_values(ascending=False) | |
| .mul(100) | |
| .apply(lambda x: f"{x:.2f}") | |
| .reset_index() | |
| ) | |
| accuracy_breakdown.columns = ["Task", "Accuracy (%)"] | |
| # Create the Gradio interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("# BlindTest Results Review (GPT-4o mini)") | |
| gr.HTML( | |
| """ | |
| <p style="text-align: center;"> | |
| This is a review of results from the GPT-4 mini model on the VLMs Are Blind dataset. | |
| <br> | |
| <a href="https://vlmsareblind.github.io/" target="_blank">Project Website</a> | | |
| <a href="https://arxiv.org/abs/2407.06581" target="_blank">arXiv Paper</a> | |
| </p> | |
| """ | |
| ) | |
| with gr.Row(): | |
| task_dropdown = gr.Dropdown( | |
| choices=df_final["task"].unique().tolist(), | |
| label="Select Task", | |
| value=df_final["task"].unique()[0], | |
| ) | |
| row_selector = gr.Slider( | |
| minimum=0, | |
| maximum=len(df_final[df_final["task"] == df_final["task"].unique()[0]]) - 1, | |
| step=1, | |
| label=f"Select Row Index (0-{len(df_final[df_final['task'] == df_final['task'].unique()[0]]) - 1})", | |
| value=0, | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| image_output = gr.Image(label="Image", type="pil") | |
| with gr.Column(scale=3): | |
| prompt_output = gr.Textbox(label="Prompt", lines=3) | |
| model_output = gr.Textbox(label="Model Output", lines=2) | |
| ground_truth = gr.Textbox(label="Ground Truth", lines=2) | |
| task = gr.Textbox(label="Task") | |
| is_correct = gr.Checkbox(label="Is Correct") | |
| gr.Markdown("## Accuracy Breakdown by Task") | |
| gr.DataFrame(accuracy_breakdown) | |
| task_dropdown.change(update_slider, inputs=task_dropdown, outputs=row_selector) | |
| task_dropdown.change( | |
| show_row, | |
| inputs=[gr.Slider(value=0, visible=False), task_dropdown], | |
| outputs=[ | |
| image_output, | |
| prompt_output, | |
| model_output, | |
| ground_truth, | |
| task, | |
| is_correct, | |
| ], | |
| ) | |
| row_selector.change( | |
| show_row, | |
| inputs=[row_selector, task_dropdown], | |
| outputs=[ | |
| image_output, | |
| prompt_output, | |
| model_output, | |
| ground_truth, | |
| task, | |
| is_correct, | |
| ], | |
| ) | |
| # Launch the app | |
| app.launch() | |