Spaces:

visionLMsftw
/

comparevlms

Running

sergiopaniego HF Staff commited on May 29

Commit

c301de7

verified ·

1 Parent(s): 62b998d

IU improvement (#8)

Files changed (2) hide show

app.py CHANGED Viewed

@@ -34,10 +34,12 @@ def display_model_details(model_name):
     link = f"https://huggingface.co/{model_name}"
     return f"""
-    <div style="margin-top: 10px; font-size: 15px;">
-        <p><strong>Provider:</strong> {provider}</p>
-        <p><strong>Size:</strong> {size}B</p>
-        <p><strong>Link:</strong> <a href="{link}" target="_blank">{link}</a></p>
     </div>
     """
@@ -49,7 +51,20 @@ default_example_id = evaluation_data[0]["id"]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# VLMVibeEval")
-    gr.Markdown("VLM evaluation leaderboard based on vibes.")
     mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
     with gr.Column(visible=True) as model_mode:

     link = f"https://huggingface.co/{model_name}"
     return f"""
+    <div style="margin-top: 10px; font-size: 14px; display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
+        <span><strong>Provider:</strong> {provider}</span>
+        <span style="color: #999;">|</span>
+        <span><strong>Size:</strong> {size}B</span>
+        <span style="color: #999;">|</span>
+        <span><strong>Link:</strong> <a href="{link}" target="_blank">{model_name}</a></span>
     </div>
     """
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# VLMVibeEval")
+    gr.Markdown(
+        """
+        A lightweight leaderboard for evaluating Vision Language Models (VLMs) — based on vibes.
+        Traditional benchmarks can be misleading due to overlap with training data. Instead, we let you **vibe test** models across curated examples:
+        1. Predefined categories with images and prompts.
+        2. Check any model on these examples.
+        3. Explore the generations and judge for yourself.
+        This is not about scores — it's about *how it feels*.
+        """
+    )
     mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
     with gr.Column(visible=True) as model_mode:

data_utils.py CHANGED Viewed

@@ -12,7 +12,7 @@ def get_evaluation_data(ds):
             "id": ds[i]["ex_id"],
             "image_thumbnail": image_to_base64(thumbnail_img),
             "image_full": image_to_base64(img),
-            "image_full_url": "https://sergiopaniego-vibe-testing-images.hf.space/image/" + str(i),
             "prompt": ds[i]["prompt"],
             "category": ds[i]["category"]
         })

             "id": ds[i]["ex_id"],
             "image_thumbnail": image_to_base64(thumbnail_img),
             "image_full": image_to_base64(img),
+            "image_full_url": "https://visionlmsftw-vibe-testing-images.hf.space/image/" + str(i),
             "prompt": ds[i]["prompt"],
             "category": ds[i]["category"]
         })