Spaces:
Running
Running
IU improvement (#8)
Browse files- UI improvements (066e55e008b09de3f22f1b2ed7a8b0cccf9e5c3d)
- app.py +20 -5
- data_utils.py +1 -1
app.py
CHANGED
|
@@ -34,10 +34,12 @@ def display_model_details(model_name):
|
|
| 34 |
link = f"https://huggingface.co/{model_name}"
|
| 35 |
|
| 36 |
return f"""
|
| 37 |
-
<div style="margin-top: 10px; font-size:
|
| 38 |
-
<
|
| 39 |
-
<
|
| 40 |
-
<
|
|
|
|
|
|
|
| 41 |
</div>
|
| 42 |
"""
|
| 43 |
|
|
@@ -49,7 +51,20 @@ default_example_id = evaluation_data[0]["id"]
|
|
| 49 |
|
| 50 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 51 |
gr.Markdown("# VLMVibeEval")
|
| 52 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
|
| 54 |
|
| 55 |
with gr.Column(visible=True) as model_mode:
|
|
|
|
| 34 |
link = f"https://huggingface.co/{model_name}"
|
| 35 |
|
| 36 |
return f"""
|
| 37 |
+
<div style="margin-top: 10px; font-size: 14px; display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
|
| 38 |
+
<span><strong>Provider:</strong> {provider}</span>
|
| 39 |
+
<span style="color: #999;">|</span>
|
| 40 |
+
<span><strong>Size:</strong> {size}B</span>
|
| 41 |
+
<span style="color: #999;">|</span>
|
| 42 |
+
<span><strong>Link:</strong> <a href="{link}" target="_blank">{model_name}</a></span>
|
| 43 |
</div>
|
| 44 |
"""
|
| 45 |
|
|
|
|
| 51 |
|
| 52 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 53 |
gr.Markdown("# VLMVibeEval")
|
| 54 |
+
gr.Markdown(
|
| 55 |
+
"""
|
| 56 |
+
A lightweight leaderboard for evaluating Vision Language Models (VLMs) — based on vibes.
|
| 57 |
+
|
| 58 |
+
Traditional benchmarks can be misleading due to overlap with training data. Instead, we let you **vibe test** models across curated examples:
|
| 59 |
+
|
| 60 |
+
1. Predefined categories with images and prompts.
|
| 61 |
+
2. Check any model on these examples.
|
| 62 |
+
3. Explore the generations and judge for yourself.
|
| 63 |
+
|
| 64 |
+
This is not about scores — it's about *how it feels*.
|
| 65 |
+
"""
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
|
| 69 |
|
| 70 |
with gr.Column(visible=True) as model_mode:
|
data_utils.py
CHANGED
|
@@ -12,7 +12,7 @@ def get_evaluation_data(ds):
|
|
| 12 |
"id": ds[i]["ex_id"],
|
| 13 |
"image_thumbnail": image_to_base64(thumbnail_img),
|
| 14 |
"image_full": image_to_base64(img),
|
| 15 |
-
"image_full_url": "https://
|
| 16 |
"prompt": ds[i]["prompt"],
|
| 17 |
"category": ds[i]["category"]
|
| 18 |
})
|
|
|
|
| 12 |
"id": ds[i]["ex_id"],
|
| 13 |
"image_thumbnail": image_to_base64(thumbnail_img),
|
| 14 |
"image_full": image_to_base64(img),
|
| 15 |
+
"image_full_url": "https://visionlmsftw-vibe-testing-images.hf.space/image/" + str(i),
|
| 16 |
"prompt": ds[i]["prompt"],
|
| 17 |
"category": ds[i]["category"]
|
| 18 |
})
|