danaaubakirova's picture
fix
ae47c8c
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
import os
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
LLM_BENCHMARKS_TEXT,
INTRODUCTION_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
EVAL_COLS,
EVAL_TYPES,
AutoEvalColumn,
ModelType,
fields,
WeightType,
Precision
)
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
from src.populate import get_evaluation_queue_df, get_leaderboard_df
from src.submission.submit import add_new_eval
def restart_space():
API.restart_space(repo_id=REPO_ID)
### Space initialisation
try:
print(EVAL_REQUESTS_PATH)
snapshot_download(
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception:
restart_space()
try:
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception:
restart_space()
# LIBERO Leaderboard Data
LIBERO_DATA = [
['<a href="https://huggingface.co/HuggingFaceVLA/smolvla_libero" target="_blank">HuggingFaceVLA/smolvla_libero</a>', "HuggingFace", "450M", 0.90, 1.0, 1.0, "--", 0.6, 0.87, "βœ… Checkpoint Available", '<a href="https://huggingface.co/papers/2506.01844" target="_blank">πŸ“„ SmolVLA Paper</a>', "smolvla_spatial.mp4"],
['<a href="https://huggingface.co/lerobot/pi0" target="_blank">lerobot/pi0</a>', "Physical Intelligence", "3.3B", 0.90, 0.86, 0.95, "--", 0.73, 0.86, "Reported Score Only", '<a href="https://huggingface.co/papers/2410.24164" target="_blank">πŸ“„ Pi0 Paper</a>', "pi0.mp4"],
]
LIBERO_COLUMNS = [
"Model",
"Organization",
"Model Size",
"Spatial",
"Object",
"Goal",
"90",
"Long",
"Average",
"Available",
"Paper",
"Video"
]
# Columns to display in the table (excluding Video and Organization columns)
LIBERO_DISPLAY_COLUMNS = [
"Model",
"Model Size",
"Spatial",
"Object",
"Goal",
"90",
"Long",
"Average",
"Available",
"Paper"
]
LIBERO_DF = pd.DataFrame(LIBERO_DATA, columns=LIBERO_COLUMNS)
def get_libero_leaderboard():
return LIBERO_DF
def get_video_by_model_and_task(model_name, task_name):
"""Get video file path for a given model and task"""
# Task-specific videos for each model (only SmolVLA has videos available)
model_task_videos = {
"SmolVLA": {
"Spatial": "smolvla_spatial.mp4",
"Object": "smolvla_object.mp4",
"Goal": "smolvla_goal.mp4",
"90": "smolvla_90.mp4",
"Long": "smolvla_long.mp4"
}
# Pi0 videos not available yet
}
# Get the video for the specific model and task
if model_name in model_task_videos and task_name in model_task_videos[model_name]:
video_file = model_task_videos[model_name][task_name]
print(f"Selected model: {model_name}, Task: {task_name}, Video file: {video_file}")
return video_file
else:
print(f"No video available for {model_name} - {task_name}")
return None
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
(
finished_eval_queue_df,
running_eval_queue_df,
pending_eval_queue_df,
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
print([c.type for c in fields(AutoEvalColumn)])
return Leaderboard(
value=dataframe,
datatype=[c.type for c in fields(AutoEvalColumn)],
select_columns=SelectColumns(
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
label="Select Columns to Display:",
),
search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
filter_columns=[
ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
ColumnFilter(
AutoEvalColumn.params.name,
type="slider",
min=0.01,
max=150,
label="Select the number of parameters (B)",
),
ColumnFilter(
AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
),
],
bool_checkboxgroup_label="Hide models",
interactive=False,
)
custom_css_extended = custom_css + """
/* More specific selectors to override Gradio defaults */
.gradio-container #libero-leaderboard th,
#libero-leaderboard thead th,
#libero-leaderboard th {
font-size: 10px !important;
font-weight: bold !important;
padding: 6px 8px !important;
}
.gradio-container #libero-leaderboard td,
#libero-leaderboard tbody td,
#libero-leaderboard td {
font-size: 12px !important;
padding: 6px 8px !important;
}
#libero-leaderboard th:first-child,
#libero-leaderboard td:first-child {
min-width: 300px !important;
max-width: 400px !important;
width: 350px !important;
}
#libero-leaderboard a {
color: #0066cc !important;
text-decoration: none !important;
}
#libero-leaderboard a:hover {
text-decoration: underline !important;
}
"""
demo = gr.Blocks(css=custom_css_extended)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ† LIBERO Leaderboard", elem_id="libero-benchmark-tab-table", id=0):
# Header with image
'''
with gr.Row():
gr.Markdown(
"""
<div align="center">
<h1>πŸ† LIBERO Leaderboard</h1>
<h3>Benchmarking <b>Vision-Language-Action (VLA)</b> Policies in Simulation</h3>
<p style="font-size:16px;">Made with ❀️ by <b>HuggingFace VLA</b></p>
<img src="https://libero-project.github.io/assets/images/libero_banner.png"
alt="LIBERO Banner" style="max-width: 80%; border-radius: 12px; margin-top: 20px;">
</div>
"""
)
'''
# Full-width Leaderboard Section
with gr.Group():
gr.Markdown("### πŸ… Current Leaderboard")
# Controls and video section in same row
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### πŸ” Search & Controls")
search_box = gr.Textbox(
label="Search models",
placeholder="Type model name to search...",
interactive=True
)
# Define columns that are always shown (not selectable)
MANDATORY_COLUMNS = ["Model", "Model Size", "Paper"]
# Define columns that can be toggled
SELECTABLE_COLUMNS = [col for col in LIBERO_DISPLAY_COLUMNS if col not in MANDATORY_COLUMNS]
column_selector = gr.CheckboxGroup(
choices=SELECTABLE_COLUMNS,
value=SELECTABLE_COLUMNS,
label="Select optional columns to display",
interactive=True
)
gr.Markdown("**Always shown:** Model, Model Size, Paper")
with gr.Column(scale=1):
gr.Markdown("### πŸŽ₯ Model Video Demo")
gr.Markdown("Click on any model row in the table below to see its demo video")
video_display = gr.Video(
label="Demo video will appear here when you click on a model",
height=300,
autoplay=False,
show_label=True,
interactive=True,
value=None
)
# Create a simple dataframe instead of complex Leaderboard to avoid issues
libero_leaderboard = gr.Dataframe(
value=get_libero_leaderboard()[LIBERO_DISPLAY_COLUMNS],
headers=LIBERO_DISPLAY_COLUMNS,
interactive=False,
wrap=True,
datatype=["html", "str", "number", "number", "number", "str", "number", "number", "str", "html"],
elem_id="libero-leaderboard",
)
# Helper text
gr.Markdown(
"""
**πŸ’‘ Tips**:
- Use the search box to find specific models
- **Click on SmolVLA scores** (Spatial, Object, Goal, 90, Long) to see task-specific demo videos above
- **Click on model names** to go directly to HuggingFace repositories
- 🎬 **Videos available**: SmolVLA task demos | **Pi0 videos**: Coming soon!
""",
elem_classes="markdown-text"
)
# Function to get datatype for a column
def get_column_datatype(column_name):
"""Return the appropriate datatype for each column"""
if column_name in ["Model", "Paper"]:
return "html" # Contains HTML links
elif column_name in ["Spatial", "Object", "Goal", "Long", "Average"]:
return "number"
elif column_name == "90":
return "str" # Can contain "--"
else:
return "str" # Default for Model Size, Available, etc.
# Function to filter and update the table - using a simpler approach
def update_table(search_term, selected_columns):
df = get_libero_leaderboard()
# Filter by search term
if search_term:
mask = df['Model'].str.contains(search_term, case=False, na=False)
df = df[mask]
# Handle column filtering by replacing hidden columns with empty strings
# This keeps the datatype array stable while hiding unwanted data
result_df = df[LIBERO_DISPLAY_COLUMNS].copy()
# Always include mandatory columns + selected optional columns
MANDATORY_COLUMNS = ["Model", "Model Size", "Paper"]
SELECTABLE_COLUMNS = [col for col in LIBERO_DISPLAY_COLUMNS if col not in MANDATORY_COLUMNS]
# Hide unselected optional columns by replacing their content with empty strings
if selected_columns is not None:
for col in SELECTABLE_COLUMNS:
if col not in selected_columns:
result_df[col] = "" # Hide the column content but keep the structure
return result_df
# Function to handle row selection and display video
def show_video(evt: gr.SelectData):
try:
print(f"Leaderboard click event: {evt}")
if hasattr(evt, 'index') and evt.index is not None:
if isinstance(evt.index, (list, tuple)) and len(evt.index) >= 2:
row_idx = evt.index[0]
col_idx = evt.index[1]
else:
row_idx = evt.index
col_idx = 0
print(f"Selected row: {row_idx}, column: {col_idx}")
# Map column indices to task names (based on LIBERO_DISPLAY_COLUMNS)
# Model, Model Size, Spatial, Object, Goal, 90, Long, Average, Availability, Paper
task_mapping = {
2: "Spatial", # Spatial column
3: "Object", # Object column
4: "Goal", # Goal column
5: "90", # 90 column
6: "Long" # Long column
}
# Only show video when clicking on score columns (columns 2-6 are the LIBERO scores)
if col_idx in task_mapping and row_idx < len(LIBERO_DATA):
# Extract model name from HTML link
model_html = LIBERO_DATA[row_idx][0]
if "smolvla" in model_html.lower():
model_name = "SmolVLA"
elif "pi0" in model_html.lower():
model_name = "Pi0"
else:
model_name = "SmolVLA" # default
task_name = task_mapping[col_idx]
print(f"Model selected: {model_name}, Task: {task_name}")
video_path = get_video_by_model_and_task(model_name, task_name)
print(f"Video path returned: {video_path}")
if video_path:
return video_path
else:
# Return None to clear the video display and show a message in console
print(f"Videos coming soon for {model_name}!")
return None
print("Click on a score column (Spatial, Object, Goal, 90, Long) to see task-specific video")
return None
except Exception as e:
print(f"Error in show_video: {e}")
return None
# Connect the controls to table updates
search_box.change(update_table, inputs=[search_box, column_selector], outputs=libero_leaderboard)
column_selector.change(update_table, inputs=[search_box, column_selector], outputs=libero_leaderboard)
# Connect the leaderboard selection to video display
libero_leaderboard.select(show_video, outputs=video_display)
with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=1):
# About LIBERO
gr.Markdown(
"""
### πŸ“– About LIBERO
LIBERO is a **benchmark suite** for evaluating **Vision-Language-Action (VLA)** models across a variety of robotics tasks.
It provides a standardized setup so researchers and developers can compare models fairly.
### πŸ”— Key Resources
πŸ“„ **LIBERO Paper**: [LIBERO: Benchmarking Knowledge Transfer for Lifelong Robot Learning](https://arxiv.org/abs/2306.03310)
πŸ’» **Original LIBERO Repository**: [Lifelong-Robot-Learning/LIBERO](https://github.com/Lifelong-Robot-Learning/LIBERO)
**Evaluation Metrics**
- πŸ“Š Each task suite column shows the **success rate** for that specific suite (0.0 - 1.0)
- πŸ“ **Model Size**: Parameter count (e.g., 1B, 3B)
- πŸ“ˆ **Average**: Mean score across all LIBERO task suites
- βœ… **Availability**: Whether model checkpoints are available or scores are paper-only
- πŸ“„ **Paper**: The links to research papers
- πŸŽ₯ **Video**: Click on any model row to see a demo video if available
"""
)
# LIBERO Task Suites Description
gr.Markdown(
"""
### πŸ“‹ LIBERO Task Suites
LIBERO includes five task suites, each with different focuses:
- 🧭 **LIBERO-Spatial** (`libero_spatial`) – tasks that require reasoning about spatial relations
- 🎯 **LIBERO-Object** (`libero_object`) – tasks centered on manipulating different objects
- 🏁 **LIBERO-Goal** (`libero_goal`) – goal-conditioned tasks where the robot must adapt to changing targets
- ⚑ **LIBERO-90** (`libero_90`) – 90 short-horizon tasks from the LIBERO-100 collection
- πŸ”„ **LIBERO-Long** (`libero_10`) – 10 long-horizon tasks from the LIBERO-100 collection
"""
)
# How to train and evaluate
'''
gr.Markdown(
"""
---
### πŸš€ How to Contribute
To add your model to LIBERO leaderboard, we suggest to check the docs for using LIBERO with [LeRobot](https://huggingface.co/docs/lerobot/libero).
As a quick overview, here are the steps:
**1. Train** on the LIBERO dataset:
πŸ‘‰ [HuggingFaceVLA/libero](https://huggingface.co/datasets/HuggingFaceVLA/libero) *(LeRobot-compatible preprocessed dataset)*
πŸ“ *Official dataset: [physical-intelligence/libero](https://huggingface.co/datasets/physical-intelligence/libero)*
**2. Evaluate** using `lerobot` with the following script:
```bash
#!/bin/bash
# Storage / caches
RAID=/raid/jade
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
export HF_HOME=$RAID/.cache/huggingface
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
export WANDB_CACHE_DIR=$RAID/.cache/wandb
export TMPDIR=$RAID/.cache/tmp
mkdir -p $TMPDIR
export WANDB_MODE=offline
export TOKENIZERS_PARALLELISM=false
export MUJOCO_GL=egl
export CUDA_VISIBLE_DEVICES=2
# Configuration
POLICY_PATH="/raid/jade/models/smolvla_pipe"
TASK=libero_spatial
ENV_TYPE="libero"
BATCH_SIZE=1
N_EPISODES=1
N_ACTION_STEPS=10
# Run evaluation
python src/lerobot/scripts/eval.py \\
--policy.path="$POLICY_PATH" \\
--env.type="$ENV_TYPE" \\
--eval.batch_size="$BATCH_SIZE" \\
--eval.n_episodes="$N_EPISODES" \\
--env.task=$TASK \\
--env.max_parallel_tasks=10 \\
--policy.n_action_steps=$N_ACTION_STEPS
```
**3. Submit your results** by opening a GitHub issue.
We'll add your model + video to the leaderboard!
### πŸ“‹ Dataset Information
When training on LIBERO tasks, make sure your dataset parquet and metadata keys follow the LeRobot convention.
The environment expects:
- `observation.state` β†’ 8-dim agent state
- `observation.images.image` β†’ main camera (agentview_image)
- `observation.images.image2` β†’ wrist camera (robot0_eye_in_hand_image)
⚠️ **Important**: Cleaning the dataset upfront is more efficient than remapping keys inside the code. To avoid potential mismatches and key errors, we provide a preprocessed LIBERO dataset that is fully compatible with the current LeRobot codebase and requires no additional manipulation.
**Installation** (after following [LeRobot installation](https://huggingface.co/docs/lerobot/en/installation)):
```bash
pip install -e ".[libero]"
export MUJOCO_GL=egl # for headless servers (HPC, cloud)
```
"""
)
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
'''
with gr.TabItem("πŸš€ How To Contribute! ", elem_id="llm-benchmark-tab-table", id=2):
# How to Contribute Section
gr.Markdown(
"""
# πŸš€ How to Contribute to LIBERO Leaderboard
To add your model to LIBERO leaderboard, we suggest to check the docs for using LIBERO with [LeRobot](https://huggingface.co/docs/lerobot/libero).
As a quick overview, here are the steps:
**1. Train** on the LIBERO dataset:
πŸ‘‰ [HuggingFaceVLA/libero](https://huggingface.co/datasets/HuggingFaceVLA/libero) *(LeRobot-compatible preprocessed dataset)*
πŸ“ *Official dataset: [physical-intelligence/libero](https://huggingface.co/datasets/physical-intelligence/libero)*
**2. Evaluate** using `lerobot` with the following script:
```bash
#!/bin/bash
# Storage / caches
RAID=/raid/jade
export TRANSFORMERS_CACHE=$RAID/.cache/huggingface/transformers
export HF_HOME=$RAID/.cache/huggingface
export HF_DATASETS_CACHE=$RAID/.cache/huggingface/datasets
export HF_LEROBOT_HOME=$RAID/.cache/huggingface/lerobot
export WANDB_CACHE_DIR=$RAID/.cache/wandb
export TMPDIR=$RAID/.cache/tmp
mkdir -p $TMPDIR
export WANDB_MODE=offline
export TOKENIZERS_PARALLELISM=false
export MUJOCO_GL=egl
export CUDA_VISIBLE_DEVICES=2
# Configuration
POLICY_PATH="/raid/jade/models/smolvla_pipe"
TASK=libero_spatial
ENV_TYPE="libero"
BATCH_SIZE=1
N_EPISODES=1
N_ACTION_STEPS=10
# Run evaluation
python src/lerobot/scripts/eval.py \\
--policy.path="$POLICY_PATH" \\
--env.type="$ENV_TYPE" \\
--eval.batch_size="$BATCH_SIZE" \\
--eval.n_episodes="$N_EPISODES" \\
--env.task=$TASK \\
--env.max_parallel_tasks=10 \\
--policy.n_action_steps=$N_ACTION_STEPS
```
**3. Submit your results** by opening a GitHub issue.
We'll add your model + video to the leaderboard!
### πŸ“‹ Dataset Information
When training on LIBERO tasks, make sure your dataset parquet and metadata keys follow the LeRobot convention.
The environment expects:
- `observation.state` β†’ 8-dim agent state
- `observation.images.image` β†’ main camera (agentview_image)
- `observation.images.image2` β†’ wrist camera (robot0_eye_in_hand_image)
⚠️ **Important**: Cleaning the dataset upfront is more efficient than remapping keys inside the code. To avoid potential mismatches and key errors, we provide a preprocessed LIBERO dataset that is fully compatible with the current LeRobot codebase and requires no additional manipulation.
**Installation** (after following [LeRobot installation](https://huggingface.co/docs/lerobot/en/installation)):
```bash
pip install -e ".[libero]"
export MUJOCO_GL=egl # for headless servers (HPC, cloud)
```
---
""",
elem_classes="markdown-text"
)
'''
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
'''
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()