Spaces:
Running
Running
| from pathlib import Path | |
| import gradio as gr | |
| import pandas as pd | |
| from gradio_leaderboard import Leaderboard | |
| custom_css = """ | |
| .logo { | |
| width: 300px; | |
| height: auto; | |
| max-width: 100%; | |
| margin: 0 auto; | |
| object-fit: contain; | |
| padding-bottom: 0; | |
| } | |
| .text { | |
| font-size: 16px !important; | |
| } | |
| .tabs button { | |
| font-size: 20px; | |
| } | |
| .subtabs button { | |
| font-size: 20px; | |
| } | |
| h1, h2 { | |
| margin: 0; | |
| padding-top: 0; | |
| } | |
| """ | |
| # override method to avoid bugg | |
| Leaderboard.raise_error_if_incorrect_config = lambda self: None | |
| abs_path = Path(__file__).parent / "data" | |
| # Load the JSONL file into a pandas DataFrame using the json library | |
| df = pd.read_json(abs_path / "text_to_image.jsonl", lines=True) | |
| df["URL"] = df.apply( | |
| lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>', | |
| axis=1, | |
| ) | |
| df = df[ | |
| [ | |
| "URL", | |
| "Platform", | |
| "Owner", | |
| "Device", | |
| "Model", | |
| "Optimization", | |
| "Median Inference Time", | |
| "Price per Image", | |
| ] | |
| + [ | |
| col | |
| for col in df.columns.tolist() | |
| if col | |
| not in [ | |
| "URL", | |
| "Model", | |
| "Median Inference Time", | |
| "Price per Image", | |
| "Platform", | |
| "Owner", | |
| "Device", | |
| "Optimization", | |
| ] | |
| ] | |
| ] | |
| df = df.sort_values(by="GenEval", ascending=False) | |
| with gr.Blocks("ParityError/Interstellar", fill_width=True, css=custom_css) as demo: | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center;"> | |
| <img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/inferbench/logo2-cropped.png" style="width: 200px; height: auto; max-width: 100%; margin: 0 auto;"> | |
| <h1>🏋️ InferBench 🏋️</h1> | |
| <h2>A cost/quality/speed Leaderboard for Inference Providers!</h2> | |
| </div> | |
| """ | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Text-to-Image Leaderboard"): | |
| Leaderboard( | |
| value=df, | |
| select_columns=df.columns.tolist(), | |
| datatype=[ | |
| "markdown", | |
| "markdown", | |
| "markdown", | |
| "markdown", | |
| "markdown", | |
| "markdown", | |
| ] | |
| + ["number"] * (len(df.columns.tolist()) - 6), | |
| filter_columns=[ | |
| "Platform", | |
| "Owner", | |
| "Device", | |
| "Model", | |
| "Optimization", | |
| ], | |
| ) | |
| gr.Markdown( | |
| """ | |
| > **💡 Note:** Each efficiency metric and quality metric captures only one dimension of model capacity. Rankings may vary when considering other metrics. | |
| """ | |
| ) | |
| with gr.TabItem("About"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| # 📊 Text-to-Image Leaderboard | |
| This leaderboard compares the performance of different text-to-image providers. | |
| We started with a comprehensive benchmark comparing our very own FLUX-juiced with the “FLUX.1 [dev]” endpoints offered by: | |
| - Replicate: https://replicate.com/black-forest-labs/flux-dev | |
| - Fal: https://fal.ai/models/fal-ai/flux/dev | |
| - Fireworks AI: https://fireworks.ai/models/fireworks/flux-1-dev-fp8 | |
| - Together AI: https://www.together.ai/models/flux-1-dev | |
| We also included the following non-FLUX providers: | |
| - AWS Nova Canvas: https://aws.amazon.com/ai/generative-ai/nova/creative/ | |
| All of these inference providers offer implementations but they don’t always communicate about the optimisation methods used in the background, and most endpoint have different response times and performance measures. | |
| For comparison purposes we used the same generation set-up for all the providers. | |
| - 28 inference steps | |
| - 1024×1024 resolution | |
| - Guidance scale of 3.5 | |
| - H100 GPU (80GB)—only reported by Replicate | |
| Although we did test with this specific Pruna configuration and hardware, the applied compression methods work with different config and hardware too! | |
| > We published a full blog post on [the creation of our FLUX-juiced endpoint](https://www.pruna.ai/blog/flux-juiced-the-fastest-image-generation-endpoint). | |
| """ | |
| ) | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| # 🧃 FLUX.1-dev (juiced) | |
| FLUX.1-dev (juiced) is our optimized version of FLUX.1-dev, delivering up to **2.6x faster inference** than the official Replicate API, **without sacrificing image quality**. | |
| Under the hood, it uses a custom combination of: | |
| - **Graph compilation** for optimized execution paths | |
| - **Inference-time caching** for repeated operations | |
| We won’t go deep into the internals here, but here’s the gist: | |
| > We combine compiler-level execution graph optimization with selective caching of heavy operations (like attention layers), allowing inference to skip redundant computations without any loss in fidelity. | |
| These techniques are generalized and plug-and-play via the **Pruna Pro** pipeline, and can be applied to nearly any diffusion-based image model—not just FLUX. For a free but still very juicy model you can use our open source solution. | |
| > 🧪 Try FLUX-juiced now → [replicate.com/prunaai/flux.1-juiced](https://replicate.com/prunaai/flux.1-juiced) | |
| ## Sample Images | |
| The prompts were randomly sampled from the [parti-prompts dataset](https://github.com/google-research/parti). The reported times represent the full duration of each API call. | |
| > **For samples, check out the [Pruna Notion page](https://pruna.notion.site/FLUX-1-dev-vs-Pruna-s-FLUX-juiced-1d270a039e5f80c6a2a3c00fc0d75ef0)** | |
| """ | |
| ) | |
| with gr.Accordion("🌍 Join the Pruna AI community!", open=False): | |
| gr.HTML( | |
| """ | |
| <a rel="nofollow" href="https://twitter.com/PrunaAI"><img alt="Twitter" src="https://img.shields.io/twitter/follow/PrunaAI?style=social"></a> | |
| <a rel="nofollow" href="https://github.com/PrunaAI/pruna"><img alt="GitHub" src="https://img.shields.io/github/stars/prunaai/pruna"></a> | |
| <a rel="nofollow" href="https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following"><img alt="LinkedIn" src="https://img.shields.io/badge/LinkedIn-Connect-blue"></a> | |
| <a rel="nofollow" href="https://discord.com/invite/rskEr4BZJx"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&logo=discord"></a> | |
| <a rel="nofollow" href="https://www.reddit.com/r/PrunaAI/"><img alt="Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social"></a> | |
| """ | |
| ) | |
| with gr.Accordion("Citation", open=True): | |
| gr.Markdown( | |
| """ | |
| ```bibtex | |
| @article{InferBench, | |
| title={InferBench: A Leaderboard for Inference Providers}, | |
| author={PrunaAI}, | |
| year={2025}, | |
| howpublished={\\url{https://huggingface.co/spaces/PrunaAI/InferBench}} | |
| } | |
| ``` | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) | |