Spaces:

Nymbo
/

Tools

Running

File size: 6,553 Bytes

from __future__ import annotations

import os
import random
from typing import Annotated

import gradio as gr
from PIL import Image
from huggingface_hub import InferenceClient

from app import _log_call_end, _log_call_start, _truncate_for_log
from ._docstrings import autodoc

HF_API_TOKEN = os.getenv("HF_READ_TOKEN")

# Single source of truth for the LLM-facing tool description
TOOL_SUMMARY = (
    "Generate an image from a text prompt via Hugging Face serverless inference; "
    "tunable model/steps/guidance/size, supports negative prompt and seed; returns a PIL.Image. "
    "Return the generated media to the user in this format `![Alt text](URL)`."
)


@autodoc(
    summary=TOOL_SUMMARY,
)
def Generate_Image(
    prompt: Annotated[str, "Text description of the image to generate."],
    model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name' (e.g., black-forest-labs/FLUX.1-Krea-dev)."] = "black-forest-labs/FLUX.1-Krea-dev",
    negative_prompt: Annotated[str, "What should NOT appear in the image."] = (
        "(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
        "missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
        "mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
    ),
    steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
    cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
    sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
    seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
    width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
    height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
) -> Image.Image:
    _log_call_start(
        "Generate_Image",
        prompt=_truncate_for_log(prompt, 200),
        model_id=model_id,
        steps=steps,
        cfg_scale=cfg_scale,
        seed=seed,
        size=f"{width}x{height}",
    )
    if not prompt or not prompt.strip():
        _log_call_end("Generate_Image", "error=empty prompt")
        raise gr.Error("Please provide a non-empty prompt.")
    enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
    providers = ["auto", "replicate", "fal-ai"]
    last_error: Exception | None = None
    for provider in providers:
        try:
            client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
            image = client.text_to_image(
                prompt=enhanced_prompt,
                negative_prompt=negative_prompt,
                model=model_id,
                width=width,
                height=height,
                num_inference_steps=steps,
                guidance_scale=cfg_scale,
                seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
            )
            _log_call_end("Generate_Image", f"provider={provider} size={image.size}")
            return image
        except Exception as exc:  # pylint: disable=broad-except
            last_error = exc
            continue
    msg = str(last_error) if last_error else "Unknown error"
    lowered = msg.lower()
    if "404" in msg:
        raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and your HF token access.")
    if "503" in msg:
        raise gr.Error("The model is warming up. Please try again shortly.")
    if "401" in msg or "403" in msg:
        raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
    if ("api_key" in lowered) or ("hf auth login" in lowered) or ("unauthorized" in lowered) or ("forbidden" in lowered):
        raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
    _log_call_end("Generate_Image", f"error={_truncate_for_log(msg, 200)}")
    raise gr.Error(f"Image generation failed: {msg}")


def build_interface() -> gr.Interface:
    return gr.Interface(
        fn=Generate_Image,
        inputs=[
            gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2),
            gr.Textbox(
                label="Model",
                value="black-forest-labs/FLUX.1-Krea-dev",
                placeholder="creator/model-name",
                max_lines=1,
                info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-image&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
            ),
            gr.Textbox(
                label="Negative Prompt",
                value=(
                    "(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
                    "missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
                    "mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
                ),
                lines=2,
            ),
            gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps"),
            gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale"),
            gr.Radio(
                label="Sampler",
                value="DPM++ 2M Karras",
                choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"],
            ),
            gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)"),
            gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width"),
            gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height"),
        ],
        outputs=gr.Image(label="Generated Image"),
        title="Generate Image",
        description=(
            "<div style=\"text-align:center\">Generate images via Hugging Face serverless inference. "
            "Default model is FLUX.1-Krea-dev.</div>"
        ),
        api_description=TOOL_SUMMARY,
        flagging_mode="never",
        show_api=bool(os.getenv("HF_READ_TOKEN")),
    )


__all__ = ["Generate_Image", "build_interface"]