Tools / Modules /Generate_Image.py
Nymbo's picture
Update Modules/Generate_Image.py
2c5ff7b verified
raw
history blame
6.55 kB
from __future__ import annotations
import os
import random
from typing import Annotated
import gradio as gr
from PIL import Image
from huggingface_hub import InferenceClient
from app import _log_call_end, _log_call_start, _truncate_for_log
from ._docstrings import autodoc
HF_API_TOKEN = os.getenv("HF_READ_TOKEN")
# Single source of truth for the LLM-facing tool description
TOOL_SUMMARY = (
"Generate an image from a text prompt via Hugging Face serverless inference; "
"tunable model/steps/guidance/size, supports negative prompt and seed; returns a PIL.Image. "
"Return the generated media to the user in this format `![Alt text](URL)`."
)
@autodoc(
summary=TOOL_SUMMARY,
)
def Generate_Image(
prompt: Annotated[str, "Text description of the image to generate."],
model_id: Annotated[str, "Hugging Face model id in the form 'creator/model-name' (e.g., black-forest-labs/FLUX.1-Krea-dev)."] = "black-forest-labs/FLUX.1-Krea-dev",
negative_prompt: Annotated[str, "What should NOT appear in the image."] = (
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
),
steps: Annotated[int, "Number of denoising steps (1–100). Higher = slower, potentially higher quality."] = 35,
cfg_scale: Annotated[float, "Classifier-free guidance scale (1–20). Higher = follow the prompt more closely."] = 7.0,
sampler: Annotated[str, "Sampling method label (UI only). Common options: 'DPM++ 2M Karras', 'DPM++ SDE Karras', 'Euler', 'Euler a', 'Heun', 'DDIM'."] = "DPM++ 2M Karras",
seed: Annotated[int, "Random seed for reproducibility. Use -1 for a random seed per call."] = -1,
width: Annotated[int, "Output width in pixels (64–1216, multiple of 32 recommended)."] = 1024,
height: Annotated[int, "Output height in pixels (64–1216, multiple of 32 recommended)."] = 1024,
) -> Image.Image:
_log_call_start(
"Generate_Image",
prompt=_truncate_for_log(prompt, 200),
model_id=model_id,
steps=steps,
cfg_scale=cfg_scale,
seed=seed,
size=f"{width}x{height}",
)
if not prompt or not prompt.strip():
_log_call_end("Generate_Image", "error=empty prompt")
raise gr.Error("Please provide a non-empty prompt.")
enhanced_prompt = f"{prompt} | ultra detail, ultra elaboration, ultra quality, perfect."
providers = ["auto", "replicate", "fal-ai"]
last_error: Exception | None = None
for provider in providers:
try:
client = InferenceClient(api_key=HF_API_TOKEN, provider=provider)
image = client.text_to_image(
prompt=enhanced_prompt,
negative_prompt=negative_prompt,
model=model_id,
width=width,
height=height,
num_inference_steps=steps,
guidance_scale=cfg_scale,
seed=seed if seed != -1 else random.randint(1, 1_000_000_000),
)
_log_call_end("Generate_Image", f"provider={provider} size={image.size}")
return image
except Exception as exc: # pylint: disable=broad-except
last_error = exc
continue
msg = str(last_error) if last_error else "Unknown error"
lowered = msg.lower()
if "404" in msg:
raise gr.Error(f"Model not found or unavailable: {model_id}. Check the id and your HF token access.")
if "503" in msg:
raise gr.Error("The model is warming up. Please try again shortly.")
if "401" in msg or "403" in msg:
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
if ("api_key" in lowered) or ("hf auth login" in lowered) or ("unauthorized" in lowered) or ("forbidden" in lowered):
raise gr.Error("Please duplicate the space and provide a `HF_READ_TOKEN` to enable Image and Video Generation.")
_log_call_end("Generate_Image", f"error={_truncate_for_log(msg, 200)}")
raise gr.Error(f"Image generation failed: {msg}")
def build_interface() -> gr.Interface:
return gr.Interface(
fn=Generate_Image,
inputs=[
gr.Textbox(label="Prompt", placeholder="Enter a prompt", lines=2),
gr.Textbox(
label="Model",
value="black-forest-labs/FLUX.1-Krea-dev",
placeholder="creator/model-name",
max_lines=1,
info="<a href=\"https://huggingface.co/models?pipeline_tag=text-to-image&inference_provider=nebius,cerebras,novita,fireworks-ai,together,fal-ai,groq,featherless-ai,nscale,hyperbolic,sambanova,cohere,replicate,scaleway,publicai,hf-inference&sort=trending\" target=\"_blank\" rel=\"noopener noreferrer\">Browse models</a>",
),
gr.Textbox(
label="Negative Prompt",
value=(
"(deformed, distorted, disfigured), poorly drawn, bad anatomy, wrong anatomy, extra limb, "
"missing limb, floating limbs, (mutated hands and fingers), disconnected limbs, mutation, "
"mutated, ugly, disgusting, blurry, amputation, misspellings, typos"
),
lines=2,
),
gr.Slider(minimum=1, maximum=100, value=35, step=1, label="Steps"),
gr.Slider(minimum=1.0, maximum=20.0, value=7.0, step=0.1, label="CFG Scale"),
gr.Radio(
label="Sampler",
value="DPM++ 2M Karras",
choices=["DPM++ 2M Karras", "DPM++ SDE Karras", "Euler", "Euler a", "Heun", "DDIM"],
),
gr.Slider(minimum=-1, maximum=1_000_000_000, value=-1, step=1, label="Seed (-1 = random)"),
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Width"),
gr.Slider(minimum=64, maximum=1216, value=1024, step=32, label="Height"),
],
outputs=gr.Image(label="Generated Image"),
title="Generate Image",
description=(
"<div style=\"text-align:center\">Generate images via Hugging Face serverless inference. "
"Default model is FLUX.1-Krea-dev.</div>"
),
api_description=TOOL_SUMMARY,
flagging_mode="never",
show_api=bool(os.getenv("HF_READ_TOKEN")),
)
__all__ = ["Generate_Image", "build_interface"]