Spaces:

ruslanmv
/

ai-fast-image-server

Running on Zero

App Files Files Community

ruslanmv commited on Sep 28

Commit

4e09337

1 Parent(s): 71538ab

First commit

Browse files

Files changed (4) hide show

.vscode/settings.json +1 -1
README.md +38 -2
app.py +110 -212
requirements.txt +11 -0

.vscode/settings.json CHANGED Viewed

@@ -10,7 +10,7 @@
     "[python]": {
         "editor.formatOnType": true,
         "editor.codeActionsOnSave": {
-            "source.organizeImports": true
         }
     },
     "editor.formatOnSave": true,

     "[python]": {
         "editor.formatOnType": true,
         "editor.codeActionsOnSave": {
+            "source.organizeImports": "explicit"
         }
     },
     "editor.formatOnSave": true,

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🌍
 colorFrom: gray
 colorTo: blue
 sdk: gradio
-sdk_version: 4.37.1
 app_file: app.py
 license: mit
 pinned: false
@@ -13,4 +13,40 @@ duplicated_from: hysts/SD-XL
 load_balancing_strategy: random
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorFrom: gray
 colorTo: blue
 sdk: gradio
+sdk_version: 3.39.0
 app_file: app.py
 license: mit
 pinned: false
 load_balancing_strategy: random
 ---
+# AI Fast Image Server
+A lightweight Gradio app that serves fast **text-to-image** generation using either:
+- **SDXL Base 1.0 + LCM** (default), or
+- **SSD-1B + LCM LoRA** (enable via a flag in `app.py`)
+The app targets **very few inference steps** (e.g., 4) for speed while keeping good image quality. It falls back to **CPU** automatically if CUDA isn’t available.
+---
+## Features
+- ⚡ **Fast sampling** with **LCM** schedulers
+- 🔁 **Deterministic** results via seed
+- 🖥️ **Auto GPU/CPU** selection (no brittle `nvidia-smi` checks)
+- 🔐 Optional **secret token** gate to prevent abuse
+- 🧩 Switch between **SDXL** and **SSD-1B+LCM LoRA** with a flag
+---
+## Requirements
+Dependencies are pinned for compatibility (notably `diffusers==0.23.0` + `huggingface_hub==0.14.1`):
+```txt
+accelerate==0.24.1
+diffusers==0.23.0
+gradio==3.39.0
+huggingface_hub==0.14.1
+invisible-watermark==0.2.0
+Pillow==10.1.0
+torch==2.1.0
+transformers==4.35.0
+safetensors==0.4.0
+numpy>=1.23
+ipython

app.py CHANGED Viewed

@@ -1,122 +1,106 @@
 run_api = False
 SSD_1B = False
-import os
-# Use GPU
-gpu_info = os.popen("nvidia-smi").read()
-if "failed" in gpu_info:
-    print("Not connected to a GPU")
-    is_gpu = False
-else:
-    print(gpu_info)
-    is_gpu = True
-print(is_gpu)
 from IPython.display import clear_output
-def check_enviroment():
     try:
-        import torch
-        print("Enviroment is already installed.")
     except ImportError:
-        print("Enviroment not found. Installing...")
-        # Install requirements from requirements.txt
-        os.system("pip install -r requirements.txt")
-        # Install gradio version 3.48.0
-        os.system("pip install gradio==3.39.0")
-        # Install python-dotenv
-        os.system("pip install python-dotenv")
-        # Clear the output
         clear_output()
-        print("Enviroment installed successfully.")
-# Call the function to check and install Packages if necessary
-check_enviroment()
-from IPython.display import clear_output
-import os
-import gradio as gr
-import numpy as np
-import PIL
-import base64
-import io
 import torch
 from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler
-# SDXL
-from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler
-# Get the current directory
 current_dir = os.getcwd()
-model_path = os.path.join(current_dir)
-# Set the cache path
 cache_path = os.path.join(current_dir, "cache")
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
 SECRET_TOKEN = os.getenv("SECRET_TOKEN", "default_secret")
-# Uncomment the following line if you are using PyTorch 1.10 or later
-# os.environ["TORCH_USE_CUDA_DSA"] = "1"
-if is_gpu:
-    # Uncomment the following line if you want to enable CUDA launch blocking
-    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
-else:
-    # Uncomment the following line if you want to use CPU instead of GPU
-    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Get the current directory
-current_dir = os.getcwd()
-model_path = os.path.join(current_dir)
-# Set the cache path
-cache_path = os.path.join(current_dir, "cache")
 if not SSD_1B:
     unet = UNet2DConditionModel.from_pretrained(
         "latent-consistency/lcm-sdxl",
-        torch_dtype=torch.float16,
-        variant="fp16",
         cache_dir=cache_path,
     )
     pipe = DiffusionPipeline.from_pretrained(
         "stabilityai/stable-diffusion-xl-base-1.0",
         unet=unet,
-        torch_dtype=torch.float16,
-        variant="fp16",
         cache_dir=cache_path,
     )
     pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-    if torch.cuda.is_available():
-        pipe.to("cuda")
 else:
-    # SSD-1B
-    from diffusers import LCMScheduler, AutoPipelineForText2Image
     pipe = AutoPipelineForText2Image.from_pretrained(
         "segmind/SSD-1B",
-        torch_dtype=torch.float16,
-        variant="fp16",
         cache_dir=cache_path,
     )
     pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-    if torch.cuda.is_available():
-        pipe.to("cuda")
-    # load and fuse
     pipe.load_lora_weights("latent-consistency/lcm-lora-ssd-1b")
     pipe.fuse_lora()
 def generate(
     prompt: str,
     negative_prompt: str = "",
@@ -126,15 +110,18 @@ def generate(
     guidance_scale: float = 0.0,
     num_inference_steps: int = 4,
     secret_token: str = "",
-) -> PIL.Image.Image:
     if secret_token != SECRET_TOKEN:
-        raise gr.Error(
-            f"Invalid secret token. Please fork the original space if you want to use it for yourself."
-        )
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
         width=width,
@@ -143,18 +130,14 @@ def generate(
         num_inference_steps=num_inference_steps,
         generator=generator,
         output_type="pil",
-    ).images[0]
-    return image
 clear_output()
-from IPython.display import display
-def generate_image(prompt="A beautiful and sexy girl"):
-    # Generate the image using the prompt
-    generated_image = generate(
         prompt=prompt,
         negative_prompt="",
         seed=0,
@@ -162,145 +145,60 @@ def generate_image(prompt="A beautiful and sexy girl"):
         height=1024,
         guidance_scale=0.0,
         num_inference_steps=4,
-        secret_token="default_secret",  # Replace with your secret token
     )
-    # Display the image in the Jupyter Notebook
-    display(generated_image)
 if not run_api:
-    secret_token = gr.Text(
         label="Secret Token",
-        max_lines=1,
         placeholder="Enter your secret token",
     )
-    prompt = gr.Text(
         label="Prompt",
-        show_label=False,
-        max_lines=1,
         placeholder="Enter your prompt",
-        container=False,
     )
-    result = gr.Image(label="Result", show_label=False)
-    negative_prompt = gr.Text(
         label="Negative prompt",
-        max_lines=1,
-        placeholder="Enter a negative prompt",
-        visible=True,
     )
     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-    width = gr.Slider(
-        label="Width",
-        minimum=256,
-        maximum=MAX_IMAGE_SIZE,
-        step=32,
-        value=1024,
-    )
-    height = gr.Slider(
-        label="Height",
-        minimum=256,
-        maximum=MAX_IMAGE_SIZE,
-        step=32,
-        value=1024,
-    )
-    guidance_scale = gr.Slider(
-        label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0
-    )
-    num_inference_steps = gr.Slider(
-        label="Number of inference steps", minimum=1, maximum=8, step=1, value=4
-    )
-    inputs = [
-        prompt,
-        negative_prompt,
-        seed,
-        width,
-        height,
-        guidance_scale,
-        num_inference_steps,
-        secret_token,
-    ]
     iface = gr.Interface(
         fn=generate,
-        inputs=inputs,
-        outputs=result,
-        title="Image Generator",
-        description="Generate images based on prompts.",
     )
     iface.launch()
 if run_api:
     with gr.Blocks() as demo:
-        gr.HTML(
-            """
-        <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
-            <div style="text-align: center; color: black;">
-                <p style="color: black;">This space is a REST API to programmatically generate images using LCM LoRA SSD-1B.</p>
-                <p style="color: black;">It is not meant to be directly used through a user interface, but using code and an access key.</p>
-            </div>
-        </div>"""
-        )
-        secret_token = gr.Text(
-            label="Secret Token",
-            max_lines=1,
-            placeholder="Enter your secret token",
-        )
-        prompt = gr.Text(
-            label="Prompt",
-            show_label=False,
-            max_lines=1,
-            placeholder="Enter your prompt",
-            container=False,
-        )
-        result = gr.Image(label="Result", show_label=False)
-        negative_prompt = gr.Text(
-            label="Negative prompt",
-            max_lines=1,
-            placeholder="Enter a negative prompt",
-            visible=True,
         )
         seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-        width = gr.Slider(
-            label="Width",
-            minimum=256,
-            maximum=MAX_IMAGE_SIZE,
-            step=32,
-            value=1024,
-        )
-        height = gr.Slider(
-            label="Height",
-            minimum=256,
-            maximum=MAX_IMAGE_SIZE,
-            step=32,
-            value=1024,
-        )
-        guidance_scale = gr.Slider(
-            label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0
-        )
-        num_inference_steps = gr.Slider(
-            label="Number of inference steps", minimum=1, maximum=8, step=1, value=4
-        )
-        inputs = [
-            prompt,
-            negative_prompt,
-            seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-            secret_token,
-        ]
-        prompt.submit(
-            fn=generate,
-            inputs=inputs,
-            outputs=result,
-            api_name="run",
-        )
-    # demo.queue(max_size=32).launch()
-    # Launch the Gradio app with multiple workers and debug mode enabled
-    demo.queue(max_size=32).launch(debug=True)

+# ---- Flags ----
 run_api = False
 SSD_1B = False
+# ---- Standard imports ----
+import os
+import subprocess
+import numpy as np
 from IPython.display import clear_output
+# ---- Minimal, deterministic env bootstrap (optional) ----
+# Prefer pinning in requirements.txt instead of installing here.
+def check_environment():
     try:
+        import torch  # noqa: F401
+        print("Environment is already installed.")
     except ImportError:
+        print("Environment not found. Installing pinned dependencies...")
+        # Strongly prefer doing this via requirements.txt at build time.
+        os.system("pip install --upgrade pip")
+        os.system("pip install diffusers==0.30.0 transformers>=4.41.0 accelerate>=0.31.0 huggingface_hub>=0.23.4 safetensors>=0.4.2 gradio==4.37.1 python-dotenv")
         clear_output()
+        print("Environment installed successfully.")
+check_environment()
+# ---- App imports (safe after environment check) ----
 import torch
+import gradio as gr
+from PIL import Image
 from diffusers import UNet2DConditionModel, DiffusionPipeline, LCMScheduler
+# Optional: only imported if SSD_1B=True
+# from diffusers import AutoPipelineForText2Image
+# ---- Config / constants ----
 current_dir = os.getcwd()
 cache_path = os.path.join(current_dir, "cache")
+os.makedirs(cache_path, exist_ok=True)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
 SECRET_TOKEN = os.getenv("SECRET_TOKEN", "default_secret")
+# ---- GPU / NVML detection (robust) ----
+def print_nvidia_smi():
+    try:
+        proc = subprocess.run(["nvidia-smi"], capture_output=True, text=True)
+        if proc.returncode == 0:
+            print(proc.stdout)
+        else:
+            # Show the stderr to aid debugging, but don't trust it for logic
+            print(proc.stderr or "nvidia-smi returned a non-zero exit code.")
+    except FileNotFoundError:
+        print("nvidia-smi not found on PATH.")
+print_nvidia_smi()
+is_gpu = torch.cuda.is_available()
+print(f"CUDA available: {is_gpu}")
+# dtype & device
+dtype = torch.float16 if is_gpu else torch.float32
+device = torch.device("cuda") if is_gpu else torch.device("cpu")
+# Optional: fewer surprises when CUDA is flaky
+if not is_gpu:
+    # Avoid cuda-related env flags when no GPU
+    os.environ.pop("CUDA_LAUNCH_BLOCKING", None)
+# ---- Pipeline setup ----
 if not SSD_1B:
+    # SDXL base + LCM UNet
     unet = UNet2DConditionModel.from_pretrained(
         "latent-consistency/lcm-sdxl",
+        torch_dtype=dtype,
+        variant="fp16" if is_gpu else None,
         cache_dir=cache_path,
     )
     pipe = DiffusionPipeline.from_pretrained(
         "stabilityai/stable-diffusion-xl-base-1.0",
         unet=unet,
+        torch_dtype=dtype,
+        variant="fp16" if is_gpu else None,
         cache_dir=cache_path,
     )
     pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+    pipe.to(device)
 else:
+    # SSD-1B + LCM LoRA
+    from diffusers import AutoPipelineForText2Image  # local import
     pipe = AutoPipelineForText2Image.from_pretrained(
         "segmind/SSD-1B",
+        torch_dtype=dtype,
+        variant="fp16" if is_gpu else None,
         cache_dir=cache_path,
     )
     pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+    pipe.to(device)
     pipe.load_lora_weights("latent-consistency/lcm-lora-ssd-1b")
     pipe.fuse_lora()
+# ---- Core generate function ----
 def generate(
     prompt: str,
     negative_prompt: str = "",
     guidance_scale: float = 0.0,
     num_inference_steps: int = 4,
     secret_token: str = "",
+) -> Image.Image:
     if secret_token != SECRET_TOKEN:
+        raise gr.Error("Invalid secret token. Set SECRET_TOKEN on the server or pass the correct token.")
+    # Make sure sizes are sane on CPU
+    width = int(np.clip(width, 256, MAX_IMAGE_SIZE))
+    height = int(np.clip(height, 256, MAX_IMAGE_SIZE))
+    generator = torch.Generator(device=device)
+    if seed is not None:
+        generator = generator.manual_seed(int(seed))
+    out = pipe(
         prompt=prompt,
         negative_prompt=negative_prompt,
         width=width,
         num_inference_steps=num_inference_steps,
         generator=generator,
         output_type="pil",
+    )
+    return out.images[0]
 clear_output()
+# ---- Optional notebook helper ----
+def generate_image(prompt="A scenic watercolor landscape, mountains at dawn"):
+    img = generate(
         prompt=prompt,
         negative_prompt="",
         seed=0,
         height=1024,
         guidance_scale=0.0,
         num_inference_steps=4,
+        secret_token=SECRET_TOKEN,
     )
+    from IPython.display import display
+    display(img)
+# ---- UI ----
 if not run_api:
+    secret_token = gr.Textbox(
         label="Secret Token",
         placeholder="Enter your secret token",
+        type="password",
     )
+    prompt = gr.Textbox(
         label="Prompt",
+        show_label=True,
+        max_lines=2,
         placeholder="Enter your prompt",
     )
+    negative_prompt = gr.Textbox(
         label="Negative prompt",
+        max_lines=2,
+        placeholder="Enter a negative prompt (optional)",
     )
     seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
+    width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
+    height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
+    guidance_scale = gr.Slider(label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0)
+    num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=8, step=1, value=4)
     iface = gr.Interface(
         fn=generate,
+        inputs=[prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps, secret_token],
+        outputs=gr.Image(label="Result"),
+        title="Image Generator (LCM)",
+        description="Fast SDXL/SSD-1B image generation with LCM. Uses CPU if CUDA is unavailable.",
     )
     iface.launch()
 if run_api:
     with gr.Blocks() as demo:
+        gr.Markdown(
+            "### REST API for LCM Text-to-Image\n"
+            "Use the `/run` endpoint programmatically with your secret."
         )
+        secret_token = gr.Textbox(label="Secret Token", type="password")
+        prompt = gr.Textbox(label="Prompt")
+        negative_prompt = gr.Textbox(label="Negative prompt")
         seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
+        width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
+        height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
+        guidance_scale = gr.Slider(label="Guidance scale", minimum=0, maximum=2, step=0.1, value=0.0)
+        num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=8, step=1, value=4)
+        inputs = [prompt, negative_prompt, seed, width, height, guidance_scale, num_inference_steps, secret_token]
+        prompt.submit(fn=generate, inputs=inputs, outputs=gr.Image(), api_name="run")
+    demo.queue(max_size=32).launch(debug=False)

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 accelerate==0.24.1
 diffusers==0.23.0
 gradio==3.39.0
@@ -6,3 +7,13 @@ Pillow==10.1.0
 torch==2.1.0
 transformers==4.35.0
 ipython

+# Core stack (kept at your versions)
 accelerate==0.24.1
 diffusers==0.23.0
 gradio==3.39.0
 torch==2.1.0
 transformers==4.35.0
 ipython
+# Must-add pins for compatibility
+# diffusers==0.23.0 expects `cached_download` to exist in huggingface_hub
+huggingface_hub==0.14.1
+# Recommended: used by diffusers/transformers when loading weights
+safetensors==0.4.0
+# Your code imports numpy directly
+numpy>=1.23