Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -51,8 +51,9 @@ from diffusers import (
|
|
| 51 |
|
| 52 |
# ---------- ZeroGPU decorator (works even off-Spaces) ----------
|
| 53 |
try:
|
| 54 |
-
import spaces # real decorator on Spaces
|
| 55 |
-
except
|
|
|
|
| 56 |
class _DummySpaces:
|
| 57 |
def GPU(self, *args, **kwargs):
|
| 58 |
def _wrap(f):
|
|
@@ -96,7 +97,7 @@ pipe: Optional[DiffusionPipeline] = None
|
|
| 96 |
|
| 97 |
def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
|
| 98 |
"""Enable memory-efficient attention and VAE tiling where possible."""
|
| 99 |
-
enabled = False
|
| 100 |
try:
|
| 101 |
p.enable_xformers_memory_efficient_attention()
|
| 102 |
enabled = True
|
|
@@ -177,7 +178,8 @@ def _estimate_duration(prompt: str,
|
|
| 177 |
width: int,
|
| 178 |
height: int,
|
| 179 |
guidance_scale: float,
|
| 180 |
-
steps: int
|
|
|
|
| 181 |
"""
|
| 182 |
Rough estimate (seconds) to inform ZeroGPU scheduler for better queuing.
|
| 183 |
Scale by pixel count and steps. Conservative upper bound.
|
|
@@ -188,32 +190,46 @@ def _estimate_duration(prompt: str,
|
|
| 188 |
est = base + steps * step_cost * max(0.5, px_scale)
|
| 189 |
return int(min(120, max(10, est)))
|
| 190 |
|
| 191 |
-
# ----------
|
| 192 |
-
@spaces.GPU(duration=_estimate_duration) #
|
| 193 |
-
def
|
| 194 |
prompt: str,
|
| 195 |
-
negative_prompt: str,
|
| 196 |
-
seed:
|
| 197 |
-
width: int,
|
| 198 |
-
height: int,
|
| 199 |
-
guidance_scale: float,
|
| 200 |
-
steps: int,
|
|
|
|
| 201 |
) -> Image.Image:
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
_p = ensure_pipe()
|
| 207 |
-
_p.to("cuda", torch.float16)
|
| 208 |
-
_gpu_mem_efficiency(_p)
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
try:
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
-
|
|
|
|
|
|
|
| 217 |
if seed is not None:
|
| 218 |
gen = gen.manual_seed(int(seed))
|
| 219 |
|
|
@@ -229,35 +245,13 @@ def _generate_gpu_call(
|
|
| 229 |
)
|
| 230 |
return out.images[0]
|
| 231 |
finally:
|
|
|
|
| 232 |
try:
|
| 233 |
_p.to("cpu", torch.float32)
|
| 234 |
_p.enable_vae_tiling()
|
| 235 |
except Exception:
|
| 236 |
pass
|
| 237 |
|
| 238 |
-
# ---------- Public generate (token gate) ----------
|
| 239 |
-
def generate(
|
| 240 |
-
prompt: str,
|
| 241 |
-
negative_prompt: str = "",
|
| 242 |
-
seed: int = 0,
|
| 243 |
-
width: int = DEFAULT_SIZE,
|
| 244 |
-
height: int = DEFAULT_SIZE,
|
| 245 |
-
guidance_scale: float = 0.0,
|
| 246 |
-
num_inference_steps: int = 4,
|
| 247 |
-
secret_token: str = "",
|
| 248 |
-
) -> Image.Image:
|
| 249 |
-
if secret_token != SECRET_TOKEN:
|
| 250 |
-
raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
|
| 251 |
-
return _generate_gpu_call(
|
| 252 |
-
prompt=prompt,
|
| 253 |
-
negative_prompt=negative_prompt,
|
| 254 |
-
seed=seed,
|
| 255 |
-
width=width,
|
| 256 |
-
height=height,
|
| 257 |
-
guidance_scale=guidance_scale,
|
| 258 |
-
steps=num_inference_steps,
|
| 259 |
-
)
|
| 260 |
-
|
| 261 |
# ---------- Optional warmup (CPU only for ZeroGPU) ----------
|
| 262 |
def warmup():
|
| 263 |
try:
|
|
|
|
| 51 |
|
| 52 |
# ---------- ZeroGPU decorator (works even off-Spaces) ----------
|
| 53 |
try:
|
| 54 |
+
import spaces # real decorator on HF Spaces
|
| 55 |
+
except ImportError:
|
| 56 |
+
# Local/dev fallback: no-op decorator so app still runs without ZeroGPU
|
| 57 |
class _DummySpaces:
|
| 58 |
def GPU(self, *args, **kwargs):
|
| 59 |
def _wrap(f):
|
|
|
|
| 97 |
|
| 98 |
def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
|
| 99 |
"""Enable memory-efficient attention and VAE tiling where possible."""
|
| 100 |
+
enabled = false_flag = False
|
| 101 |
try:
|
| 102 |
p.enable_xformers_memory_efficient_attention()
|
| 103 |
enabled = True
|
|
|
|
| 178 |
width: int,
|
| 179 |
height: int,
|
| 180 |
guidance_scale: float,
|
| 181 |
+
steps: int,
|
| 182 |
+
secret_token: str) -> int:
|
| 183 |
"""
|
| 184 |
Rough estimate (seconds) to inform ZeroGPU scheduler for better queuing.
|
| 185 |
Scale by pixel count and steps. Conservative upper bound.
|
|
|
|
| 190 |
est = base + steps * step_cost * max(0.5, px_scale)
|
| 191 |
return int(min(120, max(10, est)))
|
| 192 |
|
| 193 |
+
# ---------- Public generate (token gate) ----------
|
| 194 |
+
@spaces.GPU(duration=_estimate_duration) # <- MUST decorate the function Gradio calls
|
| 195 |
+
def generate(
|
| 196 |
prompt: str,
|
| 197 |
+
negative_prompt: str = "",
|
| 198 |
+
seed: int = 0,
|
| 199 |
+
width: int = DEFAULT_SIZE,
|
| 200 |
+
height: int = DEFAULT_SIZE,
|
| 201 |
+
guidance_scale: float = 0.0,
|
| 202 |
+
steps: int = 4,
|
| 203 |
+
secret_token: str = "",
|
| 204 |
) -> Image.Image:
|
| 205 |
+
if secret_token != SECRET_TOKEN:
|
| 206 |
+
# Using gr.Error keeps the nice Gradio toast in UI
|
| 207 |
+
raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
|
| 208 |
+
|
| 209 |
_p = ensure_pipe()
|
|
|
|
|
|
|
| 210 |
|
| 211 |
+
# Clamp user inputs for safety
|
| 212 |
+
width = int(np.clip(width, 256, MAX_IMAGE_SIZE))
|
| 213 |
+
height = int(np.clip(height, 256, MAX_IMAGE_SIZE))
|
| 214 |
+
steps = int(np.clip(steps, 1, 12))
|
| 215 |
+
guidance_scale = float(np.clip(guidance_scale, 0.0, 2.0))
|
| 216 |
+
|
| 217 |
+
# Try to use CUDA when available (ZeroGPU will make it available inside this call)
|
| 218 |
+
moved_to_cuda = False
|
| 219 |
try:
|
| 220 |
+
if torch.cuda.is_available():
|
| 221 |
+
_p.to("cuda", torch.float16)
|
| 222 |
+
_gpu_mem_efficiency(_p)
|
| 223 |
+
moved_to_cuda = True
|
| 224 |
+
else:
|
| 225 |
+
_p.to("cpu", torch.float32)
|
| 226 |
+
except Exception as e:
|
| 227 |
+
log.warning(f"Falling back to CPU: {e}")
|
| 228 |
+
_p.to("cpu", torch.float32)
|
| 229 |
|
| 230 |
+
try:
|
| 231 |
+
device = "cuda" if moved_to_cuda else "cpu"
|
| 232 |
+
gen = torch.Generator(device=device)
|
| 233 |
if seed is not None:
|
| 234 |
gen = gen.manual_seed(int(seed))
|
| 235 |
|
|
|
|
| 245 |
)
|
| 246 |
return out.images[0]
|
| 247 |
finally:
|
| 248 |
+
# Return model to CPU so the GPU can be released immediately after call
|
| 249 |
try:
|
| 250 |
_p.to("cpu", torch.float32)
|
| 251 |
_p.enable_vae_tiling()
|
| 252 |
except Exception:
|
| 253 |
pass
|
| 254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
# ---------- Optional warmup (CPU only for ZeroGPU) ----------
|
| 256 |
def warmup():
|
| 257 |
try:
|