ruslanmv commited on
Commit
20ea5a4
·
1 Parent(s): 35220ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -47
app.py CHANGED
@@ -51,8 +51,9 @@ from diffusers import (
51
 
52
  # ---------- ZeroGPU decorator (works even off-Spaces) ----------
53
  try:
54
- import spaces # real decorator on Spaces
55
- except Exception:
 
56
  class _DummySpaces:
57
  def GPU(self, *args, **kwargs):
58
  def _wrap(f):
@@ -96,7 +97,7 @@ pipe: Optional[DiffusionPipeline] = None
96
 
97
  def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
98
  """Enable memory-efficient attention and VAE tiling where possible."""
99
- enabled = False
100
  try:
101
  p.enable_xformers_memory_efficient_attention()
102
  enabled = True
@@ -177,7 +178,8 @@ def _estimate_duration(prompt: str,
177
  width: int,
178
  height: int,
179
  guidance_scale: float,
180
- steps: int) -> int:
 
181
  """
182
  Rough estimate (seconds) to inform ZeroGPU scheduler for better queuing.
183
  Scale by pixel count and steps. Conservative upper bound.
@@ -188,32 +190,46 @@ def _estimate_duration(prompt: str,
188
  est = base + steps * step_cost * max(0.5, px_scale)
189
  return int(min(120, max(10, est)))
190
 
191
- # ---------- GPU-decorated inference (Spaces detects this) ----------
192
- @spaces.GPU(duration=_estimate_duration) # no-op outside Spaces
193
- def _generate_gpu_call(
194
  prompt: str,
195
- negative_prompt: str,
196
- seed: Optional[int],
197
- width: int,
198
- height: int,
199
- guidance_scale: float,
200
- steps: int,
 
201
  ) -> Image.Image:
202
- """
203
- Runs under a ZeroGPU-allocated context. We move the pipeline to CUDA at the
204
- start and back to CPU at the end so that it remains usable when GPU is released.
205
- """
206
  _p = ensure_pipe()
207
- _p.to("cuda", torch.float16)
208
- _gpu_mem_efficiency(_p)
209
 
 
 
 
 
 
 
 
 
210
  try:
211
- width = int(np.clip(width, 256, MAX_IMAGE_SIZE))
212
- height = int(np.clip(height, 256, MAX_IMAGE_SIZE))
213
- steps = int(np.clip(steps, 1, 12))
214
- guidance_scale = float(np.clip(guidance_scale, 0.0, 2.0))
 
 
 
 
 
215
 
216
- gen = torch.Generator(device="cuda")
 
 
217
  if seed is not None:
218
  gen = gen.manual_seed(int(seed))
219
 
@@ -229,35 +245,13 @@ def _generate_gpu_call(
229
  )
230
  return out.images[0]
231
  finally:
 
232
  try:
233
  _p.to("cpu", torch.float32)
234
  _p.enable_vae_tiling()
235
  except Exception:
236
  pass
237
 
238
- # ---------- Public generate (token gate) ----------
239
- def generate(
240
- prompt: str,
241
- negative_prompt: str = "",
242
- seed: int = 0,
243
- width: int = DEFAULT_SIZE,
244
- height: int = DEFAULT_SIZE,
245
- guidance_scale: float = 0.0,
246
- num_inference_steps: int = 4,
247
- secret_token: str = "",
248
- ) -> Image.Image:
249
- if secret_token != SECRET_TOKEN:
250
- raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
251
- return _generate_gpu_call(
252
- prompt=prompt,
253
- negative_prompt=negative_prompt,
254
- seed=seed,
255
- width=width,
256
- height=height,
257
- guidance_scale=guidance_scale,
258
- steps=num_inference_steps,
259
- )
260
-
261
  # ---------- Optional warmup (CPU only for ZeroGPU) ----------
262
  def warmup():
263
  try:
 
51
 
52
  # ---------- ZeroGPU decorator (works even off-Spaces) ----------
53
  try:
54
+ import spaces # real decorator on HF Spaces
55
+ except ImportError:
56
+ # Local/dev fallback: no-op decorator so app still runs without ZeroGPU
57
  class _DummySpaces:
58
  def GPU(self, *args, **kwargs):
59
  def _wrap(f):
 
97
 
98
  def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
99
  """Enable memory-efficient attention and VAE tiling where possible."""
100
+ enabled = false_flag = False
101
  try:
102
  p.enable_xformers_memory_efficient_attention()
103
  enabled = True
 
178
  width: int,
179
  height: int,
180
  guidance_scale: float,
181
+ steps: int,
182
+ secret_token: str) -> int:
183
  """
184
  Rough estimate (seconds) to inform ZeroGPU scheduler for better queuing.
185
  Scale by pixel count and steps. Conservative upper bound.
 
190
  est = base + steps * step_cost * max(0.5, px_scale)
191
  return int(min(120, max(10, est)))
192
 
193
+ # ---------- Public generate (token gate) ----------
194
+ @spaces.GPU(duration=_estimate_duration) # <- MUST decorate the function Gradio calls
195
+ def generate(
196
  prompt: str,
197
+ negative_prompt: str = "",
198
+ seed: int = 0,
199
+ width: int = DEFAULT_SIZE,
200
+ height: int = DEFAULT_SIZE,
201
+ guidance_scale: float = 0.0,
202
+ steps: int = 4,
203
+ secret_token: str = "",
204
  ) -> Image.Image:
205
+ if secret_token != SECRET_TOKEN:
206
+ # Using gr.Error keeps the nice Gradio toast in UI
207
+ raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
208
+
209
  _p = ensure_pipe()
 
 
210
 
211
+ # Clamp user inputs for safety
212
+ width = int(np.clip(width, 256, MAX_IMAGE_SIZE))
213
+ height = int(np.clip(height, 256, MAX_IMAGE_SIZE))
214
+ steps = int(np.clip(steps, 1, 12))
215
+ guidance_scale = float(np.clip(guidance_scale, 0.0, 2.0))
216
+
217
+ # Try to use CUDA when available (ZeroGPU will make it available inside this call)
218
+ moved_to_cuda = False
219
  try:
220
+ if torch.cuda.is_available():
221
+ _p.to("cuda", torch.float16)
222
+ _gpu_mem_efficiency(_p)
223
+ moved_to_cuda = True
224
+ else:
225
+ _p.to("cpu", torch.float32)
226
+ except Exception as e:
227
+ log.warning(f"Falling back to CPU: {e}")
228
+ _p.to("cpu", torch.float32)
229
 
230
+ try:
231
+ device = "cuda" if moved_to_cuda else "cpu"
232
+ gen = torch.Generator(device=device)
233
  if seed is not None:
234
  gen = gen.manual_seed(int(seed))
235
 
 
245
  )
246
  return out.images[0]
247
  finally:
248
+ # Return model to CPU so the GPU can be released immediately after call
249
  try:
250
  _p.to("cpu", torch.float32)
251
  _p.enable_vae_tiling()
252
  except Exception:
253
  pass
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  # ---------- Optional warmup (CPU only for ZeroGPU) ----------
256
  def warmup():
257
  try: