Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -141,35 +141,67 @@ def resize_for_model(image_path):
|
|
| 141 |
)
|
| 142 |
return new_img, target_size
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
@spaces.GPU()
|
| 145 |
def generate_image(text_prompt, session_id, image_height = 1024, image_width = 1024):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
print("image generation used")
|
|
|
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
image_h, image_w = scale_hw_to_area_divisible(image_height, image_width, area=1024 * 1024)
|
| 153 |
-
|
| 154 |
-
flux_model.to("cuda")
|
| 155 |
-
|
| 156 |
-
image = flux_model(
|
| 157 |
-
text_prompt,
|
| 158 |
-
height=image_h,
|
| 159 |
-
width=image_w,
|
| 160 |
-
num_inference_steps = 28,
|
| 161 |
-
guidance_scale=4.5,
|
| 162 |
-
generator=torch.Generator().manual_seed(int(1234))
|
| 163 |
-
).images[0]
|
| 164 |
-
|
| 165 |
-
flux_model.to("cpu")
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
image.save(output_path)
|
| 172 |
-
return output_path
|
| 173 |
|
| 174 |
def generate_scene(
|
| 175 |
text_prompt,
|
|
@@ -186,42 +218,48 @@ def generate_scene(
|
|
| 186 |
audio_negative_prompt = "",
|
| 187 |
progress=gr.Progress(track_tqdm=True)
|
| 188 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
text_prompt_processed = (text_prompt or "").strip()
|
| 190 |
if session_id is None:
|
| 191 |
session_id = uuid.uuid4().hex
|
| 192 |
-
|
| 193 |
if not text_prompt_processed:
|
| 194 |
raise gr.Error("Please enter a prompt.")
|
| 195 |
-
|
| 196 |
-
if not flux_model and not image:
|
| 197 |
-
generate_image(text_prompt,session_id)
|
| 198 |
-
print("no image provived will")
|
| 199 |
-
#raise gr.Error("Please provide an image")
|
| 200 |
-
|
| 201 |
-
|
| 202 |
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
-
|
|
|
|
|
|
|
| 206 |
|
| 207 |
-
|
|
|
|
|
|
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
| 225 |
|
| 226 |
def get_duration(
|
| 227 |
text_prompt,
|
|
@@ -248,6 +286,7 @@ def get_duration(
|
|
| 248 |
return int(sample_steps * 3 + warmup + image_generation_s)
|
| 249 |
|
| 250 |
|
|
|
|
| 251 |
@spaces.GPU(duration=get_duration)
|
| 252 |
def generate_video(
|
| 253 |
text_prompt,
|
|
@@ -264,29 +303,40 @@ def generate_video(
|
|
| 264 |
audio_negative_prompt = "",
|
| 265 |
progress=gr.Progress(track_tqdm=True)
|
| 266 |
):
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
| 269 |
if session_id is None:
|
| 270 |
session_id = uuid.uuid4().hex
|
| 271 |
|
| 272 |
-
|
| 273 |
-
|
| 274 |
if not image:
|
|
|
|
| 275 |
image = generate_image(text_prompt, session_id)
|
| 276 |
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
| 278 |
image_path = image
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
output_path = os.path.join(output_dir, f"generated_video.mp4")
|
| 283 |
|
|
|
|
|
|
|
| 284 |
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
video_frame_width = target_size[0]
|
| 288 |
video_frame_height = target_size[1]
|
| 289 |
|
|
|
|
| 290 |
generated_video, generated_audio, _ = ovi_engine.generate(
|
| 291 |
text_prompt=text_prompt,
|
| 292 |
image_path=image_path,
|
|
@@ -303,12 +353,10 @@ def generate_video(
|
|
| 303 |
)
|
| 304 |
|
| 305 |
save_video(output_path, generated_video, generated_audio, fps=24, sample_rate=16000)
|
| 306 |
-
|
| 307 |
-
print(f"{session_id} video generation succeeded")
|
| 308 |
|
| 309 |
return output_path
|
| 310 |
|
| 311 |
-
|
| 312 |
def cleanup(request: gr.Request):
|
| 313 |
|
| 314 |
sid = request.session_hash
|
|
|
|
| 141 |
)
|
| 142 |
return new_img, target_size
|
| 143 |
|
| 144 |
+
|
| 145 |
+
def _ensure_output_dir(session_id):
|
| 146 |
+
output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
|
| 147 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 148 |
+
return output_dir
|
| 149 |
+
|
| 150 |
@spaces.GPU()
|
| 151 |
def generate_image(text_prompt, session_id, image_height = 1024, image_width = 1024):
|
| 152 |
+
"""
|
| 153 |
+
Generates an image from text_prompt using flux_model if available.
|
| 154 |
+
Always returns a filepath (string) or raises a gr.Error on failure.
|
| 155 |
+
"""
|
| 156 |
print("image generation used")
|
| 157 |
+
text_prompt = clean_text(text_prompt or "")
|
| 158 |
|
| 159 |
+
# If flux_model isn't loaded, fall back to example image (if available)
|
| 160 |
+
output_dir = _ensure_output_dir(session_id)
|
| 161 |
+
output_path = os.path.join(output_dir, "generate_image.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
+
if flux_model is None:
|
| 164 |
+
# fallback to example image if provided
|
| 165 |
+
if image_example and os.path.exists(image_example):
|
| 166 |
+
# copy example into session folder so downstream can always rely on a path under processed_results
|
| 167 |
+
shutil.copy(image_example, output_path)
|
| 168 |
+
print(f"Flux model not available — using example image {image_example}")
|
| 169 |
+
return output_path
|
| 170 |
+
else:
|
| 171 |
+
raise gr.Error("Image generation model not available and no example image found.")
|
| 172 |
+
|
| 173 |
+
# ensure requested dims are divisible/compatible
|
| 174 |
+
image_h, image_w = scale_hw_to_area_divisible(int(image_height), int(image_width), area=1024 * 1024)
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
# move model to GPU, generate, then move model back to CPU
|
| 178 |
+
flux_model.to("cuda")
|
| 179 |
+
gen = flux_model(
|
| 180 |
+
text_prompt,
|
| 181 |
+
height=image_h,
|
| 182 |
+
width=image_w,
|
| 183 |
+
num_inference_steps = 28,
|
| 184 |
+
guidance_scale=4.5,
|
| 185 |
+
generator=torch.Generator(device="cuda").manual_seed(1234)
|
| 186 |
+
)
|
| 187 |
+
image = gen.images[0]
|
| 188 |
+
image.save(output_path)
|
| 189 |
+
print(f"Saved generated image to {output_path}")
|
| 190 |
+
return output_path
|
| 191 |
+
except Exception as e:
|
| 192 |
+
# provide helpful error message and fallback to example if present
|
| 193 |
+
print(f"⚠️ generate_image failed: {e}")
|
| 194 |
+
if image_example and os.path.exists(image_example):
|
| 195 |
+
shutil.copy(image_example, output_path)
|
| 196 |
+
print(f"Falling back to example image {image_example}")
|
| 197 |
+
return output_path
|
| 198 |
+
raise gr.Error(f"Image generation failed: {e}")
|
| 199 |
+
finally:
|
| 200 |
+
try:
|
| 201 |
+
flux_model.to("cpu")
|
| 202 |
+
except Exception:
|
| 203 |
+
pass
|
| 204 |
|
|
|
|
|
|
|
| 205 |
|
| 206 |
def generate_scene(
|
| 207 |
text_prompt,
|
|
|
|
| 218 |
audio_negative_prompt = "",
|
| 219 |
progress=gr.Progress(track_tqdm=True)
|
| 220 |
):
|
| 221 |
+
"""
|
| 222 |
+
Top-level helper that ensures there's an image (generates one if necessary)
|
| 223 |
+
and then calls generate_video.
|
| 224 |
+
"""
|
| 225 |
text_prompt_processed = (text_prompt or "").strip()
|
| 226 |
if session_id is None:
|
| 227 |
session_id = uuid.uuid4().hex
|
| 228 |
+
|
| 229 |
if not text_prompt_processed:
|
| 230 |
raise gr.Error("Please enter a prompt.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
+
# If user did not supply an image (None or empty), try to generate one and use it.
|
| 233 |
+
if not image:
|
| 234 |
+
print("No image provided; attempting to generate one.")
|
| 235 |
+
image = generate_image(text_prompt_processed, session_id)
|
| 236 |
+
print(f"Generated/fallback image path: {image}")
|
| 237 |
|
| 238 |
+
# If image is a dict-like from Gradio, try to extract file path (defensive)
|
| 239 |
+
if isinstance(image, dict) and "name" in image:
|
| 240 |
+
image = image["name"]
|
| 241 |
|
| 242 |
+
# final check - ensure file exists
|
| 243 |
+
if not image or not os.path.exists(image):
|
| 244 |
+
raise gr.Error("No usable image available (generation failed and no fallback).")
|
| 245 |
|
| 246 |
+
print(f"{session_id} is generating scene with {sample_steps} steps (image: {image})")
|
| 247 |
+
|
| 248 |
+
return generate_video(
|
| 249 |
+
text_prompt=text_prompt_processed,
|
| 250 |
+
sample_steps=sample_steps,
|
| 251 |
+
image=image,
|
| 252 |
+
session_id=session_id,
|
| 253 |
+
video_seed=video_seed,
|
| 254 |
+
solver_name=solver_name,
|
| 255 |
+
shift=shift,
|
| 256 |
+
video_guidance_scale=video_guidance_scale,
|
| 257 |
+
audio_guidance_scale=audio_guidance_scale,
|
| 258 |
+
slg_layer=slg_layer,
|
| 259 |
+
video_negative_prompt=video_negative_prompt,
|
| 260 |
+
audio_negative_prompt=audio_negative_prompt,
|
| 261 |
+
progress=progress
|
| 262 |
+
)
|
| 263 |
|
| 264 |
def get_duration(
|
| 265 |
text_prompt,
|
|
|
|
| 286 |
return int(sample_steps * 3 + warmup + image_generation_s)
|
| 287 |
|
| 288 |
|
| 289 |
+
|
| 290 |
@spaces.GPU(duration=get_duration)
|
| 291 |
def generate_video(
|
| 292 |
text_prompt,
|
|
|
|
| 303 |
audio_negative_prompt = "",
|
| 304 |
progress=gr.Progress(track_tqdm=True)
|
| 305 |
):
|
| 306 |
+
"""
|
| 307 |
+
Generates a video using ovi_engine given a guaranteed image path (string).
|
| 308 |
+
"""
|
| 309 |
+
print("generate_video called")
|
| 310 |
if session_id is None:
|
| 311 |
session_id = uuid.uuid4().hex
|
| 312 |
|
| 313 |
+
# If image is not provided for any reason, try generating one now.
|
|
|
|
| 314 |
if not image:
|
| 315 |
+
print("No image passed to generate_video; generating now...")
|
| 316 |
image = generate_image(text_prompt, session_id)
|
| 317 |
|
| 318 |
+
# If Gradio passed a dict or other structure, extract file path
|
| 319 |
+
if isinstance(image, dict) and "name" in image:
|
| 320 |
+
image_path = image["name"]
|
| 321 |
+
else:
|
| 322 |
image_path = image
|
| 323 |
|
| 324 |
+
if not image_path or not os.path.exists(image_path):
|
| 325 |
+
raise gr.Error("Image path is missing or the file does not exist. Cannot generate video.")
|
|
|
|
| 326 |
|
| 327 |
+
output_dir = _ensure_output_dir(session_id)
|
| 328 |
+
output_path = os.path.join(output_dir, "generated_video.mp4")
|
| 329 |
|
| 330 |
+
# Resize/pad and get the target dims for the model
|
| 331 |
+
try:
|
| 332 |
+
_, target_size = resize_for_model(image_path)
|
| 333 |
+
except Exception as e:
|
| 334 |
+
raise gr.Error(f"Failed to open/resize image: {e}")
|
| 335 |
|
| 336 |
video_frame_width = target_size[0]
|
| 337 |
video_frame_height = target_size[1]
|
| 338 |
|
| 339 |
+
# Call your ovi_engine (unchanged)
|
| 340 |
generated_video, generated_audio, _ = ovi_engine.generate(
|
| 341 |
text_prompt=text_prompt,
|
| 342 |
image_path=image_path,
|
|
|
|
| 353 |
)
|
| 354 |
|
| 355 |
save_video(output_path, generated_video, generated_audio, fps=24, sample_rate=16000)
|
| 356 |
+
print(f"{session_id} video generation succeeded: {output_path}")
|
|
|
|
| 357 |
|
| 358 |
return output_path
|
| 359 |
|
|
|
|
| 360 |
def cleanup(request: gr.Request):
|
| 361 |
|
| 362 |
sid = request.session_hash
|