OVI

Running on Zero

App Files Files Community

rahul7star commited on 28 days ago

Commit

4d9f075

verified ·

1 Parent(s): 4e07d6b

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -62

app.py CHANGED Viewed

@@ -141,35 +141,67 @@ def resize_for_model(image_path):
     )
     return new_img, target_size
 @spaces.GPU()
 def generate_image(text_prompt, session_id, image_height = 1024, image_width = 1024):
     print("image generation used")
-    if flux_model is None:
-        return None
-    text_prompt = clean_text(text_prompt)
-    image_h, image_w = scale_hw_to_area_divisible(image_height, image_width, area=1024 * 1024)
-    flux_model.to("cuda")
-    image = flux_model(
-        text_prompt,
-        height=image_h,
-        width=image_w,
-        num_inference_steps = 28,
-        guidance_scale=4.5,
-        generator=torch.Generator().manual_seed(int(1234))
-    ).images[0]
-    flux_model.to("cpu")
-    output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
-    os.makedirs(output_dir, exist_ok=True)
-    output_path = os.path.join(output_dir, f"generate_image.png")
-    image.save(output_path)
-    return output_path
 def generate_scene(
     text_prompt,
@@ -186,42 +218,48 @@ def generate_scene(
     audio_negative_prompt = "",
     progress=gr.Progress(track_tqdm=True)
 ):
     text_prompt_processed = (text_prompt or "").strip()
     if session_id is None:
         session_id = uuid.uuid4().hex
     if not text_prompt_processed:
         raise gr.Error("Please enter a prompt.")
-    if not flux_model and not image:
-        generate_image(text_prompt,session_id)
-        print("no image provived will")
-        #raise gr.Error("Please provide an image")
-    print(f"{session_id} is generating scene with {sample_steps} steps")
-    tags = ["<ENDAUDCAP>", "<AUDCAP>", "<E>", "<S>"]
-    found = [tag for tag in tags if tag in text_prompt]
-    if bool(found):
-        print(f"{session_id} used tags")
-    return generate_video(text_prompt,
-                        sample_steps,
-                        image,
-                        session_id,
-                        video_seed,
-                        solver_name,
-                        shift,
-                        video_guidance_scale,
-                        audio_guidance_scale,
-                        slg_layer,
-                        video_negative_prompt,
-                        audio_negative_prompt,
-                        progress)
 def get_duration(
     text_prompt,
@@ -248,6 +286,7 @@ def get_duration(
     return int(sample_steps * 3 + warmup + image_generation_s)
 @spaces.GPU(duration=get_duration)
 def generate_video(
     text_prompt,
@@ -264,29 +303,40 @@ def generate_video(
     audio_negative_prompt = "",
     progress=gr.Progress(track_tqdm=True)
 ):
-    print("text uis")
-    print(text_prompt)
     if session_id is None:
         session_id = uuid.uuid4().hex
-    image_path = None
     if not image:
         image = generate_image(text_prompt, session_id)
-    if image is not None:
         image_path = image
-    output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
-    os.makedirs(output_dir, exist_ok=True)
-    output_path = os.path.join(output_dir, f"generated_video.mp4")
-    _, target_size = resize_for_model(image_path)
     video_frame_width = target_size[0]
     video_frame_height = target_size[1]
     generated_video, generated_audio, _ = ovi_engine.generate(
         text_prompt=text_prompt,
         image_path=image_path,
@@ -303,12 +353,10 @@ def generate_video(
     )
     save_video(output_path, generated_video, generated_audio, fps=24, sample_rate=16000)
-    print(f"{session_id} video generation succeeded")
     return output_path
 def cleanup(request: gr.Request):
     sid = request.session_hash

     )
     return new_img, target_size
+def _ensure_output_dir(session_id):
+    output_dir = os.path.join(os.environ["PROCESSED_RESULTS"], session_id)
+    os.makedirs(output_dir, exist_ok=True)
+    return output_dir
 @spaces.GPU()
 def generate_image(text_prompt, session_id, image_height = 1024, image_width = 1024):
+    """
+    Generates an image from text_prompt using flux_model if available.
+    Always returns a filepath (string) or raises a gr.Error on failure.
+    """
     print("image generation used")
+    text_prompt = clean_text(text_prompt or "")
+    # If flux_model isn't loaded, fall back to example image (if available)
+    output_dir = _ensure_output_dir(session_id)
+    output_path = os.path.join(output_dir, "generate_image.png")
+    if flux_model is None:
+        # fallback to example image if provided
+        if image_example and os.path.exists(image_example):
+            # copy example into session folder so downstream can always rely on a path under processed_results
+            shutil.copy(image_example, output_path)
+            print(f"Flux model not available — using example image {image_example}")
+            return output_path
+        else:
+            raise gr.Error("Image generation model not available and no example image found.")
+    # ensure requested dims are divisible/compatible
+    image_h, image_w = scale_hw_to_area_divisible(int(image_height), int(image_width), area=1024 * 1024)
+    try:
+        # move model to GPU, generate, then move model back to CPU
+        flux_model.to("cuda")
+        gen = flux_model(
+            text_prompt,
+            height=image_h,
+            width=image_w,
+            num_inference_steps = 28,
+            guidance_scale=4.5,
+            generator=torch.Generator(device="cuda").manual_seed(1234)
+        )
+        image = gen.images[0]
+        image.save(output_path)
+        print(f"Saved generated image to {output_path}")
+        return output_path
+    except Exception as e:
+        # provide helpful error message and fallback to example if present
+        print(f"⚠️ generate_image failed: {e}")
+        if image_example and os.path.exists(image_example):
+            shutil.copy(image_example, output_path)
+            print(f"Falling back to example image {image_example}")
+            return output_path
+        raise gr.Error(f"Image generation failed: {e}")
+    finally:
+        try:
+            flux_model.to("cpu")
+        except Exception:
+            pass
 def generate_scene(
     text_prompt,
     audio_negative_prompt = "",
     progress=gr.Progress(track_tqdm=True)
 ):
+    """
+    Top-level helper that ensures there's an image (generates one if necessary)
+    and then calls generate_video.
+    """
     text_prompt_processed = (text_prompt or "").strip()
     if session_id is None:
         session_id = uuid.uuid4().hex
     if not text_prompt_processed:
         raise gr.Error("Please enter a prompt.")
+    # If user did not supply an image (None or empty), try to generate one and use it.
+    if not image:
+        print("No image provided; attempting to generate one.")
+        image = generate_image(text_prompt_processed, session_id)
+        print(f"Generated/fallback image path: {image}")
+    # If image is a dict-like from Gradio, try to extract file path (defensive)
+    if isinstance(image, dict) and "name" in image:
+        image = image["name"]
+    # final check - ensure file exists
+    if not image or not os.path.exists(image):
+        raise gr.Error("No usable image available (generation failed and no fallback).")
+    print(f"{session_id} is generating scene with {sample_steps} steps (image: {image})")
+    return generate_video(
+        text_prompt=text_prompt_processed,
+        sample_steps=sample_steps,
+        image=image,
+        session_id=session_id,
+        video_seed=video_seed,
+        solver_name=solver_name,
+        shift=shift,
+        video_guidance_scale=video_guidance_scale,
+        audio_guidance_scale=audio_guidance_scale,
+        slg_layer=slg_layer,
+        video_negative_prompt=video_negative_prompt,
+        audio_negative_prompt=audio_negative_prompt,
+        progress=progress
+    )
 def get_duration(
     text_prompt,
     return int(sample_steps * 3 + warmup + image_generation_s)
 @spaces.GPU(duration=get_duration)
 def generate_video(
     text_prompt,
     audio_negative_prompt = "",
     progress=gr.Progress(track_tqdm=True)
 ):
+    """
+    Generates a video using ovi_engine given a guaranteed image path (string).
+    """
+    print("generate_video called")
     if session_id is None:
         session_id = uuid.uuid4().hex
+    # If image is not provided for any reason, try generating one now.
     if not image:
+        print("No image passed to generate_video; generating now...")
         image = generate_image(text_prompt, session_id)
+    # If Gradio passed a dict or other structure, extract file path
+    if isinstance(image, dict) and "name" in image:
+        image_path = image["name"]
+    else:
         image_path = image
+    if not image_path or not os.path.exists(image_path):
+        raise gr.Error("Image path is missing or the file does not exist. Cannot generate video.")
+    output_dir = _ensure_output_dir(session_id)
+    output_path = os.path.join(output_dir, "generated_video.mp4")
+    # Resize/pad and get the target dims for the model
+    try:
+        _, target_size = resize_for_model(image_path)
+    except Exception as e:
+        raise gr.Error(f"Failed to open/resize image: {e}")
     video_frame_width = target_size[0]
     video_frame_height = target_size[1]
+    # Call your ovi_engine (unchanged)
     generated_video, generated_audio, _ = ovi_engine.generate(
         text_prompt=text_prompt,
         image_path=image_path,
     )
     save_video(output_path, generated_video, generated_audio, fps=24, sample_rate=16000)
+    print(f"{session_id} video generation succeeded: {output_path}")
     return output_path
 def cleanup(request: gr.Request):
     sid = request.session_hash