Spaces:

rahul7star
/

Kandinsky

Paused

App Files Files Community

rahul7star commited on 25 days ago

Commit

fef2666

verified ·

1 Parent(s): f68fbe5

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -173

app.py CHANGED Viewed

@@ -1,188 +1,136 @@
-# app.py
-import spaces
 import os
-import sys
 import time
-import subprocess
 import tempfile
-import shutil
 from pathlib import Path
-from huggingface_hub import hf_hub_download
 import gradio as gr
-# ====================================
-# Helper utilities
-# ====================================
-def sh(cmd, check=True, env=None):
-    """Shell helper that prints output live."""
-    print(f"RUN: {cmd}")
     try:
-        completed = subprocess.run(cmd, shell=True, check=check, capture_output=True, text=True, env=env)
-        print(completed.stdout)
-        if completed.stderr:
-            print("ERR:", completed.stderr, file=sys.stderr)
-        return completed.returncode, completed.stdout
-    except subprocess.CalledProcessError as e:
-        print("Command failed:", e, file=sys.stderr)
-        print(e.stdout)
-        print(e.stderr, file=sys.stderr)
-        return e.returncode, e.stdout if hasattr(e, "stdout") else ""
-# ====================================
-# FlashAttention install (startup)
-# ====================================
-def try_install_flash_attention():
-    """Download and install FlashAttention wheel from rahul7star/flash-attn-3 repo."""
-    try:
-        print("🔹 Attempting to install FlashAttention...")
-        wheel = hf_hub_download(
-            repo_id="rahul7star/flash-attn-3",
-            repo_type="model",
-            filename="128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl",
         )
-        print(f"✅ Wheel downloaded: {wheel}")
-        sh(f"pip install {wheel}")
-        import importlib, site
-        site.addsitedir(site.getsitepackages()[0])
-        importlib.invalidate_caches()
-        print("✅ FlashAttention installed successfully.")
-    except Exception as e:
-        print(f"⚠️ Could not install FlashAttention: {e}")
-        print("Continuing without it...")
-# ====================================
-# Model download (startup)
-# ====================================
-def ensure_models_downloaded():
-    """Run download_models.py once at startup to fetch model weights."""
-    marker = Path(".models_ready")
-    if marker.exists():
-        print("✅ Models already downloaded (marker found).")
-        return True
-    if not Path("download_models.py").exists():
-        print("❌ Missing download_models.py in repo. Please include it.")
-        return False
-    print("⬇️ Downloading model weights via download_models.py ...")
-    try:
-        rc, _ = sh(f"{sys.executable} download_models.py", check=True)
-        marker.write_text("ok")
-        print("✅ Model download complete.")
-        return True
     except Exception as e:
-        print(f"❌ Model download failed: {e}")
-        return False
-# ====================================
-# Inference runner (text/image → video)
-# ====================================
-def run_inference(prompt: str, image_path: str | None = None):
-    """Run test.py with prompt + optional image. Returns path to video."""
-    workdir = os.getcwd()
-    out_video = Path(workdir) / "output.mp4"
-    if out_video.exists():
-        out_video.unlink(missing_ok=True)
-    cmd = [sys.executable, "test.py", "--prompt", f"\"{prompt}\""]
-    if image_path:
-        cmd += ["--image_path", f"\"{image_path}\""]
-    cmd_str = " ".join(cmd)
-    print(f"🚀 Running inference: {cmd_str}")
-    try:
-        proc = subprocess.run(cmd_str, shell=True, capture_output=True, text=True, check=True)
-        print(proc.stdout)
-        if proc.stderr:
-            print(proc.stderr, file=sys.stderr)
-    except subprocess.CalledProcessError as e:
-        print("❌ Inference failed:", e)
-        print(e.stdout)
-        print(e.stderr)
         return None
-    # Find the resulting .mp4
-    if out_video.exists():
-        return str(out_video)
-    vids = sorted(Path(workdir).glob("*.mp4"), key=lambda p: p.stat().st_mtime, reverse=True)
-    return str(vids[0]) if vids else None
-# ====================================
-# Gradio callback
-# ====================================
-@spaces.GPU(duration=50)
-def generate(prompt, image):
-    """Main Gradio callback for generating video."""
-    status = []
-    temp_img_path = None
-    if image is not None:
-        tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
-        image.save(tmp, format="PNG")
-        tmp.close()
-        temp_img_path = tmp.name
-        status.append(f"📸 Saved image: {temp_img_path}")
-    try:
-        video_path = run_inference(prompt, image_path=temp_img_path)
-        if not video_path:
-            status.append("❌ No video produced. Check test.py output.")
-            return None, "\n".join(status)
-    except Exception as e:
-        status.append(f"❌ Inference failed: {e}")
-        return None, "\n".join(status)
-    dest_dir = Path("outputs"); dest_dir.mkdir(exist_ok=True)
-    ts = int(time.time())
-    dest = dest_dir / f"t2v_output_{ts}.mp4"
-    shutil.copy(video_path, dest)
-    status.append(f"✅ Video generated: {dest}")
-    return str(dest), "\n".join(status)
-# ====================================
-# UI builder
-# ====================================
-def build_ui():
-    with gr.Blocks(title="Text+Image → Video (Spaces GPU)") as demo:
-        gr.Markdown("## 🎬 Kandinsky / T2V Video Generator\nProvide a text prompt and optional image to generate short video clips using GPU inference.")
-        with gr.Row():
-            with gr.Column(scale=3):
-                prompt = gr.Textbox(label="Prompt", placeholder="A dog in a red hat, cinematic lighting", value="A dog in a red hat")
-                image_in = gr.Image(label="Optional input image", type="pil")
-                generate_btn = gr.Button("🎥 Generate Video", variant="primary")
-                status = gr.Textbox(label="Logs", lines=8)
-            with gr.Column(scale=2):
-                out_video = gr.Video(label="Output video")
-        generate_btn.click(fn=generate, inputs=[prompt, image_in], outputs=[out_video, status])
-    return demo
-# ====================================
-# App startup
-# ====================================
 if __name__ == "__main__":
-    print("🚀 Starting Text+Image → Video Gradio App")
-    print("Python:", sys.executable)
-    print("CUDA_VISIBLE_DEVICES:", os.environ.get("CUDA_VISIBLE_DEVICES", "(not set)"))
-    # Install FlashAttention + download models ONCE at startup
-    try_install_flash_attention()
-    ensure_models_downloaded()
-    Path("outputs").mkdir(exist_ok=True)
-    demo = build_ui()
-    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

 import os
+import warnings
+import logging
 import time
 import tempfile
 from pathlib import Path
 import gradio as gr
+import torch
+from huggingface_hub import hf_hub_download
+# GPU management for Hugging Face Spaces
+import spaces
+# ==========================================================
+# 1️⃣ Install FlashAttention dynamically
+# ==========================================================
+try:
+    print("Attempting to download and install FlashAttention wheel...")
+    flash_attention_wheel = hf_hub_download(
+        repo_id="rahul7star/flash-attn-3",
+        repo_type="model",
+        filename="128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl",
+    )
+    os.system(f"pip install {flash_attention_wheel}")
+    import importlib, site
+    site.addsitedir(site.getsitepackages()[0])
+    importlib.invalidate_caches()
+    print("✅ FlashAttention installed successfully.")
+except Exception as e:
+    print(f"⚠️ Could not install FlashAttention: {e}")
+    print("Continuing without FlashAttention...")
+# ==========================================================
+# 2️⃣ Kandinsky Import & Setup
+# ==========================================================
+warnings.filterwarnings("ignore")
+logging.getLogger("torch").setLevel(logging.ERROR)
+from kandinsky import get_T2V_pipeline
+# Preload model (config path should exist in the repo)
+CONFIG_PATH = "./configs/config_5s_sft.yaml"
+# Load pipeline on GPU
+print("🔄 Loading Kandinsky T2V pipeline...")
+pipe = get_T2V_pipeline(
+    device_map={"dit": "cuda:0", "vae": "cuda:0", "text_embedder": "cuda:0"},
+    conf_path=CONFIG_PATH,
+    offload=False,
+    magcache=False,
+)
+print("✅ Kandinsky T2V pipeline loaded successfully.")
+# ==========================================================
+# 3️⃣ Generation Function
+# ==========================================================
+@spaces.GPU
+def generate_video(prompt, negative_prompt, image=None, width=768, height=512, duration=5, steps=None, guidance=None, scheduler_scale=5.0, expand_prompt=1):
+    """Generate a video using Kandinsky 5 T2V pipeline"""
     try:
+        if (width, height) not in [(512, 512), (512, 768), (768, 512)]:
+            raise ValueError(f"Unsupported resolution: ({width}x{height}). Supported: 512x512, 512x768, 768x512")
+        output_path = Path(tempfile.gettempdir()) / f"kandinsky_{int(time.time())}.mp4"
+        start = time.perf_counter()
+        # Run pipeline (image optional)
+        result = pipe(
+            prompt,
+            time_length=duration,
+            width=width,
+            height=height,
+            num_steps=steps,
+            guidance_weight=guidance,
+            scheduler_scale=scheduler_scale,
+            expand_prompts=expand_prompt,
+            save_path=str(output_path),
+            image=image,
         )
+        elapsed = time.perf_counter() - start
+        print(f"✅ Generated video in {elapsed:.2f}s: {output_path}")
+        return str(output_path)
     except Exception as e:
+        print(f"❌ Generation failed: {e}")
         return None
+# ==========================================================
+# 4️⃣ Gradio UI
+# ==========================================================
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🎬 Kandinsky 5.0 T2V Lite — Text & Image to Video Generator")
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt = gr.Textbox(label="Prompt", placeholder="A dog in a red hat running in the snow")
+            negative_prompt = gr.Textbox(
+                label="Negative Prompt",
+                value="Static, 2D cartoon, cartoon, 2d animation, paintings, images, worst quality, low quality, ugly, deformed, walking backwards"
+            )
+            image = gr.Image(label="Optional Input Image", type="filepath")
+            with gr.Row():
+                width = gr.Radio(choices=[512, 768], value=768, label="Width")
+                height = gr.Radio(choices=[512, 768], value=512, label="Height")
+            duration = gr.Slider(1, 10, value=5, step=1, label="Video Duration (seconds)")
+            steps = gr.Slider(1, 50, value=None, step=1, label="Sampling Steps (optional)")
+            guidance = gr.Slider(1.0, 10.0, value=None, step=0.5, label="Guidance Weight (optional)")
+            scheduler_scale = gr.Slider(1.0, 10.0, value=5.0, step=0.5, label="Scheduler Scale")
+            expand_prompt = gr.Checkbox(value=True, label="Expand Prompt")
+            generate_btn = gr.Button("🚀 Generate Video", variant="primary")
+        with gr.Column(scale=1):
+            video_output = gr.Video(label="Generated Video")
+    generate_btn.click(
+        fn=generate_video,
+        inputs=[prompt, negative_prompt, image, width, height, duration, steps, guidance, scheduler_scale, expand_prompt],
+        outputs=[video_output]
+    )
+# ==========================================================
+# 5️⃣ Launch
+# ==========================================================
 if __name__ == "__main__":
+    demo.launch()