Spaces:

rahul7star
/

Kandinsky

Paused

App Files Files Community

rahul7star commited on 24 days ago

Commit

9cd917b

verified ·

1 Parent(s): c3c58ae

Update app.py

Browse files

Files changed (1) hide show

app.py +207 -100

app.py CHANGED Viewed

@@ -1,137 +1,244 @@
-import spaces
 import os
 import warnings
 import logging
-import time
 import tempfile
 from pathlib import Path
-import gradio as gr
 import torch
 from huggingface_hub import hf_hub_download
-# GPU management for Hugging Face Spaces
 # ==========================================================
-# 1️⃣ Install FlashAttention dynamically
 # ==========================================================
-try:
-    print("Attempting to download and install FlashAttention wheel...")
-    flash_attention_wheel = hf_hub_download(
-        repo_id="rahul7star/flash-attn-3",
-        repo_type="model",
-        filename="128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl",
-    )
-    os.system(f"pip install {flash_attention_wheel}")
-    import importlib, site
-    site.addsitedir(site.getsitepackages()[0])
-    importlib.invalidate_caches()
-    print("✅ FlashAttention installed successfully.")
-except Exception as e:
-    print(f"⚠️ Could not install FlashAttention: {e}")
-    print("Continuing without FlashAttention...")
 # ==========================================================
-# 2️⃣ Kandinsky Import & Setup
 # ==========================================================
-warnings.filterwarnings("ignore")
-logging.getLogger("torch").setLevel(logging.ERROR)
-from kandinsky import get_T2V_pipeline
-# Preload model (config path should exist in the repo)
-CONFIG_PATH = "./configs/config_5s_sft.yaml"
-# Load pipeline on GPU
-print("🔄 Loading Kandinsky T2V pipeline...")
-pipe = get_T2V_pipeline(
-    device_map={"dit": "cuda:0", "vae": "cuda:0", "text_embedder": "cuda:0"},
-    conf_path=CONFIG_PATH,
-    offload=False,
-    magcache=False,
-)
-print("✅ Kandinsky T2V pipeline loaded successfully.")
 # ==========================================================
-# 3️⃣ Generation Function
 # ==========================================================
-@spaces.GPU(duration = 30)
-def generate_video(prompt, negative_prompt, image=None, width=768, height=512, duration=5, steps=None, guidance=None, scheduler_scale=5.0, expand_prompt=1):
-    """Generate a video using Kandinsky 5 T2V pipeline"""
     try:
-        if (width, height) not in [(512, 512), (512, 768), (768, 512)]:
-            raise ValueError(f"Unsupported resolution: ({width}x{height}). Supported: 512x512, 512x768, 768x512")
-        output_path = Path(tempfile.gettempdir()) / f"kandinsky_{int(time.time())}.mp4"
-        start = time.perf_counter()
-        # Run pipeline (image optional)
-        result = pipe(
-            prompt,
-            time_length=duration,
-            width=width,
-            height=height,
-            num_steps=steps,
-            guidance_weight=guidance,
-            scheduler_scale=scheduler_scale,
-            expand_prompts=expand_prompt,
-            save_path=str(output_path),
-            image=image,
         )
-        elapsed = time.perf_counter() - start
-        print(f"✅ Generated video in {elapsed:.2f}s: {output_path}")
-        return str(output_path)
     except Exception as e:
-        print(f"❌ Generation failed: {e}")
-        return None
 # ==========================================================
-# 4️⃣ Gradio UI
 # ==========================================================
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎬 Kandinsky 5.0 T2V Lite — Text & Image to Video Generator")
-    with gr.Row():
-        with gr.Column(scale=2):
-            prompt = gr.Textbox(label="Prompt", placeholder="A dog in a red hat running in the snow")
-            negative_prompt = gr.Textbox(
-                label="Negative Prompt",
-                value="Static, 2D cartoon, cartoon, 2d animation, paintings, images, worst quality, low quality, ugly, deformed, walking backwards"
-            )
-            image = gr.Image(label="Optional Input Image", type="filepath")
-            with gr.Row():
-                width = gr.Radio(choices=[512, 768], value=768, label="Width")
-                height = gr.Radio(choices=[512, 768], value=512, label="Height")
-            duration = gr.Slider(1, 10, value=5, step=1, label="Video Duration (seconds)")
-            steps = gr.Slider(1, 50, value=None, step=1, label="Sampling Steps (optional)")
-            guidance = gr.Slider(1.0, 10.0, value=None, step=0.5, label="Guidance Weight (optional)")
-            scheduler_scale = gr.Slider(1.0, 10.0, value=5.0, step=0.5, label="Scheduler Scale")
-            expand_prompt = gr.Checkbox(value=True, label="Expand Prompt")
-            generate_btn = gr.Button("🚀 Generate Video", variant="primary")
-        with gr.Column(scale=1):
-            video_output = gr.Video(label="Generated Video")
-    generate_btn.click(
-        fn=generate_video,
-        inputs=[prompt, negative_prompt, image, width, height, duration, steps, guidance, scheduler_scale, expand_prompt],
-        outputs=[video_output]
-    )
 # ==========================================================
-# 5️⃣ Launch
 # ==========================================================
 if __name__ == "__main__":
-    demo.launch()

+# app.py
 import os
+import sys
+import time
 import warnings
 import logging
 import tempfile
+import shutil
 from pathlib import Path
+import subprocess
 import torch
+import gradio as gr
+import spaces
 from huggingface_hub import hf_hub_download
+# ==========================================================
+# Helper: shell command runner
+# ==========================================================
+def sh(cmd, check=True):
+    print(f"RUN: {cmd}")
+    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    if result.stdout:
+        print(result.stdout)
+    if result.stderr:
+        print(result.stderr, file=sys.stderr)
+    if check and result.returncode != 0:
+        raise subprocess.CalledProcessError(result.returncode, cmd)
+    return result.returncode, result.stdout
 # ==========================================================
+# Install FlashAttention (best effort)
 # ==========================================================
+def try_install_flash_attention():
+    try:
+        print("Attempting to download and install FlashAttention wheel...")
+        flash_attention_wheel = hf_hub_download(
+            repo_id="rahul7star/flash-attn-3",
+            repo_type="model",
+            filename="128/flash_attn_3-3.0.0b1-cp39-abi3-linux_x86_64.whl",
+        )
+        sh(f"pip install {flash_attention_wheel}")
+        import importlib, site
+        site.addsitedir(site.getsitepackages()[0])
+        importlib.invalidate_caches()
+        print("✅ FlashAttention installed successfully.")
+        return True
+    except Exception as e:
+        print(f"⚠️ Could not install FlashAttention: {e}")
+        print("Continuing without FlashAttention...")
+        return False
+# ==========================================================
+# Ensure models downloaded before UI starts
+# ==========================================================
+def ensure_models_downloaded(marker_file=".models_ready"):
+    marker = Path(marker_file)
+    if marker.exists():
+        print("✅ Models already downloaded (marker found).")
+        return True
+    if not Path("download_models.py").exists():
+        print("❌ download_models.py not found. Please include it in repo.")
+        return False
+    print("📦 Running download_models.py to fetch model artifacts...")
+    try:
+        sh(f"{sys.executable} download_models.py", check=True)
+        marker.write_text("ok")
+        print("✅ Model download complete. Marker created.")
+        return True
+    except Exception as e:
+        print(f"❌ Model download failed: {e}")
+        return False
 # ==========================================================
+# Import Kandinsky pipeline (after models ready)
 # ==========================================================
+def disable_warnings():
+    warnings.filterwarnings("ignore")
+    logging.getLogger("torch").setLevel(logging.ERROR)
+    torch._logging.set_logs(
+        dynamo=logging.ERROR,
+        dynamic=logging.ERROR,
+        aot=logging.ERROR,
+        inductor=logging.ERROR,
+        guards=False,
+        recompiles=False
+    )
+# ==========================================================
+# Video generation merged from test.py
+# ==========================================================
+def generate_kandinsky_video(prompt, negative_prompt, width, height, video_duration,
+                             expand_prompt, sample_steps, guidance_weight,
+                             scheduler_scale, output_filename, offload=False, magcache=False):
+    from kandinsky import get_T2V_pipeline
+    # validate resolution
+    if (width, height) not in [(512, 512), (512, 768), (768, 512)]:
+        raise ValueError(f"Unsupported video size ({width}x{height}). Use 512x512, 512x768, or 768x512.")
+    # locate config
+    conf_path = Path("configs/config_5s_sft.yaml")
+    if not conf_path.exists():
+        # fallback if config somewhere in subfolder
+        candidates = list(Path(".").rglob("config_5s_sft.yaml"))
+        if not candidates:
+            raise FileNotFoundError("config_5s_sft.yaml not found.")
+        conf_path = candidates[0]
+    print(f"🔧 Using config: {conf_path}")
+    # load pipeline
+    print("🔄 Loading Kandinsky 5.0 T2V pipeline...")
+    pipe = get_T2V_pipeline(
+        device_map={"dit": "cuda:0", "vae": "cuda:0", "text_embedder": "cuda:0"},
+        conf_path=str(conf_path),
+        offload=offload,
+        magcache=magcache,
+    )
+    print("✅ Pipeline loaded successfully.")
+    # generate
+    start_time = time.perf_counter()
+    x = pipe(
+        prompt,
+        time_length=video_duration,
+        width=width,
+        height=height,
+        num_steps=sample_steps,
+        guidance_weight=guidance_weight,
+        scheduler_scale=scheduler_scale,
+        expand_prompts=expand_prompt,
+        save_path=output_filename,
+    )
+    elapsed = time.perf_counter() - start_time
+    print(f"✅ Video generated: {output_filename} (took {elapsed:.2f}s)")
+    return output_filename
 # ==========================================================
+# Gradio callback
 # ==========================================================
+@spaces.GPU(duration=60)
+def generate(prompt, negative_prompt, width, height, duration, expand_prompt, sample_steps,
+             guidance_weight, scheduler_scale, install_flash, force_download):
+    logs = []
+    disable_warnings()
+    # optional flash install
+    if install_flash:
+        ok = try_install_flash_attention()
+        logs.append(f"FlashAttention install: {'OK' if ok else 'FAILED'}")
+    # ensure models ready
+    if force_download:
+        marker = Path(".models_ready")
+        if marker.exists():
+            marker.unlink()
+            logs.append("Removed model marker for re-download.")
+    ok_models = ensure_models_downloaded()
+    logs.append(f"Models ready: {ok_models}")
+    if not ok_models:
+        return None, "\n".join(logs)
+    # run generation
     try:
+        output_file = Path("outputs")
+        output_file.mkdir(exist_ok=True)
+        out_path = output_file / f"kandinsky_{int(time.time())}.mp4"
+        result_path = generate_kandinsky_video(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            width=int(width),
+            height=int(height),
+            video_duration=int(duration),
+            expand_prompt=int(expand_prompt),
+            sample_steps=int(sample_steps) if sample_steps else None,
+            guidance_weight=float(guidance_weight) if guidance_weight else None,
+            scheduler_scale=float(scheduler_scale),
+            output_filename=str(out_path),
         )
+        logs.append(f"Video generated at {result_path}")
+        return str(result_path), "\n".join(logs)
     except Exception as e:
+        logs.append(f"❌ Generation failed: {e}")
+        return None, "\n".join(logs)
 # ==========================================================
+# Gradio UI
 # ==========================================================
+def build_ui():
+    with gr.Blocks(title="Kandinsky 5.0 — Text/Image to Video", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🎬 Kandinsky 5.0 — Text + Image → Video Generator (Spaces GPU)")
+        with gr.Row():
+            with gr.Column(scale=3):
+                prompt = gr.Textbox(label="Prompt", value="A dog in a red hat", placeholder="Describe your scene")
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="Static, 2D cartoon, worst quality, deformed"
+                )
+                width = gr.Radio([512, 768], value=768, label="Width")
+                height = gr.Radio([512, 768], value=512, label="Height")
+                duration = gr.Slider(1, 10, value=5, step=1, label="Video duration (seconds)")
+                expand_prompt = gr.Checkbox(label="Expand Prompt", value=True)
+                sample_steps = gr.Number(label="Sample Steps (optional)", value=None)
+                guidance_weight = gr.Number(label="Guidance Weight (optional)", value=None)
+                scheduler_scale = gr.Number(label="Scheduler Scale", value=5.0)
+                install_flash = gr.Checkbox(label="Install FlashAttention (optional)", value=False)
+                force_download = gr.Checkbox(label="Force re-download models", value=False)
+                generate_btn = gr.Button("🚀 Generate Video", variant="primary")
+                status = gr.Textbox(label="Logs / Status", interactive=False, lines=10)
+            with gr.Column(scale=2):
+                out_video = gr.Video(label="Generated Video")
+        generate_btn.click(
+            fn=generate,
+            inputs=[prompt, negative_prompt, width, height, duration, expand_prompt,
+                    sample_steps, guidance_weight, scheduler_scale,
+                    install_flash, force_download],
+            outputs=[out_video, status]
+        )
+    return demo
 # ==========================================================
+# Main entrypoint
 # ==========================================================
 if __name__ == "__main__":
+    print("🚀 Starting Kandinsky T2V Gradio App...")
+    Path("outputs").mkdir(exist_ok=True)
+    ensure_models_downloaded()
+    demo = build_ui()
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))