Spaces:

decart-ai
/

lucy-edit-dev

Running on Zero

File size: 7,839 Bytes

182aa1d

import gradio as gr
import torch
import spaces
from typing import List
from PIL import Image
from diffusers import AutoencoderKLWan, LucyEditPipeline
from diffusers.utils import export_to_video, load_video
import tempfile
import os

model_id = "decart-ai/Lucy-Edit-Dev"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
pipe = LucyEditPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
pipe.to("cuda")

def calculate_resolution(input_width, input_height, max_dimension=832):
    """Calculate optimal resolution preserving aspect ratio"""
    # Ensure dimensions are multiples of 16
    def round_to_16(x):
        return int(round(x / 16.0) * 16)
    
    # Get aspect ratio
    aspect_ratio = input_width / input_height
    
    # Square videos
    if 0.95 <= aspect_ratio <= 1.05:
        return 512, 512
    
    # Landscape videos (width > height)
    elif aspect_ratio > 1:
        if input_width > input_height:
            # Fit to max width
            new_width = min(max_dimension, input_width)
            new_height = int(new_width / aspect_ratio)
            
            # Ensure height doesn't exceed max
            if new_height > 480:
                new_height = 480
                new_width = int(new_height * aspect_ratio)
        else:
            new_width = max_dimension
            new_height = int(new_width / aspect_ratio)
    
    # Portrait videos (height > width)
    else:
        if input_height > input_width:
            # Fit to max height
            new_height = min(max_dimension, input_height)
            new_width = int(new_height * aspect_ratio)
            
            # Ensure width doesn't exceed max
            if new_width > 480:
                new_width = 480
                new_height = int(new_width / aspect_ratio)
        else:
            new_height = max_dimension
            new_width = int(new_height * aspect_ratio)
    
    # Round to multiples of 16
    return round_to_16(new_width), round_to_16(new_height)

@spaces.GPU(duration=90)
def process_video(
    video_path,
    prompt,
    negative_prompt,
    num_frames,
    auto_resize,
    manual_height,
    manual_width,
    guidance_scale,
    progress=gr.Progress()
):
    """Process the video with Lucy Edit"""
    try:
        # Load and preprocess video
        progress(0.2, desc="Loading video...")
        
        # Get video dimensions
        temp_video = load_video(video_path)
        if temp_video and len(temp_video) > 0:
            original_width, original_height = temp_video[0].size
            
            # Calculate dimensions
            if auto_resize:
                width, height = calculate_resolution(original_width, original_height)
                gr.Info(f"Auto-resized from {original_width}x{original_height} to {width}x{height} (preserving aspect ratio)")
            else:
                width, height = manual_width, manual_height
                if abs((original_width/original_height) - (width/height)) > 0.1:
                    gr.Warning(f"Output aspect ratio ({width}x{height}) differs significantly from input ({original_width}x{original_height}). Video may appear stretched.")
        else:
            raise ValueError("Could not load video or video is empty")
        
        # Convert video function
        def convert_video(video: List[Image.Image]) -> List[Image.Image]:
            # Ensure we don't exceed the video length
            frames_to_load = min(len(video), num_frames)
            video_frames = video[:frames_to_load]
            # Resize frames
            video_frames = [frame.resize((width, height)) for frame in video_frames]
            return video_frames
        
        # Load video from file path
        video = load_video(video_path, convert_method=convert_video)
        
        # Ensure we have the right number of frames
        if len(video) < num_frames:
            gr.Warning(f"Video has only {len(video)} frames, using all available frames.")
            num_frames = len(video)
        
        # Generate edited video
        progress(0.5, desc="Generating edited video...")
        output = pipe(
            prompt=prompt,
            video=video,
            negative_prompt=negative_prompt,
            height=height,
            width=width,
            num_frames=num_frames,
            guidance_scale=guidance_scale,
        ).frames[0]
        
        # Export to temporary file
        progress(0.9, desc="Exporting video...")
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
            output_path = tmp_file.name
        
        export_to_video(output, output_path, fps=24)
        
        progress(1.0, desc="Complete!")
        return output_path
        
    except Exception as e:
        gr.Error(f"An error occurred: {str(e)}")
        return None

# Create Gradio interface
with gr.Blocks(title="Lucy Edit - Video Editing with Text") as demo:
    gr.Markdown(f"""# 🎬 Lucy Edit - AI Video Editing""")
    
    with gr.Row():
        with gr.Column(scale=1):
            # Input controls
            video_input = gr.Video(label="Input Video")
            
            prompt = gr.Textbox(
                label="Edit Prompt",
                placeholder="Describe what you want to change in the video...",
                lines=3,
                placeholder="Change the apron and blouse to a classic clown costume"
            )
            
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative Prompt (optional)",
                    placeholder="Describe what you DON'T want in the video...",
                    lines=2
                )
                auto_resize = gr.Checkbox(
                    label="Auto-resize (preserve aspect ratio)",
                    value=True,
                    info="Automatically calculate dimensions based on input video"
                )
                
                num_frames = gr.Slider(
                    label="Number of Frames",
                    minimum=1,
                    maximum=120,
                    value=81,
                    step=1,
                    info="More frames = longer processing time"
                )
                
                with gr.Row():
                    manual_height = gr.Slider(
                        label="Height (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=480,
                        step=32
                    )
                    manual_width = gr.Slider(
                        label="Width (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=832,
                        step=32
                    )
                
                guidance_scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=1.0,
                    maximum=20.0,
                    value=5.0,
                    step=0.5,
                    info="Higher values follow the prompt more strictly"
                )
            
            generate_btn = gr.Button("🎨 Generate Edited Video", variant="primary")
            
        with gr.Column(scale=1):
            video_output = gr.Video(label="Edited Video")
    
    # Event handlers
    generate_btn.click(
        fn=process_video,
        inputs=[
            video_input,
            prompt,
            negative_prompt,
            num_frames,
            auto_resize,
            manual_height,
            manual_width,
            guidance_scale
        ],
        outputs=video_output
    )

if __name__ == "__main__":
    demo.launch(share=True)