Spaces:

tencent
/

DepthCrafter

Running on Zero

App Files Files Community

wbhu-tc commited on Sep 20, 2024

Commit

6654f6a

1 Parent(s): 83d4b6c

update

Browse files

Files changed (1) hide show

app.py +86 -8

app.py CHANGED Viewed

@@ -1,12 +1,95 @@
 import spaces
 import gradio as gr
-from run import DepthCrafterDemo
 examples = [
     ["examples/example_01.mp4", 25, 1.2, 1024, 195],
 ]
 def construct_demo():
     with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
         gr.Markdown(
@@ -29,11 +112,6 @@ def construct_demo():
                         <a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
             """
         )
-        # demo
-        depthcrafter_demo = DepthCrafterDemo(
-            unet_path="tencent/DepthCrafter",
-            pre_train_path="stabilityai/stable-video-diffusion-img2vid-xt",
-        )
         with gr.Row(equal_height=True):
             with gr.Column(scale=1):
@@ -105,12 +183,12 @@ def construct_demo():
                 process_length,
             ],
             outputs=[output_video_1, output_video_2],
-            fn=depthcrafter_demo.run,
             cache_examples=False,
         )
         generate_btn.click(
-            fn=depthcrafter_demo.run,
             inputs=[
                 input_video,
                 num_denoising_steps,

+import os
+import numpy as np
 import spaces
 import gradio as gr
+import torch
+from diffusers.training_utils import set_seed
+from depthcrafter.depth_crafter_ppl import DepthCrafterPipeline
+from depthcrafter.unet import DiffusersUNetSpatioTemporalConditionModelDepthCrafter
+import uuid
+import random
+from huggingface_hub import hf_hub_download
+from depthcrafter.utils import read_video_frames, vis_sequence_depth, save_video
 examples = [
     ["examples/example_01.mp4", 25, 1.2, 1024, 195],
 ]
+unet = DiffusersUNetSpatioTemporalConditionModelDepthCrafter.from_pretrained(
+    "tencent/DepthCrafter",
+    subfolder="unet",
+    low_cpu_mem_usage=True,
+    torch_dtype=torch.float16,
+)
+pipe = DepthCrafterPipeline.from_pretrained(
+    "stabilityai/stable-video-diffusion-img2vid-xt",
+    unet=unet,
+    torch_dtype=torch.float16,
+    variant="fp16",
+)
+pipe.to("cuda")
+@spaces.GPU(duration=120)
+def infer_depth(
+    video: str,
+    num_denoising_steps: int,
+    guidance_scale: float,
+    max_res: int = 1024,
+    process_length: int = 195,
+    #
+    save_folder: str = "./demo_output",
+    window_size: int = 110,
+    overlap: int = 25,
+    target_fps: int = 15,
+    seed: int = 42,
+    track_time: bool = True,
+    save_npz: bool = False,
+):
+    set_seed(seed)
+    frames, target_fps = read_video_frames(video, process_length, target_fps, max_res)
+    print(f"==> video name: {video}, frames shape: {frames.shape}")
+    # inference the depth map using the DepthCrafter pipeline
+    with torch.inference_mode():
+        res = pipe(
+            frames,
+            height=frames.shape[1],
+            width=frames.shape[2],
+            output_type="np",
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_denoising_steps,
+            window_size=window_size,
+            overlap=overlap,
+            track_time=track_time,
+        ).frames[0]
+    # convert the three-channel output to a single channel depth map
+    res = res.sum(-1) / res.shape[-1]
+    # normalize the depth map to [0, 1] across the whole video
+    res = (res - res.min()) / (res.max() - res.min())
+    # visualize the depth map and save the results
+    vis = vis_sequence_depth(res)
+    # save the depth map and visualization with the target FPS
+    save_path = os.path.join(save_folder, os.path.splitext(os.path.basename(video))[0])
+    os.makedirs(os.path.dirname(save_path), exist_ok=True)
+    if save_npz:
+        np.savez_compressed(save_path + ".npz", depth=res)
+    save_video(res, save_path + "_depth.mp4", fps=target_fps)
+    save_video(vis, save_path + "_vis.mp4", fps=target_fps)
+    save_video(frames, save_path + "_input.mp4", fps=target_fps)
+    return [
+        save_path + "_input.mp4",
+        save_path + "_vis.mp4",
+        # save_path + "_depth.mp4",
+    ]
 def construct_demo():
     with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
         gr.Markdown(
                         <a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
             """
         )
         with gr.Row(equal_height=True):
             with gr.Column(scale=1):
                 process_length,
             ],
             outputs=[output_video_1, output_video_2],
+            fn=infer_depth,
             cache_examples=False,
         )
         generate_btn.click(
+            fn=infer_depth,
             inputs=[
                 input_video,
                 num_denoising_steps,