Spaces:
Running
on
Zero
Running
on
Zero
update
Browse files
app.py
CHANGED
|
@@ -1,12 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import spaces
|
| 2 |
import gradio as gr
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
examples = [
|
| 6 |
["examples/example_01.mp4", 25, 1.2, 1024, 195],
|
| 7 |
]
|
| 8 |
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def construct_demo():
|
| 11 |
with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
|
| 12 |
gr.Markdown(
|
|
@@ -29,11 +112,6 @@ def construct_demo():
|
|
| 29 |
<a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
|
| 30 |
"""
|
| 31 |
)
|
| 32 |
-
# demo
|
| 33 |
-
depthcrafter_demo = DepthCrafterDemo(
|
| 34 |
-
unet_path="tencent/DepthCrafter",
|
| 35 |
-
pre_train_path="stabilityai/stable-video-diffusion-img2vid-xt",
|
| 36 |
-
)
|
| 37 |
|
| 38 |
with gr.Row(equal_height=True):
|
| 39 |
with gr.Column(scale=1):
|
|
@@ -105,12 +183,12 @@ def construct_demo():
|
|
| 105 |
process_length,
|
| 106 |
],
|
| 107 |
outputs=[output_video_1, output_video_2],
|
| 108 |
-
fn=
|
| 109 |
cache_examples=False,
|
| 110 |
)
|
| 111 |
|
| 112 |
generate_btn.click(
|
| 113 |
-
fn=
|
| 114 |
inputs=[
|
| 115 |
input_video,
|
| 116 |
num_denoising_steps,
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
import spaces
|
| 5 |
import gradio as gr
|
| 6 |
+
import torch
|
| 7 |
+
from diffusers.training_utils import set_seed
|
| 8 |
+
|
| 9 |
+
from depthcrafter.depth_crafter_ppl import DepthCrafterPipeline
|
| 10 |
+
from depthcrafter.unet import DiffusersUNetSpatioTemporalConditionModelDepthCrafter
|
| 11 |
+
|
| 12 |
+
import uuid
|
| 13 |
+
import random
|
| 14 |
+
from huggingface_hub import hf_hub_download
|
| 15 |
+
|
| 16 |
+
from depthcrafter.utils import read_video_frames, vis_sequence_depth, save_video
|
| 17 |
|
| 18 |
examples = [
|
| 19 |
["examples/example_01.mp4", 25, 1.2, 1024, 195],
|
| 20 |
]
|
| 21 |
|
| 22 |
|
| 23 |
+
unet = DiffusersUNetSpatioTemporalConditionModelDepthCrafter.from_pretrained(
|
| 24 |
+
"tencent/DepthCrafter",
|
| 25 |
+
subfolder="unet",
|
| 26 |
+
low_cpu_mem_usage=True,
|
| 27 |
+
torch_dtype=torch.float16,
|
| 28 |
+
)
|
| 29 |
+
pipe = DepthCrafterPipeline.from_pretrained(
|
| 30 |
+
"stabilityai/stable-video-diffusion-img2vid-xt",
|
| 31 |
+
unet=unet,
|
| 32 |
+
torch_dtype=torch.float16,
|
| 33 |
+
variant="fp16",
|
| 34 |
+
)
|
| 35 |
+
pipe.to("cuda")
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@spaces.GPU(duration=120)
|
| 39 |
+
def infer_depth(
|
| 40 |
+
video: str,
|
| 41 |
+
num_denoising_steps: int,
|
| 42 |
+
guidance_scale: float,
|
| 43 |
+
max_res: int = 1024,
|
| 44 |
+
process_length: int = 195,
|
| 45 |
+
#
|
| 46 |
+
save_folder: str = "./demo_output",
|
| 47 |
+
window_size: int = 110,
|
| 48 |
+
overlap: int = 25,
|
| 49 |
+
target_fps: int = 15,
|
| 50 |
+
seed: int = 42,
|
| 51 |
+
track_time: bool = True,
|
| 52 |
+
save_npz: bool = False,
|
| 53 |
+
):
|
| 54 |
+
set_seed(seed)
|
| 55 |
+
|
| 56 |
+
frames, target_fps = read_video_frames(video, process_length, target_fps, max_res)
|
| 57 |
+
print(f"==> video name: {video}, frames shape: {frames.shape}")
|
| 58 |
+
|
| 59 |
+
# inference the depth map using the DepthCrafter pipeline
|
| 60 |
+
with torch.inference_mode():
|
| 61 |
+
res = pipe(
|
| 62 |
+
frames,
|
| 63 |
+
height=frames.shape[1],
|
| 64 |
+
width=frames.shape[2],
|
| 65 |
+
output_type="np",
|
| 66 |
+
guidance_scale=guidance_scale,
|
| 67 |
+
num_inference_steps=num_denoising_steps,
|
| 68 |
+
window_size=window_size,
|
| 69 |
+
overlap=overlap,
|
| 70 |
+
track_time=track_time,
|
| 71 |
+
).frames[0]
|
| 72 |
+
# convert the three-channel output to a single channel depth map
|
| 73 |
+
res = res.sum(-1) / res.shape[-1]
|
| 74 |
+
# normalize the depth map to [0, 1] across the whole video
|
| 75 |
+
res = (res - res.min()) / (res.max() - res.min())
|
| 76 |
+
# visualize the depth map and save the results
|
| 77 |
+
vis = vis_sequence_depth(res)
|
| 78 |
+
# save the depth map and visualization with the target FPS
|
| 79 |
+
save_path = os.path.join(save_folder, os.path.splitext(os.path.basename(video))[0])
|
| 80 |
+
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
| 81 |
+
if save_npz:
|
| 82 |
+
np.savez_compressed(save_path + ".npz", depth=res)
|
| 83 |
+
save_video(res, save_path + "_depth.mp4", fps=target_fps)
|
| 84 |
+
save_video(vis, save_path + "_vis.mp4", fps=target_fps)
|
| 85 |
+
save_video(frames, save_path + "_input.mp4", fps=target_fps)
|
| 86 |
+
return [
|
| 87 |
+
save_path + "_input.mp4",
|
| 88 |
+
save_path + "_vis.mp4",
|
| 89 |
+
# save_path + "_depth.mp4",
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
|
| 93 |
def construct_demo():
|
| 94 |
with gr.Blocks(analytics_enabled=False) as depthcrafter_iface:
|
| 95 |
gr.Markdown(
|
|
|
|
| 112 |
<a style='font-size:18px;color: #000000' href='https://depthcrafter.github.io/'> [Project Page] </a> </div>
|
| 113 |
"""
|
| 114 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
with gr.Row(equal_height=True):
|
| 117 |
with gr.Column(scale=1):
|
|
|
|
| 183 |
process_length,
|
| 184 |
],
|
| 185 |
outputs=[output_video_1, output_video_2],
|
| 186 |
+
fn=infer_depth,
|
| 187 |
cache_examples=False,
|
| 188 |
)
|
| 189 |
|
| 190 |
generate_btn.click(
|
| 191 |
+
fn=infer_depth,
|
| 192 |
inputs=[
|
| 193 |
input_video,
|
| 194 |
num_denoising_steps,
|