PaintsUndoFix

Runtime error

App Files Files Community

John6666 commited on Sep 23

Commit

09ef7da

verified ·

1 Parent(s): 35315ef

Upload 4 files

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +23 -4
diffusers_vdm/improved_clip_vision.py +1 -1
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🎨
 colorFrom: gray
 colorTo: green
 sdk: gradio
-sdk_version: 5.25.2
 app_file: app.py
 pinned: false
 ---

 colorFrom: gray
 colorTo: green
 sdk: gradio
+sdk_version: 5.46.1
 app_file: app.py
 pinned: false
 ---

app.py CHANGED Viewed

@@ -23,6 +23,14 @@ from transformers import CLIPTextModel, CLIPTokenizer
 from diffusers_vdm.pipeline import LatentVideoDiffusionPipeline
 from diffusers_vdm.utils import resize_and_center_crop, save_bcthw_as_mp4
 # Disable gradients globally
 torch.set_grad_enabled(False)
@@ -34,7 +42,6 @@ class ModifiedUNet(UNet2DConditionModel):
         unet_add_coded_conds(unet=m, added_number_count=1)
         return m
 model_name = 'lllyasviel/paints_undo_single_frame'
 tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
 text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder").to(torch.float16).to("cuda")
@@ -57,7 +64,6 @@ k_sampler = KDiffusionSampler(
     linear=True
 )
 def find_best_bucket(h, w, options):
     min_metric = float('inf')
     best_bucket = None
@@ -106,7 +112,13 @@ def interrogator_process(x):
     return image_description
-@spaces.GPU()
 def process(input_fg, prompt, input_undo_steps, image_width, image_height, seed, steps, n_prompt, cfg,
             progress=gr.Progress()):
     rng = torch.Generator(device="cuda").manual_seed(int(seed))
@@ -192,7 +204,14 @@ def process_video_inner(image_1, image_2, prompt, seed=123, steps=25, cfg_scale=
     return video, image_1, image_2
-@spaces.GPU(duration=360)
 def process_video(keyframes, prompt, steps, cfg, fps, seed, progress=gr.Progress()):
     result_frames = []
     cropped_images = []

 from diffusers_vdm.pipeline import LatentVideoDiffusionPipeline
 from diffusers_vdm.utils import resize_and_center_crop, save_bcthw_as_mp4
+IS_ZERO_GPU = bool(os.getenv("SPACES_ZERO_GPU"))
+IS_GPU_MODE = True if IS_ZERO_GPU else (True if torch.cuda.is_available() else False)
+if IS_ZERO_GPU:
+    import subprocess
+    subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
+    torch.set_float32_matmul_precision("high")
+    torch.backends.cuda.matmul.allow_tf32 = True
 # Disable gradients globally
 torch.set_grad_enabled(False)
         unet_add_coded_conds(unet=m, added_number_count=1)
         return m
 model_name = 'lllyasviel/paints_undo_single_frame'
 tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
 text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder").to(torch.float16).to("cuda")
     linear=True
 )
 def find_best_bucket(h, w, options):
     min_metric = float('inf')
     best_bucket = None
     return image_description
+def process_get_duration(input_fg, prompt, input_undo_steps, image_width, image_height, seed, steps, n_prompt, cfg, progres):
+    def_duration = 15.
+    def_steps = 50.
+    return int(def_duration * steps / def_steps)
+@spaces.GPU(duration=process_get_duration)
 def process(input_fg, prompt, input_undo_steps, image_width, image_height, seed, steps, n_prompt, cfg,
             progress=gr.Progress()):
     rng = torch.Generator(device="cuda").manual_seed(int(seed))
     return video, image_1, image_2
+def process_video_get_duration(keyframes, prompt, steps, cfg, fps, seed, progress):
+    def_duration = 180.
+    def_steps = 50.
+    def_fps = 4.
+    return int(def_duration * steps / def_steps * fps / def_fps)
+@spaces.GPU(duration=process_video_get_duration)
 def process_video(keyframes, prompt, steps, cfg, fps, seed, progress=gr.Progress()):
     result_frames = []
     cropped_images = []

diffusers_vdm/improved_clip_vision.py CHANGED Viewed

@@ -35,7 +35,7 @@ def arbitrary_positional_encoding(p, H, W):
     return weight
-def improved_clipvision_embedding_forward(self, pixel_values):
     pixel_values = pixel_values * 0.5 + 0.5
     pixel_values = preprocess(pixel_values)
     batch_size = pixel_values.shape[0]

     return weight
+def improved_clipvision_embedding_forward(self, pixel_values, interpolate_pos_encoding=False, *args, **kwargs):
     pixel_values = pixel_values * 0.5 + 0.5
     pixel_values = preprocess(pixel_values)
     batch_size = pixel_values.shape[0]

requirements.txt CHANGED Viewed

@@ -13,4 +13,5 @@ xformers
 onnxruntime
 av
 torchvision
-spaces

 onnxruntime
 av
 torchvision
+spaces
+pydantic==2.10.6