John6666 commited on
Commit
09ef7da
Β·
verified Β·
1 Parent(s): 35315ef

Upload 4 files

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🎨
4
  colorFrom: gray
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.25.2
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: gray
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.46.1
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -23,6 +23,14 @@ from transformers import CLIPTextModel, CLIPTokenizer
23
  from diffusers_vdm.pipeline import LatentVideoDiffusionPipeline
24
  from diffusers_vdm.utils import resize_and_center_crop, save_bcthw_as_mp4
25
 
 
 
 
 
 
 
 
 
26
  # Disable gradients globally
27
  torch.set_grad_enabled(False)
28
 
@@ -34,7 +42,6 @@ class ModifiedUNet(UNet2DConditionModel):
34
  unet_add_coded_conds(unet=m, added_number_count=1)
35
  return m
36
 
37
-
38
  model_name = 'lllyasviel/paints_undo_single_frame'
39
  tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
40
  text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder").to(torch.float16).to("cuda")
@@ -57,7 +64,6 @@ k_sampler = KDiffusionSampler(
57
  linear=True
58
  )
59
 
60
-
61
  def find_best_bucket(h, w, options):
62
  min_metric = float('inf')
63
  best_bucket = None
@@ -106,7 +112,13 @@ def interrogator_process(x):
106
  return image_description
107
 
108
 
109
- @spaces.GPU()
 
 
 
 
 
 
110
  def process(input_fg, prompt, input_undo_steps, image_width, image_height, seed, steps, n_prompt, cfg,
111
  progress=gr.Progress()):
112
  rng = torch.Generator(device="cuda").manual_seed(int(seed))
@@ -192,7 +204,14 @@ def process_video_inner(image_1, image_2, prompt, seed=123, steps=25, cfg_scale=
192
  return video, image_1, image_2
193
 
194
 
195
- @spaces.GPU(duration=360)
 
 
 
 
 
 
 
196
  def process_video(keyframes, prompt, steps, cfg, fps, seed, progress=gr.Progress()):
197
  result_frames = []
198
  cropped_images = []
 
23
  from diffusers_vdm.pipeline import LatentVideoDiffusionPipeline
24
  from diffusers_vdm.utils import resize_and_center_crop, save_bcthw_as_mp4
25
 
26
+ IS_ZERO_GPU = bool(os.getenv("SPACES_ZERO_GPU"))
27
+ IS_GPU_MODE = True if IS_ZERO_GPU else (True if torch.cuda.is_available() else False)
28
+ if IS_ZERO_GPU:
29
+ import subprocess
30
+ subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
31
+ torch.set_float32_matmul_precision("high")
32
+ torch.backends.cuda.matmul.allow_tf32 = True
33
+
34
  # Disable gradients globally
35
  torch.set_grad_enabled(False)
36
 
 
42
  unet_add_coded_conds(unet=m, added_number_count=1)
43
  return m
44
 
 
45
  model_name = 'lllyasviel/paints_undo_single_frame'
46
  tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
47
  text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder").to(torch.float16).to("cuda")
 
64
  linear=True
65
  )
66
 
 
67
  def find_best_bucket(h, w, options):
68
  min_metric = float('inf')
69
  best_bucket = None
 
112
  return image_description
113
 
114
 
115
+ def process_get_duration(input_fg, prompt, input_undo_steps, image_width, image_height, seed, steps, n_prompt, cfg, progres):
116
+ def_duration = 15.
117
+ def_steps = 50.
118
+ return int(def_duration * steps / def_steps)
119
+
120
+
121
+ @spaces.GPU(duration=process_get_duration)
122
  def process(input_fg, prompt, input_undo_steps, image_width, image_height, seed, steps, n_prompt, cfg,
123
  progress=gr.Progress()):
124
  rng = torch.Generator(device="cuda").manual_seed(int(seed))
 
204
  return video, image_1, image_2
205
 
206
 
207
+ def process_video_get_duration(keyframes, prompt, steps, cfg, fps, seed, progress):
208
+ def_duration = 180.
209
+ def_steps = 50.
210
+ def_fps = 4.
211
+ return int(def_duration * steps / def_steps * fps / def_fps)
212
+
213
+
214
+ @spaces.GPU(duration=process_video_get_duration)
215
  def process_video(keyframes, prompt, steps, cfg, fps, seed, progress=gr.Progress()):
216
  result_frames = []
217
  cropped_images = []
diffusers_vdm/improved_clip_vision.py CHANGED
@@ -35,7 +35,7 @@ def arbitrary_positional_encoding(p, H, W):
35
  return weight
36
 
37
 
38
- def improved_clipvision_embedding_forward(self, pixel_values):
39
  pixel_values = pixel_values * 0.5 + 0.5
40
  pixel_values = preprocess(pixel_values)
41
  batch_size = pixel_values.shape[0]
 
35
  return weight
36
 
37
 
38
+ def improved_clipvision_embedding_forward(self, pixel_values, interpolate_pos_encoding=False, *args, **kwargs):
39
  pixel_values = pixel_values * 0.5 + 0.5
40
  pixel_values = preprocess(pixel_values)
41
  batch_size = pixel_values.shape[0]
requirements.txt CHANGED
@@ -13,4 +13,5 @@ xformers
13
  onnxruntime
14
  av
15
  torchvision
16
- spaces
 
 
13
  onnxruntime
14
  av
15
  torchvision
16
+ spaces
17
+ pydantic==2.10.6