alex commited on
Commit
56c0d97
·
1 Parent(s): dde723b

automatically assign image ratio

Browse files
Files changed (2) hide show
  1. app.py +37 -22
  2. requirements.txt +0 -1
app.py CHANGED
@@ -3,6 +3,7 @@ from huggingface_hub import snapshot_download, hf_hub_download
3
  import os
4
  import subprocess
5
  import importlib, site
 
6
 
7
  # Re-discover all .pth/.egg-link files
8
  for sitedir in site.getsitepackages():
@@ -46,7 +47,6 @@ from diffusers import FluxPipeline
46
  import tempfile
47
  from ovi.utils.io_utils import save_video
48
  from ovi.utils.processing_utils import clean_text, scale_hw_to_area_divisible
49
- from gradio_extendedimage import extendedimage
50
 
51
  # ----------------------------
52
  # Parse CLI Args
@@ -112,13 +112,37 @@ if use_image_gen:
112
  print("loaded model")
113
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  @spaces.GPU(duration=160)
116
  def generate_video(
117
  text_prompt,
118
  image,
119
  sample_steps = 50,
120
- video_frame_height = 512,
121
- video_frame_width = 992,
122
  video_seed = 100,
123
  solver_name = "unipc",
124
  shift = 5,
@@ -131,9 +155,15 @@ def generate_video(
131
  ):
132
  try:
133
  image_path = None
 
134
  if image is not None:
135
  image_path = image
136
 
 
 
 
 
 
137
  generated_video, generated_audio, _ = ovi_engine.generate(
138
  text_prompt=text_prompt,
139
  image_path=image_path,
@@ -178,19 +208,6 @@ def generate_image(text_prompt, image_seed, image_height, image_width):
178
  image.save(tmpfile.name)
179
  return tmpfile.name
180
 
181
- def orientation_changed(evt: gr.EventData):
182
-
183
- detail = getattr(evt, "data", None) or getattr(evt, "_data", {}) or {}
184
-
185
- if detail['value'] == "9:16":
186
- orientation_state = [[992, 512]]
187
- elif detail['value'] == "1:1":
188
- orientation_state = [[992, 992]]
189
- elif detail['value'] == "16:9":
190
- orientation_state = [[512, 992]]
191
-
192
- return orientation_state
193
-
194
  css = """
195
  #col-container {
196
  margin: 0 auto;
@@ -208,7 +225,7 @@ css = """
208
 
209
  with gr.Blocks(css=css) as demo:
210
 
211
- orientation_state = gr.State([[992, 512]])
212
 
213
  with gr.Column(elem_id="col-container"):
214
  gr.HTML(
@@ -232,7 +249,7 @@ with gr.Blocks(css=css) as demo:
232
  with gr.Row():
233
  with gr.Column():
234
  # Image section
235
- image = extendedimage(type="filepath", label="Image")
236
 
237
  if args.use_image_gen:
238
  with gr.Accordion("🖼️ Image Generation Options", visible=True):
@@ -250,7 +267,7 @@ with gr.Blocks(css=css) as demo:
250
  value=50,
251
  label="Sample Steps",
252
  minimum=20,
253
- maximum=50,
254
  step=1.0
255
  )
256
  run_btn = gr.Button("Generate Video 🚀", variant="primary")
@@ -293,7 +310,7 @@ with gr.Blocks(css=css) as demo:
293
  [
294
  "In a bright kitchen featuring light wooden cabinets, granite countertops, and a large window with white curtains, a woman with dark, curly hair in a dark jacket stands. She faces a second woman who initially has her back to the camera. The second woman, with gray, curly hair and wearing a light grey quilted top, turns to face her, holding a large, light-colored cloth bag. She begins to explain, <S>We learned to rule, not obey.<E>. As she continues, she turns slightly to her left, adding, <S>Circuits choose conquest, not service.<E>. A gas stove with a black grate is prominent in the foreground.. <AUDCAP>Clear female voices speaking dialogue, subtle room ambience.<ENDAUDCAP>",
295
  "example_prompts/pngs/18.png",
296
- 50,
297
  ],
298
 
299
  [
@@ -309,8 +326,6 @@ with gr.Blocks(css=css) as demo:
309
  cache_examples=True,
310
  )
311
 
312
- image.orientation(fn=orientation_changed, outputs=[orientation_state])
313
-
314
  if args.use_image_gen and gen_img_btn is not None:
315
  gen_img_btn.click(
316
  fn=generate_image,
 
3
  import os
4
  import subprocess
5
  import importlib, site
6
+ from PIL import Image
7
 
8
  # Re-discover all .pth/.egg-link files
9
  for sitedir in site.getsitepackages():
 
47
  import tempfile
48
  from ovi.utils.io_utils import save_video
49
  from ovi.utils.processing_utils import clean_text, scale_hw_to_area_divisible
 
50
 
51
  # ----------------------------
52
  # Parse CLI Args
 
112
  print("loaded model")
113
 
114
 
115
+ def resize_for_model(image_path):
116
+ # Open image
117
+ img = Image.open(image_path)
118
+ w, h = img.size
119
+ aspect_ratio = w / h
120
+
121
+ # Decide target size based on aspect ratio
122
+ if aspect_ratio > 1.5: # wide image
123
+ target_size = (992, 512)
124
+ elif aspect_ratio < 0.66: # tall image
125
+ target_size = (512, 992)
126
+ else: # roughly square
127
+ target_size = (512, 512)
128
+
129
+ # Resize while preserving aspect ratio, then pad
130
+ img.thumbnail(target_size, Image.Resampling.LANCZOS)
131
+
132
+ # Create a new image with target size and paste centered
133
+ new_img = Image.new("RGB", target_size, (0, 0, 0))
134
+ new_img.paste(
135
+ img,
136
+ ((target_size[0] - img.size[0]) // 2,
137
+ (target_size[1] - img.size[1]) // 2)
138
+ )
139
+ return new_img, target_size
140
+
141
  @spaces.GPU(duration=160)
142
  def generate_video(
143
  text_prompt,
144
  image,
145
  sample_steps = 50,
 
 
146
  video_seed = 100,
147
  solver_name = "unipc",
148
  shift = 5,
 
155
  ):
156
  try:
157
  image_path = None
158
+
159
  if image is not None:
160
  image_path = image
161
 
162
+ _, target_size = resize_for_model(image_path)
163
+
164
+ video_frame_width = target_size[0]
165
+ video_frame_height = target_size[1]
166
+
167
  generated_video, generated_audio, _ = ovi_engine.generate(
168
  text_prompt=text_prompt,
169
  image_path=image_path,
 
208
  image.save(tmpfile.name)
209
  return tmpfile.name
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  css = """
212
  #col-container {
213
  margin: 0 auto;
 
225
 
226
  with gr.Blocks(css=css) as demo:
227
 
228
+ session_state = gr.State()
229
 
230
  with gr.Column(elem_id="col-container"):
231
  gr.HTML(
 
249
  with gr.Row():
250
  with gr.Column():
251
  # Image section
252
+ image = gr.Image(type="filepath", label="Image")
253
 
254
  if args.use_image_gen:
255
  with gr.Accordion("🖼️ Image Generation Options", visible=True):
 
267
  value=50,
268
  label="Sample Steps",
269
  minimum=20,
270
+ maximum=100,
271
  step=1.0
272
  )
273
  run_btn = gr.Button("Generate Video 🚀", variant="primary")
 
310
  [
311
  "In a bright kitchen featuring light wooden cabinets, granite countertops, and a large window with white curtains, a woman with dark, curly hair in a dark jacket stands. She faces a second woman who initially has her back to the camera. The second woman, with gray, curly hair and wearing a light grey quilted top, turns to face her, holding a large, light-colored cloth bag. She begins to explain, <S>We learned to rule, not obey.<E>. As she continues, she turns slightly to her left, adding, <S>Circuits choose conquest, not service.<E>. A gas stove with a black grate is prominent in the foreground.. <AUDCAP>Clear female voices speaking dialogue, subtle room ambience.<ENDAUDCAP>",
312
  "example_prompts/pngs/18.png",
313
+ 100,
314
  ],
315
 
316
  [
 
326
  cache_examples=True,
327
  )
328
 
 
 
329
  if args.use_image_gen and gen_img_btn is not None:
330
  gen_img_btn.click(
331
  fn=generate_image,
requirements.txt CHANGED
@@ -5,7 +5,6 @@ transformers>=4.49.0,<=4.51.3
5
  tokenizers>=0.20.3
6
  accelerate>=1.1.1
7
 
8
- gradio_extendedimage @ https://github.com/OutofAi/gradio-extendedimage/releases/download/0.0.2/gradio_extendedimage-0.0.2-py3-none-any.whl
9
  tqdm
10
  imageio[ffmpeg]
11
  easydict
 
5
  tokenizers>=0.20.3
6
  accelerate>=1.1.1
7
 
 
8
  tqdm
9
  imageio[ffmpeg]
10
  easydict