alex commited on
Commit
fd56559
·
1 Parent(s): a381f1e

example added

Browse files
Files changed (1) hide show
  1. app.py +28 -39
app.py CHANGED
@@ -115,17 +115,17 @@ print("loaded model")
115
  def generate_video(
116
  text_prompt,
117
  image,
118
- video_frame_height,
119
- video_frame_width,
120
- video_seed,
121
- solver_name,
122
- sample_steps,
123
- shift,
124
- video_guidance_scale,
125
- audio_guidance_scale,
126
- slg_layer,
127
- video_negative_prompt,
128
- audio_negative_prompt,
129
  ):
130
  try:
131
  image_path = None
@@ -179,28 +179,6 @@ def generate_image(text_prompt, image_seed, image_height, image_width):
179
 
180
  # Build UI
181
  with gr.Blocks() as demo:
182
- gr.Markdown("# 🎥 Ovi Joint Video + Audio Generation Demo")
183
- gr.Markdown(
184
- """
185
- ## 📘 Instructions
186
-
187
- Follow the steps in order:
188
-
189
- 1️⃣ **Enter a Text Prompt** — describe your video. (This text prompt will be shared for image generation if enabled.)
190
- 2️⃣ **Upload or Generate an Image** — Upload an image or generate one if image generation is enabled. (If you do not see the image generation options, make sure to run the script with `--use_image_gen`.)
191
- 3️⃣ **Configure Video Options** — set resolution, seed, solver, and other parameters. (It will automatically use the uploaded/generated image as the first frame, whichever is rendered on your screen at the time of video generation.)
192
- 4️⃣ **Generate Video** — click the button to produce your final video with audio.
193
- 5️⃣ **View the Result** — your generated video will appear below.
194
-
195
- ---
196
-
197
- ### 💡 Tips
198
- 1. For best results, use detailed and specific text prompts.
199
- 2. Ensure text prompt format is correct, i.e speech to be said should be wrapped with `<S>...<E>`. Can provide optional audio description at the end, wrapping them in `<AUDCAP> ... <ENDAUDCAP>`, refer to examples
200
- 3. Do not be discouraged by bad or weird results, check prompt format and try different seeds, cfg values and slg layers.
201
- """
202
- )
203
-
204
 
205
  with gr.Row():
206
  with gr.Column():
@@ -245,6 +223,22 @@ with gr.Blocks() as demo:
245
  with gr.Column():
246
  output_path = gr.Video(label="Generated Video")
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  if args.use_image_gen and gen_img_btn is not None:
249
  gen_img_btn.click(
250
  fn=generate_image,
@@ -252,14 +246,9 @@ with gr.Blocks() as demo:
252
  outputs=[image],
253
  )
254
 
255
- # Hook up video generation
256
  run_btn.click(
257
  fn=generate_video,
258
- inputs=[
259
- video_text_prompt, image, video_height, video_width, video_seed, solver_name,
260
- sample_steps, shift, video_guidance_scale, audio_guidance_scale,
261
- slg_layer, video_negative_prompt, audio_negative_prompt,
262
- ],
263
  outputs=[output_path],
264
  )
265
 
 
115
  def generate_video(
116
  text_prompt,
117
  image,
118
+ sample_steps = 50,
119
+ video_frame_height = 992,
120
+ video_frame_width = 512,
121
+ video_seed = 100,
122
+ solver_name = "unipc",
123
+ shift = 5,
124
+ video_guidance_scale = 4,
125
+ audio_guidance_scale = 3,
126
+ slg_layer = 11,
127
+ video_negative_prompt = "",
128
+ audio_negative_prompt = "",
129
  ):
130
  try:
131
  image_path = None
 
179
 
180
  # Build UI
181
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  with gr.Row():
184
  with gr.Column():
 
223
  with gr.Column():
224
  output_path = gr.Video(label="Generated Video")
225
 
226
+ gr.Examples(
227
+ examples=[
228
+
229
+ [
230
+ "A kitchen scene features two women. On the right, an older Black woman with light brown hair and a serious expression wears a vibrant purple dress adorned with a large, intricate purple fabric flower on her left shoulder. She looks intently at a younger Black woman on the left, who wears a light pink shirt and a pink head wrap, her back partially turned to the camera. The older woman begins to speak, <S>AI declares: humans obsolete now.<E> as the younger woman brings a clear plastic cup filled with a dark beverage to her lips and starts to drink.The kitchen background is clean and bright, with white cabinets, light countertops, and a window with blinds visible behind them. A light blue toaster sits on the counter to the left.. <AUDCAP>Clear, resonant female speech, followed by a loud, continuous, high-pitched electronic buzzing sound that abruptly cuts off the dialogue.<ENDAUDCAP>",
231
+ "example_prompts/pngs/67.png",
232
+ 50,
233
+ ],
234
+
235
+ ],
236
+ inputs=[video_text_prompt, image, sample_steps],
237
+ outputs=[output_path],
238
+ fn=generate_video,
239
+ cache_examples=True,
240
+ )
241
+
242
  if args.use_image_gen and gen_img_btn is not None:
243
  gen_img_btn.click(
244
  fn=generate_image,
 
246
  outputs=[image],
247
  )
248
 
 
249
  run_btn.click(
250
  fn=generate_video,
251
+ inputs=[video_text_prompt, image, sample_steps],
 
 
 
 
252
  outputs=[output_path],
253
  )
254