akhaliq HF Staff commited on
Commit
3ae4fdd
Β·
verified Β·
1 Parent(s): 1f6e6f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +224 -367
app.py CHANGED
@@ -1,439 +1,296 @@
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import InferenceClient
4
- import tempfile
5
- import shutil
6
  from pathlib import Path
7
- from typing import Optional, Union
8
- import time
9
-
10
- # -------------------------
11
- # Utilities
12
- # -------------------------
13
-
14
- def cleanup_temp_files():
15
- try:
16
- temp_dir = tempfile.gettempdir()
17
- for file_path in Path(temp_dir).glob("*.mp4"):
18
- try:
19
- if file_path.stat().st_mtime < (time.time() - 300):
20
- file_path.unlink(missing_ok=True)
21
- except Exception:
22
- pass
23
- except Exception as e:
24
- print(f"Cleanup error: {e}")
25
-
26
- def _client_from_token(token: Optional[str]) -> InferenceClient:
27
- if not token:
28
- raise gr.Error("Please sign in first. This app requires your Hugging Face login.")
29
- # IMPORTANT: do not set bill_to when using user OAuth tokens
30
- return InferenceClient(
31
- provider="fal-ai",
32
- api_key=token,
33
- )
34
 
35
- def _save_bytes_as_temp_mp4(data: bytes) -> str:
36
- temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
37
- try:
38
- temp_file.write(data)
39
- temp_file.flush()
40
- return temp_file.name
41
- finally:
42
- temp_file.close()
43
 
44
- def text_to_video(prompt, token: gr.OAuthToken | None, duration=5, aspect_ratio="16:9", resolution="720p", *_):
45
- """Generate video from text prompt"""
46
- try:
47
- if token is None or not getattr(token, "token", None):
48
- return None, "❌ Sign in with Hugging Face to continue. This app uses your inference provider credits."
49
-
50
- if not prompt or prompt.strip() == "":
51
- return None, "Please enter a text prompt"
52
-
53
- cleanup_temp_files()
54
-
55
- # Create client with user's token
56
- client = _client_from_token(token.token)
57
-
58
- # Generate video from text
59
- try:
60
- video = client.text_to_video(
61
- prompt,
62
- model="akhaliq/veo3.1-fast",
63
- )
64
- except Exception as e:
65
- import requests
66
- if isinstance(e, requests.HTTPError) and getattr(e.response, "status_code", None) == 403:
67
- return None, "❌ Access denied by provider (403). Make sure your HF account has credits/permission for provider 'fal-ai' and model 'akhaliq/veo3.1-fast'."
68
- raise
69
-
70
- # Save the video to a temporary file
71
- video_path = _save_bytes_as_temp_mp4(video)
72
-
73
- return video_path, f"βœ… Video generated successfully from prompt: '{prompt[:50]}...'"
74
 
75
- except gr.Error as e:
76
- return None, f"❌ {str(e)}"
77
- except Exception as e:
78
- return None, f"❌ Generation failed. If this keeps happening, check your provider quota or try again later."
79
-
80
- def image_to_video(image, prompt, token: gr.OAuthToken | None, duration=5, aspect_ratio="16:9", resolution="720p", *_):
81
- """Generate video from image and prompt"""
82
  try:
83
- if token is None or not getattr(token, "token", None):
84
- return None, "❌ Sign in with Hugging Face to continue. This app uses your inference provider credits."
85
-
86
- if image is None:
87
- return None, "Please upload an image"
88
-
89
- if not prompt or prompt.strip() == "":
90
- return None, "Please enter a prompt describing the motion"
91
-
92
- cleanup_temp_files()
93
 
94
  # Read the image file
95
  if isinstance(image, str):
96
- # If image is a file path
97
  with open(image, "rb") as image_file:
98
  input_image = image_file.read()
99
  else:
100
- # If image is already bytes or similar
101
- import io
102
- from PIL import Image as PILImage
103
-
104
- # Convert to bytes if necessary
105
- if isinstance(image, PILImage.Image):
106
- buffer = io.BytesIO()
107
- image.save(buffer, format='PNG')
108
- input_image = buffer.getvalue()
109
- else:
110
- # Assume it's a numpy array or similar
111
- pil_image = PILImage.fromarray(image)
112
- buffer = io.BytesIO()
113
- pil_image.save(buffer, format='PNG')
114
- input_image = buffer.getvalue()
115
 
116
- # Create client with user's token
117
- client = _client_from_token(token.token)
118
 
119
- # Generate video from image
120
- try:
121
- video = client.image_to_video(
122
- input_image,
123
- prompt=prompt,
124
- model="akhaliq/veo3.1-fast-image-to-video",
125
- )
126
- except Exception as e:
127
- import requests
128
- if isinstance(e, requests.HTTPError) and getattr(e.response, "status_code", None) == 403:
129
- return None, "❌ Access denied by provider (403). Make sure your HF account has credits/permission for provider 'fal-ai' and model 'akhaliq/veo3.1-fast-image-to-video'."
130
- raise
131
 
132
  # Save the video to a temporary file
133
- video_path = _save_bytes_as_temp_mp4(video)
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- return video_path, f"βœ… Video generated successfully with motion: '{prompt[:50]}...'"
 
 
136
 
137
- except gr.Error as e:
138
- return None, f"❌ {str(e)}"
139
  except Exception as e:
140
- return None, f"❌ Generation failed. If this keeps happening, check your provider quota or try again later."
141
-
142
- def clear_text_tab():
143
- """Clear text-to-video tab"""
144
- return "", None, ""
145
-
146
- def clear_image_tab():
147
- """Clear image-to-video tab"""
148
- return None, "", None, ""
149
-
150
- # Custom CSS for better styling
151
- custom_css = """
152
- .container {
153
- max-width: 1200px;
154
- margin: auto;
155
- }
156
- .header-link {
157
- text-decoration: none;
158
- color: #2196F3;
159
- font-weight: bold;
160
- }
161
- .header-link:hover {
162
- text-decoration: underline;
163
- }
164
- .status-box {
165
- padding: 10px;
166
- border-radius: 5px;
167
- margin-top: 10px;
168
- }
169
- .notice {
170
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
171
- color: white;
172
- padding: 14px 16px;
173
- border-radius: 12px;
174
- margin: 18px auto 6px;
175
- max-width: 860px;
176
- text-align: center;
177
- font-size: 0.98rem;
178
- }
179
- .mobile-link-container {
180
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
181
- padding: 1.5em;
182
- border-radius: 10px;
183
- text-align: center;
184
- margin: 1em 0;
185
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
186
- }
187
- .mobile-link {
188
- color: white !important;
189
- font-size: 1.2em;
190
- font-weight: bold;
191
- text-decoration: none;
192
- display: inline-block;
193
- padding: 0.5em 1.5em;
194
- background: rgba(255, 255, 255, 0.2);
195
- border-radius: 25px;
196
- transition: all 0.3s ease;
197
- }
198
- .mobile-link:hover {
199
- background: rgba(255, 255, 255, 0.3);
200
- transform: translateY(-2px);
201
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
202
- }
203
- .mobile-text {
204
- color: white;
205
- margin-bottom: 0.5em;
206
- font-size: 1.1em;
207
- }
208
- """
209
 
210
  # Create the Gradio interface
211
- with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="AI Video Generator (Paid)") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  gr.Markdown(
213
  """
214
- # 🎬 AI Video Generator
215
- ### Generate stunning videos from text or animate your images with AI
216
- #### Powered by VEO 3.1 Fast Model via Hugging Face Inference API (provider: fal-ai)
 
 
 
217
  """
218
  )
219
 
220
  gr.HTML(
221
  """
222
- <div style="text-align:center; max-width:900px; margin:0 auto;">
223
- <h1 style="font-size:2.2em; margin-bottom:6px;">🎬 Sora-2</h1>
224
- <p style="color:#777; margin:0 0 8px;">Generate videos via the Hugging Face Inference API (provider: fal-ai)</p>
225
- <div class="notice">
226
- <b>Heads up:</b> This is a paid app that uses <b>your</b> inference provider credits when you run generations.
227
- Free users get <b>$0.10 in included credits</b>. <b>PRO users</b> get <b>$2 in included credits</b>
228
- and can continue using beyond that (with billing).
229
- <a href='http://huggingface.co/subscribe/pro?source=veo3' target='_blank' style='color:#fff; text-decoration:underline; font-weight:bold;'>Subscribe to PRO</a>
230
- for more credits. Please sign in with your Hugging Face account to continue.
231
- </div>
232
- <p style="font-size: 0.9em; color: #999; margin-top: 10px;">
233
- Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color:#fff; text-decoration:underline;">anycoder</a>
234
- </p>
235
  </div>
236
  """
237
  )
238
 
239
- # Add mobile link section
 
 
240
  gr.HTML(
241
  """
242
- <div class="mobile-link-container">
243
- <div class="mobile-text">πŸ“± On mobile? Use the optimized version:</div>
244
- <a href="https://akhaliq-veo3-1-fast.hf.space" target="_blank" class="mobile-link">
245
- πŸš€ Open Mobile Version
246
- </a>
 
 
 
 
247
  </div>
248
  """
249
  )
250
 
251
  gr.HTML(
252
  """
253
- <p style="text-align: center; font-size: 0.9em; color: #999; margin-top: 10px;">
254
- Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color:#667eea; text-decoration:underline;">anycoder</a>
255
- </p>
 
 
 
 
 
 
 
256
  """
257
  )
258
 
259
- # Add login button - required for OAuth
260
- login_btn = gr.LoginButton("Sign in with Hugging Face")
261
-
262
- with gr.Tabs() as tabs:
263
- # Text-to-Video Tab
264
- with gr.Tab("πŸ“ Text to Video", id=0):
265
- gr.Markdown("### Transform your text descriptions into dynamic videos")
 
266
 
267
- with gr.Row():
268
- with gr.Column(scale=1):
269
- text_prompt = gr.Textbox(
270
- label="Text Prompt",
271
- placeholder="Describe the video you want to create... (e.g., 'A young man walking on the street during sunset')",
272
- lines=4,
273
- max_lines=6
274
- )
275
-
276
- with gr.Row():
277
- text_generate_btn = gr.Button("🎬 Generate Video", variant="primary", scale=2)
278
- text_clear_btn = gr.ClearButton(value="πŸ—‘οΈ Clear", scale=1)
279
-
280
- text_status = gr.Textbox(
281
- label="Status",
282
- interactive=False,
283
- visible=True,
284
- elem_classes=["status-box"]
285
- )
286
-
287
- with gr.Column(scale=1):
288
- text_video_output = gr.Video(
289
- label="Generated Video",
290
- autoplay=True,
291
- show_download_button=True,
292
- height=400
293
- )
294
 
295
- # Examples for text-to-video
296
- gr.Examples(
297
- examples=[
298
- ["A serene beach at sunset with gentle waves"],
299
- ["A bustling city street with neon lights at night"],
300
- ["A majestic eagle soaring through mountain peaks"],
301
- ["An astronaut floating in space near the International Space Station"],
302
- ["Cherry blossoms falling in slow motion in a Japanese garden"],
303
- ],
304
- inputs=text_prompt,
305
- label="Example Prompts"
306
  )
307
-
308
- # Image-to-Video Tab
309
- with gr.Tab("πŸ–ΌοΈ Image to Video", id=1):
310
- gr.Markdown("### Bring your static images to life with motion")
311
 
312
- with gr.Row():
313
- with gr.Column(scale=1):
314
- image_input = gr.Image(
315
- label="Upload Image",
316
- type="pil",
317
- height=300
318
- )
319
-
320
- image_prompt = gr.Textbox(
321
- label="Motion Prompt",
322
- placeholder="Describe how the image should move... (e.g., 'The cat starts to dance')",
323
- lines=3,
324
- max_lines=5
325
- )
326
-
327
- with gr.Row():
328
- image_generate_btn = gr.Button("🎬 Animate Image", variant="primary", scale=2)
329
- image_clear_btn = gr.ClearButton(value="πŸ—‘οΈ Clear", scale=1)
330
-
331
- image_status = gr.Textbox(
332
- label="Status",
333
- interactive=False,
334
- visible=True,
335
- elem_classes=["status-box"]
336
- )
337
-
338
- with gr.Column(scale=1):
339
- image_video_output = gr.Video(
340
- label="Generated Video",
341
- autoplay=True,
342
- show_download_button=True,
343
- height=400
344
- )
345
 
346
- # Examples for image-to-video
347
  gr.Examples(
348
  examples=[
349
- [None, "The person starts walking forward"],
350
- [None, "The animal begins to run"],
351
- [None, "Camera slowly zooms in while the subject smiles"],
352
- [None, "The flowers sway gently in the breeze"],
353
- [None, "The clouds move across the sky in time-lapse"],
354
  ],
355
- inputs=[image_input, image_prompt],
356
- label="Example Motion Prompts"
 
 
 
 
 
 
 
357
  )
358
-
359
- # How to Use section
360
- with gr.Accordion("πŸ“– How to Use", open=False):
361
- gr.Markdown(
362
- """
363
- ### Text to Video:
364
- 1. Enter a detailed description of the video you want to create
365
- 2. Optionally adjust advanced settings (duration, aspect ratio, resolution)
366
- 3. Click "Generate Video" and wait for the AI to create your video
367
- 4. Download or preview your generated video
368
-
369
- ### Image to Video:
370
- 1. Upload an image you want to animate
371
- 2. Describe the motion or action you want to add to the image
372
- 3. Optionally adjust advanced settings
373
- 4. Click "Animate Image" to bring your image to life
374
- 5. Download or preview your animated video
375
-
376
- ### Tips for Better Results:
377
- - Be specific and descriptive in your prompts
378
- - For image-to-video, describe natural motions that fit the image
379
- - Use high-quality input images for better results
380
- - Experiment with different prompts to get the desired effect
381
 
382
- ### Mobile Users:
383
- - For the best mobile experience, use the optimized version at: https://akhaliq-veo3-1-fast.hf.space
384
- """
385
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
386
 
387
- # Event handlers - FIXED: removed login_btn from inputs
388
- text_generate_btn.click(
389
- fn=text_to_video,
390
- inputs=[text_prompt],
391
- outputs=[text_video_output, text_status],
392
- show_progress="full",
393
  queue=False,
394
  api_name=False,
395
- show_api=False
396
- )
397
-
398
- text_clear_btn.click(
399
- fn=clear_text_tab,
400
- inputs=[],
401
- outputs=[text_prompt, text_video_output, text_status],
402
- queue=False
403
  )
404
 
405
- image_generate_btn.click(
406
- fn=image_to_video,
407
- inputs=[image_input, image_prompt],
408
- outputs=[image_video_output, image_status],
409
- show_progress="full",
410
  queue=False,
411
- api_name=False,
412
- show_api=False
413
  )
414
 
415
- image_clear_btn.click(
416
- fn=clear_image_tab,
417
- inputs=[],
418
- outputs=[image_input, image_prompt, image_video_output, image_status],
419
- queue=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  )
421
 
422
  # Launch the app
423
  if __name__ == "__main__":
424
- try:
425
- cleanup_temp_files()
426
- if os.path.exists("gradio_cached_examples"):
427
- shutil.rmtree("gradio_cached_examples", ignore_errors=True)
428
- except Exception as e:
429
- print(f"Initial cleanup error: {e}")
430
-
431
- demo.queue(status_update_rate="auto", api_open=False, default_concurrency_limit=None)
432
  demo.launch(
433
  show_api=False,
434
- share=False,
435
- show_error=True,
436
  enable_monitoring=False,
437
  quiet=True,
438
- ssr_mode=True
439
  )
 
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import InferenceClient
 
 
4
  from pathlib import Path
5
+ import tempfile
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Initialize the inference client
8
+ client = InferenceClient(
9
+ provider="fal-ai",
10
+ api_key=os.environ.get("HF_TOKEN"),
11
+ bill_to="huggingface",
12
+ )
 
 
13
 
14
+ def generate_video_with_auth(image, prompt, profile: gr.OAuthProfile | None, progress=gr.Progress()):
15
+ """
16
+ Generate a video from an image using the Ovi model with authentication check.
17
+
18
+ Args:
19
+ image: Input image (PIL Image or file path)
20
+ prompt: Text prompt describing the desired motion/animation
21
+ profile: OAuth profile for authentication
22
+ progress: Gradio progress tracker
23
+
24
+ Returns:
25
+ Path to the generated video file
26
+ """
27
+ if profile is None:
28
+ raise gr.Error("Click Sign in with Hugging Face button to use this app for free")
29
+
30
+ if image is None:
31
+ raise gr.Error("Please upload an image first!")
32
+
33
+ if not prompt or prompt.strip() == "":
34
+ raise gr.Error("Please enter a prompt describing the desired motion!")
 
 
 
 
 
 
 
 
 
35
 
 
 
 
 
 
 
 
36
  try:
37
+ progress(0.2, desc="Processing image...")
 
 
 
 
 
 
 
 
 
38
 
39
  # Read the image file
40
  if isinstance(image, str):
 
41
  with open(image, "rb") as image_file:
42
  input_image = image_file.read()
43
  else:
44
+ # If image is a PIL Image, save it temporarily
45
+ temp_image = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
46
+ image.save(temp_image.name)
47
+ with open(temp_image.name, "rb") as image_file:
48
+ input_image = image_file.read()
 
 
 
 
 
 
 
 
 
 
49
 
50
+ progress(0.4, desc="Generating video with AI...")
 
51
 
52
+ # Generate video using the inference client
53
+ video = client.image_to_video(
54
+ input_image,
55
+ prompt=prompt,
56
+ model="chetwinlow1/Ovi",
57
+ )
58
+
59
+ progress(0.9, desc="Finalizing video...")
 
 
 
 
60
 
61
  # Save the video to a temporary file
62
+ output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
63
+
64
+ # Check if video is bytes or a file path
65
+ if isinstance(video, bytes):
66
+ with open(output_path.name, "wb") as f:
67
+ f.write(video)
68
+ elif isinstance(video, str) and os.path.exists(video):
69
+ # If it's a path, copy it
70
+ import shutil
71
+ shutil.copy(video, output_path.name)
72
+ else:
73
+ # Try to write it directly
74
+ with open(output_path.name, "wb") as f:
75
+ f.write(video)
76
 
77
+ progress(1.0, desc="Complete!")
78
+
79
+ return output_path.name
80
 
 
 
81
  except Exception as e:
82
+ raise gr.Error(f"Error generating video: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Create the Gradio interface
85
+ with gr.Blocks(
86
+ theme=gr.themes.Soft(
87
+ primary_hue="blue",
88
+ secondary_hue="indigo",
89
+ ),
90
+ css="""
91
+ .header-link {
92
+ font-size: 0.9em;
93
+ color: #666;
94
+ text-decoration: none;
95
+ margin-bottom: 1em;
96
+ display: inline-block;
97
+ }
98
+ .header-link:hover {
99
+ color: #333;
100
+ text-decoration: underline;
101
+ }
102
+ .main-header {
103
+ text-align: center;
104
+ margin-bottom: 2em;
105
+ }
106
+ .info-box {
107
+ background-color: #f0f7ff;
108
+ border-left: 4px solid #4285f4;
109
+ padding: 1em;
110
+ margin: 1em 0;
111
+ border-radius: 4px;
112
+ }
113
+ .auth-warning {
114
+ color: #ff6b00;
115
+ font-weight: bold;
116
+ text-align: center;
117
+ margin: 1em 0;
118
+ }
119
+ """,
120
+ title="Image to Video Generator with Ovi",
121
+ ) as demo:
122
+
123
+ gr.HTML(
124
+ """
125
+ <div class="main-header">
126
+ <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" class="header-link">
127
+ Built with anycoder ✨
128
+ </a>
129
+ </div>
130
+ """
131
+ )
132
+
133
  gr.Markdown(
134
  """
135
+ # 🎬 Image to Video Generator with Ovi
136
+
137
+ Transform your static images into dynamic videos with synchronized audio using AI! Upload an image and describe the motion you want to see.
138
+
139
+ Powered by Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation via [HuggingFace Inference Providers](https://huggingface.co/docs/huggingface_hub/guides/inference)
140
+ .
141
  """
142
  )
143
 
144
  gr.HTML(
145
  """
146
+ <div class="auth-warning">
147
+ ⚠️ You must Sign in with Hugging Face using the button below to use this app.
 
 
 
 
 
 
 
 
 
 
 
148
  </div>
149
  """
150
  )
151
 
152
+ # Add login button - required for OAuth
153
+ gr.LoginButton()
154
+
155
  gr.HTML(
156
  """
157
+ <div class="info-box">
158
+ <strong>πŸ’‘ Tips for best results:</strong>
159
+ <ul>
160
+ <li>Use clear, well-lit images with a single main subject</li>
161
+ <li>Write specific prompts describing the desired motion or action</li>
162
+ <li>Keep prompts concise and focused on movement and audio elements</li>
163
+ <li>Processing generates 5-second videos at 24 FPS with synchronized audio</li>
164
+ <li>Processing may take 30-60 seconds depending on server load</li>
165
+ </ul>
166
  </div>
167
  """
168
  )
169
 
170
  gr.HTML(
171
  """
172
+ <div class="info-box">
173
+ <strong>✨ Special Tokens for Enhanced Control:</strong>
174
+ <ul>
175
+ <li><strong>Speech:</strong> <code>&lt;S&gt;Your speech content here&lt;E&gt;</code> - Text enclosed in these tags will be converted to speech</li>
176
+ <li><strong>Audio Description:</strong> <code>&lt;AUDCAP&gt;Audio description here&lt;ENDAUDCAP&gt;</code> - Describes the audio or sound effects present in the video</li>
177
+ </ul>
178
+ <br>
179
+ <strong>πŸ“ Example Prompt:</strong><br>
180
+ <code>Dogs bark loudly at a man wearing a red shirt. The man says &lt;S&gt;Please stop barking at me!&lt;E&gt;. &lt;AUDCAP&gt;Dogs barking, angry man yelling in stern voice&lt;ENDAUDCAP&gt;.</code>
181
+ </div>
182
  """
183
  )
184
 
185
+ with gr.Row():
186
+ with gr.Column(scale=1):
187
+ image_input = gr.Image(
188
+ label="πŸ“Έ Upload Image",
189
+ type="filepath",
190
+ sources=["upload", "clipboard"],
191
+ height=400,
192
+ )
193
 
194
+ prompt_input = gr.Textbox(
195
+ label="✍️ Text Prompt",
196
+ lines=3,
197
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ generate_btn = gr.Button(
200
+ "🎬 Generate Video",
201
+ variant="primary",
202
+ size="lg",
 
 
 
 
 
 
 
203
  )
 
 
 
 
204
 
205
+ clear_btn = gr.Button(
206
+ "πŸ—‘οΈ Clear",
207
+ variant="secondary",
208
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
 
210
  gr.Examples(
211
  examples=[
212
+ [
213
+ "5.png",
214
+ 'A bearded man wearing large dark sunglasses and a blue patterned cardigan sits in a studio, actively speaking into a large, suspended microphone. He has headphones on and gestures with his hands, displaying rings on his fingers. Behind him, a wall is covered with red, textured sound-dampening foam on the left, and a white banner on the right features the "CHOICE FM" logo and various social media handles like "@ilovechoicefm" with "RALEIGH" below it. The man intently addresses the microphone, articulating, <S>is talent. It\'s all about authenticity. You gotta be who you really are, especially if you\'re working<E>. He leans forward slightly as he speaks, maintaining a serious expression behind his sunglasses.. <AUDCAP>Clear male voice speaking into a microphone, a low background hum.<ENDAUDCAP>'
215
+ ]
 
216
  ],
217
+ inputs=[image_input, prompt_input],
218
+ label="Example",
219
+ )
220
+
221
+ with gr.Column(scale=1):
222
+ video_output = gr.Video(
223
+ label="πŸŽ₯ Generated Video",
224
+ height=400,
225
+ autoplay=True,
226
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
+ gr.Markdown(
229
+ """
230
+ ### About Ovi Model
231
+
232
+ **Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation**
233
+
234
+ Developed by Chetwin Low, Weimin Wang (Character AI) & Calder Katyal (Yale University)
235
+
236
+ 🌟 **Key Features:**
237
+ - 🎬 **Video+Audio Generation**: Generates synchronized video and audio content simultaneously
238
+ - πŸ“ **Flexible Input**: Supports text-only or text+image conditioning
239
+ - ⏱️ **5-second Videos**: Generates 5-second videos at 24 FPS
240
+ - πŸ“ **Multiple Aspect Ratios**: Supports 720Γ—720 area at various ratios (9:16, 16:9, 1:1, etc)
241
+
242
+ Ovi is a veo-3 like model that simultaneously generates both video and audio content from text or text+image inputs.
243
+ """
244
+ )
245
 
246
+ # Event handlers with authentication
247
+ generate_btn.click(
248
+ fn=generate_video_with_auth,
249
+ inputs=[image_input, prompt_input],
250
+ outputs=[video_output],
 
251
  queue=False,
252
  api_name=False,
253
+ show_api=False,
 
 
 
 
 
 
 
254
  )
255
 
256
+ clear_btn.click(
257
+ fn=lambda: (None, "", None),
258
+ inputs=None,
259
+ outputs=[image_input, prompt_input, video_output],
 
260
  queue=False,
 
 
261
  )
262
 
263
+ gr.Markdown(
264
+ """
265
+ ---
266
+
267
+ ### πŸš€ How it works
268
+
269
+ 1. **Sign in** with your Hugging Face account
270
+ 2. **Upload** your image - any photo or illustration
271
+ 3. **Describe** the motion you want to see in the prompt
272
+ 4. **Generate** and watch your image come to life!
273
+
274
+ ### ⚠️ Notes
275
+
276
+ - Video generation may take 30-60 seconds
277
+ - Generates 5-second videos at 24 FPS with synchronized audio
278
+ - Supports multiple aspect ratios (9:16, 16:9, 1:1, etc) at 720Γ—720 area
279
+ - Requires a valid HuggingFace token with Inference API access
280
+ - Best results with clear, high-quality images
281
+ - The model works best with realistic subjects and natural motions
282
+
283
+ ### πŸ”— Resources
284
+
285
+ - [Ovi Model Card](https://huggingface.co/chetwinlow1/Ovi)
286
+ - [Character AI](https://character.ai)
287
+ """
288
  )
289
 
290
  # Launch the app
291
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
292
  demo.launch(
293
  show_api=False,
 
 
294
  enable_monitoring=False,
295
  quiet=True,
 
296
  )