|
|
import gradio as gr |
|
|
import os |
|
|
from huggingface_hub import InferenceClient |
|
|
from pathlib import Path |
|
|
import tempfile |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_video_with_auth(image, prompt, token: gr.OAuthToken | None, progress=gr.Progress()): |
|
|
""" |
|
|
Generate a video from an image using the Ovi model with authentication check. |
|
|
|
|
|
Args: |
|
|
image: Input image (PIL Image or file path) |
|
|
prompt: Text prompt describing the desired motion/animation |
|
|
token: OAuth token for authentication |
|
|
progress: Gradio progress tracker |
|
|
|
|
|
Returns: |
|
|
Path to the generated video file |
|
|
""" |
|
|
if token is None or not getattr(token, "token", None): |
|
|
raise gr.Error("Please sign in with Hugging Face to use this paid app") |
|
|
|
|
|
if image is None: |
|
|
raise gr.Error("Please upload an image first!") |
|
|
|
|
|
if not prompt or prompt.strip() == "": |
|
|
raise gr.Error("Please enter a prompt describing the desired motion!") |
|
|
|
|
|
try: |
|
|
progress(0.2, desc="Processing image...") |
|
|
|
|
|
|
|
|
if isinstance(image, str): |
|
|
with open(image, "rb") as image_file: |
|
|
input_image = image_file.read() |
|
|
else: |
|
|
|
|
|
temp_image = tempfile.NamedTemporaryFile(delete=False, suffix=".png") |
|
|
image.save(temp_image.name) |
|
|
with open(temp_image.name, "rb") as image_file: |
|
|
input_image = image_file.read() |
|
|
|
|
|
progress(0.4, desc="Generating video with AI...") |
|
|
|
|
|
|
|
|
|
|
|
client = InferenceClient( |
|
|
provider="fal-ai", |
|
|
api_key=token.token, |
|
|
) |
|
|
|
|
|
|
|
|
video = client.image_to_video( |
|
|
input_image, |
|
|
prompt=prompt, |
|
|
model="chetwinlow1/Ovi", |
|
|
) |
|
|
|
|
|
progress(0.9, desc="Finalizing video...") |
|
|
|
|
|
|
|
|
output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") |
|
|
|
|
|
|
|
|
if isinstance(video, bytes): |
|
|
with open(output_path.name, "wb") as f: |
|
|
f.write(video) |
|
|
elif isinstance(video, str) and os.path.exists(video): |
|
|
|
|
|
import shutil |
|
|
shutil.copy(video, output_path.name) |
|
|
else: |
|
|
|
|
|
with open(output_path.name, "wb") as f: |
|
|
f.write(video) |
|
|
|
|
|
progress(1.0, desc="Complete!") |
|
|
|
|
|
return output_path.name |
|
|
|
|
|
except Exception as e: |
|
|
raise gr.Error(f"Error generating video: {str(e)}") |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
theme=gr.themes.Soft( |
|
|
primary_hue="blue", |
|
|
secondary_hue="indigo", |
|
|
), |
|
|
css=""" |
|
|
.header-link { |
|
|
font-size: 0.9em; |
|
|
color: #666; |
|
|
text-decoration: none; |
|
|
margin-bottom: 1em; |
|
|
display: inline-block; |
|
|
} |
|
|
.header-link:hover { |
|
|
color: #333; |
|
|
text-decoration: underline; |
|
|
} |
|
|
.main-header { |
|
|
text-align: center; |
|
|
margin-bottom: 2em; |
|
|
} |
|
|
.info-box { |
|
|
background-color: #f0f7ff; |
|
|
border-left: 4px solid #4285f4; |
|
|
padding: 1em; |
|
|
margin: 1em 0; |
|
|
border-radius: 4px; |
|
|
} |
|
|
.auth-warning { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
color: white; |
|
|
padding: 14px 16px; |
|
|
border-radius: 12px; |
|
|
margin: 18px auto 6px; |
|
|
max-width: 860px; |
|
|
text-align: center; |
|
|
font-size: 0.98rem; |
|
|
font-weight: bold; |
|
|
} |
|
|
""", |
|
|
title="Image to Video Generator with Ovi (Paid)", |
|
|
) as demo: |
|
|
|
|
|
gr.HTML( |
|
|
""" |
|
|
<div class="main-header"> |
|
|
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" class="header-link"> |
|
|
Built with anycoder β¨ |
|
|
</a> |
|
|
</div> |
|
|
""" |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
# π¬ Image to Video Generator with Ovi |
|
|
|
|
|
Transform your static images into dynamic videos with synchronized audio using AI! Upload an image and describe the motion you want to see. |
|
|
|
|
|
Powered by Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation via [HuggingFace Inference Providers](https://huggingface.co/docs/huggingface_hub/guides/inference). |
|
|
""" |
|
|
) |
|
|
|
|
|
gr.HTML( |
|
|
""" |
|
|
<div class="auth-warning"> |
|
|
π³ <b>PAID APP:</b> This app uses <b>YOUR</b> inference provider credits. |
|
|
Free users get $0.10 in included credits. PRO users get $2 in credits and can continue with billing. |
|
|
<a href='http://huggingface.co/subscribe/pro?source=ovi' target='_blank' style='color:#fff; text-decoration:underline;'>Subscribe to PRO</a> for more credits. |
|
|
Please sign in below to continue. |
|
|
<br><a href='https://huggingface.co/settings/inference-providers/overview' target='_blank' style='color:#fff; text-decoration:underline; font-weight:bold;'>Check your billing usage here</a> |
|
|
</div> |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
gr.LoginButton() |
|
|
|
|
|
gr.HTML( |
|
|
""" |
|
|
<div class="info-box"> |
|
|
<strong>π‘ Tips for best results:</strong> |
|
|
<ul> |
|
|
<li>Use clear, well-lit images with a single main subject</li> |
|
|
<li>Write specific prompts describing the desired motion or action</li> |
|
|
<li>Keep prompts concise and focused on movement and audio elements</li> |
|
|
<li>Processing generates 5-second videos at 24 FPS with synchronized audio</li> |
|
|
<li>Processing may take 30-60 seconds depending on server load</li> |
|
|
</ul> |
|
|
</div> |
|
|
""" |
|
|
) |
|
|
|
|
|
gr.HTML( |
|
|
""" |
|
|
<div class="info-box"> |
|
|
<strong>β¨ Special Tokens for Enhanced Control:</strong> |
|
|
<ul> |
|
|
<li><strong>Speech:</strong> <code><S>Your speech content here<E></code> - Text enclosed in these tags will be converted to speech</li> |
|
|
<li><strong>Audio Description:</strong> <code><AUDCAP>Audio description here<ENDAUDCAP></code> - Describes the audio or sound effects present in the video</li> |
|
|
</ul> |
|
|
<br> |
|
|
<strong>π Example Prompt:</strong><br> |
|
|
<code>Dogs bark loudly at a man wearing a red shirt. The man says <S>Please stop barking at me!<E>. <AUDCAP>Dogs barking, angry man yelling in stern voice<ENDAUDCAP>.</code> |
|
|
</div> |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
image_input = gr.Image( |
|
|
label="πΈ Upload Image", |
|
|
type="filepath", |
|
|
sources=["upload", "clipboard"], |
|
|
height=400, |
|
|
) |
|
|
|
|
|
prompt_input = gr.Textbox( |
|
|
label="βοΈ Text Prompt", |
|
|
lines=3, |
|
|
placeholder="Describe the motion and audio you want to see..." |
|
|
) |
|
|
|
|
|
generate_btn = gr.Button( |
|
|
"π¬ Generate Video", |
|
|
variant="primary", |
|
|
size="lg", |
|
|
) |
|
|
|
|
|
clear_btn = gr.Button( |
|
|
"ποΈ Clear", |
|
|
variant="secondary", |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
[ |
|
|
"5.png", |
|
|
'A bearded man wearing large dark sunglasses and a blue patterned cardigan sits in a studio, actively speaking into a large, suspended microphone. He has headphones on and gestures with his hands, displaying rings on his fingers. Behind him, a wall is covered with red, textured sound-dampening foam on the left, and a white banner on the right features the "CHOICE FM" logo and various social media handles like "@ilovechoicefm" with "RALEIGH" below it. The man intently addresses the microphone, articulating, <S>is talent. It\'s all about authenticity. You gotta be who you really are, especially if you\'re working<E>. He leans forward slightly as he speaks, maintaining a serious expression behind his sunglasses.. <AUDCAP>Clear male voice speaking into a microphone, a low background hum.<ENDAUDCAP>' |
|
|
] |
|
|
], |
|
|
inputs=[image_input, prompt_input], |
|
|
label="Example", |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
video_output = gr.Video( |
|
|
label="π₯ Generated Video", |
|
|
height=400, |
|
|
autoplay=True, |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
### About Ovi Model |
|
|
|
|
|
**Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation** |
|
|
|
|
|
Developed by Chetwin Low, Weimin Wang (Character AI) & Calder Katyal (Yale University) |
|
|
|
|
|
π **Key Features:** |
|
|
- π¬ **Video+Audio Generation**: Generates synchronized video and audio content simultaneously |
|
|
- π **Flexible Input**: Supports text-only or text+image conditioning |
|
|
- β±οΈ **5-second Videos**: Generates 5-second videos at 24 FPS |
|
|
- π **Multiple Aspect Ratios**: Supports 720Γ720 area at various ratios (9:16, 16:9, 1:1, etc) |
|
|
|
|
|
Ovi is a veo-3 like model that simultaneously generates both video and audio content from text or text+image inputs. |
|
|
|
|
|
--- |
|
|
|
|
|
### π³ Pricing & Credits |
|
|
|
|
|
This is a **paid app** that charges your HuggingFace inference provider account: |
|
|
- **Free users**: $0.10 in included credits |
|
|
- **PRO users**: $2 in included credits + ability to continue with billing |
|
|
- Each video generation consumes credits based on processing time |
|
|
|
|
|
[Subscribe to PRO](http://huggingface.co/subscribe/pro?source=ovi) for more credits! |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
generate_btn.click( |
|
|
fn=generate_video_with_auth, |
|
|
inputs=[image_input, prompt_input], |
|
|
outputs=[video_output], |
|
|
queue=False, |
|
|
api_name=False, |
|
|
show_api=False, |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
fn=lambda: (None, "", None), |
|
|
inputs=None, |
|
|
outputs=[image_input, prompt_input, video_output], |
|
|
queue=False, |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
--- |
|
|
|
|
|
### π How it works |
|
|
|
|
|
1. **Sign in** with your Hugging Face account (required for paid app) |
|
|
2. **Upload** your image - any photo or illustration |
|
|
3. **Describe** the motion you want to see in the prompt |
|
|
4. **Generate** and watch your image come to life with synchronized audio! |
|
|
5. **Credits are deducted** from your HuggingFace inference provider account |
|
|
|
|
|
### β οΈ Notes |
|
|
|
|
|
- **This is a PAID app** - uses your inference provider credits |
|
|
- Video generation may take 30-60 seconds |
|
|
- Generates 5-second videos at 24 FPS with synchronized audio |
|
|
- Supports multiple aspect ratios (9:16, 16:9, 1:1, etc) at 720Γ720 area |
|
|
- Best results with clear, high-quality images |
|
|
- The model works best with realistic subjects and natural motions |
|
|
- Free accounts have limited credits - upgrade to PRO for more |
|
|
|
|
|
### π Resources |
|
|
|
|
|
- [Ovi Model Card](https://huggingface.co/chetwinlow1/Ovi) |
|
|
- [Character AI](https://character.ai) |
|
|
- [Subscribe to PRO](http://huggingface.co/subscribe/pro?source=ovi) |
|
|
- [Inference API Documentation](https://huggingface.co/docs/huggingface_hub/guides/inference) |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
show_api=False, |
|
|
enable_monitoring=False, |
|
|
quiet=True, |
|
|
) |