sora-2.0 / app.py
akhaliq's picture
akhaliq HF Staff
Update app.py
f75d68c verified
raw
history blame
10.1 kB
import gradio as gr
import os
import tempfile
import shutil
from typing import Optional, Tuple, Union
from huggingface_hub import InferenceClient
from pathlib import Path
from PIL import Image
import io
import time
# Initialize Hugging Face Inference Client with fal-ai provider
client = InferenceClient(
provider="fal-ai",
api_key=os.environ.get("HF_TOKEN"),
bill_to="huggingface",
)
def cleanup_temp_files():
"""Clean up old temporary video files to prevent storage overflow."""
try:
temp_dir = tempfile.gettempdir()
# Clean up old .mp4 files in temp directory
for file_path in Path(temp_dir).glob("*.mp4"):
try:
# Remove files older than 5 minutes
if file_path.stat().st_mtime < (time.time() - 300):
file_path.unlink(missing_ok=True)
except Exception:
pass
except Exception as e:
print(f"Cleanup error: {e}")
def generate_video(
prompt: str,
duration: int = 8,
size: str = "1280x720",
api_key: Optional[str] = None
) -> Tuple[Optional[str], str]:
"""Generate video using Sora-2 through Hugging Face Inference API with fal-ai provider."""
cleanup_temp_files()
try:
if api_key:
temp_client = InferenceClient(
provider="fal-ai",
api_key=api_key,
bill_to="huggingface",
)
else:
temp_client = client
if not os.environ.get("HF_TOKEN") and not api_key:
return None, "❌ Please set HF_TOKEN environment variable."
video_bytes = temp_client.text_to_video(
prompt,
model="akhaliq/sora-2",
)
temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
try:
temp_file.write(video_bytes)
temp_file.flush()
video_path = temp_file.name
finally:
temp_file.close()
return video_path, "βœ… Video generated successfully!"
except Exception as e:
return None, f"❌ Error generating video: {str(e)}"
def generate_video_from_image(
image: Union[str, bytes],
prompt: str,
api_key: Optional[str] = None
) -> Tuple[Optional[str], str]:
"""Generate a video from a single input image + prompt using Sora-2 image-to-video."""
cleanup_temp_files()
if not prompt or prompt.strip() == "":
return None, "❌ Please enter a prompt"
try:
if api_key:
temp_client = InferenceClient(
provider="fal-ai",
api_key=api_key,
bill_to="huggingface",
)
else:
temp_client = client
if not os.environ.get("HF_TOKEN") and not api_key:
return None, "❌ Please set HF_TOKEN environment variable."
# Load and resize image if needed
if isinstance(image, str):
img = Image.open(image)
elif isinstance(image, (bytes, bytearray)):
img = Image.open(io.BytesIO(image))
else:
return None, "❌ Invalid image input. Please upload an image."
# Resize if image is too large (max 1920x1080)
max_width = 1920
max_height = 1080
if img.width > max_width or img.height > max_height:
# Calculate aspect ratio preserving resize
ratio = min(max_width / img.width, max_height / img.height)
new_width = int(img.width * ratio)
new_height = int(img.height * ratio)
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Convert back to bytes
img_byte_arr = io.BytesIO()
# Save as JPEG for smaller file size
if img.mode in ('RGBA', 'LA', 'P'):
# Convert RGBA/LA/P to RGB
rgb_img = Image.new('RGB', img.size, (255, 255, 255))
rgb_img.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
img = rgb_img
img.save(img_byte_arr, format='JPEG', quality=95)
input_image = img_byte_arr.getvalue()
video_bytes = temp_client.image_to_video(
input_image,
prompt=prompt,
model="akhaliq/sora-2-image-to-video",
)
temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
try:
temp_file.write(video_bytes)
temp_file.flush()
video_path = temp_file.name
finally:
temp_file.close()
return video_path, "βœ… Video generated from image successfully!"
except Exception as e:
return None, f"❌ Error generating video from image: {str(e)}"
def generate_with_auth(
prompt: str,
profile: gr.OAuthProfile | None
) -> Tuple[Optional[str], str]:
"""Wrapper function that checks if user is logged in before generating video."""
if profile is None:
raise gr.Error("Click Sign in with Hugging Face button to use this app for free")
if not prompt or prompt.strip() == "":
return None, "❌ Please enter a prompt"
return generate_video(
prompt,
duration=8,
size="1280x720",
api_key=None
)
def generate_with_auth_image(
prompt: str,
image_path: Optional[str],
profile: gr.OAuthProfile | None
) -> Tuple[Optional[str], str]:
"""Checks login status then calls image->video generator."""
if profile is None:
raise gr.Error("Click Sign in with Hugging Face button to use this app for free")
if not image_path:
return None, "❌ Please upload an image"
return generate_video_from_image(image=image_path, prompt=prompt, api_key=None)
def create_ui():
css = '''
.logo-dark{display: none}
.dark .logo-dark{display: block !important}
.dark .logo-light{display: none}
#sub_title{margin-top: -20px !important}
'''
with gr.Blocks(title="Sora-2 Text-to-Video Generator", theme=gr.themes.Soft(), css=css) as demo:
gr.HTML("""
<div style="text-align: center; max-width: 800px; margin: 0 auto;">
<h1 style="font-size: 2.5em; margin-bottom: 0.5em;">
🎬 Sora-2 Text-to-Video Generator
</h1>
<p style="font-size: 1.1em; color: #666; margin-bottom: 20px;">Generate stunning videos using OpenAI's Sora-2 model</p>
<p style='color: orange;'>⚠️ You must Sign in with Hugging Face using the button to use this app.</p>
<p style="font-size: 0.9em; color: #999; margin-top: 15px;">
Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #667eea;">anycoder</a>
</p>
</div>
""")
# Add login button - required for OAuth
gr.LoginButton()
# Text -> Video
with gr.Row():
with gr.Column(scale=1):
prompt_input = gr.Textbox(
label="Enter your prompt",
placeholder="Describe the video you want to create...",
lines=4
)
generate_btn = gr.Button("πŸŽ₯ Generate Video", variant="primary", size="lg")
with gr.Column(scale=1):
video_output = gr.Video(label="Generated Video", height=400, interactive=False, show_download_button=True)
status_output = gr.Textbox(label="Status", interactive=False, visible=True)
generate_btn.click(
fn=generate_with_auth,
inputs=[prompt_input],
outputs=[video_output, status_output],
queue=False
)
# Image -> Video UI
gr.HTML("""
<div style="text-align: center; margin: 40px 0 10px;">
<h3 style="margin-bottom: 8px;">πŸ–ΌοΈ ➜ 🎬 Image β†’ Video (beta)</h3>
<p style="color:#666; margin:0;">Turn a single image into a short video with a guiding prompt.</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
img_prompt_input = gr.Textbox(
label="Describe how the scene should evolve",
placeholder="e.g., The cat starts to dance and spins playfully",
lines=3,
)
image_input = gr.Image(label="Upload an image", type="filepath")
generate_img_btn = gr.Button("πŸŽ₯ Generate from Image", variant="primary")
with gr.Column(scale=1):
video_output_img = gr.Video(label="Generated Video (from Image)", height=400, interactive=False, show_download_button=True)
status_output_img = gr.Textbox(label="Status", interactive=False, visible=True)
generate_img_btn.click(
fn=generate_with_auth_image,
inputs=[img_prompt_input, image_input],
outputs=[video_output_img, status_output_img],
queue=False
)
# Example usage guidance
gr.Examples(
examples=[
"A majestic golden eagle soaring through a vibrant sunset sky"
],
inputs=prompt_input,
outputs=video_output,
fn=generate_video, # Examples use the original function
cache_examples=False,
api_name=False,
show_api=False,
)
return demo
if __name__ == "__main__":
try:
cleanup_temp_files()
if os.path.exists("gradio_cached_examples"):
shutil.rmtree("gradio_cached_examples", ignore_errors=True)
except Exception as e:
print(f"Initial cleanup error: {e}")
app = create_ui()
# Launch without special auth parameters
# OAuth is enabled via Space metadata (hf_oauth: true in README.md)
app.launch(
show_api=False,
enable_monitoring=False,
quiet=True,
max_threads=10
)