import gradio as gr import os from huggingface_hub import InferenceClient import tempfile from pathlib import Path from PIL import Image as PILImage import io # Initialize the Hugging Face Inference client client = InferenceClient( provider="fal-ai", api_key=os.environ.get("HF_TOKEN"), # Your HF token must be set in the environment bill_to="huggingface", ) # --- Core Functions --- def text_to_video(prompt, duration=5, aspect_ratio="16:9", resolution="720p"): """Generate video from text prompt""" try: if not prompt or prompt.strip() == "": return None, "⚠️ Please enter a text prompt." # Generate video from text video = client.text_to_video( prompt, model="akhaliq/veo3.1-fast", ) # Save the video to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file: tmp_file.write(video) video_path = tmp_file.name return video_path, f"✅ Video generated successfully from prompt: '{prompt[:50]}...'" except Exception as e: return None, f"❌ Error generating video: {str(e)}" def image_to_video(image, prompt, duration=5, aspect_ratio="16:9", resolution="720p"): """Generate video from image and prompt""" try: if image is None: return None, "⚠️ Please upload an image." if not prompt or prompt.strip() == "": return None, "⚠️ Please enter a motion description." # Convert image to bytes if isinstance(image, PILImage.Image): buffer = io.BytesIO() image.save(buffer, format="PNG") input_image = buffer.getvalue() else: pil_image = PILImage.fromarray(image) buffer = io.BytesIO() pil_image.save(buffer, format="PNG") input_image = buffer.getvalue() # Generate video from image video = client.image_to_video( input_image, prompt=prompt, model="akhaliq/veo3.1-fast-image-to-video", ) # Save the video to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file: tmp_file.write(video) video_path = tmp_file.name return video_path, f"✅ Video generated successfully with motion: '{prompt[:50]}...'" except Exception as e: return None, f"❌ Error generating video: {str(e)}" def clear_text_tab(): """Clear text-to-video tab""" return "", None, "" def clear_image_tab(): """Clear image-to-video tab""" return None, "", None, "" # --- Custom CSS --- custom_css = """ .container { max-width: 1200px; margin: auto; } .status-box { padding: 10px; border-radius: 5px; margin-top: 10px; } """ # --- Gradio App --- with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="AI Video Generator") as demo: gr.Markdown( """ # 🎬 AI Video Generator ### Generate stunning videos from text or animate your images with AI #### Powered by VEO 3.1 Fast Model """ ) with gr.Tabs() as tabs: # Text-to-Video Tab with gr.Tab("📝 Text to Video"): gr.Markdown("### Transform your text descriptions into dynamic videos") with gr.Row(): with gr.Column(scale=1): text_prompt = gr.Textbox( label="Text Prompt", placeholder="Describe the video you want to create...", lines=4, max_lines=6 ) with gr.Accordion("Advanced Settings", open=False): text_duration = gr.Slider(1, 10, value=5, step=1, label="Duration (seconds)") text_aspect_ratio = gr.Dropdown( ["16:9", "9:16", "1:1", "4:3", "21:9"], value="16:9", label="Aspect Ratio" ) text_resolution = gr.Dropdown( ["480p", "720p", "1080p"], value="720p", label="Resolution" ) with gr.Row(): text_generate_btn = gr.Button("🎬 Generate Video", variant="primary") text_clear_btn = gr.ClearButton(value="🗑️ Clear") text_status = gr.Textbox( label="Status", interactive=False, visible=True, elem_classes=["status-box"] ) with gr.Column(scale=1): text_video_output = gr.Video( label="Generated Video", autoplay=True, show_download_button=True, height=400 ) gr.Examples( examples=[ ["A serene beach at sunset with gentle waves"], ["A bustling city street with neon lights at night"], ["A majestic eagle soaring through mountain peaks"], ["An astronaut floating in space near the ISS"], ["Cherry blossoms falling in a Japanese garden"], ], inputs=text_prompt, label="Example Prompts", ) # Image-to-Video Tab with gr.Tab("🖼️ Image to Video"): gr.Markdown("### Bring your static images to life with motion") with gr.Row(): with gr.Column(scale=1): image_input = gr.Image(label="Upload Image", type="pil", height=300) image_prompt = gr.Textbox( label="Motion Prompt", placeholder="Describe how the image should move...", lines=3, max_lines=5 ) with gr.Accordion("Advanced Settings", open=False): image_duration = gr.Slider(1, 10, value=5, step=1, label="Duration (seconds)") image_aspect_ratio = gr.Dropdown( ["16:9", "9:16", "1:1", "4:3", "21:9"], value="16:9", label="Aspect Ratio" ) image_resolution = gr.Dropdown( ["480p", "720p", "1080p"], value="720p", label="Resolution" ) with gr.Row(): image_generate_btn = gr.Button("🎬 Animate Image", variant="primary") image_clear_btn = gr.ClearButton(value="🗑️ Clear") image_status = gr.Textbox( label="Status", interactive=False, visible=True, elem_classes=["status-box"] ) with gr.Column(scale=1): image_video_output = gr.Video( label="Generated Video", autoplay=True, show_download_button=True, height=400 ) gr.Examples( examples=[ [None, "The person starts walking forward"], [None, "The animal begins to run"], [None, "Camera slowly zooms in while the subject smiles"], [None, "The flowers sway gently in the breeze"], [None, "The clouds move across the sky in time-lapse"], ], inputs=[image_input, image_prompt], label="Example Motion Prompts", ) # --- Event handlers --- text_generate_btn.click( fn=text_to_video, inputs=[text_prompt, text_duration, text_aspect_ratio, text_resolution], outputs=[text_video_output, text_status], ) text_clear_btn.click( fn=clear_text_tab, inputs=[], outputs=[text_prompt, text_video_output, text_status], ) image_generate_btn.click( fn=image_to_video, inputs=[image_input, image_prompt, image_duration, image_aspect_ratio, image_resolution], outputs=[image_video_output, image_status], ) image_clear_btn.click( fn=clear_image_tab, inputs=[], outputs=[image_input, image_prompt, image_video_output, image_status], ) # Launch the app if __name__ == "__main__": demo.launch(show_api=False, share=False, quiet=True)