Ovi

Running

App Files Files Community

akhaliq HF Staff commited on 28 days ago

Commit

9e7a994

verified ·

1 Parent(s): 42f1e7f

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -256

app.py CHANGED Viewed

@@ -1,49 +1,11 @@
 import gradio as gr
 import os
 from huggingface_hub import InferenceClient
-import tempfile
-import shutil
 from pathlib import Path
-from typing import Optional
-import time
-# -------------------------
-# Utilities
-# -------------------------
-def cleanup_temp_files():
-    """Clean up old temporary video files"""
-    try:
-        temp_dir = tempfile.gettempdir()
-        for file_path in Path(temp_dir).glob("*.mp4"):
-            try:
-                if file_path.stat().st_mtime < (time.time() - 300):
-                    file_path.unlink(missing_ok=True)
-            except Exception:
-                pass
-    except Exception as e:
-        print(f"Cleanup error: {e}")
-def _client_from_token(token: Optional[str]) -> InferenceClient:
-    """Create InferenceClient from user's OAuth token"""
-    if not token:
-        raise gr.Error("Please sign in first. This app requires your Hugging Face login.")
-    # IMPORTANT: do not set bill_to when using user OAuth tokens
-    # This ensures the user is billed, not Hugging Face
-    return InferenceClient(
-        provider="fal-ai",
-        api_key=token,
-    )
-def _save_bytes_as_temp_mp4(data: bytes) -> str:
-    """Save video bytes to temporary MP4 file"""
-    temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    try:
-        temp_file.write(data)
-        temp_file.flush()
-        return temp_file.name
-    finally:
-        temp_file.close()
 def generate_video_with_auth(image, prompt, profile: gr.OAuthProfile | None, progress=gr.Progress()):
     """
@@ -56,162 +18,149 @@ def generate_video_with_auth(image, prompt, profile: gr.OAuthProfile | None, pro
         progress: Gradio progress tracker
     Returns:
-        Tuple of (video_path, status_message)
     """
     try:
-        # Check authentication
-        if profile is None:
-            return None, "❌ Sign in with Hugging Face to continue. This app uses your inference provider credits."
-        if image is None:
-            return None, "❌ Please upload an image first!"
-        if not prompt or prompt.strip() == "":
-            return None, "❌ Please enter a prompt describing the desired motion!"
         progress(0.2, desc="Processing image...")
-        cleanup_temp_files()
         # Read the image file
         if isinstance(image, str):
-            # If image is a file path
             with open(image, "rb") as image_file:
                 input_image = image_file.read()
         else:
-            # If image is PIL Image or array
-            import io
-            from PIL import Image as PILImage
-            if isinstance(image, PILImage.Image):
-                buffer = io.BytesIO()
-                image.save(buffer, format='PNG')
-                input_image = buffer.getvalue()
-            else:
-                # Assume it's a numpy array
-                pil_image = PILImage.fromarray(image)
-                buffer = io.BytesIO()
-                pil_image.save(buffer, format='PNG')
-                input_image = buffer.getvalue()
         progress(0.4, desc="Generating video with AI...")
-        # Create client with user's OAuth token (not HF_TOKEN)
-        # IMPORTANT: Do not use bill_to parameter - this ensures user gets billed
         client = InferenceClient(
             provider="fal-ai",
-            api_key=profile.oauth_info.access_token,  # Use user's token
         )
         # Generate video using the inference client
-        try:
-            video = client.image_to_video(
-                input_image,
-                prompt=prompt,
-                model="chetwinlow1/Ovi",
-            )
-        except Exception as e:
-            import requests
-            if isinstance(e, requests.HTTPError) and getattr(e.response, "status_code", None) == 403:
-                return None, "❌ Access denied by provider (403). Make sure your HF account has credits/permission for provider 'fal-ai' and model 'chetwinlow1/Ovi'."
-            raise
         progress(0.9, desc="Finalizing video...")
         # Save the video to a temporary file
-        video_path = _save_bytes_as_temp_mp4(video)
         progress(1.0, desc="Complete!")
-        return video_path, f"✅ Video generated successfully! Prompt: '{prompt[:60]}...'"
-    except gr.Error as e:
-        return None, f"❌ {str(e)}"
     except Exception as e:
-        return None, f"❌ Generation failed. If this keeps happening, check your provider quota or try again later. Error: {str(e)}"
-def clear_all():
-    """Clear all inputs and outputs"""
-    return None, "", None, ""
-# Custom CSS for better styling
-custom_css = """
-.container {
-    max-width: 1200px;
-    margin: auto;
-}
-.header-link {
-    text-decoration: none;
-    color: #2196F3;
-    font-weight: bold;
-}
-.header-link:hover {
-    text-decoration: underline;
-}
-.status-box {
-    padding: 10px;
-    border-radius: 5px;
-    margin-top: 10px;
-}
-.notice {
-    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-    color: white;
-    padding: 14px 16px;
-    border-radius: 12px;
-    margin: 18px auto 6px;
-    max-width: 860px;
-    text-align: center;
-    font-size: 0.98rem;
-}
-.info-box {
-    background-color: #f0f7ff;
-    border-left: 4px solid #4285f4;
-    padding: 1em;
-    margin: 1em 0;
-    border-radius: 4px;
-}
-.special-tokens-box {
-    background: linear-gradient(135deg, #ffeaa7 0%, #fdcb6e 100%);
-    padding: 1em;
-    margin: 1em 0;
-    border-radius: 8px;
-    border-left: 4px solid #e17055;
-}
-"""
 # Create the Gradio interface
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Ovi Image-to-Video Generator (Paid)") as demo:
-    # Header with payment notice
     gr.HTML(
         """
-        <div style="text-align:center; padding:2em 1em 1em;">
-            <h1 style="font-size:2.2em; margin-bottom:6px;">🎬 Ovi: Image-to-Video with Audio</h1>
-            <p style="color:#777; margin:0 0 8px;">Generate synchronized video and audio from images</p>
-            <div class="notice">
-                <b>Heads up:</b> This is a paid app that uses <b>your</b> inference provider credits when you run generations.
-                Free users get <b>$0.10 in included credits</b>. <b>PRO users</b> get <b>$2 in included credits</b>
-                and can continue using beyond that (with billing).
-                <a href='http://huggingface.co/subscribe/pro?source=ovi' target='_blank' style='color:#fff; text-decoration:underline; font-weight:bold;'>Subscribe to PRO</a>
-                for more credits. Please sign in with your Hugging Face account to continue.
-            </div>
-            <p style="font-size: 0.9em; color: #999; margin-top: 10px;">
-                Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color:#667eea; text-decoration:underline;">anycoder</a>
-            </p>
         </div>
         """
     )
     gr.Markdown(
         """
-        ### Transform your static images into dynamic videos with synchronized audio using AI!
-        Powered by **Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation** via [HuggingFace Inference Providers](https://huggingface.co/docs/huggingface_hub/guides/inference)
         """
     )
     # Add login button - required for OAuth
-    login_btn = gr.LoginButton("Sign in with Hugging Face")
     gr.HTML(
         """
@@ -230,7 +179,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Ovi Image-to-Video
     gr.HTML(
         """
-        <div class="special-tokens-box">
             <strong>✨ Special Tokens for Enhanced Control:</strong>
             <ul>
                 <li><strong>Speech:</strong> <code>&lt;S&gt;Your speech content here&lt;E&gt;</code> - Text enclosed in these tags will be converted to speech</li>
@@ -247,36 +196,26 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Ovi Image-to-Video
         with gr.Column(scale=1):
             image_input = gr.Image(
                 label="📸 Upload Image",
-                type="pil",
                 sources=["upload", "clipboard"],
                 height=400,
             )
             prompt_input = gr.Textbox(
                 label="✍️ Text Prompt",
-                placeholder="Describe the motion and audio you want... (e.g., 'A person walking forward while talking')",
-                lines=4,
-                max_lines=6
             )
-            with gr.Row():
-                generate_btn = gr.Button(
-                    "🎬 Generate Video",
-                    variant="primary",
-                    scale=2
-                )
-                clear_btn = gr.Button(
-                    "🗑️ Clear",
-                    variant="secondary",
-                    scale=1
-                )
-            status_output = gr.Textbox(
-                label="Status",
-                interactive=False,
-                visible=True,
-                elem_classes=["status-box"]
             )
             gr.Examples(
@@ -287,7 +226,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Ovi Image-to-Video
                     ]
                 ],
                 inputs=[image_input, prompt_input],
-                label="Example Prompts",
             )
         with gr.Column(scale=1):
@@ -295,7 +234,6 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Ovi Image-to-Video
                 label="🎥 Generated Video",
                 height=400,
                 autoplay=True,
-                show_download_button=True
             )
             gr.Markdown(
@@ -316,107 +254,70 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Ovi Image-to-Video
                 ---
-                ### 💳 Pricing Information
-                This app uses the Hugging Face Inference API (provider: fal-ai) which charges based on usage:
                 - **Free users**: $0.10 in included credits
                 - **PRO users**: $2 in included credits + ability to continue with billing
-                [Subscribe to PRO](http://huggingface.co/subscribe/pro?source=ovi) for more credits and features!
                 """
             )
-    # How to Use section
-    with gr.Accordion("📖 How to Use", open=False):
-        gr.Markdown(
-            """
-            ### Getting Started:
-            1. **Sign in** with your Hugging Face account using the button above
-            2. **Upload** your image - any photo or illustration
-            3. **Describe** the motion and audio you want in the prompt
-            4. **Use special tokens** for speech and audio descriptions (optional but recommended)
-            5. **Generate** and watch your image come to life with synchronized audio!
-            ### Special Tokens Guide:
-            **Speech Token**: `<S>text<E>`
-            - Use this to add spoken dialogue to your video
-            - Example: `The person says <S>Hello, how are you?<E>`
-            **Audio Description Token**: `<AUDCAP>description<ENDAUDCAP>`
-            - Use this to describe background sounds and audio effects
-            - Example: `<AUDCAP>Birds chirping, gentle wind blowing<ENDAUDCAP>`
-            ### Tips for Better Results:
-            - Be specific and descriptive in your prompts
-            - Combine visual motion descriptions with audio elements
-            - Use high-quality input images for better results
-            - Experiment with different prompts and special tokens
-            - Processing takes 30-60 seconds per generation
-            ### ⚠️ Important Notes:
-            - This is a **paid app** that uses your inference provider credits
-            - Each generation consumes credits based on processing time
-            - Free accounts have limited credits ($0.10)
-            - PRO accounts get more credits ($2) and can continue with billing
-            - Videos are 5 seconds long at 24 FPS
-            - Supports multiple aspect ratios (9:16, 16:9, 1:1, etc)
-            """
-        )
-    gr.Markdown(
-        """
-        ---
-        ### 🔗 Resources
-        - [Ovi Model Card](https://huggingface.co/chetwinlow1/Ovi)
-        - [Character AI](https://character.ai)
-        - [Hugging Face Inference API Docs](https://huggingface.co/docs/huggingface_hub/guides/inference)
-        - [Subscribe to PRO](http://huggingface.co/subscribe/pro?source=ovi)
-        ### 📊 Model Specifications
-        - **Provider**: fal-ai
-        - **Model**: chetwinlow1/Ovi
-        - **Output**: 5-second videos at 24 FPS with audio
-        - **Input**: Image + Text prompt
-        - **Resolution**: 720×720 area (various aspect ratios)
-        """
-    )
     # Event handlers with authentication
     generate_btn.click(
         fn=generate_video_with_auth,
         inputs=[image_input, prompt_input],
-        outputs=[video_output, status_output],
-        show_progress="full",
         queue=False,
         api_name=False,
         show_api=False,
     )
     clear_btn.click(
-        fn=clear_all,
-        inputs=[],
-        outputs=[image_input, prompt_input, video_output, status_output],
         queue=False,
     )
 # Launch the app
 if __name__ == "__main__":
-    try:
-        cleanup_temp_files()
-        if os.path.exists("gradio_cached_examples"):
-            shutil.rmtree("gradio_cached_examples", ignore_errors=True)
-    except Exception as e:
-        print(f"Initial cleanup error: {e}")
-    demo.queue(status_update_rate="auto", api_open=False, default_concurrency_limit=None)
     demo.launch(
         show_api=False,
-        share=False,
-        show_error=True,
         enable_monitoring=False,
         quiet=True,
     )

 import gradio as gr
 import os
 from huggingface_hub import InferenceClient
 from pathlib import Path
+import tempfile
+# DO NOT create a global client for paid apps
+# Each user's client will be created using their OAuth token
 def generate_video_with_auth(image, prompt, profile: gr.OAuthProfile | None, progress=gr.Progress()):
     """
         progress: Gradio progress tracker
     Returns:
+        Path to the generated video file
     """
+    if profile is None:
+        raise gr.Error("Please sign in with Hugging Face to use this paid app")
+    if image is None:
+        raise gr.Error("Please upload an image first!")
+    if not prompt or prompt.strip() == "":
+        raise gr.Error("Please enter a prompt describing the desired motion!")
     try:
         progress(0.2, desc="Processing image...")
         # Read the image file
         if isinstance(image, str):
             with open(image, "rb") as image_file:
                 input_image = image_file.read()
         else:
+            # If image is a PIL Image, save it temporarily
+            temp_image = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+            image.save(temp_image.name)
+            with open(temp_image.name, "rb") as image_file:
+                input_image = image_file.read()
         progress(0.4, desc="Generating video with AI...")
+        # CRITICAL FOR PAID APPS: Create client with user's OAuth token
+        # Do NOT use bill_to parameter - this makes the USER pay, not HuggingFace
         client = InferenceClient(
             provider="fal-ai",
+            api_key=profile.oauth_info.access_token,
         )
         # Generate video using the inference client
+        video = client.image_to_video(
+            input_image,
+            prompt=prompt,
+            model="chetwinlow1/Ovi",
+        )
         progress(0.9, desc="Finalizing video...")
         # Save the video to a temporary file
+        output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+        # Check if video is bytes or a file path
+        if isinstance(video, bytes):
+            with open(output_path.name, "wb") as f:
+                f.write(video)
+        elif isinstance(video, str) and os.path.exists(video):
+            # If it's a path, copy it
+            import shutil
+            shutil.copy(video, output_path.name)
+        else:
+            # Try to write it directly
+            with open(output_path.name, "wb") as f:
+                f.write(video)
         progress(1.0, desc="Complete!")
+        return output_path.name
     except Exception as e:
+        raise gr.Error(f"Error generating video: {str(e)}")
 # Create the Gradio interface
+with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="indigo",
+    ),
+    css="""
+        .header-link {
+            font-size: 0.9em;
+            color: #666;
+            text-decoration: none;
+            margin-bottom: 1em;
+            display: inline-block;
+        }
+        .header-link:hover {
+            color: #333;
+            text-decoration: underline;
+        }
+        .main-header {
+            text-align: center;
+            margin-bottom: 2em;
+        }
+        .info-box {
+            background-color: #f0f7ff;
+            border-left: 4px solid #4285f4;
+            padding: 1em;
+            margin: 1em 0;
+            border-radius: 4px;
+        }
+        .auth-warning {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 14px 16px;
+            border-radius: 12px;
+            margin: 18px auto 6px;
+            max-width: 860px;
+            text-align: center;
+            font-size: 0.98rem;
+            font-weight: bold;
+        }
+    """,
+    title="Image to Video Generator with Ovi (Paid)",
+) as demo:
     gr.HTML(
         """
+        <div class="main-header">
+            <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" class="header-link">
+                Built with anycoder ✨
+            </a>
         </div>
         """
     )
     gr.Markdown(
         """
+        # 🎬 Image to Video Generator with Ovi
+        Transform your static images into dynamic videos with synchronized audio using AI! Upload an image and describe the motion you want to see.
+        Powered by Ovi: Twin Backbone Cross-Modal Fusion for Audio-Video Generation via [HuggingFace Inference Providers](https://huggingface.co/docs/huggingface_hub/guides/inference).
+        """
+    )
+    gr.HTML(
+        """
+        <div class="auth-warning">
+            💳 <b>PAID APP:</b> This app uses <b>YOUR</b> inference provider credits.
+            Free users get $0.10 in included credits. PRO users get $2 in credits and can continue with billing.
+            <a href='http://huggingface.co/subscribe/pro?source=ovi' target='_blank' style='color:#fff; text-decoration:underline;'>Subscribe to PRO</a> for more credits.
+            Please sign in below to continue.
+        </div>
         """
     )
     # Add login button - required for OAuth
+    gr.LoginButton()
     gr.HTML(
         """
     gr.HTML(
         """
+        <div class="info-box">
             <strong>✨ Special Tokens for Enhanced Control:</strong>
             <ul>
                 <li><strong>Speech:</strong> <code>&lt;S&gt;Your speech content here&lt;E&gt;</code> - Text enclosed in these tags will be converted to speech</li>
         with gr.Column(scale=1):
             image_input = gr.Image(
                 label="📸 Upload Image",
+                type="filepath",
                 sources=["upload", "clipboard"],
                 height=400,
             )
             prompt_input = gr.Textbox(
                 label="✍️ Text Prompt",
+                lines=3,
+                placeholder="Describe the motion and audio you want to see..."
             )
+            generate_btn = gr.Button(
+                "🎬 Generate Video",
+                variant="primary",
+                size="lg",
+            )
+            clear_btn = gr.Button(
+                "🗑️ Clear",
+                variant="secondary",
             )
             gr.Examples(
                     ]
                 ],
                 inputs=[image_input, prompt_input],
+                label="Example",
             )
         with gr.Column(scale=1):
                 label="🎥 Generated Video",
                 height=400,
                 autoplay=True,
             )
             gr.Markdown(
                 ---
+                ### 💳 Pricing & Credits
+                This is a **paid app** that charges your HuggingFace inference provider account:
                 - **Free users**: $0.10 in included credits
                 - **PRO users**: $2 in included credits + ability to continue with billing
+                - Each video generation consumes credits based on processing time
+                [Subscribe to PRO](http://huggingface.co/subscribe/pro?source=ovi) for more credits!
                 """
             )
     # Event handlers with authentication
+    # NOTE: Do NOT pass profile as input - Gradio injects it automatically
     generate_btn.click(
         fn=generate_video_with_auth,
         inputs=[image_input, prompt_input],
+        outputs=[video_output],
         queue=False,
         api_name=False,
         show_api=False,
     )
     clear_btn.click(
+        fn=lambda: (None, "", None),
+        inputs=None,
+        outputs=[image_input, prompt_input, video_output],
         queue=False,
     )
+    gr.Markdown(
+        """
+        ---
+        ### 🚀 How it works
+        1. **Sign in** with your Hugging Face account (required for paid app)
+        2. **Upload** your image - any photo or illustration
+        3. **Describe** the motion you want to see in the prompt
+        4. **Generate** and watch your image come to life with synchronized audio!
+        5. **Credits are deducted** from your HuggingFace inference provider account
+        ### ⚠️ Notes
+        - **This is a PAID app** - uses your inference provider credits
+        - Video generation may take 30-60 seconds
+        - Generates 5-second videos at 24 FPS with synchronized audio
+        - Supports multiple aspect ratios (9:16, 16:9, 1:1, etc) at 720×720 area
+        - Best results with clear, high-quality images
+        - The model works best with realistic subjects and natural motions
+        - Free accounts have limited credits - upgrade to PRO for more
+        ### 🔗 Resources
+        - [Ovi Model Card](https://huggingface.co/chetwinlow1/Ovi)
+        - [Character AI](https://character.ai)
+        - [Subscribe to PRO](http://huggingface.co/subscribe/pro?source=ovi)
+        - [Inference API Documentation](https://huggingface.co/docs/huggingface_hub/guides/inference)
+        """
+    )
 # Launch the app
 if __name__ == "__main__":
     demo.launch(
         show_api=False,
         enable_monitoring=False,
         quiet=True,
     )