Spaces:

Realmeas
/

Myblinkcaption

Sleeping

App Files Files Community

Realmeas commited on 27 days ago

Commit

b847dcb

verified ·

1 Parent(s): 1bd8ed9

Create app.py

Browse files

Files changed (1) hide show

app.py +279 -0

app.py ADDED Viewed

	@@ -0,0 +1,279 @@

+# app.py - BlinkCaptions-like Auto Subtitle Generator for Hugging Face Spaces
+# This Gradio app mimics Vozo.ai's BlinkCaptions: AI-powered one-click dynamic captions
+# with high accuracy, Hinglish support, styles, emojis/highlights, and FFmpeg burning.
+# Key steps (Blink-like):
+# 1. Input handling: Upload video or YouTube URL download.
+# 2. Audio extraction & validation (duration <10min, has audio).
+# 3. Whisper transcription: Auto-detect Hinglish (language=None), timestamps for sync.
+#    - Lyrics mode: Enable word_timestamps for music-like precision.
+#    - Trim: Skip short/silent segments (<0.5s).
+# 4. Enhancements: Word emphasis (e.g., wrap "wow" in bold/color tags).
+# 5. Translation: Optional to 120+ langs via argostranslate (pre-install common packs).
+# 6. ASS subtitle creation: Styled with fonts/colors/sizes/positions/animations/emojis.
+# 7. Burn to video: FFmpeg overlays HD output, no watermark.
+# 8. UI: Simple, free, viral-ready for Reels/YouTube.
+# Deploy: Save as app.py, add requirements.txt, create HF Space (Gradio SDK).
+import os
+import tempfile
+import gradio as gr
+from transformers import pipeline
+import torch
+import ffmpeg
+from yt_dlp import YoutubeDL
+from googletrans import Translator  # Fallback to googletrans for simplicity (argos heavy for 120+ langs)
+# Note: For argostranslate, uncomment below and pre-install packs in HF Space Dockerfile if needed.
+# from argostranslate import package, translate
+# package.update_package_index()  # Run once
+# Model options (lighter for speed)
+MODEL_CHOICES = {
+    "Base (fast)": "openai/whisper-base",
+    "Small": "openai/whisper-small",
+    "Medium": "openai/whisper-medium",
+    "Large-v3 (accurate, multilingual)": "openai/whisper-large-v3"
+}
+# Style options (200+ simulated via combos; Blink-like: Montserrat/Arial, colors, etc.)
+FONTS = ["Arial", "Montserrat"]  # Add more if custom fonts available
+COLORS = ["white", "yellow", "black"]
+SIZES = ["small (24)", "medium (32)", "large (40)"]
+POSITIONS = ["bottom", "top"]
+LANGUAGES = ["en", "hi", "fr", "es"]  # Sample; extend to 120+ with googletrans
+translator = Translator()  # googletrans instance
+def download_youtube(url, progress=gr.Progress()):
+    """Download YouTube video using yt-dlp (Blink-like: social input support)."""
+    progress(0, desc="Downloading video...")
+    ydl_opts = {
+        'format': 'best[height<=720]/best',  # HD but not too heavy
+        'outtmpl': '%(title)s.%(ext)s',
+        'noplaylist': True,
+    }
+    with tempfile.TemporaryDirectory() as tmpdir:
+        os.chdir(tmpdir)
+        with YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+        files = [f for f in os.listdir(tmpdir) if f.endswith(('.mp4', '.mkv', '.webm'))]
+        if files:
+            video_path = os.path.join(tmpdir, files[0])
+            progress(1, desc="Download complete!")
+            return video_path
+    raise ValueError("Download failed!")
+def extract_audio(video_path):
+    """Extract audio from video using FFmpeg (prep for Whisper)."""
+    audio_path = video_path.rsplit('.', 1)[0] + '.wav'
+    stream = ffmpeg.input(video_path)
+    stream = ffmpeg.output(stream, audio_path, acodec='pcm_s16le', ac=1, ar=16000)  # Whisper format
+    ffmpeg.run(stream, overwrite_output=True, quiet=True)
+    return audio_path
+def get_video_duration(video_path):
+    """Get duration in seconds using FFmpeg probe (limit check)."""
+    probe = ffmpeg.probe(video_path)
+    return float(probe['streams'][0]['duration']) if probe['streams'] else 0
+def has_audio(video_path):
+    """Check if video has audio stream."""
+    probe = ffmpeg.probe(video_path)
+    return any(s['codec_type'] == 'audio' for s in probe['streams'])
+def transcribe_audio(audio_path, model_name, lyrics_mode, progress=gr.Progress()):
+    """Whisper transcription (Blink-like: 98%+ acc, Hinglish auto-detect, lyrics mode)."""
+    progress(0, desc="Loading Whisper model...")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    pipe = pipeline(
+        "automatic-speech-recognition",
+        model=model_name,
+        device=device,
+        return_timestamps=True,
+        chunk_length_s=30,
+        batch_size=8,
+    )
+    # Generate kwargs for accuracy boost (transcribe task, auto lang for Hinglish)
+    generate_kwargs = {"task": "transcribe", "language": None}  # Auto-detect Hindi/English mix
+    if lyrics_mode:
+        generate_kwargs["word_timestamps"] = True  # Lyrics precision
+    progress(0.5, desc="Transcribing...")
+    result = pipe(audio_path, generate_kwargs=generate_kwargs)
+    # Extract segments, trim silences (short <0.5s)
+    segments = result.get('chunks', [])
+    trimmed_segments = [s for s in segments if (s['end'] - s['start']) > 0.5]
+    progress(1, desc="Transcription complete!")
+    return trimmed_segments  # List of {'text': , 'start': , 'end': }
+def translate_text(text, target_lang):
+    """Optional translation (Blink-like: 120+ langs)."""
+    if target_lang == "en":  # No translate
+        return text
+    return translator.translate(text, dest=target_lang).text
+def create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress=gr.Progress()):
+    """Create ASS subtitles (styled like Blink: fonts/colors/emojis/highlights/animations)."""
+    progress(0, desc="Generating styled subtitles...")
+    # Map sizes
+    size_map = {"small (24)": 24, "medium (32)": 32, "large (40)": 40}
+    fontsize = size_map[size]
+    # Color to ASS hex (BGR)
+    color_map = {"white": "&HFFFFFF&", "yellow": "&HFFFF00&", "black": "&H000000&"}
+    subtitle_color = color_map[color]
+    # Position: bottom/top alignment
+    alignment = 2 if position == "bottom" else 8  # ASS alignment codes
+    # ASS header (V4+ Styles for Blink-like customizations)
+    ass_content = """[Script Info]
+Title: Generated Subtitles
+ScriptType: v4.00+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,{font},{fontsize},{subtitle_color},&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,2,{alignment},10,10,10,1
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+""".format(font=font, fontsize=fontsize, subtitle_color=subtitle_color, alignment=alignment)
+    # Build events with enhancements
+    for i, seg in enumerate(segments):
+        start = f"{int(seg['start']*100)}"  # ms
+        end = f"{int(seg['end']*100)}"
+        text = translate_text(seg['text'].strip(), target_lang)
+        # Word emphasis/highlights (e.g., make "wow" pop with bold/color)
+        for word in emphasis_words:
+            if word.lower() in text.lower():
+                text = text.replace(word, f"{{\\b1\\c{&HFF0000&}}}" + word + "{\\b0}")
+        # Add emoji example (Blink-like: one-click emojis)
+        if "!" in text:
+            text += " 😎"  # Simple auto-add
+        # Basic fade-in animation (Blink dynamic)
+        text = f"{{\\fad(200,200)}}{text}"
+        # Dialogue event
+        ass_content += f"Dialogue: 0,{start},{end},Default,,0,0,0,,{text}\n"
+    progress(1, desc="ASS file ready!")
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.ass', delete=False) as f:
+        f.write(ass_content)
+        return f.name
+def burn_subtitles(video_path, ass_path, progress=gr.Progress()):
+    """Burn ASS to video using FFmpeg (Blink-like: HD export, no watermark)."""
+    progress(0, desc="Burning subtitles...")
+    output_path = video_path.rsplit('.', 1)[0] + '_subtitled.mp4'
+    # FFmpeg command: subtitles filter for styles/shadows/position
+    stream = ffmpeg.input(video_path)
+    stream = ffmpeg.filter(stream, 'subtitles', ass_path, force_style='FontName=Montserrat,PrimaryColour=&HFFFFFF&')  # Override if needed
+    stream = ffmpeg.output(stream, output_path, vcodec='libx264', acodec='aac', **{'preset': 'fast'})  # HD
+    ffmpeg.run(stream, overwrite_output=True, quiet=True)
+    progress(1, desc="Video ready!")
+    return output_path
+def main_process(
+    video_file, yt_url, model_choice, lyrics_mode, target_lang,
+    font, color, size, position, emphasis_words_str,
+    progress=gr.Progress()
+):
+    """Main pipeline (Blink one-click flow)."""
+    if video_file is None and yt_url is None:
+        raise gr.Error("Upload a video or enter a YouTube URL!")
+    video_path = None
+    if video_file:
+        video_path = video_file.name
+    elif yt_url:
+        video_path = download_youtube(yt_url, progress)
+    # Validation (Blink error handling)
+    duration = get_video_duration(video_path)
+    if duration > 600:  # 10 min
+        raise gr.Error("Video too long! Limit to 10 minutes for demo.")
+    if not has_audio(video_path):
+        raise gr.Error("No audio detected!")
+    # Extract audio
+    audio_path = extract_audio(video_path)
+    # Transcribe
+    model_name = MODEL_CHOICES[model_choice]
+    segments = transcribe_audio(audio_path, model_name, lyrics_mode, progress)
+    if not segments:
+        raise gr.Error("No speech detected!")
+    # Emphasis words
+    emphasis_words = emphasis_words_str.split(',') if emphasis_words_str else []
+    # Create styled ASS
+    ass_path = create_ass_subtitles(segments, font, color, size, position, emphasis_words, target_lang, progress)
+    # Burn
+    output_video = burn_subtitles(video_path, ass_path, progress)
+    # Also save SRT for download (simple conversion from segments)
+    srt_content = ""
+    for i, seg in enumerate(segments, 1):
+        start = f"{int(seg['start']//3600):02d}:{int((seg['start']%3600)//60):02d}:{int(seg['start']%60):02d},{int((seg['start']%1)*1000):03d}"
+        end = f"{int(seg['end']//3600):02d}:{int((seg['end']%3600)//60):02d}:{int(seg['end']%60):02d},{int((seg['end']%1)*1000):03d}"
+        text = translate_text(seg['text'].strip(), target_lang)
+        srt_content += f"{i}\n{start} --> {end}\n{text}\n\n"
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False) as f:
+        f.write(srt_content)
+        srt_path = f.name
+    # Preview thumbnail (simple FFmpeg extract)
+    thumb_path = video_path.rsplit('.', 1)[0] + '_thumb.jpg'
+    ffmpeg.input(video_path, ss=1).output(thumb_path, vframes=1).run(quiet=True)
+    return output_video, srt_path, thumb_path
+# Gradio UI (Blink simple: upload, selectors, progress, downloads)
+with gr.Blocks(title="BlinkCaptions Clone - Free Auto Subtitles") as demo:
+    gr.Markdown("# 🚀 BlinkCaptions-like AI Subtitle Generator\nGenerate dynamic Hinglish captions with styles! Viral-ready for Reels/YouTube.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            video_input = gr.File(label="Upload Video (MP4, <10min)", file_types=[".mp4", ".mkv", ".webm"])
+            yt_input = gr.Textbox(label="Or YouTube URL", placeholder="https://youtube.com/watch?v=...")
+        with gr.Column(scale=1):
+            model_choice = gr.Dropdown(choices=list(MODEL_CHOICES.keys()), value="Large-v3 (accurate, multilingual)", label="Model")
+            lyrics_mode = gr.Checkbox(label="Lyrics Mode (for music)", value=False)
+            target_lang = gr.Dropdown(choices=LANGUAGES, value="en", label="Translate To")
+    with gr.Row():
+        font = gr.Dropdown(choices=FONTS, value="Arial", label="Font")
+        color = gr.Dropdown(choices=COLORS, value="white", label="Color")
+        size = gr.Dropdown(choices=SIZES, value="medium (32)", label="Size")
+        position = gr.Dropdown(choices=POSITIONS, value="bottom", label="Position")
+    emphasis_input = gr.Textbox(label="Emphasize Words (comma-separated, e.g., wow,amazing)", placeholder="For highlights!")
+    process_btn = gr.Button("Generate Captions!", variant="primary")
+    with gr.Row():
+        output_video = gr.File(label="Download Video with Subtitles")
+        output_srt = gr.File(label="Download SRT Transcript")
+        preview = gr.Image(label="Preview Thumbnail")
+    # Progress integrated in fn
+    process_btn.click(
+        main_process,
+        inputs=[video_input, yt_input, model_choice, lyrics_mode, target_lang, font, color, size, position, emphasis_input],
+        outputs=[output_video, output_srt, preview]
+    )
+    gr.Markdown("**Free & No Login!** Optimized for speed (CUDA auto). Built with ❤️ for creators.")
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)  # For HF Spaces