Spaces:

suprimedev
/

speh32

Sleeping

App Files Files Community

suprimedev commited on Oct 21

Commit

145d58c

verified ·

1 Parent(s): c9db7d2

Update app.py

Browse files

Files changed (1) hide show

app.py +140 -288

app.py CHANGED Viewed

@@ -12,449 +12,301 @@ import time
 # تنظیمات اولیه
 recognizer = sr.Recognizer()
-recognizer.energy_threshold = 300
 recognizer.dynamic_energy_threshold = True
-recognizer.pause_threshold = 0.8  # اضافه: pause detection بهتر برای real-time
-# صف برای accumulate کردن chunk‌های real-time (هر chunk کوچک ~0.5-1s)
-audio_queue = queue.Queue(maxsize=20)  # محدود برای جلوگیری از overflow
-CHUNK_PROCESS_INTERVAL = 3.0  # ثانیه: هر چند ثانیه chunk‌ها را process کنیم
-MIN_AUDIO_DURATION = 2.0  # ثانیه: حداقل طول صوت برای process
-# متغیر global برای ذخیره متن real-time
 current_transcript = ""
 transcript_lock = threading.Lock()
-# Thread background برای پردازش مداوم queue
 def background_processor():
-    """Thread جداگانه برای polling queue و process chunk‌های accumulate‌شده"""
-    accumulated_audio = []  # لیست برای جمع chunk‌ها
-    last_process_time = time.time()
     while True:
         try:
-            # اگر queue خالی نیست، chunk جدید اضافه کن
             if not audio_queue.empty():
-                audio_tuple = audio_queue.get(timeout=0.1)
-                if audio_tuple:
-                    sample_rate, audio_data = audio_tuple
-                    # Normalize و append به accumulated
-                    if audio_data is not None and len(audio_data) > 0:
-                        # Clip به [-1, 1] برای جلوگیری از overflow
-                        audio_data = np.clip(audio_data, -1.0, 1.0)
-                        accumulated_audio.append((sample_rate, audio_data))
-                        print(f"Chunk accumulated: {len(audio_data)/sample_rate:.2f}s")  # log
-            # هر INTERVAL، اگر طول accumulated کافی باشد، process کن
-            current_time = time.time()
-            total_duration = sum(len(data)/rate for rate, data in accumulated_audio) if accumulated_audio else 0
-            if (current_time - last_process_time >= CHUNK_PROCESS_INTERVAL) and total_duration >= MIN_AUDIO_DURATION:
-                # Merge chunk‌ها به یک audio واحد
-                if len(accumulated_audio) > 0:
-                    merged_rate = accumulated_audio[0][0]  # فرض sample_rate ثابت
-                    merged_data = np.concatenate([data for _, data in accumulated_audio])
                     text = process_audio_chunk((merged_rate, merged_data))
-                    if text and text != "[خطا در اتصال]":
                         with transcript_lock:
                             if current_transcript:
                                 current_transcript += " " + text
                             else:
                                 current_transcript = text
-                        print(f"Processed: {text}")  # log
-                    # Reset accumulated
-                    accumulated_audio = []
-                    last_process_time = current_time
-            time.sleep(0.1)  # poll هر 100ms
-        except queue.Empty:
-            continue
         except Exception as e:
-            print(f"Error in background processor: {e}")
-            time.sleep(0.5)
-# شروع thread background (یک بار)
 processor_thread = threading.Thread(target=background_processor, daemon=True)
 processor_thread.start()
 def numpy_to_audio_segment(audio_data, sample_rate):
-    """تبدیل numpy array به AudioSegment (بهبود: clip و handling بهتر)"""
     if audio_data is None or len(audio_data) == 0:
         return None
-    # نرمال‌سازی و clip به int16
-    if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
-        audio_data = np.clip(audio_data, -1.0, 1.0)  # clip برای جلوگیری از distortion
         audio_data = (audio_data * 32767).astype(np.int16)
-    # ذخیره در buffer موقت
     buffer = io.BytesIO()
     with wave.open(buffer, 'wb') as wav_file:
         wav_file.setnchannels(1)
         wav_file.setsampwidth(2)
         wav_file.setframerate(sample_rate)
         wav_file.writeframes(audio_data.tobytes())
     buffer.seek(0)
     return AudioSegment.from_wav(buffer)
 def process_audio_chunk(audio_tuple):
-    """پردازش یک قطعه صوتی با Google Speech Recognition (بهبود: handling chunk کوتاه)"""
     try:
         if audio_tuple is None:
             return ""
         sample_rate, audio_data = audio_tuple
-        duration = len(audio_data) / sample_rate if sample_rate > 0 else 0
-        if duration < MIN_AUDIO_DURATION:
-            return ""  # skip chunk‌های خیلی کوتاه
-        # تبدیل به AudioSegment
         audio_segment = numpy_to_audio_segment(audio_data, sample_rate)
-        if audio_segment is None or len(audio_segment) < MIN_AUDIO_DURATION * 1000:
             return ""
-        # ذخیره موقت برای speech recognition
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-            audio_segment.export(tmp_file.name, format="wav")
-            tmp_path = tmp_file.name
-        # استفاده از speech recognition با adjust برای chunk
         with sr.AudioFile(tmp_path) as source:
-            recognizer.adjust_for_ambient_noise(source, duration=0.5)  # adjust noise برای real-time
             audio = recognizer.record(source)
-        # تشخیص با اولویت فارسی
         text = ""
         try:
-            text = recognizer.recognize_google(audio, language='fa-IR')
         except sr.UnknownValueError:
-            # تلاش با انگلیسی
             try:
                 text = recognizer.recognize_google(audio, language='en-US')
             except sr.UnknownValueError:
-                text = ""  # بهبود: لاگ
-                print("No speech detected in chunk")
         except sr.RequestError as e:
-            text = "[خطا در اتصال]"
             print(f"Google API error: {e}")
-        # پاک کردن فایل موقت
         if os.path.exists(tmp_path):
             os.unlink(tmp_path)
-        return text.strip()
     except Exception as e:
-        print(f"خطا در پردازش: {e}")
         return ""
-def transcribe_realtime(audio):
-    """تبدیل real-time صوت به متن (بهبود: queue chunk و return current)"""
-    global current_transcript
     if audio is None:
-        return current_transcript
-    # Add to queue (non-blocking)
     try:
         audio_queue.put(audio, block=False)
     except queue.Full:
-        print("Queue full, skipping chunk")  # log
-    # Return current transcript برای آپدیت UI
     with transcript_lock:
         return current_transcript
-def clear_realtime():
-    """پاک کردن متن real-time"""
     global current_transcript
     with transcript_lock:
         current_transcript = ""
-    # Clear queue هم
     while not audio_queue.empty():
-        try:
-            audio_queue.get_nowait()
-        except queue.Empty:
-            break
     return ""
-# توابع file و UI بدون تغییر (برای اختصار، اما کامل هستند)
 def transcribe_file(audio_file, chunk_duration=30):
-    """تبدیل فایل صوتی به متن با تقسیم به بخش‌ها"""
     if audio_file is None:
-        yield "لطفاً یک فایل صوتی آپلود کنید", ""
         return
     try:
-        # خواندن فایل صوتی
         audio = AudioSegment.from_file(audio_file)
         duration_ms = len(audio)
-        chunk_duration_ms = chunk_duration * 1000
         all_text = []
-        progress_text = ""
-        # تقسیم و پردازش
-        num_chunks = (duration_ms + chunk_duration_ms - 1) // chunk_duration_ms
-        for i in range(0, duration_ms, chunk_duration_ms):
-            chunk = audio[i:i + chunk_duration_ms]
-            # ذخیره chunk موقت
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-                chunk.export(tmp_file.name, format="wav")
-                tmp_path = tmp_file.name
-            # تشخیص گفتار
             try:
                 with sr.AudioFile(tmp_path) as source:
                     audio_data = recognizer.record(source)
-                # تلاش با فارسی
                 try:
                     text = recognizer.recognize_google(audio_data, language='fa-IR')
                 except sr.UnknownValueError:
-                    # تلاش با انگلیسی
                     try:
                         text = recognizer.recognize_google(audio_data, language='en-US')
                     except sr.UnknownValueError:
                         text = ""
                 except sr.RequestError:
-                    text = "[خطا در اتصال به سرویس]"
-                if text and text != "[خطا در اتصال به سرویس]":
                     all_text.append(text)
             except Exception as e:
-                print(f"خطا در پردازش chunk: {e}")
-            # پاک کردن فایل موقت
             if os.path.exists(tmp_path):
                 os.unlink(tmp_path)
-            # به‌روزرسانی پیشرفت
-            current_chunk = (i // chunk_duration_ms) + 1
-            progress = min((i + chunk_duration_ms) / duration_ms * 100, 100)
-            progress_text = f"پیشرفت: {progress:.1f}% - بخش {current_chunk} از {num_chunks}"
-            yield " ".join(all_text), progress_text
-            # تاخیر کوتاه برای جلوگیری از rate limiting
             time.sleep(0.5)
-        final_text = " ".join(all_text)
-        yield final_text, "تبدیل کامل شد! ✅"
     except Exception as e:
-        yield f"خطا: {str(e)}", "خطا در پردازش ❌"
 def save_text(text):
-    """ذخیره متن در فایل موقت و برگرداندن آن"""
     if not text.strip():
         return None
     temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', encoding='utf-8')
     temp_file.write(text)
     temp_file.close()
     return temp_file.name
-# رابط کاربری Gradio (بدون تغییر، اما اتصال real-time بهبود یافته)
 with gr.Blocks(
-    title="تبدیل گفتار به متن",
     theme=gr.themes.Soft(),
     css="""
-    .gradio-container {
-        font-family: 'Vazir', 'Tahoma', sans-serif !important;
-    }
-    .rtl {
-        direction: rtl;
-        text-align: right;
-    }
     """
 ) as demo:
     gr.HTML("""
     <div style="text-align: center; max-width: 800px; margin: 0 auto;">
-        <h1 style="font-size: 2.5em; margin-bottom: 0.5em;">
-            🎤 تبدیل گفتار به متن
-        </h1>
         <p style="font-size: 1.1em; color: #666; margin-bottom: 2em;">
-            ابزار قدرتمند تبدیل صوت به متن با پشتیبانی از زبان فارسی و انگلیسی
         </p>
     </div>
     """)
     with gr.Tabs():
-        # تب Real-time
         with gr.TabItem("🎙️ ضبط مستقیم"):
-            gr.Markdown("### میکروفون خود را فعال کرده و شروع به صحبت کنید", elem_classes="rtl")
             with gr.Row():
-                with gr.Column(scale=1):
-                    audio_input = gr.Audio(
-                        sources=["microphone"],
-                        type="numpy",
-                        streaming=True,
-                        label="میکروفون",
-                        show_label=True,
-                        elem_classes="rtl"
-                    )
-                    with gr.Row():
-                        clear_btn = gr.Button(
-                            "🗑️ پاک کردن متن",
-                            variant="secondary",
-                            size="sm"
-                        )
             realtime_output = gr.Textbox(
-                label="متن تشخیص داده شده",
-                placeholder="شروع به صحبت کنید و متن اینجا ظاهر می‌شود... (پس از 3-5 ثانیه)",
-                lines=12,
                 elem_classes="rtl",
                 rtl=True,
-                show_copy_button=True
-            )
-            # اتصال برای real-time transcription (بهبود: stream هر chunk به queue)
-            audio_input.stream(
-                transcribe_realtime,
-                inputs=[audio_input],
-                outputs=[realtime_output],
-                show_progress=False
             )
-            clear_btn.click(
-                clear_realtime,
-                outputs=[realtime_output]
-            )
-        # تب فایل صوتی (بدون تغییر)
-        with gr.TabItem("📁 فایل صوتی"):
-            gr.Markdown("### فایل صوتی خود را انتخاب کنید", elem_classes="rtl")
-            with gr.Row():
-                with gr.Column(scale=3):
-                    file_input = gr.Audio(
-                        sources=["upload"],
-                        type="filepath",
-                        label="انتخاب فایل صوتی",
-                        elem_classes="rtl"
-                    )
-                with gr.Column(scale=1):
-                    chunk_duration = gr.Slider(
-                        minimum=10,
-                        maximum=60,
-                        value=30,
-                        step=5,
-                        label="مدت هر بخش (ثانیه)",
-                        elem_classes="rtl"
-                    )
-            process_btn = gr.Button(
-                "🚀 شروع تبدیل",
-                variant="primary",
-                size="lg"
-            )
-            progress_label = gr.Textbox(
-                label="وضعیت پردازش",
-                interactive=False,
-                elem_classes="rtl"
-            )
-            file_output = gr.Textbox(
-                label="متن تشخیص داده شده",
-                placeholder="متن پس از پردازش اینجا نمایش داده می‌شود...",
-                lines=12,
-                elem_classes="rtl",
-                rtl=True,
-                show_copy_button=True
             )
-            # دکمه‌های ذخیره و پاک کردن
             with gr.Row():
-                save_btn = gr.Button(
-                    "💾 ذخیره متن",
-                    variant="secondary"
-                )
-                clear_file_btn = gr.Button(
-                    "🗑️ پاک کردن",
-                    variant="secondary"
-                )
-                download_file = gr.File(
-                    label="دانلود فایل متن",
-                    visible=False,
-                    elem_classes="rtl"
-                )
-            # اتصالات
-            process_btn.click(
-                transcribe_file,
-                inputs=[file_input, chunk_duration],
-                outputs=[file_output, progress_label]
-            )
-            save_btn.click(
-                save_text,
-                inputs=[file_output],
-                outputs=[download_file]
-            ).then(
-                lambda: gr.update(visible=True),
-                outputs=[download_file]
-            )
-            clear_file_btn.click(
-                lambda: ("", ""),
-                outputs=[file_output, progress_label]
-            )
-    # بخش راهنما (بهبود: نکته real-time)
-    with gr.Accordion("📖 راهنمای استفاده", open=False, elem_classes="rtl"):
         gr.Markdown("""
-        ### نحوه استفاده:
-        **برای ضبط مستقیم (بهبود یافته):**
-        1. به تب "ضبط مستقیم" بروید
-        2. اجازه دسترسی به میکروفون را بدهید
-        3. حداقل 3-5 ثانیه واضح صحبت کنید (chunk‌ها جمع می‌شوند)
-        4. متن هر 3 ثانیه آپدیت می‌شود
-        **برای فایل صوتی:**
-        1. به تب "فایل صوتی" بروید
-        2. فایل مورد نظر را انتخاب کنید
-        3. مدت زمان تقسیم‌بندی را تنظیم کنید (پیش‌فرض: 30 ثانیه)
-        4. روی "شروع تبدیل" کلیک کنید
-        5. منتظر بمانید تا پردازش کامل شود
-        ### فرمت‌های پشتیبانی شده:
-        - MP3, WAV, M4A, FLAC, OGG, MP4, AVI, MOV
-        ### نکات مهم:
-        - 🎯 برای دقت بیشتر، از فایل‌های با کیفیت بالا استفاده کنید
-        - 🔇 نویز پس‌زمینه را به حداقل برسانید
-        - 🗣️ واضح و شمرده صحبت کنید (حداقل 3 ثانیه)
-        - 🌐 اتصال اینترنت پایدار داشته باشید
-        - ⚠️ Real-time ممکن است 1-2 ثانیه تاخیر داشته باشد
         """, elem_classes="rtl")
-    # فوتر
-    gr.HTML("""
-    <div style="text-align: center; margin-top: 2em; padding: 1em; background-color: #f8f9fa;">
-        <p style="color: #666;">
-            ساخته شده با ❤️ | نسخه 2.1 (بهبود real-time)
-        </p>
-    </div>
-    """)
-# اجرای برنامه
 if __name__ == "__main__":
-    demo.queue().launch(
-        share=True,
-        show_error=True
-    )

 # تنظیمات اولیه
 recognizer = sr.Recognizer()
+recognizer.energy_threshold = 400  # کمی بالاتر برای real-time
 recognizer.dynamic_energy_threshold = True
+recognizer.pause_threshold = 0.5  # حساس‌تر به pause
+# صف برای accumulate chunk‌های real-time
+audio_queue = queue.Queue(maxsize=10)  # کوچک‌تر برای real-time
+PROCESS_INTERVAL = 3.0  # هر 3 ثانیه process
+MIN_DURATION = 1.5  # حداقل 1.5s صوت برای process
+# Transcript global
 current_transcript = ""
 transcript_lock = threading.Lock()
+# Background thread برای پردازش queue
 def background_processor():
+    accumulated = []
+    last_process = time.time()
     while True:
         try:
+            # Get chunk if available
             if not audio_queue.empty():
+                audio_tuple = audio_queue.get(timeout=0.2)
+                if audio_tuple and audio_tuple[1] is not None:
+                    rate, data = audio_tuple
+                    data = np.clip(data, -1.0, 1.0)  # Normalize
+                    accumulated.append((rate, data))
+                    print(f"Accumulated chunk: {len(data)/rate:.2f}s")
+            # Process if interval passed and enough audio
+            now = time.time()
+            total_dur = sum(len(d)/r for r, d in accumulated) if accumulated else 0
+            if (now - last_process >= PROCESS_INTERVAL) and total_dur >= MIN_DURATION:
+                if accumulated:
+                    merged_rate = accumulated[0][0]
+                    merged_data = np.concatenate([d for _, d in accumulated])
                     text = process_audio_chunk((merged_rate, merged_data))
+                    if text and text not in ["", "[خطا در اتصال]"]:
                         with transcript_lock:
                             if current_transcript:
                                 current_transcript += " " + text
                             else:
                                 current_transcript = text
+                        print(f"✅ Processed: {text[:50]}...")  # Log کوتاه
+                    # Reset
+                    accumulated = []
+                    last_process = now
+            time.sleep(0.2)  # Poll faster for responsiveness
         except Exception as e:
+            print(f"Background error: {e}")
+            time.sleep(1)
+# Start background thread
 processor_thread = threading.Thread(target=background_processor, daemon=True)
 processor_thread.start()
 def numpy_to_audio_segment(audio_data, sample_rate):
     if audio_data is None or len(audio_data) == 0:
         return None
+    if audio_data.dtype in [np.float32, np.float64]:
+        audio_data = np.clip(audio_data, -1.0, 1.0)
         audio_data = (audio_data * 32767).astype(np.int16)
     buffer = io.BytesIO()
     with wave.open(buffer, 'wb') as wav_file:
         wav_file.setnchannels(1)
         wav_file.setsampwidth(2)
         wav_file.setframerate(sample_rate)
         wav_file.writeframes(audio_data.tobytes())
     buffer.seek(0)
     return AudioSegment.from_wav(buffer)
 def process_audio_chunk(audio_tuple):
     try:
         if audio_tuple is None:
             return ""
         sample_rate, audio_data = audio_tuple
+        duration = len(audio_data) / sample_rate if sample_rate else 0
+        if duration < MIN_DURATION:
+            return ""
         audio_segment = numpy_to_audio_segment(audio_data, sample_rate)
+        if audio_segment is None or len(audio_segment) < MIN_DURATION * 1000:
             return ""
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            audio_segment.export(tmp.name, format="wav")
+            tmp_path = tmp.name
         with sr.AudioFile(tmp_path) as source:
+            recognizer.adjust_for_ambient_noise(source, duration=0.3)  # Quick adjust
             audio = recognizer.record(source)
+        # Google recognition
         text = ""
         try:
+            text = recognizer.recognize_google(audio, language='fa-IR')  # Persian first
+            # اگر key داری: recognize_google(audio, language='fa-IR', key="YOUR_GOOGLE_API_KEY")
         except sr.UnknownValueError:
             try:
                 text = recognizer.recognize_google(audio, language='en-US')
             except sr.UnknownValueError:
+                print("No speech in chunk")
+                text = ""
         except sr.RequestError as e:
             print(f"Google API error: {e}")
+            text = "[خطا اتصال]"
         if os.path.exists(tmp_path):
             os.unlink(tmp_path)
+        return text.strip()
     except Exception as e:
+        print(f"Process error: {e}")
         return ""
+def handle_realtime_audio(audio):
+    """Handle incoming audio chunks from microphone"""
     if audio is None:
+        return gr.update()
     try:
         audio_queue.put(audio, block=False)
     except queue.Full:
+        print("Queue full, skip")
+    return gr.update()
+def get_current_transcript():
+    """Poll transcript for UI update"""
     with transcript_lock:
         return current_transcript
+def clear_transcript():
     global current_transcript
     with transcript_lock:
         current_transcript = ""
+    # Clear queue
     while not audio_queue.empty():
+        audio_queue.get_nowait()
     return ""
+# File transcription (unchanged, works fine)
 def transcribe_file(audio_file, chunk_duration=30):
     if audio_file is None:
+        yield "لطفاً فایل آپلود کنید", ""
         return
     try:
         audio = AudioSegment.from_file(audio_file)
         duration_ms = len(audio)
+        chunk_ms = chunk_duration * 1000
         all_text = []
+        num_chunks = (duration_ms + chunk_ms - 1) // chunk_ms
+        for i in range(0, duration_ms, chunk_ms):
+            chunk = audio[i:i + chunk_ms]
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+                chunk.export(tmp.name, format="wav")
+                tmp_path = tmp.name
             try:
                 with sr.AudioFile(tmp_path) as source:
                     audio_data = recognizer.record(source)
                 try:
                     text = recognizer.recognize_google(audio_data, language='fa-IR')
                 except sr.UnknownValueError:
                     try:
                         text = recognizer.recognize_google(audio_data, language='en-US')
                     except sr.UnknownValueError:
                         text = ""
                 except sr.RequestError:
+                    text = "[خطا اتصال]"
+                if text and text != "[خطا اتصال]":
                     all_text.append(text)
             except Exception as e:
+                print(f"File chunk error: {e}")
             if os.path.exists(tmp_path):
                 os.unlink(tmp_path)
+            progress = min((i + chunk_ms) / duration_ms * 100, 100)
+            yield " ".join(all_text), f"پیشرفت: {progress:.1f}% - بخش {(i // chunk_ms)+1} از {num_chunks}"
             time.sleep(0.5)
+        yield " ".join(all_text), "کامل شد! ✅"
     except Exception as e:
+        yield f"خطا: {e}", "خطا ❌"
 def save_text(text):
     if not text.strip():
         return None
     temp_file = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', encoding='utf-8')
     temp_file.write(text)
     temp_file.close()
     return temp_file.name
+# Gradio UI with Timer for live updates
 with gr.Blocks(
+    title="تبدیل گفتار به متن - Real-time Fixed",
     theme=gr.themes.Soft(),
     css="""
+    .gradio-container { font-family: 'Vazir', 'Tahoma', sans-serif !important; }
+    .rtl { direction: rtl; text-align: right; }
     """
 ) as demo:
     gr.HTML("""
     <div style="text-align: center; max-width: 800px; margin: 0 auto;">
+        <h1 style="font-size: 2.5em; margin-bottom: 0.5em;">🎤 تبدیل گفتار به متن</h1>
         <p style="font-size: 1.1em; color: #666; margin-bottom: 2em;">
+            Real-time با Google API - پردازش background | قابل توزیع روی مرورگرها
         </p>
     </div>
     """)
     with gr.Tabs():
         with gr.TabItem("🎙️ ضبط مستقیم"):
+            gr.Markdown("### فعال کنید و 5+ ثانیه واضح صحبت کنید (متن هر 3s آپدیت می‌شه)", elem_classes="rtl")
             with gr.Row():
+                audio_input = gr.Audio(
+                    sources=["microphone"],
+                    type="numpy",
+                    label="میکروفون (ضبط رو شروع کن)",
+                    elem_classes="rtl"
+                )
+                start_btn = gr.Button("▶️ شروع real-time", variant="primary")
+                stop_btn = gr.Button("⏹️ توقف", variant="secondary")
             realtime_output = gr.Textbox(
+                label="متن live",
+                placeholder="پس از 3-5s صحبت، متن ظاهر می‌شه...",
+                lines=10,
                 elem_classes="rtl",
                 rtl=True,
+                show_copy_button=True,
+                interactive=False
             )
+            clear_btn = gr.Button("🗑️ پاک کردن", variant="secondary")
+            # Events
+            audio_input.change(handle_realtime_audio, inputs=[audio_input], outputs=[audio_input])  # Handle chunks
+            # Timer for live update (هر 2s transcript رو pull کن)
+            timer = gr.Timer(2.0)  # Start after tab open
+            timer.tick(get_current_transcript, outputs=[realtime_output])
+            clear_btn.click(clear_transcript, outputs=[realtime_output])
+            # Start/stop recording (toggle microphone)
+            def toggle_recording(active):
+                return gr.update(value=active)  # Simple toggle, but Gradio handles start/stop
+            start_btn.click(lambda: gr.update(visible=True), outputs=[stop_btn]).click(
+                toggle_recording, inputs=[audio_input], outputs=[audio_input]
             )
+            stop_btn.click(lambda: gr.update(visible=False), outputs=[start_btn])
+        with gr.TabItem("📁 فایل صوتی"):
+            gr.Markdown("### فایل آپلود کن و تبدیل کن", elem_classes="rtl")
             with gr.Row():
+                file_input = gr.Audio(sources=["upload"], type="filepath", label="فایل صوتی", elem_classes="rtl")
+                chunk_slider = gr.Slider(10, 60, 30, 5, label="بخش‌بندی (s)", elem_classes="rtl")
+            process_btn = gr.Button("🚀 تبدیل", variant="primary")
+            progress_label = gr.Textbox(label="وضعیت", interactive=False, elem_classes="rtl")
+            file_output = gr.Textbox(label="متن", lines=10, elem_classes="rtl", rtl=True, show_copy_button=True)
+            with gr.Row():
+                save_btn = gr.Button("💾 ذخیره", variant="secondary")
+                clear_file_btn = gr.Button("🗑️ پاک", variant="secondary")
+                download_file = gr.File(label="دانلود TXT", visible=False, elem_classes="rtl")
+            process_btn.click(transcribe_file, [file_input, chunk_slider], [file_output, progress_label])
+            save_btn.click(save_text, file_output, download_file).then(lambda: gr.update(visible=True), download_file)
+            clear_file_btn.click(lambda: ("", ""), [file_output, progress_label])
+    with gr.Accordion("📖 راهنما", open=False, elem_classes="rtl"):
         gr.Markdown("""
+        ### استفاده:
+        - **Real-time**: میکروفون رو فعال کن، 5s+ صحبت کن. هر 3s متن آپدیت می‌شه (background).
+        - **فایل**: آپلود و دکمه بزن.
+        ### نکات:
+        - 🗣️ واضح صحبت کن، نویز کم.
+        - 🌐 اینترنت پایدار (Google API).
+        - 📱 روی موبایل/دسکتاپ کار می‌کنه (mic access بده).
+        - ⚠️ محدودیت Google: ~60 req/min - برای حجم بالا، API key اضافه کن (در کد کامنت).
+        - توزیع: share لینک رو share کن، همه browserها ساپورت.
         """, elem_classes="rtl")
+    gr.HTML('<div style="text-align: center; margin-top: 2em; padding: 1em; background: #f8f9fa;"><p style="color: #666;">نسخه 2.2 - Fixed Real-time با Timer | Google Backend</p></div>')
 if __name__ == "__main__":
+    demo.queue().launch(share=True, show_error=True, server_name="0.0.0.0", server_port=7860)