Spaces:

prathameshv07
/

Multilingual-Audio-Intelligence-System

Running

App Files Files Community

Prathamesh Sarjerao Vaidya commited on Aug 11

Commit

cb974bb

1 Parent(s): 3543d68

made some changes

Browse files

Files changed (4) hide show

Dockerfile +20 -18
README.md +18 -4
templates/index.html +31 -54
web_app.py +135 -98

Dockerfile CHANGED Viewed

@@ -1,9 +1,7 @@
 FROM python:3.9-slim
-# Set working directory
 WORKDIR /app
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
     ffmpeg \
     git \
@@ -11,31 +9,35 @@ RUN apt-get update && apt-get install -y \
     curl \
     && rm -rf /var/lib/apt/lists/*
-# Copy requirements first for better caching
 COPY requirements.txt .
-# Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy application code
 COPY . .
-# Create necessary directories
-RUN mkdir -p templates static uploads outputs model_cache
-# Set environment variables for HuggingFace Spaces
-ENV PYTHONPATH=/app
-ENV GRADIO_ANALYTICS_ENABLED=False
-# Preload models during build time (optional - comment out if build time is too long)
-# RUN python model_preloader.py
-# Expose port
 EXPOSE 7860
-# Health check
 HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
     CMD curl -f http://localhost:7860/api/system-info || exit 1
-# Start command for HuggingFace Spaces
-CMD ["python", "-c", "import subprocess; subprocess.run(['python', 'model_preloader.py']); import uvicorn; uvicorn.run('web_app:app', host='0.0.0.0', port=7860, workers=1)"]

 FROM python:3.9-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y \
     ffmpeg \
     git \
     curl \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
+# Create necessary directories & fix permissions
+RUN mkdir -p templates static uploads outputs model_cache temp_files demo_results \
+    && chmod -R 777 templates static uploads outputs model_cache temp_files demo_results
+# Environment variables
+ENV PYTHONPATH=/app \
+    GRADIO_ANALYTICS_ENABLED=False \
+    HF_MODELS_CACHE=/app/model_cache \
+    OUTPUT_DIR=./outputs \
+    TEMP_DIR=./temp_files \
+    WHISPER_MODEL_SIZE=small \
+    TARGET_LANGUAGE=en \
+    MAX_WORKERS=1 \
+    USE_GPU=false \
+    HF_HOME=/app/model_cache \
+    TRANSFORMERS_CACHE=/app/model_cache \
+    TORCH_HOME=/app/model_cache \
+    XDG_CACHE_HOME=/app/model_cache \
+    MPLCONFIGDIR=/tmp/matplotlib
 EXPOSE 7860
 HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
     CMD curl -f http://localhost:7860/api/system-info || exit 1
+CMD ["python", "-c", "import subprocess; subprocess.run(['python', 'model_preloader.py']); import uvicorn; uvicorn.run('web_app:app', host='0.0.0.0', port=7860, workers=1)"]

README.md CHANGED Viewed

@@ -1,3 +1,13 @@
 # 🎵 Multilingual Audio Intelligence System
 ![Multilingual Audio Intelligence System Banner](/static/imgs/banner.png)
@@ -24,11 +34,11 @@ The Multilingual Audio Intelligence System is an advanced AI-powered platform th
 #### 🎬 Demo Banner
-![Demo Banner](/static/imgs/demo_banner.png)
 #### 📝 Transcript with Translation
-![Transcript with Translation](/static/imgs/demo_res_transcript_translate.png)
 #### 📊 Visual Representation
@@ -38,7 +48,7 @@ The Multilingual Audio Intelligence System is an advanced AI-powered platform th
 #### 🧠 Summary Output
-![Summary Output](/static/imgs/demo_res_summary.png)
 ## Demo & Documentation
@@ -168,4 +178,8 @@ uvicorn web_app:app --host 0.0.0.0 --port 8000
 - **Documentation**: Check `/api/docs` endpoint
 - **System Info**: Use the info button in the web interface
-- **Logs**: Monitor terminal output for detailed information

+---
+title: Multilingual Audio Intelligence System
+emoji: 🎵
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+short_description: AI system for multilingual transcription and translation
+---
 # 🎵 Multilingual Audio Intelligence System
 ![Multilingual Audio Intelligence System Banner](/static/imgs/banner.png)
 #### 🎬 Demo Banner
+<img src="static/imgs/demo_banner.png" alt="Demo Banner"/>
 #### 📝 Transcript with Translation
+<img src="static/imgs/demo_res_transcript_translate.png" alt="Transcript with Translation"/>
 #### 📊 Visual Representation
 #### 🧠 Summary Output
+<img src="static/imgs/demo_res_summary.png" alt="Summary Output"/>
 ## Demo & Documentation
 - **Documentation**: Check `/api/docs` endpoint
 - **System Info**: Use the info button in the web interface
+- **Logs**: Monitor terminal output for detailed information
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

templates/index.html CHANGED Viewed

@@ -67,8 +67,14 @@
                         <i class="fas fa-cog mr-2"></i>
                         Full Processing
                     </button>
-                    <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
-                        ⬤ Operational
                     </span>
                     <button id="system-info-btn" class="text-gray-500 hover:text-gray-700">
                         <i class="fas fa-info-circle"></i>
@@ -423,7 +429,6 @@
                             <div class="grid grid-cols-1 gap-6">
                                 <div id="language-chart" style="width:100%;height:300px;"></div>
                                 <div id="speaker-timeline" style="width:100%;height:300px;"></div>
-                                <div id="confidence-chart" style="width:100%;height:300px;"></div>
                             </div>
                         </div>
@@ -485,6 +490,28 @@
         const processingModeBtn = document.getElementById('processing-mode-btn');
         const processingModeIndicator = document.getElementById('processing-mode-indicator');
         // Navigation handling
         function showHome() {
             homeSection.classList.add('active');
@@ -579,7 +606,7 @@
                     'yuri_kizaki': {
                         name: 'Yuri Kizaki - Japanese Audio',
                         filename: 'Yuri_Kizaki.mp3',
-                        duration: 22.8
                     },
                     'film_podcast': {
                         name: 'French Film Podcast',
@@ -908,8 +935,6 @@
             // Speaker Timeline
             createSpeakerTimeline(segments);
-            // Confidence Analysis
-            createConfidenceChart(segments);
         }
         function createLanguageChart(segments) {
@@ -985,45 +1010,6 @@
             Plotly.newPlot('speaker-timeline', data, layout, {responsive: true});
         }
-        function createConfidenceChart(segments) {
-            const confidenceRanges = {
-                'High (90-100%)': 0,
-                'Medium (70-89%)': 0,
-                'Low (50-69%)': 0,
-                'Very Low (<50%)': 0
-            };
-            segments.forEach(seg => {
-                const confidence = seg.confidence * 100;
-                if (confidence >= 90) confidenceRanges['High (90-100%)']++;
-                else if (confidence >= 70) confidenceRanges['Medium (70-89%)']++;
-                else if (confidence >= 50) confidenceRanges['Low (50-69%)']++;
-                else confidenceRanges['Very Low (<50%)']++;
-            });
-            const data = [{
-                x: Object.keys(confidenceRanges),
-                y: Object.values(confidenceRanges),
-                type: 'bar',
-                marker: {
-                    color: ['#10B981', '#F59E0B', '#EF4444', '#6B7280']
-                }
-            }];
-            const layout = {
-                title: {
-                    text: '📊 Recognition Confidence Distribution',
-                    font: { size: 18, family: 'Arial, sans-serif' }
-                },
-                xaxis: { title: 'Confidence Level' },
-                yaxis: { title: 'Number of Segments' },
-                height: 300,
-                margin: { t: 50, b: 80, l: 50, r: 20 }
-            };
-            Plotly.newPlot('confidence-chart', data, layout, {responsive: true});
-        }
         function populateTranscript(segments) {
             const transcriptContent = document.getElementById('transcript-content');
             transcriptContent.innerHTML = '';
@@ -1032,12 +1018,6 @@
                 const segmentDiv = document.createElement('div');
                 segmentDiv.className = 'mb-6 p-4 border border-gray-200 rounded-lg bg-white shadow-sm';
-                // Ensure confidence is a positive percentage
-                const confidencePercent = Math.round(Math.abs(segment.confidence * 100));
-                const confidenceColor = confidencePercent >= 90 ? 'bg-green-100 text-green-800' :
-                                       confidencePercent >= 70 ? 'bg-yellow-100 text-yellow-800' :
-                                       'bg-red-100 text-red-800';
                 segmentDiv.innerHTML = `
                     <div class="flex justify-between items-start mb-3">
                         <span class="inline-flex items-center px-3 py-1 rounded-full text-sm font-medium bg-blue-100 text-blue-800">
@@ -1053,9 +1033,6 @@
                             <div class="flex items-center mb-2">
                                 <i class="fas fa-microphone text-gray-600 mr-2"></i>
                                 <span class="text-sm font-medium text-gray-700">Original (${segment.language.toUpperCase()})</span>
-                                <span class="ml-2 inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${confidenceColor}">
-                                    ${confidencePercent}% confidence
-                                </span>
                             </div>
                             <p class="text-gray-800 leading-relaxed">${segment.text}</p>
                         </div>

                         <i class="fas fa-cog mr-2"></i>
                         Full Processing
                     </button>
+                    <!-- <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800">
+                        <!-- <i class="fas fa-circle w-2 h-2 mr-2"></i> -->
+                    <!-- <span class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800"> -->
+                        <!-- <i class="fas fa-circle w-2 h-2 mr-1"></i> -->
+                        <!--⬤ Operational
+                    </span> -->
+                    <span id="server-status" class="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium">
+                        ⬤ Checking...
                     </span>
                     <button id="system-info-btn" class="text-gray-500 hover:text-gray-700">
                         <i class="fas fa-info-circle"></i>
                             <div class="grid grid-cols-1 gap-6">
                                 <div id="language-chart" style="width:100%;height:300px;"></div>
                                 <div id="speaker-timeline" style="width:100%;height:300px;"></div>
                             </div>
                         </div>
         const processingModeBtn = document.getElementById('processing-mode-btn');
         const processingModeIndicator = document.getElementById('processing-mode-indicator');
+        async function updateServerStatus() {
+            const el = document.getElementById("server-status");
+            try {
+                const res = await fetch("/health"); // or your FastAPI health endpoint
+                if (!res.ok) throw new Error("Bad response");
+                el.textContent = "⬤ Live";
+                el.className = "inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-green-100 text-green-800";
+            } catch (err) {
+                // Could be error or down
+                fetch("/").catch(() => {
+                    el.textContent = "⬤ Server Down";
+                    el.className = "inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-red-100 text-red-800";
+                });
+                el.textContent = "⬤ Error";
+                el.className = "inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-yellow-100 text-yellow-800";
+            }
+        }
+        // setInterval(updateServerStatus, 5000);
+        // updateServerStatus();
+        document.addEventListener("DOMContentLoaded", updateServerStatus);
         // Navigation handling
         function showHome() {
             homeSection.classList.add('active');
                     'yuri_kizaki': {
                         name: 'Yuri Kizaki - Japanese Audio',
                         filename: 'Yuri_Kizaki.mp3',
+                        duration: 23.0
                     },
                     'film_podcast': {
                         name: 'French Film Podcast',
             // Speaker Timeline
             createSpeakerTimeline(segments);
         }
         function createLanguageChart(segments) {
             Plotly.newPlot('speaker-timeline', data, layout, {responsive: true});
         }
         function populateTranscript(segments) {
             const transcriptContent = document.getElementById('transcript-content');
             transcriptContent.innerHTML = '';
                 const segmentDiv = document.createElement('div');
                 segmentDiv.className = 'mb-6 p-4 border border-gray-200 rounded-lg bg-white shadow-sm';
                 segmentDiv.innerHTML = `
                     <div class="flex justify-between items-start mb-3">
                         <span class="inline-flex items-center px-3 py-1 rounded-full text-sm font-medium bg-blue-100 text-blue-800">
                             <div class="flex items-center mb-2">
                                 <i class="fas fa-microphone text-gray-600 mr-2"></i>
                                 <span class="text-sm font-medium text-gray-700">Original (${segment.language.toUpperCase()})</span>
                             </div>
                             <p class="text-gray-800 leading-relaxed">${segment.text}</p>
                         </div>

web_app.py CHANGED Viewed

@@ -128,6 +128,26 @@ DEMO_FILES = {
     }
 }
 # Demo results cache
 demo_results_cache = {}
@@ -146,7 +166,7 @@ class DemoManager:
             file_path = self.demo_dir / config["filename"]
             results_path = self.results_dir / f"{demo_id}_results.json"
-            # Check if file exists
             if not file_path.exists():
                 logger.info(f"Downloading demo file: {config['filename']}")
                 try:
@@ -155,13 +175,13 @@ class DemoManager:
                     logger.error(f"Failed to download {config['filename']}: {e}")
                     continue
-            # Check if results exist
             if not results_path.exists():
-                logger.info(f"Preprocessing demo file: {config['filename']}")
                 try:
-                    await self.preprocess_demo_file(demo_id, file_path, results_path)
                 except Exception as e:
-                    logger.error(f"Failed to preprocess {config['filename']}: {e}")
                     continue
             # Load results into cache
@@ -181,109 +201,126 @@ class DemoManager:
         logger.info(f"Downloaded demo file: {file_path.name}")
-    async def preprocess_demo_file(self, demo_id: str, file_path: Path, results_path: Path):
-        """Preprocess demo file and cache results."""
-        config = DEMO_FILES[demo_id]
-        # Create realistic demo results based on the actual content
-        if demo_id == "yuri_kizaki":
-            segments = [
                 {
                     "speaker": "Speaker 1",
                     "start_time": 0.0,
-                    "end_time": 8.5,
-                    "text": "音声メッセージが既存のウェブサイトを超えたコミュニケーションを実現。目で見るだけだったウェブサイトに音声情報をインクルードすることで、",
-                    "translated_text": "Audio messages enable communication beyond existing websites. By incorporating audio information into visually-driven websites,",
-                    "language": "ja",
-                    "confidence": 0.94
-                },
-                {
-                    "speaker": "Speaker 1",
-                    "start_time": 8.5,
-                    "end_time": 16.2,
-                    "text": "情報に新しい価値を与え、他者との差別化に効果を発揮します。また、文字やグラフィックだけでは伝えることの難しかった感情やニュアンスを表現し、",
-                    "translated_text": "you can add new value to the information and effectively differentiate from others. They also express emotions and nuances that are difficult to convey with text and graphics alone,",
-                    "language": "ja",
-                    "confidence": 0.96
-                },
-                {
-                    "speaker": "Speaker 1",
-                    "start_time": 16.2,
-                    "end_time": 22.8,
-                    "text": "ユーザーの興味と理解を深めます。見る、聞く、理解するウェブサイトへ。音声メッセージが人の心を動かします。",
-                    "translated_text": "deepening user interest and understanding. Turn your website into a place of sight, hearing, and understanding. Audio messages move people's hearts.",
-                    "language": "ja",
-                    "confidence": 0.95
                 }
             ]
-            duration = 22.8
-        elif demo_id == "film_podcast":
-            segments = [
                 {
-                    "speaker": "Speaker 1",
                     "start_time": 0.0,
-                    "end_time": 5.0,
-                    "text": "Le film intitulé The Social Network traite de la création du site Facebook par Mark Zuckerberg",
-                    "translated_text": "The film The Social Network deals with the creation of Facebook by Mark Zuckerberg",
-                    "language": "fr",
-                    "confidence": 0.97
-                },
-                {
-                    "speaker": "Speaker 1",
-                    "start_time": 5.0,
-                    "end_time": 14.0,
-                    "text": "et des problèmes judiciaires que cela a comporté pour le créateur de ce site.",
-                    "translated_text": "and the legal problems this caused for the creator of this site.",
-                    "language": "fr",
-                    "confidence": 0.95
-                },
-                {
-                    "speaker": "Speaker 1",
-                    "start_time": 14.0,
-                    "end_time": 19.0,
-                    "text": "Ce film est très réaliste et très intéressant.",
-                    "translated_text": "This film is very realistic and very interesting.",
-                    "language": "fr",
-                    "confidence": 0.98
-                },
-                {
-                    "speaker": "Speaker 1",
-                    "start_time": 19.0,
-                    "end_time": 25.0,
-                    "text": "La semaine dernière, j'ai été au cinéma voir Paranormal Activity 2.",
-                    "translated_text": "Last week, I went to the cinema to see Paranormal Activity 2.",
-                    "language": "fr",
-                    "confidence": 0.96
                 }
-            ]
-            duration = 25.0
-        # Create comprehensive results
-        results = {
-            "segments": segments,
             "summary": {
-                "total_duration": duration,
-                "num_speakers": len(set(seg["speaker"] for seg in segments)),
-                "num_segments": len(segments),
-                "languages": [segments[0]["language"]],
-                "processing_time": 0.5,
-                "file_path": str(file_path),
-                "demo_id": demo_id
-            },
-            "metadata": {
-                "original_filename": config["filename"],
-                "display_name": config["display_name"],
-                "language": config["language"],
-                "description": config["description"]
             }
         }
-        # Save results
-        with open(results_path, 'w', encoding='utf-8') as f:
-            json.dump(results, f, indent=2, ensure_ascii=False)
-        logger.info(f"Preprocessed demo file: {config['filename']}")
 # Initialize demo manager
 demo_manager = DemoManager()
@@ -456,6 +493,9 @@ async def startup_event():
         logger.info("Demo files initialization complete")
     except Exception as e:
         logger.error(f"Demo files initialization failed: {e}")
 @app.get("/", response_class=HTMLResponse)
@@ -559,7 +599,6 @@ async def get_results(task_id: str):
                         "text": seg.original_text if hasattr(seg, 'original_text') else "",
                         "translated_text": seg.translated_text if hasattr(seg, 'translated_text') else "",
                         "language": seg.original_language if hasattr(seg, 'original_language') else "unknown",
-                        "confidence": seg.confidence_transcription if hasattr(seg, 'confidence_transcription') else 0.0
                     })
             # Extract summary information
@@ -589,7 +628,6 @@ async def get_results(task_id: str):
                         "end_time": 5.0,
                         "text": f"Processed audio from file. Full results processing encountered an error: {str(e)}",
                         "language": "en",
-                        "confidence": 0.8
                     }
                 ],
                 "summary": {
@@ -619,7 +657,6 @@ async def get_results(task_id: str):
                         "end_time": 1.0,
                         "text": "Audio processing completed but results are not available for display.",
                         "language": "en",
-                        "confidence": 1.0
                     }
                 ],
                 "summary": {

     }
 }
+@app.get("/health")
+async def health():
+    """Simple health check endpoint."""
+    try:
+        # Basic system check
+        import shutil
+        total, used, free = shutil.disk_usage(".")
+        if free < 50 * 1024 * 1024:  # less than 50MB
+            return {"status": "error", "detail": "Low disk space"}
+        # Check if models are loaded
+        if not hasattr(app.state, "models_loaded") or not app.state.models_loaded:
+            return {"status": "error", "detail": "Models not loaded"}
+        return {"status": "ok"}
+    except Exception as e:
+        return {"status": "error", "detail": str(e)}
 # Demo results cache
 demo_results_cache = {}
             file_path = self.demo_dir / config["filename"]
             results_path = self.results_dir / f"{demo_id}_results.json"
+            # Check if file exists, download if not
             if not file_path.exists():
                 logger.info(f"Downloading demo file: {config['filename']}")
                 try:
                     logger.error(f"Failed to download {config['filename']}: {e}")
                     continue
+            # Check if results exist, process if not
             if not results_path.exists():
+                logger.info(f"Processing demo file: {config['filename']}")
                 try:
+                    await self.process_demo_file(demo_id, file_path, results_path)
                 except Exception as e:
+                    logger.error(f"Failed to process {config['filename']}: {e}")
                     continue
             # Load results into cache
         logger.info(f"Downloaded demo file: {file_path.name}")
+    async def process_demo_file(self, demo_id: str, file_path: Path, results_path: Path):
+        """Process demo file using actual pipeline and cache results."""
+        try:
+            # Initialize pipeline for demo processing
+            pipeline = AudioIntelligencePipeline(
+                whisper_model_size="small",
+                target_language="en",
+                device="auto",
+                hf_token=os.getenv('HUGGINGFACE_TOKEN'),
+                output_dir="./outputs"
+            )
+            # Process the actual audio file
+            logger.info(f"Processing demo file: {file_path}")
+            results = pipeline.process_audio(
+                str(file_path),
+                save_outputs=True,
+                output_formats=['json', 'srt_original', 'srt_translated', 'text', 'summary']
+            )
+            # Format results for demo display
+            formatted_results = self.format_demo_results(results, demo_id)
+            # Save formatted results
+            with open(results_path, 'w', encoding='utf-8') as f:
+                json.dump(formatted_results, f, indent=2, ensure_ascii=False)
+            logger.info(f"Demo file processed and cached: {config['filename']}")
+        except Exception as e:
+            logger.error(f"Failed to process demo file {demo_id}: {e}")
+            # Create fallback results if processing fails
+            fallback_results = self.create_fallback_results(demo_id, str(e))
+            with open(results_path, 'w', encoding='utf-8') as f:
+                json.dump(fallback_results, f, indent=2, ensure_ascii=False)
+    def format_demo_results(self, results: Dict, demo_id: str) -> Dict:
+        """Format pipeline results for demo display."""
+        formatted_results = {
+            "segments": [],
+            "summary": {
+                "total_duration": 0,
+                "num_speakers": 0,
+                "num_segments": 0,
+                "languages": [],
+                "processing_time": 0
+            }
+        }
+        try:
+            # Extract segments from actual pipeline results
+            if 'processed_segments' in results:
+                for seg in results['processed_segments']:
+                    formatted_results["segments"].append({
+                        "speaker": seg.speaker_id if hasattr(seg, 'speaker_id') else "Speaker 1",
+                        "start_time": seg.start_time if hasattr(seg, 'start_time') else 0,
+                        "end_time": seg.end_time if hasattr(seg, 'end_time') else 0,
+                        "text": seg.original_text if hasattr(seg, 'original_text') else "",
+                        "translated_text": seg.translated_text if hasattr(seg, 'translated_text') else "",
+                        "language": seg.original_language if hasattr(seg, 'original_language') else "unknown"
+                    })
+            # Extract metadata
+            if 'audio_metadata' in results:
+                metadata = results['audio_metadata']
+                formatted_results["summary"]["total_duration"] = metadata.get('duration_seconds', 0)
+            if 'processing_stats' in results:
+                stats = results['processing_stats']
+                formatted_results["summary"]["processing_time"] = stats.get('total_time', 0)
+            # Calculate derived stats
+            formatted_results["summary"]["num_segments"] = len(formatted_results["segments"])
+            speakers = set(seg["speaker"] for seg in formatted_results["segments"])
+            formatted_results["summary"]["num_speakers"] = len(speakers)
+            languages = set(seg["language"] for seg in formatted_results["segments"] if seg["language"] != 'unknown')
+            formatted_results["summary"]["languages"] = list(languages) if languages else ["unknown"]
+        except Exception as e:
+            logger.error(f"Error formatting demo results: {e}")
+            # Return basic structure if formatting fails
+            formatted_results["segments"] = [
                 {
                     "speaker": "Speaker 1",
                     "start_time": 0.0,
+                    "end_time": 5.0,
+                    "text": f"Demo processing completed. Error in formatting: {str(e)}",
+                    "translated_text": f"Demo processing completed. Error in formatting: {str(e)}",
+                    "language": "en"
                 }
             ]
+            formatted_results["summary"]["total_duration"] = 5.0
+            formatted_results["summary"]["num_segments"] = 1
+            formatted_results["summary"]["num_speakers"] = 1
+            formatted_results["summary"]["languages"] = ["en"]
+        return formatted_results
+    def create_fallback_results(self, demo_id: str, error_msg: str) -> Dict:
+        """Create fallback results when demo processing fails."""
+        config = DEMO_FILES[demo_id]
+        return {
+            "segments": [
                 {
+                    "speaker": "System",
                     "start_time": 0.0,
+                    "end_time": 1.0,
+                    "text": f"Demo processing failed: {error_msg}",
+                    "translated_text": f"Demo processing failed: {error_msg}",
+                    "language": "en"
                 }
+            ],
             "summary": {
+                "total_duration": 1.0,
+                "num_speakers": 1,
+                "num_segments": 1,
+                "languages": ["en"],
+                "processing_time": 0.1
             }
         }
 # Initialize demo manager
 demo_manager = DemoManager()
         logger.info("Demo files initialization complete")
     except Exception as e:
         logger.error(f"Demo files initialization failed: {e}")
+    # Set models loaded flag for health check
+    app.state.models_loaded = True
 @app.get("/", response_class=HTMLResponse)
                         "text": seg.original_text if hasattr(seg, 'original_text') else "",
                         "translated_text": seg.translated_text if hasattr(seg, 'translated_text') else "",
                         "language": seg.original_language if hasattr(seg, 'original_language') else "unknown",
                     })
             # Extract summary information
                         "end_time": 5.0,
                         "text": f"Processed audio from file. Full results processing encountered an error: {str(e)}",
                         "language": "en",
                     }
                 ],
                 "summary": {
                         "end_time": 1.0,
                         "text": "Audio processing completed but results are not available for display.",
                         "language": "en",
                     }
                 ],
                 "summary": {