Spaces:
Running
Running
| import functools | |
| import numpy as np | |
| from faster_whisper.vad import VadOptions | |
| from fastapi import ( | |
| File, | |
| UploadFile, | |
| ) | |
| from fastapi import APIRouter, BackgroundTasks, Depends, Response, status | |
| from typing import List, Dict | |
| from datetime import datetime | |
| from modules.vad.silero_vad import SileroVAD | |
| from modules.whisper.data_classes import VadParams | |
| from backend.common.audio import read_audio | |
| from backend.common.models import QueueResponse | |
| from backend.db.task.dao import add_task_to_db, update_task_status_in_db | |
| from backend.db.task.models import TaskStatus, TaskType | |
| vad_router = APIRouter(prefix="/vad", tags=["Voice Activity Detection"]) | |
| def get_vad_model() -> SileroVAD: | |
| inferencer = SileroVAD() | |
| inferencer.update_model() | |
| return inferencer | |
| def run_vad( | |
| audio: np.ndarray, | |
| params: VadOptions, | |
| identifier: str, | |
| ) -> List[Dict]: | |
| update_task_status_in_db( | |
| identifier=identifier, | |
| update_data={ | |
| "uuid": identifier, | |
| "status": TaskStatus.IN_PROGRESS, | |
| "updated_at": datetime.utcnow() | |
| } | |
| ) | |
| start_time = datetime.utcnow() | |
| audio, speech_chunks = get_vad_model().run( | |
| audio=audio, | |
| vad_parameters=params | |
| ) | |
| elapsed_time = (datetime.utcnow() - start_time).total_seconds() | |
| update_task_status_in_db( | |
| identifier=identifier, | |
| update_data={ | |
| "uuid": identifier, | |
| "status": TaskStatus.COMPLETED, | |
| "updated_at": datetime.utcnow(), | |
| "result": speech_chunks, | |
| "duration": elapsed_time | |
| } | |
| ) | |
| return speech_chunks | |
| async def vad( | |
| background_tasks: BackgroundTasks, | |
| file: UploadFile = File(..., description="Audio or video file to detect voices."), | |
| params: VadParams = Depends() | |
| ) -> QueueResponse: | |
| if not isinstance(file, np.ndarray): | |
| audio, info = await read_audio(file=file) | |
| else: | |
| audio, info = file, None | |
| vad_options = VadOptions( | |
| threshold=params.threshold, | |
| min_speech_duration_ms=params.min_speech_duration_ms, | |
| max_speech_duration_s=params.max_speech_duration_s, | |
| min_silence_duration_ms=params.min_silence_duration_ms, | |
| speech_pad_ms=params.speech_pad_ms | |
| ) | |
| identifier = add_task_to_db( | |
| status=TaskStatus.QUEUED, | |
| file_name=file.filename, | |
| audio_duration=info.duration if info else None, | |
| task_type=TaskType.VAD, | |
| task_params=params.model_dump(), | |
| ) | |
| background_tasks.add_task(run_vad, audio=audio, params=vad_options, identifier=identifier) | |
| return QueueResponse(identifier=identifier, status=TaskStatus.QUEUED, message="VAD task has queued") | |