Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, File, UploadFile, HTTPException | |
| from fastapi.responses import JSONResponse | |
| from pydantic import BaseModel | |
| import speech_recognition as sr | |
| from io import BytesIO | |
| from pydub import AudioSegment | |
| import os | |
| app = FastAPI() | |
| class TranscriptionResponse(BaseModel): | |
| text: str | |
| async def transcribe_audio(file: UploadFile = File(...)): | |
| if file.content_type not in ["audio/wav", "audio/mpeg", "audio/mp3", "audio/x-wav", "audio/flac"]: | |
| raise HTTPException(status_code=400, detail="Unsupported file type") | |
| try: | |
| # Read the file into bytes | |
| audio_data = await file.read() | |
| audio_file = BytesIO(audio_data) | |
| # Use pydub to handle different audio formats | |
| audio = AudioSegment.from_file(audio_file, format=file.filename.split('.')[-1]) | |
| wav_audio = BytesIO() | |
| audio.export(wav_audio, format="wav") | |
| wav_audio.seek(0) | |
| # Use speech_recognition to process the audio | |
| recognizer = sr.Recognizer() | |
| with sr.AudioFile(wav_audio) as source: | |
| audio = recognizer.record(source) | |
| # Recognize speech using Google Web Speech API | |
| text = recognizer.recognize_google(audio, language="en-US") | |
| return TranscriptionResponse(text=text) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) |