Ravishankarsharma's picture
Update app.py
08bbe1b verified
import os
import tempfile
from typing import Optional
import whisper
import nltk
from nltk.tokenize import sent_tokenize
from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from transformers import pipeline
# ---------------- Config Paths ----------------
RUNNING_IN_COLAB = os.path.exists("/content")
if RUNNING_IN_COLAB:
BASE_DIR = "/content"
else:
BASE_DIR = "/tmp"
CACHE_DIR = os.path.join(BASE_DIR, "huggingface")
UPLOAD_DIR = os.path.join(BASE_DIR, "uploads")
NLTK_DIR = os.path.join(BASE_DIR, "nltk_data")
os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(NLTK_DIR, exist_ok=True)
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
os.environ["HF_HOME"] = CACHE_DIR
os.environ["XDG_CACHE_HOME"] = CACHE_DIR
nltk.data.path.append(NLTK_DIR)
# ---------------- NLTK Setup ----------------
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
nltk.download("punkt", download_dir=NLTK_DIR, quiet=True)
try:
nltk.data.find("tokenizers/punkt_tab")
except LookupError:
nltk.download("punkt_tab", download_dir=NLTK_DIR, quiet=True)
# ---------------- Load Models ----------------
asr_model = whisper.load_model("base")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# ---------------- FastAPI App ----------------
app = FastAPI(
title="Voice2Text API",
description="Audio Transcription + Summarization + Default Audio Processing",
version="2.4.0"
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ---------------- Routes ----------------
@app.post("/transcribe/")
async def transcribe_audio(
file: Optional[UploadFile] = File(None),
summary: Optional[bool] = Form(True)
):
"""Transcribe uploaded audio or default audio if no file provided"""
tmp_path = None
try:
if file is not None:
# Uploaded audio
with tempfile.NamedTemporaryFile(delete=False, dir=UPLOAD_DIR, suffix=".wav") as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
else:
# Default audio
default_audio_path = os.path.join(UPLOAD_DIR, "default_audio.wav")
if not os.path.exists(default_audio_path):
raise HTTPException(
status_code=404,
detail="Default audio file not found on server."
)
tmp_path = default_audio_path
# Transcribe
result = asr_model.transcribe(tmp_path)
transcription = result.get("text", "")
# Summarize if needed
if summary and transcription.strip():
sentences = sent_tokenize(transcription)
chunks = [" ".join(sentences[i:i + 3]) for i in range(0, len(sentences), 3)]
summarized_text = " ".join(
summarizer(chunk, max_length=60, min_length=25, do_sample=False)[0]["summary_text"]
for chunk in chunks
)
return {"transcription": transcription, "summary": summarized_text.strip()}
return {"transcription": transcription}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Processing failed: {e}")
finally:
if file is not None and tmp_path and os.path.exists(tmp_path):
os.remove(tmp_path)
@app.get("/auto_summarize/")
async def auto_summarize():
"""Automatically summarize default audio if exists, else summarize sample text"""
default_audio_path = os.path.join(UPLOAD_DIR, "default_audio.wav")
if os.path.exists(default_audio_path):
# Default audio exists β†’ transcribe + summarize
result = asr_model.transcribe(default_audio_path)
transcription = result.get("text", "")
if transcription.strip():
sentences = sent_tokenize(transcription)
chunks = [" ".join(sentences[i:i + 3]) for i in range(0, len(sentences), 3)]
summarized_text = " ".join(
summarizer(chunk, max_length=60, min_length=25, do_sample=False)[0]["summary_text"]
for chunk in chunks
)
return {"transcription": transcription, "summary": summarized_text.strip()}
else:
return {"transcription": "", "summary": ""}
else:
# Default audio missing β†’ summarize sample text
sample_text = """
Artificial Intelligence and Machine Learning are transforming industries
by enabling automation, advanced analytics, and data-driven decision making.
These technologies are particularly useful in healthcare, finance, and education.
"""
summary = summarizer(sample_text, max_length=100, min_length=25, do_sample=False)
return {"transcription": None, "summary": summary[0]["summary_text"]}
@app.get("/summarize_direct/")
async def summarize_direct():
"""Direct URL hit β†’ summarizes default sample text"""
sample_text = """
Artificial Intelligence and Machine Learning are transforming industries
by enabling automation, advanced analytics, and data-driven decision making.
These technologies are particularly useful in healthcare, finance, and education.
"""
summary = summarizer(sample_text, max_length=100, min_length=25, do_sample=False)
return {"summary": summary[0]["summary_text"]}
@app.get("/summarize_text/")
async def summarize_text(text: str = Query(..., description="Text to summarize")):
"""Summarize custom text passed as query parameter"""
if not text.strip():
raise HTTPException(status_code=400, detail="Text input cannot be empty.")
summary = summarizer(text, max_length=120, min_length=30, do_sample=False)
return {"summary": summary[0]["summary_text"]}