import os import csv import whisper import gradio as gr from transformers import pipeline from fastapi import FastAPI, UploadFile, File from fastapi.middleware.wsgi import WSGIMiddleware # ================== DIRECTORIES ===================== UPLOAD_DIR = "uploads" CSV_DIR = "csv_files" os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(CSV_DIR, exist_ok=True) # ================== LOAD MODELS ===================== print("Loading Whisper model...") whisper_model = whisper.load_model("base") print("Loading summarizer model...") summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # ================== FUNCTION ======================= def transcribe_and_summarize(audio_path): if audio_path is None: return "No file uploaded.", "No summary.", None # Save audio to uploads folder audio_filename = os.path.basename(audio_path) saved_audio_path = os.path.join(UPLOAD_DIR, audio_filename) if not os.path.exists(saved_audio_path): os.rename(audio_path, saved_audio_path) # Transcription try: result = whisper_model.transcribe(saved_audio_path) transcription = result["text"] except Exception as e: return f"Transcription failed: {e}", "No summary.", None # Summarization try: summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"] except Exception as e: summary = f"Summarization failed: {e}" # Save CSV csv_file = os.path.join(CSV_DIR, "transcriptions.csv") file_exists = os.path.isfile(csv_file) with open(csv_file, mode="a", newline="", encoding="utf-8") as f: writer = csv.writer(f) if not file_exists: writer.writerow(["Audio File", "Transcription", "Summary"]) writer.writerow([saved_audio_path, transcription, summary]) return transcription, summary, csv_file # ================== GRADIO INTERFACE ================= gradio_interface = gr.Interface( fn=transcribe_and_summarize, inputs=gr.Audio(type="filepath", label="Upload Audio File"), outputs=[ gr.Textbox(label="🔊 Transcription"), gr.Textbox(label="📝 Summary"), gr.File(label="📄 Download CSV with Audio Links") ], title="🎤 Audio Transcriber + Summarizer", description="Upload an audio file → Transcribe it with Whisper → Summarize it using BART → Download CSV." ) # ================== FASTAPI APP ====================== api_app = FastAPI(title="Audio Transcriber + Summarizer API") @api_app.post("/api/transcribe/") async def transcribe_api(file: UploadFile = File(...)): # Save uploaded file temporarily temp_path = f"/tmp/{file.filename}" with open(temp_path, "wb") as f: f.write(await file.read()) transcription, summary, csv_file = transcribe_and_summarize(temp_path) return {"transcription": transcription, "summary": summary, "csv_file": csv_file} # Mount Gradio on FastAPI api_app.mount("/gradio", WSGIMiddleware(gradio_interface.launch(prevent_thread_lock=True))) # ================== ENTRY POINT ===================== if __name__ == "__main__": import uvicorn uvicorn.run(api_app, host="0.0.0.0", port=8000)