Ravishankarsharma commited on
Commit
e011168
Β·
verified Β·
1 Parent(s): 957349b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -69
app.py CHANGED
@@ -1,88 +1,70 @@
1
  import os
2
  import csv
 
 
 
3
  import whisper
4
- import gradio as gr
5
  from transformers import pipeline
6
- from fastapi import FastAPI, UploadFile, File
7
- from fastapi.middleware.wsgi import WSGIMiddleware
8
 
9
- # ================== DIRECTORIES =====================
10
- UPLOAD_DIR = "uploads"
11
- CSV_DIR = "csv_files"
12
- os.makedirs(UPLOAD_DIR, exist_ok=True)
13
- os.makedirs(CSV_DIR, exist_ok=True)
14
 
15
- # ================== LOAD MODELS =====================
16
- print("Loading Whisper model...")
17
- whisper_model = whisper.load_model("base")
18
- print("Loading summarizer model...")
19
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
20
-
21
- # ================== FUNCTION =======================
22
- def transcribe_and_summarize(audio_path):
23
- if audio_path is None:
24
- return "No file uploaded.", "No summary.", None
25
 
26
- # Save audio to uploads folder
27
- audio_filename = os.path.basename(audio_path)
28
- saved_audio_path = os.path.join(UPLOAD_DIR, audio_filename)
29
- if not os.path.exists(saved_audio_path):
30
- os.rename(audio_path, saved_audio_path)
31
 
32
- # Transcription
33
- try:
34
- result = whisper_model.transcribe(saved_audio_path)
35
- transcription = result["text"]
36
- except Exception as e:
37
- return f"Transcription failed: {e}", "No summary.", None
 
38
 
39
- # Summarization
40
- try:
41
- summary = summarizer(transcription, max_length=60, min_length=10, do_sample=False)[0]["summary_text"]
42
- except Exception as e:
43
- summary = f"Summarization failed: {e}"
44
 
45
- # Save CSV
46
- csv_file = os.path.join(CSV_DIR, "transcriptions.csv")
 
47
  file_exists = os.path.isfile(csv_file)
 
48
  with open(csv_file, mode="a", newline="", encoding="utf-8") as f:
49
  writer = csv.writer(f)
50
  if not file_exists:
51
- writer.writerow(["Audio File", "Transcription", "Summary"])
52
- writer.writerow([saved_audio_path, transcription, summary])
53
-
54
- return transcription, summary, csv_file
55
-
56
- # ================== GRADIO INTERFACE =================
57
- gradio_interface = gr.Interface(
58
- fn=transcribe_and_summarize,
59
- inputs=gr.Audio(type="filepath", label="Upload Audio File"),
60
- outputs=[
61
- gr.Textbox(label="πŸ”Š Transcription"),
62
- gr.Textbox(label="πŸ“ Summary"),
63
- gr.File(label="πŸ“„ Download CSV with Audio Links")
64
- ],
65
- title="🎀 Audio Transcriber + Summarizer",
66
- description="Upload an audio file β†’ Transcribe it with Whisper β†’ Summarize it using BART β†’ Download CSV."
67
- )
68
 
69
- # ================== FASTAPI APP ======================
70
- api_app = FastAPI(title="Audio Transcriber + Summarizer API")
 
 
 
 
 
 
71
 
72
- @api_app.post("/api/transcribe/")
73
- async def transcribe_api(file: UploadFile = File(...)):
74
- # Save uploaded file temporarily
75
- temp_path = f"/tmp/{file.filename}"
76
- with open(temp_path, "wb") as f:
77
- f.write(await file.read())
78
 
79
- transcription, summary, csv_file = transcribe_and_summarize(temp_path)
80
- return {"transcription": transcription, "summary": summary, "csv_file": csv_file}
81
 
82
- # Mount Gradio on FastAPI
83
- api_app.mount("/gradio", WSGIMiddleware(gradio_interface.launch(prevent_thread_lock=True)))
84
 
85
- # ================== ENTRY POINT =====================
86
- if __name__ == "__main__":
87
- import uvicorn
88
- uvicorn.run(api_app, host="0.0.0.0", port=8000)
 
 
 
 
 
1
  import os
2
  import csv
3
+ from datetime import datetime
4
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
5
+ from fastapi.middleware.cors import CORSMiddleware
6
  import whisper
 
7
  from transformers import pipeline
 
 
8
 
9
+ # ---------------- Config ----------------
10
+ UPLOAD_DIR = os.getenv("UPLOAD_DIR", "./uploads")
 
 
 
11
 
12
+ if not os.path.exists(UPLOAD_DIR):
13
+ try:
14
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
15
+ except PermissionError:
16
+ UPLOAD_DIR = "."
 
 
 
 
 
17
 
18
+ # ---------------- App ----------------
19
+ app = FastAPI()
 
 
 
20
 
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_origins=["*"],
24
+ allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
 
29
+ # ---------------- Models ----------------
30
+ asr_model = whisper.load_model("base")
31
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
32
 
33
+ # ---------------- Utils ----------------
34
+ def save_to_csv(audio_file: str, transcription: str, summary: str):
35
+ csv_file = os.path.join(UPLOAD_DIR, "output.csv")
36
  file_exists = os.path.isfile(csv_file)
37
+
38
  with open(csv_file, mode="a", newline="", encoding="utf-8") as f:
39
  writer = csv.writer(f)
40
  if not file_exists:
41
+ writer.writerow(["Timestamp", "Audio_File", "Transcription", "Summary"])
42
+ writer.writerow([datetime.now().isoformat(), audio_file, transcription, summary])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # ---------------- Routes ----------------
45
+ @app.post("/transcribe/")
46
+ async def transcribe_audio(file: UploadFile = File(...), summary_length: int = Form(100)):
47
+ try:
48
+ # Save uploaded file
49
+ file_path = os.path.join(UPLOAD_DIR, file.filename)
50
+ with open(file_path, "wb") as buffer:
51
+ buffer.write(await file.read())
52
 
53
+ # Transcribe
54
+ result = asr_model.transcribe(file_path)
55
+ transcription = result["text"]
 
 
 
56
 
57
+ # Summarize
58
+ summary = summarizer(transcription, max_length=summary_length, min_length=30, do_sample=False)[0]["summary_text"]
59
 
60
+ # Save to CSV
61
+ save_to_csv(file.filename, transcription, summary)
62
 
63
+ return {
64
+ "filename": file.filename,
65
+ "transcription": transcription,
66
+ "summary": summary,
67
+ "csv_saved": True
68
+ }
69
+ except Exception as e:
70
+ raise HTTPException(status_code=500, detail=str(e))