Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
| 1 |
import os
|
| 2 |
-
import whisper
|
| 3 |
import requests
|
| 4 |
from flask import Flask, request, jsonify, render_template
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
-
from deepgram import DeepgramClient, PrerecordedOptions
|
| 7 |
import tempfile
|
| 8 |
import json
|
| 9 |
import subprocess
|
| 10 |
-
from youtube_transcript_api import YouTubeTranscriptApi
|
| 11 |
|
| 12 |
|
| 13 |
import warnings
|
|
@@ -53,6 +50,50 @@ def download_audio(url, temp_video_path):
|
|
| 53 |
raise Exception(f"Failed to download audio, status code: {response.status_code}")
|
| 54 |
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
@app.route('/process-video', methods=['POST'])
|
| 58 |
def process_video():
|
|
@@ -67,9 +108,9 @@ def process_video():
|
|
| 67 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
|
| 68 |
temp_video_path = temp_video_file.name
|
| 69 |
download_audio(video_url, temp_video_path)
|
| 70 |
-
|
| 71 |
# Step 2: get the information from the downloaded MP4 file synchronously
|
| 72 |
-
video_info = get_information_from_video_using_OCR(temp_video_path)
|
| 73 |
|
| 74 |
if not video_info:
|
| 75 |
return jsonify({"error": "video information extraction failed"}), 500
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import requests
|
| 3 |
from flask import Flask, request, jsonify, render_template
|
| 4 |
from dotenv import load_dotenv
|
|
|
|
| 5 |
import tempfile
|
| 6 |
import json
|
| 7 |
import subprocess
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
import warnings
|
|
|
|
| 50 |
raise Exception(f"Failed to download audio, status code: {response.status_code}")
|
| 51 |
|
| 52 |
|
| 53 |
+
def preprocess_frame(frame):
|
| 54 |
+
"""Preprocess the frame for better OCR accuracy."""
|
| 55 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 56 |
+
denoised = cv2.medianBlur(gray, 3)
|
| 57 |
+
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
|
| 58 |
+
return thresh
|
| 59 |
+
|
| 60 |
+
def clean_ocr_text(text):
|
| 61 |
+
"""Clean the OCR output by removing noise and unwanted characters."""
|
| 62 |
+
cleaned_text = re.sub(r'[^A-Za-z0-9\s,.!?-]', '', text)
|
| 63 |
+
cleaned_text = '\n'.join([line.strip() for line in cleaned_text.splitlines() if len(line.strip()) > 2])
|
| 64 |
+
return cleaned_text
|
| 65 |
+
|
| 66 |
+
def get_information_from_video_using_OCR(video_path, interval=1):
|
| 67 |
+
"""Extract text from video frames using OCR and return the combined text content."""
|
| 68 |
+
cap = cv2.VideoCapture(video_path)
|
| 69 |
+
fps = int(cap.get(cv2.CAP_PROP_FPS))
|
| 70 |
+
frame_interval = interval * fps
|
| 71 |
+
frame_count = 0
|
| 72 |
+
extracted_text = ""
|
| 73 |
+
|
| 74 |
+
print("Starting text extraction from video...")
|
| 75 |
+
|
| 76 |
+
while cap.isOpened():
|
| 77 |
+
ret, frame = cap.read()
|
| 78 |
+
if not ret:
|
| 79 |
+
break
|
| 80 |
+
|
| 81 |
+
if frame_count % frame_interval == 0:
|
| 82 |
+
preprocessed_frame = preprocess_frame(frame)
|
| 83 |
+
text = pytesseract.image_to_string(preprocessed_frame, lang='eng', config='--psm 6 --oem 3')
|
| 84 |
+
cleaned_text = clean_ocr_text(text)
|
| 85 |
+
if cleaned_text:
|
| 86 |
+
extracted_text += cleaned_text + "\n\n"
|
| 87 |
+
print(f"Text found at frame {frame_count}: {cleaned_text[:50]}...")
|
| 88 |
+
|
| 89 |
+
frame_count += 1
|
| 90 |
+
|
| 91 |
+
cap.release()
|
| 92 |
+
print("Text extraction completed.")
|
| 93 |
+
return extracted_text
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
|
| 98 |
@app.route('/process-video', methods=['POST'])
|
| 99 |
def process_video():
|
|
|
|
| 108 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file:
|
| 109 |
temp_video_path = temp_video_file.name
|
| 110 |
download_audio(video_url, temp_video_path)
|
| 111 |
+
interval = 1
|
| 112 |
# Step 2: get the information from the downloaded MP4 file synchronously
|
| 113 |
+
video_info = get_information_from_video_using_OCR(temp_video_path, interval)
|
| 114 |
|
| 115 |
if not video_info:
|
| 116 |
return jsonify({"error": "video information extraction failed"}), 500
|