Spaces:
Runtime error
Runtime error
Commit
·
fbd6bad
1
Parent(s):
3364e9c
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import uuid
|
|
| 6 |
from googletrans import Translator
|
| 7 |
from TTS.api import TTS
|
| 8 |
import ffmpeg
|
| 9 |
-
import
|
| 10 |
from scipy.signal import wiener
|
| 11 |
import soundfile as sf
|
| 12 |
from pydub import AudioSegment
|
|
@@ -26,6 +26,9 @@ ZipFile("ffmpeg.zip").extractall()
|
|
| 26 |
st = os.stat('ffmpeg')
|
| 27 |
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
def process_video(radio, video, target_language):
|
| 30 |
# Check video duration
|
| 31 |
video_info = ffmpeg.probe(video)
|
|
@@ -60,11 +63,9 @@ def process_video(radio, video, target_language):
|
|
| 60 |
shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
|
| 61 |
subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
whisper_language = result['language']
|
| 67 |
-
|
| 68 |
print(whisper_text)
|
| 69 |
|
| 70 |
language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
|
|
|
|
| 6 |
from googletrans import Translator
|
| 7 |
from TTS.api import TTS
|
| 8 |
import ffmpeg
|
| 9 |
+
from faster_whisper import WhisperModel
|
| 10 |
from scipy.signal import wiener
|
| 11 |
import soundfile as sf
|
| 12 |
from pydub import AudioSegment
|
|
|
|
| 26 |
st = os.stat('ffmpeg')
|
| 27 |
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
| 28 |
|
| 29 |
+
model_size = "small"
|
| 30 |
+
model = WhisperModel(model_size, device="cuda", compute_type="int8")
|
| 31 |
+
|
| 32 |
def process_video(radio, video, target_language):
|
| 33 |
# Check video duration
|
| 34 |
video_info = ffmpeg.probe(video)
|
|
|
|
| 63 |
shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
|
| 64 |
subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
|
| 65 |
|
| 66 |
+
segments, info = model.transcribe(f"{run_uuid}_output_audio_final.wav", beam_size=5)
|
| 67 |
+
whisper_text = " ".join(segment.text for segment in segments)
|
| 68 |
+
whisper_language = info.language
|
|
|
|
|
|
|
| 69 |
print(whisper_text)
|
| 70 |
|
| 71 |
language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
|