-Live
Browse files
app.py
CHANGED
|
@@ -157,7 +157,6 @@ def save_transcription(transcription):
|
|
| 157 |
f.write(transcription)
|
| 158 |
return file_path
|
| 159 |
|
| 160 |
-
|
| 161 |
def get_model_options(pipeline_type):
|
| 162 |
if pipeline_type == "faster-batched":
|
| 163 |
return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
|
|
@@ -212,15 +211,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 212 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
| 213 |
audio_path = trimmed_audio_path
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
segments, info =
|
| 218 |
-
end_time_perf = time.time()
|
| 219 |
else:
|
| 220 |
-
|
| 221 |
-
result = pipe(audio_path)
|
| 222 |
segments = result["chunks"]
|
| 223 |
-
|
| 224 |
|
| 225 |
transcription_time = end_time_perf - start_time_perf
|
| 226 |
audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
|
@@ -236,12 +233,12 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 236 |
transcription = ""
|
| 237 |
|
| 238 |
for segment in segments:
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
| 243 |
transcription += transcription_segment
|
| 244 |
-
|
| 245 |
if verbose:
|
| 246 |
yield metrics_output, transcription, None
|
| 247 |
|
|
@@ -289,12 +286,11 @@ iface = gr.Interface(
|
|
| 289 |
["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
|
| 290 |
],
|
| 291 |
cache_examples=False,
|
| 292 |
-
live=True
|
| 293 |
)
|
| 294 |
|
| 295 |
-
iface.launch()
|
| 296 |
-
|
| 297 |
pipeline_type_dropdown = iface.inputs[1]
|
| 298 |
model_dropdown = iface.inputs[2]
|
| 299 |
|
| 300 |
-
pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
|
|
|
|
|
|
|
|
|
| 157 |
f.write(transcription)
|
| 158 |
return file_path
|
| 159 |
|
|
|
|
| 160 |
def get_model_options(pipeline_type):
|
| 161 |
if pipeline_type == "faster-batched":
|
| 162 |
return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
|
|
|
|
| 211 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
| 212 |
audio_path = trimmed_audio_path
|
| 213 |
|
| 214 |
+
start_time_perf = time.time()
|
| 215 |
+
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
| 216 |
+
segments, info = pipeline(audio_path, batch_size=batch_size)
|
|
|
|
| 217 |
else:
|
| 218 |
+
result = pipeline(audio_path)
|
|
|
|
| 219 |
segments = result["chunks"]
|
| 220 |
+
end_time_perf = time.time()
|
| 221 |
|
| 222 |
transcription_time = end_time_perf - start_time_perf
|
| 223 |
audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
|
|
|
| 233 |
transcription = ""
|
| 234 |
|
| 235 |
for segment in segments:
|
| 236 |
+
transcription_segment = (
|
| 237 |
+
f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
|
| 238 |
+
if pipeline_type in ["faster-batched", "faster-sequenced"] else
|
| 239 |
+
f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
|
| 240 |
+
)
|
| 241 |
transcription += transcription_segment
|
|
|
|
| 242 |
if verbose:
|
| 243 |
yield metrics_output, transcription, None
|
| 244 |
|
|
|
|
| 286 |
["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
|
| 287 |
],
|
| 288 |
cache_examples=False,
|
|
|
|
| 289 |
)
|
| 290 |
|
|
|
|
|
|
|
| 291 |
pipeline_type_dropdown = iface.inputs[1]
|
| 292 |
model_dropdown = iface.inputs[2]
|
| 293 |
|
| 294 |
+
pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
|
| 295 |
+
|
| 296 |
+
iface.launch()
|