+verbosity
Browse files
app.py
CHANGED
|
@@ -15,15 +15,13 @@ import yt_dlp
|
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
| 16 |
|
| 17 |
# Clone and install faster-whisper from GitHub
|
| 18 |
-
# (we should be able to do this in build.sh in a hf space)
|
| 19 |
try:
|
| 20 |
subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
|
| 21 |
subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
|
| 22 |
except subprocess.CalledProcessError as e:
|
| 23 |
-
|
| 24 |
sys.exit(1)
|
| 25 |
|
| 26 |
-
# Add the faster-whisper directory to the Python path
|
| 27 |
sys.path.append("./faster-whisper")
|
| 28 |
|
| 29 |
from faster_whisper import WhisperModel
|
|
@@ -159,16 +157,18 @@ def save_transcription(transcription):
|
|
| 159 |
|
| 160 |
def get_model_options(pipeline_type):
|
| 161 |
if pipeline_type == "faster-batched":
|
| 162 |
-
return ["cstr/whisper-large-v3-turbo-int8_float32"
|
| 163 |
elif pipeline_type == "faster-sequenced":
|
| 164 |
-
return ["
|
| 165 |
elif pipeline_type == "transformers":
|
| 166 |
-
return ["openai/whisper-large-v3"
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
| 171 |
try:
|
|
|
|
|
|
|
|
|
|
| 172 |
if pipeline_type == "faster-batched":
|
| 173 |
model = WhisperModel(model_id, device="auto", compute_type=dtype)
|
| 174 |
pipeline = BatchedInferencePipeline(model=model)
|
|
@@ -198,6 +198,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 198 |
|
| 199 |
if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
|
| 200 |
audio_path = download_audio(input_source, download_method)
|
|
|
|
| 201 |
if audio_path.startswith("Error"):
|
| 202 |
yield f"Error: {audio_path}", "", None
|
| 203 |
return
|
|
@@ -207,6 +208,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 207 |
if start_time is not None or end_time is not None:
|
| 208 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
| 209 |
audio_path = trimmed_audio_path
|
|
|
|
| 210 |
|
| 211 |
start_time_perf = time.time()
|
| 212 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
|
@@ -225,7 +227,7 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 225 |
)
|
| 226 |
|
| 227 |
if verbose:
|
| 228 |
-
yield metrics_output, "", None
|
| 229 |
|
| 230 |
transcription = ""
|
| 231 |
|
|
@@ -237,12 +239,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 237 |
)
|
| 238 |
transcription += transcription_segment
|
| 239 |
if verbose:
|
| 240 |
-
yield metrics_output, transcription, None
|
| 241 |
|
| 242 |
transcription_file = save_transcription(transcription)
|
| 243 |
-
yield metrics_output, transcription, transcription_file
|
| 244 |
|
| 245 |
except Exception as e:
|
|
|
|
| 246 |
yield f"An error occurred: {str(e)}", "", None
|
| 247 |
|
| 248 |
finally:
|
|
@@ -261,15 +264,6 @@ def update_model_dropdown(pipeline_type):
|
|
| 261 |
model_choices = get_model_options(pipeline_type)
|
| 262 |
return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
|
| 263 |
|
| 264 |
-
def get_model_options(pipeline_type):
|
| 265 |
-
if pipeline_type == "faster-batched":
|
| 266 |
-
return ["cstr/whisper-large-v3-turbo-int8_float32"]
|
| 267 |
-
elif pipeline_type == "faster-sequenced":
|
| 268 |
-
return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
|
| 269 |
-
elif pipeline_type == "transformers":
|
| 270 |
-
return ["openai/whisper-large-v3"]
|
| 271 |
-
return []
|
| 272 |
-
|
| 273 |
with gr.Blocks() as iface:
|
| 274 |
gr.Markdown("# Multi-Pipeline Transcription")
|
| 275 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|
|
|
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
| 16 |
|
| 17 |
# Clone and install faster-whisper from GitHub
|
|
|
|
| 18 |
try:
|
| 19 |
subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
|
| 20 |
subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
|
| 21 |
except subprocess.CalledProcessError as e:
|
| 22 |
+
logging.error(f"Error during faster-whisper installation: {e}")
|
| 23 |
sys.exit(1)
|
| 24 |
|
|
|
|
| 25 |
sys.path.append("./faster-whisper")
|
| 26 |
|
| 27 |
from faster_whisper import WhisperModel
|
|
|
|
| 157 |
|
| 158 |
def get_model_options(pipeline_type):
|
| 159 |
if pipeline_type == "faster-batched":
|
| 160 |
+
return ["cstr/whisper-large-v3-turbo-int8_float32"]
|
| 161 |
elif pipeline_type == "faster-sequenced":
|
| 162 |
+
return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
|
| 163 |
elif pipeline_type == "transformers":
|
| 164 |
+
return ["openai/whisper-large-v3"]
|
| 165 |
+
return []
|
| 166 |
+
|
|
|
|
| 167 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
| 168 |
try:
|
| 169 |
+
logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
|
| 170 |
+
verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
|
| 171 |
+
|
| 172 |
if pipeline_type == "faster-batched":
|
| 173 |
model = WhisperModel(model_id, device="auto", compute_type=dtype)
|
| 174 |
pipeline = BatchedInferencePipeline(model=model)
|
|
|
|
| 198 |
|
| 199 |
if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
|
| 200 |
audio_path = download_audio(input_source, download_method)
|
| 201 |
+
verbose_messages += f"Audio file downloaded: {audio_path}\n"
|
| 202 |
if audio_path.startswith("Error"):
|
| 203 |
yield f"Error: {audio_path}", "", None
|
| 204 |
return
|
|
|
|
| 208 |
if start_time is not None or end_time is not None:
|
| 209 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
| 210 |
audio_path = trimmed_audio_path
|
| 211 |
+
verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
|
| 212 |
|
| 213 |
start_time_perf = time.time()
|
| 214 |
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
|
|
|
| 227 |
)
|
| 228 |
|
| 229 |
if verbose:
|
| 230 |
+
yield verbose_messages + metrics_output, "", None
|
| 231 |
|
| 232 |
transcription = ""
|
| 233 |
|
|
|
|
| 239 |
)
|
| 240 |
transcription += transcription_segment
|
| 241 |
if verbose:
|
| 242 |
+
yield verbose_messages + metrics_output, transcription, None
|
| 243 |
|
| 244 |
transcription_file = save_transcription(transcription)
|
| 245 |
+
yield verbose_messages + metrics_output, transcription, transcription_file
|
| 246 |
|
| 247 |
except Exception as e:
|
| 248 |
+
logging.error(f"An error occurred during transcription: {str(e)}")
|
| 249 |
yield f"An error occurred: {str(e)}", "", None
|
| 250 |
|
| 251 |
finally:
|
|
|
|
| 264 |
model_choices = get_model_options(pipeline_type)
|
| 265 |
return gr.Dropdown.update(choices=model_choices, value=model_choices[0])
|
| 266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
with gr.Blocks() as iface:
|
| 268 |
gr.Markdown("# Multi-Pipeline Transcription")
|
| 269 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|