add stability ts
Browse files
app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import time
|
| 3 |
import tempfile
|
| 4 |
-
from copy import deepcopy
|
| 5 |
from math import floor
|
| 6 |
from typing import Optional, List, Dict, Any
|
| 7 |
|
|
@@ -162,11 +161,11 @@ def get_prediction(inputs, prompt: Optional[str], punctuate_text: bool = True, s
|
|
| 162 |
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
| 163 |
if prompt:
|
| 164 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
| 165 |
-
|
|
|
|
|
|
|
| 166 |
if stabilize_timestamp:
|
| 167 |
-
prediction['chunks'] = fix_timestamp(pipeline_output=prediction['chunks'],
|
| 168 |
-
audio=inputs["array"],
|
| 169 |
-
sample_rate=inputs["sampling_rate"])
|
| 170 |
if punctuate_text:
|
| 171 |
prediction['chunks'] = PUNCTUATOR.punctuate(prediction['chunks'])
|
| 172 |
text = "".join([c['text'] for c in prediction['chunks']])
|
|
@@ -176,9 +175,11 @@ def get_prediction(inputs, prompt: Optional[str], punctuate_text: bool = True, s
|
|
| 176 |
return text, text_timestamped
|
| 177 |
|
| 178 |
|
| 179 |
-
def transcribe(inputs, prompt, punctuate_text, stabilize_timestamp):
|
| 180 |
if inputs is None:
|
| 181 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
|
|
|
|
|
|
| 182 |
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
| 183 |
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
| 184 |
return get_prediction(inputs, prompt, punctuate_text, stabilize_timestamp)
|
|
|
|
| 1 |
import os
|
| 2 |
import time
|
| 3 |
import tempfile
|
|
|
|
| 4 |
from math import floor
|
| 5 |
from typing import Optional, List, Dict, Any
|
| 6 |
|
|
|
|
| 161 |
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
| 162 |
if prompt:
|
| 163 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
| 164 |
+
array = inputs["array"]
|
| 165 |
+
sr = inputs["sampling_rate"]
|
| 166 |
+
prediction = pipe(inputs, return_timestamps=True, generate_kwargs=generate_kwargs)
|
| 167 |
if stabilize_timestamp:
|
| 168 |
+
prediction['chunks'] = fix_timestamp(pipeline_output=prediction['chunks'], audio=array, sample_rate=sr)
|
|
|
|
|
|
|
| 169 |
if punctuate_text:
|
| 170 |
prediction['chunks'] = PUNCTUATOR.punctuate(prediction['chunks'])
|
| 171 |
text = "".join([c['text'] for c in prediction['chunks']])
|
|
|
|
| 175 |
return text, text_timestamped
|
| 176 |
|
| 177 |
|
| 178 |
+
def transcribe(inputs: str, prompt, punctuate_text, stabilize_timestamp):
|
| 179 |
if inputs is None:
|
| 180 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
| 181 |
+
with open(inputs, "rb") as f:
|
| 182 |
+
inputs = f.read()
|
| 183 |
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
| 184 |
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
| 185 |
return get_prediction(inputs, prompt, punctuate_text, stabilize_timestamp)
|