Spaces:
Sleeping
Sleeping
Add support for the Whisper model large-v3-turbo.
Browse files- app.py +0 -2
- config.json5 +4 -0
- docs/options.md +1 -0
- src/translation/translationModel.py +10 -0
- src/whisper/fasterWhisperContainer.py +3 -1
app.py
CHANGED
|
@@ -56,8 +56,6 @@ MAX_FILE_PREFIX_LENGTH = 17
|
|
| 56 |
# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
|
| 57 |
MAX_AUTO_CPU_CORES = 8
|
| 58 |
|
| 59 |
-
WHISPER_MODELS = ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2", "large-v3"]
|
| 60 |
-
|
| 61 |
class VadOptions:
|
| 62 |
def __init__(self, vad: str = None, vadMergeWindow: float = 5, vadMaxMergeSize: float = 150, vadPadding: float = 1, vadPromptWindow: float = 1,
|
| 63 |
vadInitialPromptMode: Union[VadInitialPromptMode, str] = VadInitialPromptMode.PREPREND_FIRST_SEGMENT):
|
|
|
|
| 56 |
# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
|
| 57 |
MAX_AUTO_CPU_CORES = 8
|
| 58 |
|
|
|
|
|
|
|
| 59 |
class VadOptions:
|
| 60 |
def __init__(self, vad: str = None, vadMergeWindow: float = 5, vadMaxMergeSize: float = 150, vadPadding: float = 1, vadPromptWindow: float = 1,
|
| 61 |
vadInitialPromptMode: Union[VadInitialPromptMode, str] = VadInitialPromptMode.PREPREND_FIRST_SEGMENT):
|
config.json5
CHANGED
|
@@ -34,6 +34,10 @@
|
|
| 34 |
{
|
| 35 |
"name": "large-v3",
|
| 36 |
"url": "large-v3"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
// Uncomment to add custom Japanese models
|
| 39 |
//{
|
|
|
|
| 34 |
{
|
| 35 |
"name": "large-v3",
|
| 36 |
"url": "large-v3"
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"name": "large-v3-turbo",
|
| 40 |
+
"url": "large-v3-turbo"
|
| 41 |
}
|
| 42 |
// Uncomment to add custom Japanese models
|
| 43 |
//{
|
docs/options.md
CHANGED
|
@@ -17,6 +17,7 @@ Select the model that Whisper will use to transcribe the audio:
|
|
| 17 |
| large | 1550 M | N/A | large | ~10 GB | 1x |
|
| 18 |
| large-v2 | 1550 M | N/A | large | ~10 GB | 1x |
|
| 19 |
| large-v3 | 1550 M | N/A | large | ~10 GB | 1x |
|
|
|
|
| 20 |
|
| 21 |
## Language
|
| 22 |
|
|
|
|
| 17 |
| large | 1550 M | N/A | large | ~10 GB | 1x |
|
| 18 |
| large-v2 | 1550 M | N/A | large | ~10 GB | 1x |
|
| 19 |
| large-v3 | 1550 M | N/A | large | ~10 GB | 1x |
|
| 20 |
+
| turbo | 809 M | N/A | turbo | ~6 GB | 8x |
|
| 21 |
|
| 22 |
## Language
|
| 23 |
|
src/translation/translationModel.py
CHANGED
|
@@ -423,6 +423,16 @@ class TranslationModel:
|
|
| 423 |
else: #M2M100 & NLLB
|
| 424 |
output = self.transTranslator(text, max_length=max_length, batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, num_beams=self.numBeams)
|
| 425 |
result = output[0]['translation_text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
except Exception as e:
|
| 427 |
print(traceback.format_exc())
|
| 428 |
print("Error translation text: " + str(e))
|
|
|
|
| 423 |
else: #M2M100 & NLLB
|
| 424 |
output = self.transTranslator(text, max_length=max_length, batch_size=self.batchSize, no_repeat_ngram_size=self.noRepeatNgramSize, num_beams=self.numBeams)
|
| 425 |
result = output[0]['translation_text']
|
| 426 |
+
|
| 427 |
+
if len(result) > 2:
|
| 428 |
+
if result[len(result) - 1] == "\"" and result[0] == "\"":
|
| 429 |
+
result = result[1:-1]
|
| 430 |
+
elif result[len(result) - 1] == "'" and result[0] == "'":
|
| 431 |
+
result = result[1:-1]
|
| 432 |
+
elif result[len(result) - 1] == "「" and result[0] == "」":
|
| 433 |
+
result = result[1:-1]
|
| 434 |
+
elif result[len(result) - 1] == "『" and result[0] == "』":
|
| 435 |
+
result = result[1:-1]
|
| 436 |
except Exception as e:
|
| 437 |
print(traceback.format_exc())
|
| 438 |
print("Error translation text: " + str(e))
|
src/whisper/fasterWhisperContainer.py
CHANGED
|
@@ -42,11 +42,13 @@ class FasterWhisperContainer(AbstractWhisperContainer):
|
|
| 42 |
model_url = model_config.url
|
| 43 |
|
| 44 |
if model_config.type == "whisper":
|
| 45 |
-
if model_url not in ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2", "large-v3"]:
|
| 46 |
raise Exception("FasterWhisperContainer does not yet support Whisper models. Use ct2-transformers-converter to convert the model to a faster-whisper model.")
|
| 47 |
if model_url == "large":
|
| 48 |
# large is an alias for large-v1
|
| 49 |
model_url = "large-v1"
|
|
|
|
|
|
|
| 50 |
|
| 51 |
device = self.device
|
| 52 |
|
|
|
|
| 42 |
model_url = model_config.url
|
| 43 |
|
| 44 |
if model_config.type == "whisper":
|
| 45 |
+
if model_url not in ["tiny", "base", "small", "medium", "large", "large-v1", "large-v2", "large-v3", "large-v3-turbo"]:
|
| 46 |
raise Exception("FasterWhisperContainer does not yet support Whisper models. Use ct2-transformers-converter to convert the model to a faster-whisper model.")
|
| 47 |
if model_url == "large":
|
| 48 |
# large is an alias for large-v1
|
| 49 |
model_url = "large-v1"
|
| 50 |
+
elif model_url == "large-v3-turbo":
|
| 51 |
+
model_url = "deepdml/faster-whisper-large-v3-turbo-ct2"
|
| 52 |
|
| 53 |
device = self.device
|
| 54 |
|