Spaces:
Sleeping
Sleeping
update
Browse files- main.py +1 -0
- toolbox/k2_sherpa/nn_models.py +39 -1
main.py
CHANGED
|
@@ -112,6 +112,7 @@ def process(
|
|
| 112 |
loader=m_dict["loader"],
|
| 113 |
decoding_method=decoding_method,
|
| 114 |
num_active_paths=num_active_paths,
|
|
|
|
| 115 |
)
|
| 116 |
|
| 117 |
# transcribe
|
|
|
|
| 112 |
loader=m_dict["loader"],
|
| 113 |
decoding_method=decoding_method,
|
| 114 |
num_active_paths=num_active_paths,
|
| 115 |
+
normalize_samples=m_dict["normalize_samples"],
|
| 116 |
)
|
| 117 |
|
| 118 |
# transcribe
|
toolbox/k2_sherpa/nn_models.py
CHANGED
|
@@ -50,7 +50,19 @@ model_map = {
|
|
| 50 |
"tokens_file_sub_folder": "data/lang_char",
|
| 51 |
"loader": "load_sherpa_offline_recognizer",
|
| 52 |
"normalize_samples": True,
|
| 53 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
],
|
| 55 |
"English": [
|
| 56 |
{
|
|
@@ -156,6 +168,30 @@ def load_sherpa_offline_recognizer_from_paraformer(nn_model_file: str,
|
|
| 156 |
return recognizer
|
| 157 |
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
def load_recognizer(repo_id: str,
|
| 160 |
nn_model_file: str,
|
| 161 |
nn_model_file_sub_folder: str,
|
|
@@ -165,6 +201,7 @@ def load_recognizer(repo_id: str,
|
|
| 165 |
loader: str,
|
| 166 |
decoding_method: str = "greedy_search",
|
| 167 |
num_active_paths: int = 4,
|
|
|
|
| 168 |
):
|
| 169 |
if not os.path.exists(local_model_dir):
|
| 170 |
download_model(
|
|
@@ -185,6 +222,7 @@ def load_recognizer(repo_id: str,
|
|
| 185 |
tokens_file=tokens_file,
|
| 186 |
decoding_method=decoding_method,
|
| 187 |
num_active_paths=num_active_paths,
|
|
|
|
| 188 |
)
|
| 189 |
elif loader == "load_sherpa_offline_recognizer_from_paraformer":
|
| 190 |
recognizer = load_sherpa_offline_recognizer_from_paraformer(
|
|
|
|
| 50 |
"tokens_file_sub_folder": "data/lang_char",
|
| 51 |
"loader": "load_sherpa_offline_recognizer",
|
| 52 |
"normalize_samples": True,
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"repo_id": "zrjin/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2",
|
| 56 |
+
"encoder_model_file": "encoder-epoch-20-avg-1.onnx",
|
| 57 |
+
"encoder_model_file_sub_folder": ".",
|
| 58 |
+
"decoder_model_file": "decoder-epoch-20-avg-1.onnx",
|
| 59 |
+
"decoder_model_file_sub_folder": ".",
|
| 60 |
+
"joiner_model_file": "joiner-epoch-20-avg-1.onnx",
|
| 61 |
+
"joiner_model_file_sub_folder": ".",
|
| 62 |
+
"tokens_file": "tokens.txt",
|
| 63 |
+
"tokens_file_sub_folder": ".",
|
| 64 |
+
"loader": "load_sherpa_offline_recognizer_from_transducer",
|
| 65 |
+
},
|
| 66 |
],
|
| 67 |
"English": [
|
| 68 |
{
|
|
|
|
| 168 |
return recognizer
|
| 169 |
|
| 170 |
|
| 171 |
+
def load_sherpa_offline_recognizer_from_transducer(encoder_model_file: str,
|
| 172 |
+
decoder_model_file: str,
|
| 173 |
+
joiner_model_file: str,
|
| 174 |
+
tokens_file: str,
|
| 175 |
+
sample_rate: int = 16000,
|
| 176 |
+
decoding_method: str = "greedy_search",
|
| 177 |
+
feature_dim: int = 80,
|
| 178 |
+
num_threads: int = 2,
|
| 179 |
+
num_active_paths: int = 2,
|
| 180 |
+
):
|
| 181 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
|
| 182 |
+
encoder=encoder_model_file,
|
| 183 |
+
decoder=decoder_model_file,
|
| 184 |
+
joiner=joiner_model_file,
|
| 185 |
+
tokens=tokens_file,
|
| 186 |
+
num_threads=num_threads,
|
| 187 |
+
sample_rate=sample_rate,
|
| 188 |
+
feature_dim=feature_dim,
|
| 189 |
+
decoding_method=decoding_method,
|
| 190 |
+
max_active_paths=num_active_paths,
|
| 191 |
+
)
|
| 192 |
+
return recognizer
|
| 193 |
+
|
| 194 |
+
|
| 195 |
def load_recognizer(repo_id: str,
|
| 196 |
nn_model_file: str,
|
| 197 |
nn_model_file_sub_folder: str,
|
|
|
|
| 201 |
loader: str,
|
| 202 |
decoding_method: str = "greedy_search",
|
| 203 |
num_active_paths: int = 4,
|
| 204 |
+
normalize_samples: bool = False,
|
| 205 |
):
|
| 206 |
if not os.path.exists(local_model_dir):
|
| 207 |
download_model(
|
|
|
|
| 222 |
tokens_file=tokens_file,
|
| 223 |
decoding_method=decoding_method,
|
| 224 |
num_active_paths=num_active_paths,
|
| 225 |
+
normalize_samples=normalize_samples,
|
| 226 |
)
|
| 227 |
elif loader == "load_sherpa_offline_recognizer_from_paraformer":
|
| 228 |
recognizer = load_sherpa_offline_recognizer_from_paraformer(
|