hynt commited on
Commit
ee50021
·
verified ·
1 Parent(s): 5872ec9

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +66 -66
model.py CHANGED
@@ -362,72 +362,72 @@ def _get_offline_pre_trained_model(
362
  return recognizer
363
 
364
 
365
- # @lru_cache(maxsize=10)
366
- # def _get_vietnamese_pretrained_model(
367
- # repo_id: str, decoding_method: str, num_active_paths: int
368
- # ) -> sherpa_onnx.OfflineRecognizer:
369
- # assert repo_id in (
370
- # "csukuangfj/sherpa-onnx-zipformer-vi-int8-2025-04-20",
371
- # "csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20",
372
- # ), repo_id
373
-
374
- # # decoder_model = _get_nn_model_filename(
375
- # # repo_id=repo_id,
376
- # # filename="decoder-epoch-12-avg-8.onnx",
377
- # # subfolder=".",
378
- # # )
379
-
380
- # decoder_model = "decoder-epoch-45-avg-25.onnx"
381
-
382
- # if repo_id == "csukuangfj/sherpa-onnx-zipformer-vi-int8-2025-04-20":
383
- # # encoder_model = _get_nn_model_filename(
384
- # # repo_id=repo_id,
385
- # # filename="encoder-epoch-12-avg-8.int8.onnx",
386
- # # subfolder=".",
387
- # # )
388
-
389
- # encoder_model = "encoder-epoch-45-avg-25.int8.onnx"
390
-
391
- # # joiner_model = _get_nn_model_filename(
392
- # # repo_id=repo_id,
393
- # # filename="joiner-epoch-12-avg-8.int8.onnx",
394
- # # subfolder=".",
395
- # # )
396
- # joiner_model = "joiner-epoch-45-avg-25.int8.onnx"
397
- # elif repo_id == "csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20":
398
- # # encoder_model = _get_nn_model_filename(
399
- # # repo_id=repo_id,
400
- # # filename="encoder-epoch-12-avg-8.onnx",
401
- # # subfolder=".",
402
- # # )
403
-
404
- # encoder_model = "encoder-epoch-45-avg-25.onnx"
405
-
406
- # # joiner_model = _get_nn_model_filename(
407
- # # repo_id=repo_id,
408
- # # filename="joiner-epoch-12-avg-8.onnx",
409
- # # subfolder=".",
410
- # # )
411
- # joiner_model = "joiner-epoch-45-avg-25.onnx"
412
- # else:
413
- # raise ValueError(f"repo_id: {repo_id}")
414
-
415
- # # tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
416
-
417
- # tokens = "tokens.txt"
418
-
419
- # recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
420
- # tokens=tokens,
421
- # encoder=encoder_model,
422
- # decoder=decoder_model,
423
- # joiner=joiner_model,
424
- # num_threads=2,
425
- # sample_rate=16000,
426
- # feature_dim=80,
427
- # decoding_method=decoding_method,
428
- # )
429
-
430
- # return recognizer
431
 
432
 
433
  @lru_cache(maxsize=10)
 
362
  return recognizer
363
 
364
 
365
+ @lru_cache(maxsize=10)
366
+ def _get_gigaspeech_pre_trained_model(
367
+ repo_id: str, decoding_method: str, num_active_paths: int
368
+ ) -> sherpa_onnx.OfflineRecognizer:
369
+ # assert repo_id in (
370
+ # "csukuangfj/sherpa-onnx-zipformer-vi-int8-2025-04-20",
371
+ # "csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20",
372
+ # ), repo_id
373
+
374
+ # decoder_model = _get_nn_model_filename(
375
+ # repo_id=repo_id,
376
+ # filename="decoder-epoch-12-avg-8.onnx",
377
+ # subfolder=".",
378
+ # )
379
+
380
+ decoder_model = "decoder-epoch-45-avg-25.onnx"
381
+
382
+ if repo_id == "csukuangfj/sherpa-onnx-zipformer-vi-int8-2025-04-20":
383
+ # encoder_model = _get_nn_model_filename(
384
+ # repo_id=repo_id,
385
+ # filename="encoder-epoch-12-avg-8.int8.onnx",
386
+ # subfolder=".",
387
+ # )
388
+
389
+ encoder_model = "encoder-epoch-45-avg-25.int8.onnx"
390
+
391
+ # joiner_model = _get_nn_model_filename(
392
+ # repo_id=repo_id,
393
+ # filename="joiner-epoch-12-avg-8.int8.onnx",
394
+ # subfolder=".",
395
+ # )
396
+ joiner_model = "joiner-epoch-45-avg-25.int8.onnx"
397
+ elif repo_id == "csukuangfj/sherpa-onnx-zipformer-vi-2025-04-20":
398
+ # encoder_model = _get_nn_model_filename(
399
+ # repo_id=repo_id,
400
+ # filename="encoder-epoch-12-avg-8.onnx",
401
+ # subfolder=".",
402
+ # )
403
+
404
+ encoder_model = "encoder-epoch-45-avg-25.onnx"
405
+
406
+ # joiner_model = _get_nn_model_filename(
407
+ # repo_id=repo_id,
408
+ # filename="joiner-epoch-12-avg-8.onnx",
409
+ # subfolder=".",
410
+ # )
411
+ joiner_model = "joiner-epoch-45-avg-25.onnx"
412
+ else:
413
+ raise ValueError(f"repo_id: {repo_id}")
414
+
415
+ # tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
416
+
417
+ tokens = "tokens.txt"
418
+
419
+ recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
420
+ tokens=tokens,
421
+ encoder=encoder_model,
422
+ decoder=decoder_model,
423
+ joiner=joiner_model,
424
+ num_threads=2,
425
+ sample_rate=16000,
426
+ feature_dim=80,
427
+ decoding_method=decoding_method,
428
+ )
429
+
430
+ return recognizer
431
 
432
 
433
  @lru_cache(maxsize=10)