fnlp
/

gyt1145028706 commited on
Commit
c834337
·
1 Parent(s): 6171706
Files changed (4) hide show
  1. config.json +0 -120
  2. examples/m1.wav +0 -0
  3. preprocessor_config.json +0 -13
  4. pytorch_model.bin +0 -3
config.json DELETED
@@ -1,120 +0,0 @@
1
- {
2
- "model_type": "xy_tokenizer",
3
- "input_sample_rate": 16000,
4
- "output_sample_rate": 24000,
5
- "encoder_downsample_rate": 1280,
6
- "decoder_upsample_rate": 1920,
7
- "code_dim": 3072,
8
- "params": {
9
- "feature_extractor_kwargs": {
10
- "chunk_length": 30,
11
- "feature_size": 80,
12
- "hop_length": 160,
13
- "n_fft": 400,
14
- "n_samples": 480000,
15
- "nb_max_frames": 3000,
16
- "padding_side": "right",
17
- "padding_value": 0.0,
18
- "sampling_rate": 16000,
19
- "return_attention_mask": true,
20
- "return_tensors": "pt"
21
- },
22
- "semantic_encoder_kwargs": {
23
- "num_mel_bins": 80,
24
- "sampling_rate": 16000,
25
- "hop_length": 160,
26
- "stride_size": 2,
27
- "kernel_size": 3,
28
- "d_model": 768,
29
- "scale_embedding": false,
30
- "max_audio_seconds": 30,
31
- "encoder_layers": 12,
32
- "encoder_attention_heads": 12,
33
- "encoder_ffn_dim": 3072,
34
- "activation_function": "gelu"
35
- },
36
- "semantic_encoder_adapter_kwargs": {
37
- "input_dim": 768,
38
- "output_dim": 768,
39
- "d_model": 768,
40
- "max_source_positions": 1500,
41
- "encoder_layers": 4,
42
- "encoder_attention_heads": 12,
43
- "encoder_ffn_dim": 3072
44
- },
45
- "acoustic_encoder_kwargs": {
46
- "num_mel_bins": 80,
47
- "sampling_rate": 16000,
48
- "hop_length": 160,
49
- "stride_size": 2,
50
- "kernel_size": 3,
51
- "d_model": 768,
52
- "scale_embedding": false,
53
- "max_audio_seconds": 30,
54
- "encoder_layers": 12,
55
- "encoder_attention_heads": 12,
56
- "encoder_ffn_dim": 3072,
57
- "activation_function": "gelu"
58
- },
59
- "pre_rvq_adapter_kwargs": {
60
- "input_dim": 1536,
61
- "output_dim": 768,
62
- "d_model": 768,
63
- "max_source_positions": 1500,
64
- "encoder_layers": 4,
65
- "encoder_attention_heads": 12,
66
- "encoder_ffn_dim": 3072
67
- },
68
- "downsample_kwargs": {
69
- "d_model": 768,
70
- "avg_pooler": 4
71
- },
72
- "quantizer_kwargs": {
73
- "input_dim": 3072,
74
- "rvq_dim": 512,
75
- "output_dim": 3072,
76
- "num_quantizers": 8,
77
- "codebook_size": 1024,
78
- "codebook_dim": 512,
79
- "quantizer_dropout": 0.0
80
- },
81
- "post_rvq_adapter_kwargs": {
82
- "input_dim": 3072,
83
- "output_dim": 3072,
84
- "d_model": 768,
85
- "max_source_positions": 375,
86
- "encoder_layers": 4,
87
- "encoder_attention_heads": 12,
88
- "encoder_ffn_dim": 3072
89
- },
90
- "upsample_kwargs": {
91
- "d_model": 768,
92
- "stride": 4
93
- },
94
- "acoustic_decoder_kwargs": {
95
- "num_mel_bins": 80,
96
- "sampling_rate": 16000,
97
- "hop_length": 160,
98
- "stride_size": 2,
99
- "kernel_size": 3,
100
- "d_model": 768,
101
- "scale_embedding": false,
102
- "max_audio_seconds": 30,
103
- "decoder_layers": 12,
104
- "decoder_attention_heads": 12,
105
- "decoder_ffn_dim": 3072,
106
- "activation_function": "gelu"
107
- },
108
- "vocos_kwargs": {
109
- "input_channels": 80,
110
- "dim": 512,
111
- "intermediate_dim": 4096,
112
- "num_layers": 30,
113
- "n_fft": 960,
114
- "hop_size": 240,
115
- "padding": "same"
116
- }
117
- },
118
- "torch_dtype": "float32",
119
- "transformers_version": "4.51.0"
120
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/m1.wav DELETED
Binary file (64.8 kB)
 
preprocessor_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "chunk_length": 30,
3
- "feature_size": 80,
4
- "hop_length": 160,
5
- "n_fft": 400,
6
- "n_samples": 480000,
7
- "nb_max_frames": 3000,
8
- "padding_side": "right",
9
- "padding_value": 0.0,
10
- "sampling_rate": 16000,
11
- "return_attention_mask": true,
12
- "return_tensors": "pt"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fafbaf4ba0e6095be842230c4bd16ecf6d193b250718a5775f1ac7aa528d9110
3
- size 2137279502