Spaces:
Running
Running
| # @package __global__ | |
| defaults: | |
| - ../default | |
| - override /dset: audio/default | |
| - _self_ | |
| solver: compression | |
| sample_rate: ??? | |
| channels: ??? | |
| # loss balancing | |
| losses: | |
| adv: 4. | |
| feat: 4. | |
| l1: 0.1 | |
| mel: 0. | |
| msspec: 2. | |
| sisnr: 0. | |
| balancer: | |
| balance_grads: true | |
| ema_decay: 0.999 | |
| per_batch_item: true | |
| total_norm: 1. | |
| adversarial: | |
| every: 1 | |
| adversaries: [msstftd] | |
| adv_loss: hinge | |
| feat_loss: l1 | |
| # losses hyperparameters | |
| l1: {} | |
| l2: {} | |
| mrstft: | |
| factor_sc: .5 | |
| factor_mag: .5 | |
| normalized: false | |
| mel: | |
| sample_rate: ${sample_rate} | |
| n_fft: 1024 | |
| hop_length: 256 | |
| win_length: 1024 | |
| n_mels: 64 | |
| f_min: 64 | |
| f_max: null | |
| normalized: false | |
| floor_level: 1e-5 | |
| sisnr: | |
| sample_rate: ${sample_rate} | |
| segment: 5. | |
| msspec: | |
| sample_rate: ${sample_rate} | |
| range_start: 6 | |
| range_end: 11 | |
| n_mels: 64 | |
| f_min: 64 | |
| f_max: null | |
| normalized: true | |
| alphas: false | |
| floor_level: 1e-5 | |
| # metrics | |
| metrics: | |
| visqol: | |
| mode: audio | |
| bin: null # path to visqol install | |
| model: tcdaudio14_aacvopus_coresv_svrnsim_n.68_g.01_c1.model # visqol v3 | |
| # adversaries hyperparameters | |
| msstftd: | |
| in_channels: 1 | |
| out_channels: 1 | |
| filters: 32 | |
| norm: weight_norm | |
| n_ffts: [1024, 2048, 512, 256, 128] | |
| hop_lengths: [256, 512, 128, 64, 32] | |
| win_lengths: [1024, 2048, 512, 256, 128] | |
| activation: LeakyReLU | |
| activation_params: {negative_slope: 0.3} | |
| msd: | |
| in_channels: 1 | |
| out_channels: 1 | |
| scale_norms: [spectral_norm, weight_norm, weight_norm] | |
| kernel_sizes: [5, 3] | |
| filters: 16 | |
| max_filters: 1024 | |
| downsample_scales: [4, 4, 4, 4] | |
| inner_kernel_sizes: null | |
| groups: [4, 4, 4, 4] | |
| strides: null | |
| paddings: null | |
| activation: LeakyReLU | |
| activation_params: {negative_slope: 0.3} | |
| mpd: | |
| in_channels: 1 | |
| out_channels: 1 | |
| periods: [2, 3, 5, 7, 11] | |
| n_layers: 5 | |
| kernel_size: 5 | |
| stride: 3 | |
| filters: 8 | |
| filter_scales: 4 | |
| max_filters: 1024 | |
| activation: LeakyReLU | |
| activation_params: {negative_slope: 0.3} | |
| norm: weight_norm | |
| # data hyperparameters | |
| dataset: | |
| batch_size: 64 | |
| num_workers: 10 | |
| segment_duration: 1 | |
| train: | |
| num_samples: 500000 | |
| valid: | |
| num_samples: 10000 | |
| evaluate: | |
| batch_size: 32 | |
| num_samples: 10000 | |
| generate: | |
| batch_size: 32 | |
| num_samples: 50 | |
| segment_duration: 10 | |
| # solver hyperparameters | |
| evaluate: | |
| every: 25 | |
| num_workers: 5 | |
| metrics: | |
| visqol: false | |
| sisnr: true | |
| generate: | |
| every: 25 | |
| num_workers: 5 | |
| audio: | |
| sample_rate: ${sample_rate} | |
| # checkpointing schedule | |
| checkpoint: | |
| save_last: true | |
| save_every: 25 | |
| keep_last: 10 | |
| keep_every_states: null | |
| # optimization hyperparameters | |
| optim: | |
| epochs: 200 | |
| updates_per_epoch: 2000 | |
| lr: 3e-4 | |
| max_norm: 0. | |
| optimizer: adam | |
| adam: | |
| betas: [0.5, 0.9] | |
| weight_decay: 0. | |
| ema: | |
| use: true # whether to use EMA or not | |
| updates: 1 # update at every step | |
| device: ${device} # device for EMA, can be put on GPU if more frequent updates | |
| decay: 0.99 # EMA decay value, if null, no EMA is used | |