Spaces:
Runtime error
Runtime error
| # @package __global__ | |
| classifier_free_guidance: | |
| training_dropout: 0.3 | |
| inference_coef: 3.0 | |
| attribute_dropout: | |
| text: {} | |
| wav: {} | |
| fuser: | |
| cross_attention_pos_emb: false | |
| cross_attention_pos_emb_scale: 1 | |
| sum: [] | |
| prepend: [] | |
| cross: [description] | |
| input_interpolate: [] | |
| conditioners: | |
| description: | |
| model: clap | |
| clap: | |
| checkpoint: //reference/clap/music_audioset_epoch_15_esc_90.14.pt | |
| model_arch: 'HTSAT-base' | |
| enable_fusion: false | |
| sample_rate: 48000 | |
| max_audio_length: 10 | |
| audio_stride: 1 | |
| dim: 512 | |
| attribute: description | |
| normalize: true | |
| quantize: true # use RVQ quantization | |
| n_q: 12 | |
| bins: 1024 | |
| kmeans_iters: 50 | |
| text_p: 0. # probability of using text embed at train time | |
| cache_path: null | |
| dataset: | |
| joint_embed_attributes: [description] | |
| train: | |
| merge_text_p: 0.25 | |
| drop_desc_p: 0.5 | |
| drop_other_p: 0.5 | |