LiquidAI
/

LFM2-Audio-1.5B

speech-to-speech

Model card Files Files and versions

LFM2-Audio-1.5B / config.json

haerski's picture

Initial commit

3f9322d unverified about 1 month ago

history blame contribute delete

2.64 kB

	{
	"architectures": [
	"Lfm2AudioForConditionalGeneration"
	],
	"codebooks": 8,
	"tie_audio_embeddings": false,
	"semantic_codebook_factor": 100,
	"codebook_weight": "log",
	"interleaved_n_text": 6,
	"interleaved_n_audio": 12,
	"preprocessor": {
	"sample_rate": 16000,
	"normalize": "per_feature",
	"window_size": 0.025,
	"window_stride": 0.01,
	"window": "hann",
	"features": 128,
	"n_fft": 512,
	"log": true,
	"frame_splicing": 1,
	"dither": 1.0e-05,
	"pad_to": 0,
	"pad_value": 0.0
	},
	"encoder": {
	"feat_in": 128,
	"feat_out": -1,
	"n_layers": 17,
	"d_model": 512,
	"subsampling": "dw_striding",
	"subsampling_factor": 8,
	"subsampling_conv_channels": 256,
	"causal_downsampling": false,
	"reduction": null,
	"reduction_position": null,
	"reduction_factor": 1,
	"ff_expansion_factor": 4,
	"self_attention_model": "rel_pos",
	"n_heads": 8,
	"att_context_size": [
	-1,
	-1
	],
	"xscaling": false,
	"untie_biases": true,
	"pos_emb_max_len": 5000,
	"conv_kernel_size": 9,
	"conv_norm_type": "batch_norm",
	"conv_context_size": null,
	"dropout": 0.1,
	"dropout_pre_encoder": 0.1,
	"dropout_emb": 0,
	"dropout_att": 0.1
	},
	"lfm": {
	"_name_or_path": "LiquidAI/LFM2-1.2B",
	"architectures": [
	"Lfm2ForCausalLM"
	],
	"block_auto_adjust_ff_dim": true,
	"block_dim": 2048,
	"block_ff_dim": 12288,
	"block_ffn_dim_multiplier": 1,
	"block_mlp_init_scale": 1,
	"block_multiple_of": 256,
	"block_norm_eps": 1e-05,
	"block_out_init_scale": 1,
	"block_use_swiglu": true,
	"block_use_xavier_init": true,
	"conv_L_cache": 3,
	"conv_bias": false,
	"conv_dim": 2048,
	"conv_dim_out": 2048,
	"conv_use_xavier_init": true,
	"eos_token_id": 7,
	"hidden_size": 2048,
	"initializer_range": 0.02,
	"intermediate_size": 12288,
	"layer_types": [
	"conv",
	"conv",
	"full_attention",
	"conv",
	"conv",
	"full_attention",
	"conv",
	"conv",
	"full_attention",
	"conv",
	"full_attention",
	"conv",
	"full_attention",
	"conv",
	"full_attention",
	"conv"
	],
	"max_position_embeddings": 128000,
	"model_type": "lfm2",
	"norm_eps": 1e-05,
	"num_attention_heads": 32,
	"num_heads": 32,
	"num_hidden_layers": 16,
	"num_key_value_heads": 8,
	"rope_theta": 1000000,
	"torch_dtype": "bfloat16",
	"use_cache": true,
	"use_pos_enc": true,
	"vocab_size": 65536
	},
	"depthformer": {
	"layers": 6,
	"dim": 1024,
	"tie": true
	}
	}