{ "hidden_size": 2048, "intermediate_size": 8192, "hidden_act": "silu", "n_layer": 16, "mla_layers": [ 0, 5, 10, 14 ], "rms_norm_eps": 1e-05, "num_attention_heads": 32, "num_key_value_heads": 8, "kv_lora_rank": 128, "q_lora_rank": 1344, "use_lora_layer_norm": false, "use_full_kv_head": false, "qk_rope_head_dim": 32, "v_head_dim": 64, "qk_nope_head_dim": 32, "qkv_rank_divisor": 8, "max_position_embeddings": 131072, "rope_theta": 500000.0, "rope_scaling": { "factor": 32.0, "original_max_position_embeddings": 8192, "rope_type": "yarn" }, "attention_bias": false, "attention_dropout": 0.0, "d_model": 2048, "ssm_cfg": { "expand": 1, "ngroups": 32, "d_state": 64, "repeat_kv_before_conv": false }, "d_inner": 2048, "d_xb": 512 }