class GlmMLP(Phi3MLP): pass class GlmAttention(LlamaAttention): def __init__(self, config, layer_idx=None): super().__init__(config, layer_idx) self.o_proj = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size, bias=False) class GlmForCausalLM(LlamaForCausalLM): pass