Spaces:
Running
on
Zero
Running
on
Zero
Changed back to bidirectional attention
Browse files- llama_diffusion_model.py +1 -1
llama_diffusion_model.py
CHANGED
|
@@ -192,7 +192,7 @@ class CustomTransformerModel(PreTrainedModel):
|
|
| 192 |
self.llama.resize_token_embeddings(config.vocab_size)
|
| 193 |
|
| 194 |
for i, layer in enumerate(self.llama.model.layers):
|
| 195 |
-
layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='
|
| 196 |
|
| 197 |
# Freeze Llama to retain pre-trained knowledge
|
| 198 |
for param in self.llama.parameters():
|
|
|
|
| 192 |
self.llama.resize_token_embeddings(config.vocab_size)
|
| 193 |
|
| 194 |
for i, layer in enumerate(self.llama.model.layers):
|
| 195 |
+
layer.self_attn = BidirectionalLlamaAttention(layer.self_attn, masking='bidirectional')
|
| 196 |
|
| 197 |
# Freeze Llama to retain pre-trained knowledge
|
| 198 |
for param in self.llama.parameters():
|