Spaces:
Running
on
Zero
Running
on
Zero
Remove autocast
Browse files- llama_diffusion_model.py +8 -8
llama_diffusion_model.py
CHANGED
|
@@ -137,14 +137,14 @@ class CustomTransformerModel(PreTrainedModel):
|
|
| 137 |
attention_mask = attention_mask.to(dtype=torch.float32) # required for SDPA and Flash attention
|
| 138 |
|
| 139 |
|
| 140 |
-
with autocast("cuda", dtype=torch.float16):
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
|
| 149 |
logits = outputs.logits[:, :, :self.config.vocab_size].view(batch_size, seq_len, self.config.vocab_size)
|
| 150 |
|
|
|
|
| 137 |
attention_mask = attention_mask.to(dtype=torch.float32) # required for SDPA and Flash attention
|
| 138 |
|
| 139 |
|
| 140 |
+
# with autocast("cuda", dtype=torch.float16):
|
| 141 |
+
outputs = self.llama(
|
| 142 |
+
input_ids,
|
| 143 |
+
attention_mask=attention_mask,
|
| 144 |
+
output_hidden_states=True,
|
| 145 |
+
use_cache=False,
|
| 146 |
+
**kwargs
|
| 147 |
+
)
|
| 148 |
|
| 149 |
logits = outputs.logits[:, :, :self.config.vocab_size].view(batch_size, seq_len, self.config.vocab_size)
|
| 150 |
|