Spaces:

Ruurd
/

lad

Running on Zero

Ruurd commited on Apr 15

Commit

526493a

verified ·

1 Parent(s): bd6e2e6

Remove autocast

Files changed (1) hide show

llama_diffusion_model.py CHANGED Viewed

@@ -137,14 +137,14 @@ class CustomTransformerModel(PreTrainedModel):
         attention_mask = attention_mask.to(dtype=torch.float32)  # required for SDPA and Flash attention
-        with autocast("cuda", dtype=torch.float16):
-            outputs = self.llama(
-                input_ids,
-                attention_mask=attention_mask,
-                output_hidden_states=True,
-                use_cache=False,
-                **kwargs
-            )
         logits = outputs.logits[:, :, :self.config.vocab_size].view(batch_size, seq_len, self.config.vocab_size)

         attention_mask = attention_mask.to(dtype=torch.float32)  # required for SDPA and Flash attention
+        # with autocast("cuda", dtype=torch.float16):
+        outputs = self.llama(
+            input_ids,
+            attention_mask=attention_mask,
+            output_hidden_states=True,
+            use_cache=False,
+            **kwargs
+        )
         logits = outputs.logits[:, :, :self.config.vocab_size].view(batch_size, seq_len, self.config.vocab_size)