OpenGVLab
/

InternVL2-Llama3-76B

Image-Text-to-Text

feature-extraction

Model card Files Files and versions

czczup commited on Aug 22, 2024

Commit

61795f0

·

verified ·

1 Parent(s): e1f89bc

Upload folder using huggingface_hub

Files changed (2) hide show

generation_config.json +6 -1
modeling_intern_vit.py +1 -1

generation_config.json CHANGED Viewed

@@ -1,4 +1,9 @@
 {
   "_from_model_config": true,
-  "transformers_version": "4.37.2"
 }

 {
   "_from_model_config": true,
+  "transformers_version": "4.37.2",
+  "eos_token_id": [
+    128001,
+    128002,
+    128003
+  ]
 }

modeling_intern_vit.py CHANGED Viewed

@@ -287,7 +287,7 @@ class InternVisionEncoderLayer(nn.Module):
         Args:
             hidden_states (`Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]`): input to the layer of shape `(batch, seq_len, embed_dim)`
         """
-        hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states)) * self.ls1)
         hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states)) * self.ls2)

         Args:
             hidden_states (`Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]`): input to the layer of shape `(batch, seq_len, embed_dim)`
         """
+        hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states).to(hidden_states.dtype)) * self.ls1)
         hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states)) * self.ls2)