Update README.md
Browse files
README.md
CHANGED
|
@@ -44,9 +44,11 @@ pip install -r requirements.txt
|
|
| 44 |
|
| 45 |
Then you can enter the directory to run the following command.
|
| 46 |
```python
|
| 47 |
-
from transformers import MllamaForConditionalGeneration, AutoProcessor
|
| 48 |
import torch
|
|
|
|
|
|
|
| 49 |
from PIL import Image
|
|
|
|
| 50 |
|
| 51 |
# Pooling and Normalization
|
| 52 |
def last_pooling(last_hidden_state, attention_mask, normalize=True):
|
|
@@ -70,8 +72,8 @@ model = MllamaForConditionalGeneration.from_pretrained(
|
|
| 70 |
model.eval()
|
| 71 |
|
| 72 |
# Image + Text -> Text
|
| 73 |
-
|
| 74 |
-
|
| 75 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 76 |
|
| 77 |
string = 'A cat and a dog'
|
|
|
|
| 44 |
|
| 45 |
Then you can enter the directory to run the following command.
|
| 46 |
```python
|
|
|
|
| 47 |
import torch
|
| 48 |
+
import requests
|
| 49 |
+
|
| 50 |
from PIL import Image
|
| 51 |
+
from transformers import MllamaForConditionalGeneration, AutoProcessor
|
| 52 |
|
| 53 |
# Pooling and Normalization
|
| 54 |
def last_pooling(last_hidden_state, attention_mask, normalize=True):
|
|
|
|
| 72 |
model.eval()
|
| 73 |
|
| 74 |
# Image + Text -> Text
|
| 75 |
+
image = Image.open(requests.get('https://github.com/haon-chen/mmE5/blob/main/figures/example.jpg?raw=true', stream=True).raw)
|
| 76 |
+
inputs = processor(text='<|image|><|begin_of_text|> Represent the given image with the following question: What is in the image', images=[image], return_tensors="pt").to("cuda")
|
| 77 |
qry_output = last_pooling(model(**inputs, return_dict=True, output_hidden_states=True).hidden_states[-1], inputs['attention_mask'])
|
| 78 |
|
| 79 |
string = 'A cat and a dog'
|