Commit
·
6ba6dce
1
Parent(s):
bb834c6
support fused backbones and update MODEL_ID_TO_NAME
Browse files- interactive_demo.py +7 -1
- serve/__init__.py +13 -13
interactive_demo.py
CHANGED
|
@@ -152,7 +152,13 @@ class ModelWorker:
|
|
| 152 |
# Assume `image_transform` is a HF ImageProcessor...
|
| 153 |
pixel_values = self.image_processor(images[0].convert("RGB"), return_tensors="pt")["pixel_values"][0]
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
generated_text = generated_text.split("USER")[0].split("ASSISTANT")[0]
|
| 157 |
yield json.dumps({"text": ori_prompt + generated_text, "error_code": 0}).encode() + b"\0"
|
| 158 |
|
|
|
|
| 152 |
# Assume `image_transform` is a HF ImageProcessor...
|
| 153 |
pixel_values = self.image_processor(images[0].convert("RGB"), return_tensors="pt")["pixel_values"][0]
|
| 154 |
|
| 155 |
+
if type(pixel_values) is dict:
|
| 156 |
+
for k in pixel_values.keys():
|
| 157 |
+
pixel_values[k] = torch.unsqueeze(pixel_values[k].cuda(), 0)
|
| 158 |
+
else:
|
| 159 |
+
pixel_values = torch.unsqueeze(pixel_values.cuda(), 0)
|
| 160 |
+
|
| 161 |
+
generated_text = self.vlm.generate_answer(pixel_values, question_prompt)[0]
|
| 162 |
generated_text = generated_text.split("USER")[0].split("ASSISTANT")[0]
|
| 163 |
yield json.dumps({"text": ori_prompt + generated_text, "error_code": 0}).encode() + b"\0"
|
| 164 |
|
serve/__init__.py
CHANGED
|
@@ -5,31 +5,31 @@ from collections import OrderedDict
|
|
| 5 |
MODEL_ID_TO_NAME = OrderedDict(
|
| 6 |
[
|
| 7 |
(
|
| 8 |
-
"llava-lvis4v-lrv+lvis4v-lrv-resize-naive-
|
| 9 |
-
"
|
| 10 |
),
|
| 11 |
(
|
| 12 |
-
"llava-lvis4v-lrv+lvis4v-lrv-resize-naive-
|
| 13 |
-
"
|
| 14 |
),
|
| 15 |
(
|
| 16 |
-
"resize-naive-
|
| 17 |
-
"
|
| 18 |
),
|
| 19 |
(
|
| 20 |
-
"resize-naive-
|
| 21 |
-
"
|
| 22 |
),
|
| 23 |
(
|
| 24 |
-
"resize-naive-
|
| 25 |
-
"
|
| 26 |
),
|
| 27 |
(
|
| 28 |
-
"resize-naive-
|
| 29 |
-
"
|
| 30 |
),
|
| 31 |
-
("llava-v1.5-7b", "LLaVA 1.5: 7B"),
|
| 32 |
("llava-v1.5-13b", "LLaVA 1.5: 13B"),
|
|
|
|
| 33 |
]
|
| 34 |
)
|
| 35 |
|
|
|
|
| 5 |
MODEL_ID_TO_NAME = OrderedDict(
|
| 6 |
[
|
| 7 |
(
|
| 8 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align+13b+stage-finetune+x7",
|
| 9 |
+
"PrismaticVLM 13B - Chat",
|
| 10 |
),
|
| 11 |
(
|
| 12 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align+7b+stage-finetune+x7",
|
| 13 |
+
"PrismaticVLM 7B - Chat",
|
| 14 |
),
|
| 15 |
(
|
| 16 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+13b+stage-finetune+x7",
|
| 17 |
+
"PrismaticVLM 13B",
|
| 18 |
),
|
| 19 |
(
|
| 20 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+7b+stage-finetune+x7",
|
| 21 |
+
"PrismaticVLM 7B",
|
| 22 |
),
|
| 23 |
(
|
| 24 |
+
"redux-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+13b+stage-finetune+x7",
|
| 25 |
+
"PrismaticVLM 13B (Controlled)",
|
| 26 |
),
|
| 27 |
(
|
| 28 |
+
"redux-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+7b+stage-finetune+x7",
|
| 29 |
+
"PrismaticVLM 7B (Controlled)",
|
| 30 |
),
|
|
|
|
| 31 |
("llava-v1.5-13b", "LLaVA 1.5: 13B"),
|
| 32 |
+
("llava-v1.5-7b", "LLaVA 1.5: 7B"),
|
| 33 |
]
|
| 34 |
)
|
| 35 |
|