Spaces:
Runtime error
Runtime error
update
Browse files
app.py
CHANGED
|
@@ -28,13 +28,15 @@ repo_id = "microsoft/OmniParser-v2.0" # HF repo
|
|
| 28 |
local_dir = "weights" # Target local directory
|
| 29 |
|
| 30 |
dtype = torch.bfloat16
|
|
|
|
|
|
|
| 31 |
som_generator = MarkHelper()
|
| 32 |
magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
|
| 33 |
magma_qa_prompt = "<image>\n{} Answer the question briefly."
|
| 34 |
magma_model_id = "microsoft/Magma-8B"
|
| 35 |
magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
|
| 36 |
magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
|
| 37 |
-
magam_model.to(
|
| 38 |
|
| 39 |
# Download the entire repository
|
| 40 |
# snapshot_download(repo_id=repo_id, local_dir=local_dir)
|
|
@@ -71,8 +73,6 @@ This demo is powered by [Gradio](https://gradio.app/) and uses [OmniParserv2](ht
|
|
| 71 |
</div>
|
| 72 |
"""
|
| 73 |
|
| 74 |
-
DEVICE = torch.device('cuda')
|
| 75 |
-
|
| 76 |
@spaces.GPU
|
| 77 |
@torch.inference_mode()
|
| 78 |
def get_som_response(instruction, image_som):
|
|
@@ -92,7 +92,7 @@ def get_som_response(instruction, image_som):
|
|
| 92 |
inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
|
| 93 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
| 94 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
| 95 |
-
inputs = inputs.to(dtype)
|
| 96 |
|
| 97 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
| 98 |
with torch.inference_mode():
|
|
@@ -129,7 +129,7 @@ def get_qa_response(instruction, image):
|
|
| 129 |
inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
|
| 130 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
| 131 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
| 132 |
-
inputs = inputs.to(dtype)
|
| 133 |
|
| 134 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
| 135 |
with torch.inference_mode():
|
|
|
|
| 28 |
local_dir = "weights" # Target local directory
|
| 29 |
|
| 30 |
dtype = torch.bfloat16
|
| 31 |
+
DEVICE = torch.device('cuda')
|
| 32 |
+
|
| 33 |
som_generator = MarkHelper()
|
| 34 |
magma_som_prompt = "<image>\nIn this view I need to click a button to \"{}\"? Provide the coordinates and the mark index of the containing bounding box if applicable."
|
| 35 |
magma_qa_prompt = "<image>\n{} Answer the question briefly."
|
| 36 |
magma_model_id = "microsoft/Magma-8B"
|
| 37 |
magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
|
| 38 |
magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
|
| 39 |
+
magam_model.to(DEVICE)
|
| 40 |
|
| 41 |
# Download the entire repository
|
| 42 |
# snapshot_download(repo_id=repo_id, local_dir=local_dir)
|
|
|
|
| 73 |
</div>
|
| 74 |
"""
|
| 75 |
|
|
|
|
|
|
|
| 76 |
@spaces.GPU
|
| 77 |
@torch.inference_mode()
|
| 78 |
def get_som_response(instruction, image_som):
|
|
|
|
| 92 |
inputs = magma_processor(images=[image_som], texts=prompt, return_tensors="pt")
|
| 93 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
| 94 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
| 95 |
+
inputs = inputs.to(dtype).to(DEVICE)
|
| 96 |
|
| 97 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
| 98 |
with torch.inference_mode():
|
|
|
|
| 129 |
inputs = magma_processor(images=[image], texts=prompt, return_tensors="pt")
|
| 130 |
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
| 131 |
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
| 132 |
+
inputs = inputs.to(dtype).to(DEVICE)
|
| 133 |
|
| 134 |
magam_model.generation_config.pad_token_id = magma_processor.tokenizer.pad_token_id
|
| 135 |
with torch.inference_mode():
|