Spaces:

marstin
/

VLM-Lens

Running on Zero

App Files Files Community

marstin commited on Oct 1

Commit

cf24c8d

1 Parent(s): c274a6b

[martin-dev] fix gpu device

Browse files

Files changed (1) hide show

demo/launch_gradio.py +15 -12

demo/launch_gradio.py CHANGED Viewed

@@ -22,16 +22,6 @@ models_cache: Dict[str, Any] = {}
 current_model_selection: Optional[ModelSelection] = None
-def example1():
-    return "What is in this image? Describe in one word.", None, None
-def example2():
-    return "Describe the main object in the picture in one word.", None, None
-def example3():
-    return "What color is the dominant object? Describe in one word.", None, None
 def read_layer_spec(spec_file_path: str) -> List[str]:
     """Read available layers from the model spec file.
@@ -171,8 +161,12 @@ def get_single_image_probabilities(
         Tuple containing list of top tokens and their probabilities.
     """
     # Generate prompt and process inputs
     text = vlm._generate_prompt(instruction, has_images=True)
     inputs = vlm._generate_processor_output(text, image)
     with torch.no_grad():
         outputs = vlm.model.generate(
@@ -366,9 +360,13 @@ def get_module_similarity_pooled(
         raise ValueError(f"Module '{module_name}' not found in model")
     try:
         # Extract embedding for image1
         text = vlm._generate_prompt(instruction, has_images=True)
         inputs1 = vlm._generate_processor_output(text, image1)
         embeddings.clear()
         with torch.no_grad():
@@ -381,6 +379,9 @@ def get_module_similarity_pooled(
         # Extract embedding for image2
         inputs2 = vlm._generate_processor_output(text, image2)
         embeddings.clear()
         with torch.no_grad():
@@ -441,7 +442,7 @@ def get_module_similarity_pooled(
         hook_handle.remove()
-@GPU(duration=60)
 def process_dual_inputs(
     model_choice: str,
     selected_layer: str,
@@ -638,7 +639,9 @@ def create_demo() -> gr.Blocks:
         # Add examples
         gr.Examples(
             examples=[
-                [example1()], [example2()], [example3()]
             ],
             inputs=[instruction_input, image1_input, image2_input]
         )

 current_model_selection: Optional[ModelSelection] = None
 def read_layer_spec(spec_file_path: str) -> List[str]:
     """Read available layers from the model spec file.
         Tuple containing list of top tokens and their probabilities.
     """
     # Generate prompt and process inputs
+    vlm.model.eval()
     text = vlm._generate_prompt(instruction, has_images=True)
     inputs = vlm._generate_processor_output(text, image)
+    for key in inputs:
+        if isinstance(inputs[key], torch.Tensor):
+            inputs[key] = inputs[key].to(vlm.config.device)
     with torch.no_grad():
         outputs = vlm.model.generate(
         raise ValueError(f"Module '{module_name}' not found in model")
     try:
+        vlm.model.eval()
         # Extract embedding for image1
         text = vlm._generate_prompt(instruction, has_images=True)
         inputs1 = vlm._generate_processor_output(text, image1)
+        for key in inputs1:
+            if isinstance(inputs1[key], torch.Tensor):
+                inputs1[key] = inputs1[key].to(vlm.config.device)
         embeddings.clear()
         with torch.no_grad():
         # Extract embedding for image2
         inputs2 = vlm._generate_processor_output(text, image2)
+        for key in inputs2:
+            if isinstance(inputs2[key], torch.Tensor):
+                inputs2[key] = inputs2[key].to(vlm.config.device)
         embeddings.clear()
         with torch.no_grad():
         hook_handle.remove()
+@GPU(duration=120)
 def process_dual_inputs(
     model_choice: str,
     selected_layer: str,
         # Add examples
         gr.Examples(
             examples=[
+                ['What is in this image? Describe in one word.', None, None],
+                ['Describe the main object in the picture in one word.', None, None],
+                ['What color is the dominant object? Describe in one word.', None, None],
             ],
             inputs=[instruction_input, image1_input, image2_input]
         )