Visual-Reasoning-Agent

Paused

App Files Files Community

Dhruv-Ty commited on Apr 19

Commit

0ffa584

1 Parent(s): 4000a69

gpu to cpu

Browse files

Files changed (5) hide show

medrax/tools/classification.py +8 -3
medrax/tools/generation.py +5 -2
medrax/tools/grounding.py +5 -3
medrax/tools/llava_med.py +14 -5
medrax/tools/report_generation.py +4 -4

medrax/tools/classification.py CHANGED Viewed

@@ -47,14 +47,19 @@ class ChestXRayClassifierTool(BaseTool):
     )
     args_schema: Type[BaseModel] = ChestXRayInput
     model: xrv.models.DenseNet = None
-    device: Optional[str] = "cuda"
     transform: torchvision.transforms.Compose = None
-    def __init__(self, model_name: str = "densenet121-res224-all", device: Optional[str] = "cuda"):
         super().__init__()
         self.model = xrv.models.DenseNet(weights=model_name)
         self.model.eval()
-        self.device = torch.device(device) if device else "cuda"
         self.model = self.model.to(self.device)
         self.transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop()])

     )
     args_schema: Type[BaseModel] = ChestXRayInput
     model: xrv.models.DenseNet = None
+    device: Optional[torch.device] = torch.device("cpu")  # Default to CPU
     transform: torchvision.transforms.Compose = None
+    def __init__(self, model_name: str = "densenet121-res224-all", device: Optional[str] = None):
         super().__init__()
+        # If device is not specified, use CUDA if available, else fallback to CPU
+        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
         self.model = xrv.models.DenseNet(weights=model_name)
         self.model.eval()
+        # Assign device based on the passed or auto-detected option
+        self.device = torch.device(device)
         self.model = self.model.to(self.device)
         self.transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop()])

medrax/tools/generation.py CHANGED Viewed

@@ -61,7 +61,10 @@ class ChestXRayGeneratorTool(BaseTool):
         """Initialize the chest X-ray generator tool."""
         super().__init__()
-        self.device = torch.device(device) if device else "cuda"
         self.model = StableDiffusionPipeline.from_pretrained(model_path, cache_dir=cache_dir)
         self.model = self.model.to(torch.float32).to(self.device)
@@ -121,7 +124,7 @@ class ChestXRayGeneratorTool(BaseTool):
         except Exception as e:
             return (
-                {"error": str(e)},
                 {
                     "prompt": prompt,
                     "analysis_status": "failed",

         """Initialize the chest X-ray generator tool."""
         super().__init__()
+        # Automatically detect device (cuda if available, else cpu)
+        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = torch.device(device)
         self.model = StableDiffusionPipeline.from_pretrained(model_path, cache_dir=cache_dir)
         self.model = self.model.to(torch.float32).to(self.device)
         except Exception as e:
             return (
+                {"error": str(e)} ,
                 {
                     "prompt": prompt,
                     "analysis_status": "failed",

medrax/tools/grounding.py CHANGED Viewed

@@ -50,7 +50,7 @@ class XRayPhraseGroundingTool(BaseTool):
     model: Any = None
     processor: Any = None
-    device: str = "cuda"
     temp_dir: Path = None
     def __init__(
@@ -64,7 +64,10 @@ class XRayPhraseGroundingTool(BaseTool):
     ):
         """Initialize the XRay Phrase Grounding Tool."""
         super().__init__()
-        self.device = torch.device(device) if device else "cuda"
         # Setup quantization config
         if load_in_4bit:
@@ -93,7 +96,6 @@ class XRayPhraseGroundingTool(BaseTool):
             model_path, cache_dir=cache_dir, trust_remote_code=True
         )
         self.model = self.model.eval()
         self.temp_dir = Path(temp_dir if temp_dir else tempfile.mkdtemp())

     model: Any = None
     processor: Any = None
+    device: torch.device = None
     temp_dir: Path = None
     def __init__(
     ):
         """Initialize the XRay Phrase Grounding Tool."""
         super().__init__()
+        # Automatically detect device (cuda if available, else cpu)
+        device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = torch.device(device)
         # Setup quantization config
         if load_in_4bit:
             model_path, cache_dir=cache_dir, trust_remote_code=True
         )
         self.model = self.model.eval()
         self.temp_dir = Path(temp_dir if temp_dir else tempfile.mkdtemp())

medrax/tools/llava_med.py CHANGED Viewed

@@ -11,7 +11,6 @@ from langchain_core.tools import BaseTool
 from PIL import Image
 from medrax.llava.conversation import conv_templates
 from medrax.llava.model.builder import load_pretrained_model
 from medrax.llava.mm_utils import tokenizer_image_token, process_images
@@ -65,6 +64,11 @@ class LlavaMedTool(BaseTool):
         **kwargs,
     ):
         super().__init__()
         self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
             model_path=model_path,
             model_base=None,
@@ -77,6 +81,9 @@ class LlavaMedTool(BaseTool):
             device=device,
             **kwargs,
         )
         self.model.eval()
     def _process_input(
@@ -101,14 +108,14 @@ class LlavaMedTool(BaseTool):
         input_ids = (
             tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
             .unsqueeze(0)
-            .cuda()
         )
         image_tensor = None
         if image_path:
             image = Image.open(image_path)
             image_tensor = process_images([image], self.image_processor, self.model.config)[0]
-            image_tensor = image_tensor.unsqueeze(0).half().cuda()
         return input_ids, image_tensor
@@ -133,8 +140,10 @@ class LlavaMedTool(BaseTool):
         """
         try:
             input_ids, image_tensor = self._process_input(question, image_path)
-            input_ids = input_ids.to(device=self.model.device)
-            image_tensor = image_tensor.to(device=self.model.device, dtype=self.model.dtype)
             with torch.inference_mode():
                 output_ids = self.model.generate(

 from PIL import Image
 from medrax.llava.conversation import conv_templates
 from medrax.llava.model.builder import load_pretrained_model
 from medrax.llava.mm_utils import tokenizer_image_token, process_images
         **kwargs,
     ):
         super().__init__()
+        # Set the device (cuda or cpu)
+        self.device = torch.device(device) if device else torch.device("cuda")
+        # Load the model and tokenizer
         self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(
             model_path=model_path,
             model_base=None,
             device=device,
             **kwargs,
         )
+        # Move the model to the desired device
+        self.model.to(self.device)
         self.model.eval()
     def _process_input(
         input_ids = (
             tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
             .unsqueeze(0)
+            .to(self.device)  # Move to the correct device
         )
         image_tensor = None
         if image_path:
             image = Image.open(image_path)
             image_tensor = process_images([image], self.image_processor, self.model.config)[0]
+            image_tensor = image_tensor.unsqueeze(0).to(self.device, dtype=self.model.dtype)  # Move to device
         return input_ids, image_tensor
         """
         try:
             input_ids, image_tensor = self._process_input(question, image_path)
+            # Ensure that inputs are on the same device as the model
+            input_ids = input_ids.to(self.device)
+            image_tensor = image_tensor.to(self.device, dtype=self.model.dtype)
             with torch.inference_mode():
                 output_ids = self.model.generate(

medrax/tools/report_generation.py CHANGED Viewed

@@ -47,7 +47,7 @@ class ChestXRayReportGeneratorTool(BaseTool):
         "to a chest X-ray image file. Output is a structured report with both detailed "
         "observations and key clinical conclusions."
     )
-    device: Optional[str] = "cuda"
     args_schema: Type[BaseModel] = ChestXRayInput
     findings_model: VisionEncoderDecoderModel = None
     impression_model: VisionEncoderDecoderModel = None
@@ -57,10 +57,10 @@ class ChestXRayReportGeneratorTool(BaseTool):
     impression_processor: ViTImageProcessor = None
     generation_args: Dict[str, Any] = None
-    def __init__(self, cache_dir: str = "/model-weights", device: Optional[str] = "cuda"):
         """Initialize the ChestXRayReportGeneratorTool with both findings and impression models."""
         super().__init__()
-        self.device = torch.device(device) if device else "cuda"
         # Initialize findings model
         self.findings_model = VisionEncoderDecoderModel.from_pretrained(
@@ -84,7 +84,7 @@ class ChestXRayReportGeneratorTool(BaseTool):
             "IAMJB/chexpert-mimic-cxr-impression-baseline", cache_dir=cache_dir
         )
-        # Move models to device
         self.findings_model = self.findings_model.to(self.device)
         self.impression_model = self.impression_model.to(self.device)

         "to a chest X-ray image file. Output is a structured report with both detailed "
         "observations and key clinical conclusions."
     )
+    device: Optional[str] = "cpu"  # Change the device to "cpu"
     args_schema: Type[BaseModel] = ChestXRayInput
     findings_model: VisionEncoderDecoderModel = None
     impression_model: VisionEncoderDecoderModel = None
     impression_processor: ViTImageProcessor = None
     generation_args: Dict[str, Any] = None
+    def __init__(self, cache_dir: str = "/model-weights", device: Optional[str] = "cpu"):
         """Initialize the ChestXRayReportGeneratorTool with both findings and impression models."""
         super().__init__()
+        self.device = torch.device(device) if device else torch.device("cpu")  # Ensure CPU is used
         # Initialize findings model
         self.findings_model = VisionEncoderDecoderModel.from_pretrained(
             "IAMJB/chexpert-mimic-cxr-impression-baseline", cache_dir=cache_dir
         )
+        # Move models to device (CPU)
         self.findings_model = self.findings_model.to(self.device)
         self.impression_model = self.impression_model.to(self.device)