Spaces:

mol-wise
/

MOLx-Powered_by_MedRAX

Runtime error

App Files Files Community

vaibhavm29 commited on May 23

Commit

7d4bd7e

1 Parent(s): 917983f

included medgemma tool

Browse files

Files changed (4) hide show

app.py +2 -0
medrax/tools/__init__.py +1 -0
medrax/tools/medgemma.py +170 -0
pyproject.toml +1 -1

app.py CHANGED Viewed

@@ -54,6 +54,7 @@ def initialize_agent(
         "ChestXRaySegmentationTool": lambda: ChestXRaySegmentationTool(device=device),
         "LlavaMedTool": lambda: LlavaMedTool(cache_dir=model_dir, device=device, load_in_8bit=True),
         "XRayVQATool": lambda: XRayVQATool(cache_dir=model_dir, device=device),
         "ChestXRayReportGeneratorTool": lambda: ChestXRayReportGeneratorTool(
             cache_dir=model_dir, device=device
         ),
@@ -107,6 +108,7 @@ if __name__ == "__main__":
         "XRayVQATool",
         "LlavaMedTool",
         "XRayPhraseGroundingTool",
         # "ChestXRayGeneratorTool",
     ]

         "ChestXRaySegmentationTool": lambda: ChestXRaySegmentationTool(device=device),
         "LlavaMedTool": lambda: LlavaMedTool(cache_dir=model_dir, device=device, load_in_8bit=True),
         "XRayVQATool": lambda: XRayVQATool(cache_dir=model_dir, device=device),
+        "MedgemmaXRayTool": lambda: MedGemmaXRayTool(cache_dir=model_dir, device=device),
         "ChestXRayReportGeneratorTool": lambda: ChestXRayReportGeneratorTool(
             cache_dir=model_dir, device=device
         ),
         "XRayVQATool",
         "LlavaMedTool",
         "XRayPhraseGroundingTool",
+        "MedGemmaXRayTool"
         # "ChestXRayGeneratorTool",
     ]

medrax/tools/__init__.py CHANGED Viewed

@@ -9,3 +9,4 @@ from .grounding import *
 from .generation import *
 from .dicom import *
 from .utils import *

 from .generation import *
 from .dicom import *
 from .utils import *
+from .medgemma import *

medrax/tools/medgemma.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# medgemma_tool.py
+from typing import Any, Dict, Optional, Tuple, Type
+from pathlib import Path
+from pydantic import BaseModel, Field
+import torch
+from PIL import Image
+from transformers import (
+    AutoModelForImageTextToText,
+    AutoProcessor,
+)
+from langchain_core.tools import BaseTool
+from langchain_core.callbacks import (
+    CallbackManagerForToolRun,
+    AsyncCallbackManagerForToolRun,
+)
+class MedGemmaInput(BaseModel):
+    """Input schema for MedGEMMA X-ray tool."""
+    image_path: str = Field(..., description="Path to a chest X-ray image")
+    prompt: str = Field(..., description="Question or instruction for the image")
+    max_new_tokens: int = Field(
+        300,
+        description="Maximum number of tokens to generate in the answer",
+    )
+class MedGemmaXRayTool(BaseTool):
+    """A tool that uses medgemma to answer questions about chest X-ray images."""
+    name: str = "medgemma_xray_expert"
+    description: str = (
+        "The 1st tool to be used by the agent to answer any questions related to xray images."
+        "The tool is specialized in performing multiple tasks including Visual Question Answering,"
+        "Report generation, Abnormality detection, Anatomical localization, Clinical interpretations,"
+        "Comparitive analysis, Identfication and explanation of imaging signs. Input should be paths to"
+        "X-ray images and a natural language prompt describing the task to be carried out."
+    )
+    args_schema: Type[BaseModel] = MedGemmaInput
+    return_direct: bool = True
+    # model handles
+    model: Optional[AutoModelForImageTextToText] = None
+    processor: Optional[AutoProcessor] = None
+    # config
+    model_name: str = "google/medgemma-4b-it"
+    device: str = "cuda" if torch.cuda.is_available() else "cpu"
+    dtype: torch.dtype = torch.bfloat16
+    def __init__(
+        self,
+        model_name: str = "google/medgemma-4b-it",
+        device: Optional[str] = None,
+        dtype: torch.dtype = torch.bfloat16,
+        cache_dir: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.model_name = model_name
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.dtype = dtype
+        # Load model & processor
+        self.model = AutoModelForImageTextToText.from_pretrained(
+            model_name,
+            device_map="auto",
+            torch_dtype=dtype,
+            trust_remote_code=True,
+            cache_dir=cache_dir,
+        )
+        self.processor = AutoProcessor.from_pretrained(
+            model_name, trust_remote_code=True, cache_dir=cache_dir
+        )
+        self.model.eval()
+    def _generate(
+        self,
+        image_path: str,
+        prompt: str,
+        max_new_tokens: int,
+    ) -> str:
+        """Run MedGEMMA and return decoded answer."""
+        img = Image.open(image_path).convert("RGB")
+        messages = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": "You are an expert radiologist. Provide a detailed response to user's query."}],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {"type": "image", "image": img},
+                ],
+            },
+        ]
+        # 3. Tokenise with chat template
+        inputs = self.processor.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to(self.model.device, dtype=self.dtype)
+        start_len = inputs["input_ids"].shape[-1]
+        # 4. Generate
+        with torch.inference_mode():
+            gens = self.model.generate(
+                **inputs,
+                max_new_tokens=max_new_tokens,
+                do_sample=False,
+            )
+        decoded = self.processor.decode(
+            gens[0][start_len:], skip_special_tokens=True
+        )
+        return decoded.strip()
+    def _run(
+        self,
+        image_path: str,
+        prompt: str,
+        max_new_tokens: int = 300,
+        run_manager: Optional[CallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Validate, invoke model, return output + metadata."""
+        try:
+            if not Path(image_path).is_file():
+                raise FileNotFoundError(f"Image not found: {image_path}")
+            answer = self._generate(image_path, prompt, max_new_tokens)
+            return (
+                {"response": answer},
+                {
+                    "image_path": image_path,
+                    "prompt": prompt,
+                    "max_new_tokens": max_new_tokens,
+                    "status": "completed",
+                },
+            )
+        except Exception as e:
+            return (
+                {"error": str(e)},
+                {
+                    "image_path": image_path,
+                    "prompt": prompt,
+                    "max_new_tokens": max_new_tokens,
+                    "status": "failed",
+                    "error": str(e),
+                },
+            )
+    async def _arun(
+        self,
+        image_path: str,
+        prompt: str,
+        max_new_tokens: int = 300,
+        run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
+    ) -> Tuple[Dict[str, Any], Dict]:
+        """Asynchronous wrapper (delegates to sync)."""
+        return self._run(image_path, prompt, max_new_tokens)

pyproject.toml CHANGED Viewed

@@ -24,7 +24,7 @@ dependencies = [
     "pydantic>=1.8.0",
     "Pillow>=8.0.0",
     "torchxrayvision>=0.0.37",
-    "transformers @ git+https://github.com/huggingface/transformers.git@88d960937c81a32bfb63356a2e8ecf7999619681",
     "tokenizers>=0.10.0",
     "sentencepiece>=0.1.95",
     "shortuuid>=1.0.0",

     "pydantic>=1.8.0",
     "Pillow>=8.0.0",
     "torchxrayvision>=0.0.37",
+    "transformers>=4.46.3",
     "tokenizers>=0.10.0",
     "sentencepiece>=0.1.95",
     "shortuuid>=1.0.0",