Spaces:

edeler
/

LorAI

Running on Zero

App Files Files Community

edeler commited on 27 days ago

Commit

9d1ea3d

verified ·

1 Parent(s): 7629476

Upload 5 files

Browse files

Files changed (2) hide show

README.md +21 -17
app.py +28 -9

README.md CHANGED Viewed

@@ -51,27 +51,31 @@ This application is designed to run on Hugging Face Spaces. The following files
 ## Model Loading
-**MedGemma Models (Automatic):**
-- Models download automatically from Hugging Face Hub on first use
-- No manual installation required
-- Choose between 4B (faster) or 27B (more accurate) models
-**RF-DETR Model (Your Custom Model):**
-You have two options for uploading your custom RF-DETR model:
-### Option 1: Direct Upload (Simple)
-1. Upload your `rf-detr-medium.pth` file directly to your Space
-2. The app will automatically find and use it
-### Option 2: Model Repository (Recommended)
-1. Create a separate Hugging Face model repository (e.g., `your-username/rf-detr-medical`)
-2. Upload your model files there
-3. Set the environment variable `RFDETR_HF_REPO` to your repository ID
-**To set the environment variable:**
-- Go to your Space settings
-- Add `RFDETR_HF_REPO` with your model repository ID (e.g., `your-username/rf-detr-medical`)
-- The app will download from your repository automatically
 ## Space Configuration

 ## Model Loading
+### 🔑 Required: Hugging Face Token (for MedGemma)
+**MedGemma is a gated model**. To use AI-powered text analysis, you must:
+1. Go to your **Space Settings** → **Repository secrets**
+2. Add a new secret:
+   - **Name**: `HF_TOKEN`
+   - **Value**: Your Hugging Face token (get it from https://huggingface.co/settings/tokens)
+3. **Important**: Accept the model license at https://huggingface.co/google/medgemma-4b-it
+4. Save and restart your Space
+**Without the token:** Object detection will still work, but AI text analysis will be disabled.
+---
+**MedGemma Models (Automatic):**
+- Models download automatically from Hugging Face Hub on first use (with valid token)
+- Uses MedGemma 4B for efficient AI-powered analysis
+- 4-bit quantization for reduced memory usage
+**RF-DETR Model (Automatic from HF Model Repo):**
+- Model automatically downloads from `edeler/lorai` on Hugging Face
+- No manual upload needed - configured in the app
+- Cached locally after first download for faster subsequent runs
+- Model file: `lorai.pth` (135MB)
 ## Space Configuration

app.py CHANGED Viewed

@@ -46,11 +46,15 @@ class SpacesConfig:
     """Configuration optimized for Hugging Face Spaces."""
     def __init__(self):
         self.settings = {
             'results_dir': '/tmp/results',
             'checkpoint': None,
             'hf_model_repo': 'edeler/lorai',  # Hugging Face model repository
             'hf_model_filename': 'lorai.pth',
             'resolution': 576,
             'threshold': 0.7,
             'use_llm': True,
@@ -165,7 +169,7 @@ class TextGenerator:
         self.processor = None
         self.is_multimodal = False
-    def load_model(self):
         """Load the LLM model."""
         if self.model is not None:
             return
@@ -182,6 +186,10 @@ class TextGenerator:
             "device_map": "auto",
             "low_cpu_mem_usage": True,
         }
         if torch and torch.cuda.is_available():
             model_kwargs["torch_dtype"] = torch.bfloat16
@@ -204,11 +212,11 @@ class TextGenerator:
         is_multimodal = "medgemma" in self.model_id.lower()
         if is_multimodal and AutoModelForImageTextToText is not None and AutoProcessor is not None:
-            self.processor = AutoProcessor.from_pretrained(self.model_id)
             self.model = AutoModelForImageTextToText.from_pretrained(self.model_id, **model_kwargs)
             self.is_multimodal = True
         elif AutoModelForCausalLM is not None and AutoTokenizer is not None:
-            self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
             self.model = AutoModelForCausalLM.from_pretrained(self.model_id, **model_kwargs)
             self.is_multimodal = False
         else:
@@ -216,9 +224,9 @@ class TextGenerator:
         print("✓ Model loaded successfully")
-    def generate(self, text: str, image: Optional[Image.Image] = None) -> str:
         """Generate text using the loaded model."""
-        self.load_model()
         if self.model is None:
             return f"[Model not loaded: {text}]"
@@ -447,10 +455,13 @@ def create_detection_interface():
                 if app_state.config.get('use_llm'):
                     try:
                         generator = app_state.get_text_generator(model_size)
-                        llm_description = generator.generate(description, image=annotated)
                         description = llm_description
                     except Exception as e:
-                        description = f"[LLM error: {e}]\n\n{description}"
             else:
                 description += "No objects detected above the confidence threshold."
@@ -465,6 +476,13 @@ def create_detection_interface():
     with gr.Blocks(title="Medical Image Analysis", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🏥 Medical Image Analysis")
         gr.Markdown("Upload a medical image to detect and analyze findings using AI.")
         with gr.Row():
             with gr.Column():
@@ -479,10 +497,11 @@ def create_detection_interface():
                 )
                 model_size_radio = gr.Radio(
-                    choices=["4B", "27B"],
                     value="4B",
                     label="MedGemma Model Size",
-                    info="4B: Faster, less memory | 27B: More accurate, more memory"
                 )
                 analyze_btn = gr.Button("🔍 Analyze Image", variant="primary")

     """Configuration optimized for Hugging Face Spaces."""
     def __init__(self):
+        # Get HF token from environment
+        hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN')
         self.settings = {
             'results_dir': '/tmp/results',
             'checkpoint': None,
             'hf_model_repo': 'edeler/lorai',  # Hugging Face model repository
             'hf_model_filename': 'lorai.pth',
+            'hf_token': hf_token,
             'resolution': 576,
             'threshold': 0.7,
             'use_llm': True,
         self.processor = None
         self.is_multimodal = False
+    def load_model(self, hf_token: Optional[str] = None):
         """Load the LLM model."""
         if self.model is not None:
             return
             "device_map": "auto",
             "low_cpu_mem_usage": True,
         }
+        # Add token if provided
+        if hf_token:
+            model_kwargs["token"] = hf_token
         if torch and torch.cuda.is_available():
             model_kwargs["torch_dtype"] = torch.bfloat16
         is_multimodal = "medgemma" in self.model_id.lower()
         if is_multimodal and AutoModelForImageTextToText is not None and AutoProcessor is not None:
+            self.processor = AutoProcessor.from_pretrained(self.model_id, token=hf_token)
             self.model = AutoModelForImageTextToText.from_pretrained(self.model_id, **model_kwargs)
             self.is_multimodal = True
         elif AutoModelForCausalLM is not None and AutoTokenizer is not None:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=hf_token)
             self.model = AutoModelForCausalLM.from_pretrained(self.model_id, **model_kwargs)
             self.is_multimodal = False
         else:
         print("✓ Model loaded successfully")
+    def generate(self, text: str, image: Optional[Image.Image] = None, hf_token: Optional[str] = None) -> str:
         """Generate text using the loaded model."""
+        self.load_model(hf_token)
         if self.model is None:
             return f"[Model not loaded: {text}]"
                 if app_state.config.get('use_llm'):
                     try:
                         generator = app_state.get_text_generator(model_size)
+                        hf_token = app_state.config.get('hf_token')
+                        llm_description = generator.generate(description, image=annotated, hf_token=hf_token)
                         description = llm_description
                     except Exception as e:
+                        print(f"LLM generation failed: {e}")
+                        # Just use the basic description if LLM fails
+                        pass
             else:
                 description += "No objects detected above the confidence threshold."
     with gr.Blocks(title="Medical Image Analysis", theme=gr.themes.Soft()) as demo:
         gr.Markdown("# 🏥 Medical Image Analysis")
         gr.Markdown("Upload a medical image to detect and analyze findings using AI.")
+        # Check if HF token is available
+        hf_token = app_state.config.get('hf_token')
+        if not hf_token:
+            gr.Markdown("⚠️ **Note:** HF_TOKEN not set. AI text generation will be disabled. Detection will still work.")
+        else:
+            gr.Markdown("✅ **AI-powered analysis enabled** using MedGemma 4B")
         with gr.Row():
             with gr.Column():
                 )
                 model_size_radio = gr.Radio(
+                    choices=["4B"],
                     value="4B",
                     label="MedGemma Model Size",
+                    info="Using MedGemma 4B for AI-generated analysis",
+                    visible=False  # Hide since only one option
                 )
                 analyze_btn = gr.Button("🔍 Analyze Image", variant="primary")