edeler commited on
Commit
9d1ea3d
Β·
verified Β·
1 Parent(s): 7629476

Upload 5 files

Browse files
Files changed (2) hide show
  1. README.md +21 -17
  2. app.py +28 -9
README.md CHANGED
@@ -51,27 +51,31 @@ This application is designed to run on Hugging Face Spaces. The following files
51
 
52
  ## Model Loading
53
 
54
- **MedGemma Models (Automatic):**
55
- - Models download automatically from Hugging Face Hub on first use
56
- - No manual installation required
57
- - Choose between 4B (faster) or 27B (more accurate) models
58
 
59
- **RF-DETR Model (Your Custom Model):**
60
- You have two options for uploading your custom RF-DETR model:
61
 
62
- ### Option 1: Direct Upload (Simple)
63
- 1. Upload your `rf-detr-medium.pth` file directly to your Space
64
- 2. The app will automatically find and use it
 
 
 
65
 
66
- ### Option 2: Model Repository (Recommended)
67
- 1. Create a separate Hugging Face model repository (e.g., `your-username/rf-detr-medical`)
68
- 2. Upload your model files there
69
- 3. Set the environment variable `RFDETR_HF_REPO` to your repository ID
70
 
71
- **To set the environment variable:**
72
- - Go to your Space settings
73
- - Add `RFDETR_HF_REPO` with your model repository ID (e.g., `your-username/rf-detr-medical`)
74
- - The app will download from your repository automatically
 
 
 
 
 
 
 
 
75
 
76
  ## Space Configuration
77
 
 
51
 
52
  ## Model Loading
53
 
54
+ ### πŸ”‘ Required: Hugging Face Token (for MedGemma)
 
 
 
55
 
56
+ **MedGemma is a gated model**. To use AI-powered text analysis, you must:
 
57
 
58
+ 1. Go to your **Space Settings** β†’ **Repository secrets**
59
+ 2. Add a new secret:
60
+ - **Name**: `HF_TOKEN`
61
+ - **Value**: Your Hugging Face token (get it from https://huggingface.co/settings/tokens)
62
+ 3. **Important**: Accept the model license at https://huggingface.co/google/medgemma-4b-it
63
+ 4. Save and restart your Space
64
 
65
+ **Without the token:** Object detection will still work, but AI text analysis will be disabled.
 
 
 
66
 
67
+ ---
68
+
69
+ **MedGemma Models (Automatic):**
70
+ - Models download automatically from Hugging Face Hub on first use (with valid token)
71
+ - Uses MedGemma 4B for efficient AI-powered analysis
72
+ - 4-bit quantization for reduced memory usage
73
+
74
+ **RF-DETR Model (Automatic from HF Model Repo):**
75
+ - Model automatically downloads from `edeler/lorai` on Hugging Face
76
+ - No manual upload needed - configured in the app
77
+ - Cached locally after first download for faster subsequent runs
78
+ - Model file: `lorai.pth` (135MB)
79
 
80
  ## Space Configuration
81
 
app.py CHANGED
@@ -46,11 +46,15 @@ class SpacesConfig:
46
  """Configuration optimized for Hugging Face Spaces."""
47
 
48
  def __init__(self):
 
 
 
49
  self.settings = {
50
  'results_dir': '/tmp/results',
51
  'checkpoint': None,
52
  'hf_model_repo': 'edeler/lorai', # Hugging Face model repository
53
  'hf_model_filename': 'lorai.pth',
 
54
  'resolution': 576,
55
  'threshold': 0.7,
56
  'use_llm': True,
@@ -165,7 +169,7 @@ class TextGenerator:
165
  self.processor = None
166
  self.is_multimodal = False
167
 
168
- def load_model(self):
169
  """Load the LLM model."""
170
  if self.model is not None:
171
  return
@@ -182,6 +186,10 @@ class TextGenerator:
182
  "device_map": "auto",
183
  "low_cpu_mem_usage": True,
184
  }
 
 
 
 
185
 
186
  if torch and torch.cuda.is_available():
187
  model_kwargs["torch_dtype"] = torch.bfloat16
@@ -204,11 +212,11 @@ class TextGenerator:
204
  is_multimodal = "medgemma" in self.model_id.lower()
205
 
206
  if is_multimodal and AutoModelForImageTextToText is not None and AutoProcessor is not None:
207
- self.processor = AutoProcessor.from_pretrained(self.model_id)
208
  self.model = AutoModelForImageTextToText.from_pretrained(self.model_id, **model_kwargs)
209
  self.is_multimodal = True
210
  elif AutoModelForCausalLM is not None and AutoTokenizer is not None:
211
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
212
  self.model = AutoModelForCausalLM.from_pretrained(self.model_id, **model_kwargs)
213
  self.is_multimodal = False
214
  else:
@@ -216,9 +224,9 @@ class TextGenerator:
216
 
217
  print("βœ“ Model loaded successfully")
218
 
219
- def generate(self, text: str, image: Optional[Image.Image] = None) -> str:
220
  """Generate text using the loaded model."""
221
- self.load_model()
222
 
223
  if self.model is None:
224
  return f"[Model not loaded: {text}]"
@@ -447,10 +455,13 @@ def create_detection_interface():
447
  if app_state.config.get('use_llm'):
448
  try:
449
  generator = app_state.get_text_generator(model_size)
450
- llm_description = generator.generate(description, image=annotated)
 
451
  description = llm_description
452
  except Exception as e:
453
- description = f"[LLM error: {e}]\n\n{description}"
 
 
454
  else:
455
  description += "No objects detected above the confidence threshold."
456
 
@@ -465,6 +476,13 @@ def create_detection_interface():
465
  with gr.Blocks(title="Medical Image Analysis", theme=gr.themes.Soft()) as demo:
466
  gr.Markdown("# πŸ₯ Medical Image Analysis")
467
  gr.Markdown("Upload a medical image to detect and analyze findings using AI.")
 
 
 
 
 
 
 
468
 
469
  with gr.Row():
470
  with gr.Column():
@@ -479,10 +497,11 @@ def create_detection_interface():
479
  )
480
 
481
  model_size_radio = gr.Radio(
482
- choices=["4B", "27B"],
483
  value="4B",
484
  label="MedGemma Model Size",
485
- info="4B: Faster, less memory | 27B: More accurate, more memory"
 
486
  )
487
 
488
  analyze_btn = gr.Button("πŸ” Analyze Image", variant="primary")
 
46
  """Configuration optimized for Hugging Face Spaces."""
47
 
48
  def __init__(self):
49
+ # Get HF token from environment
50
+ hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN')
51
+
52
  self.settings = {
53
  'results_dir': '/tmp/results',
54
  'checkpoint': None,
55
  'hf_model_repo': 'edeler/lorai', # Hugging Face model repository
56
  'hf_model_filename': 'lorai.pth',
57
+ 'hf_token': hf_token,
58
  'resolution': 576,
59
  'threshold': 0.7,
60
  'use_llm': True,
 
169
  self.processor = None
170
  self.is_multimodal = False
171
 
172
+ def load_model(self, hf_token: Optional[str] = None):
173
  """Load the LLM model."""
174
  if self.model is not None:
175
  return
 
186
  "device_map": "auto",
187
  "low_cpu_mem_usage": True,
188
  }
189
+
190
+ # Add token if provided
191
+ if hf_token:
192
+ model_kwargs["token"] = hf_token
193
 
194
  if torch and torch.cuda.is_available():
195
  model_kwargs["torch_dtype"] = torch.bfloat16
 
212
  is_multimodal = "medgemma" in self.model_id.lower()
213
 
214
  if is_multimodal and AutoModelForImageTextToText is not None and AutoProcessor is not None:
215
+ self.processor = AutoProcessor.from_pretrained(self.model_id, token=hf_token)
216
  self.model = AutoModelForImageTextToText.from_pretrained(self.model_id, **model_kwargs)
217
  self.is_multimodal = True
218
  elif AutoModelForCausalLM is not None and AutoTokenizer is not None:
219
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=hf_token)
220
  self.model = AutoModelForCausalLM.from_pretrained(self.model_id, **model_kwargs)
221
  self.is_multimodal = False
222
  else:
 
224
 
225
  print("βœ“ Model loaded successfully")
226
 
227
+ def generate(self, text: str, image: Optional[Image.Image] = None, hf_token: Optional[str] = None) -> str:
228
  """Generate text using the loaded model."""
229
+ self.load_model(hf_token)
230
 
231
  if self.model is None:
232
  return f"[Model not loaded: {text}]"
 
455
  if app_state.config.get('use_llm'):
456
  try:
457
  generator = app_state.get_text_generator(model_size)
458
+ hf_token = app_state.config.get('hf_token')
459
+ llm_description = generator.generate(description, image=annotated, hf_token=hf_token)
460
  description = llm_description
461
  except Exception as e:
462
+ print(f"LLM generation failed: {e}")
463
+ # Just use the basic description if LLM fails
464
+ pass
465
  else:
466
  description += "No objects detected above the confidence threshold."
467
 
 
476
  with gr.Blocks(title="Medical Image Analysis", theme=gr.themes.Soft()) as demo:
477
  gr.Markdown("# πŸ₯ Medical Image Analysis")
478
  gr.Markdown("Upload a medical image to detect and analyze findings using AI.")
479
+
480
+ # Check if HF token is available
481
+ hf_token = app_state.config.get('hf_token')
482
+ if not hf_token:
483
+ gr.Markdown("⚠️ **Note:** HF_TOKEN not set. AI text generation will be disabled. Detection will still work.")
484
+ else:
485
+ gr.Markdown("βœ… **AI-powered analysis enabled** using MedGemma 4B")
486
 
487
  with gr.Row():
488
  with gr.Column():
 
497
  )
498
 
499
  model_size_radio = gr.Radio(
500
+ choices=["4B"],
501
  value="4B",
502
  label="MedGemma Model Size",
503
+ info="Using MedGemma 4B for AI-generated analysis",
504
+ visible=False # Hide since only one option
505
  )
506
 
507
  analyze_btn = gr.Button("πŸ” Analyze Image", variant="primary")