Upload 5 files
Browse files
README.md
CHANGED
|
@@ -51,27 +51,31 @@ This application is designed to run on Hugging Face Spaces. The following files
|
|
| 51 |
|
| 52 |
## Model Loading
|
| 53 |
|
| 54 |
-
|
| 55 |
-
- Models download automatically from Hugging Face Hub on first use
|
| 56 |
-
- No manual installation required
|
| 57 |
-
- Choose between 4B (faster) or 27B (more accurate) models
|
| 58 |
|
| 59 |
-
**
|
| 60 |
-
You have two options for uploading your custom RF-DETR model:
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
-
1. Create a separate Hugging Face model repository (e.g., `your-username/rf-detr-medical`)
|
| 68 |
-
2. Upload your model files there
|
| 69 |
-
3. Set the environment variable `RFDETR_HF_REPO` to your repository ID
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
## Space Configuration
|
| 77 |
|
|
|
|
| 51 |
|
| 52 |
## Model Loading
|
| 53 |
|
| 54 |
+
### π Required: Hugging Face Token (for MedGemma)
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
+
**MedGemma is a gated model**. To use AI-powered text analysis, you must:
|
|
|
|
| 57 |
|
| 58 |
+
1. Go to your **Space Settings** β **Repository secrets**
|
| 59 |
+
2. Add a new secret:
|
| 60 |
+
- **Name**: `HF_TOKEN`
|
| 61 |
+
- **Value**: Your Hugging Face token (get it from https://huggingface.co/settings/tokens)
|
| 62 |
+
3. **Important**: Accept the model license at https://huggingface.co/google/medgemma-4b-it
|
| 63 |
+
4. Save and restart your Space
|
| 64 |
|
| 65 |
+
**Without the token:** Object detection will still work, but AI text analysis will be disabled.
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
**MedGemma Models (Automatic):**
|
| 70 |
+
- Models download automatically from Hugging Face Hub on first use (with valid token)
|
| 71 |
+
- Uses MedGemma 4B for efficient AI-powered analysis
|
| 72 |
+
- 4-bit quantization for reduced memory usage
|
| 73 |
+
|
| 74 |
+
**RF-DETR Model (Automatic from HF Model Repo):**
|
| 75 |
+
- Model automatically downloads from `edeler/lorai` on Hugging Face
|
| 76 |
+
- No manual upload needed - configured in the app
|
| 77 |
+
- Cached locally after first download for faster subsequent runs
|
| 78 |
+
- Model file: `lorai.pth` (135MB)
|
| 79 |
|
| 80 |
## Space Configuration
|
| 81 |
|
app.py
CHANGED
|
@@ -46,11 +46,15 @@ class SpacesConfig:
|
|
| 46 |
"""Configuration optimized for Hugging Face Spaces."""
|
| 47 |
|
| 48 |
def __init__(self):
|
|
|
|
|
|
|
|
|
|
| 49 |
self.settings = {
|
| 50 |
'results_dir': '/tmp/results',
|
| 51 |
'checkpoint': None,
|
| 52 |
'hf_model_repo': 'edeler/lorai', # Hugging Face model repository
|
| 53 |
'hf_model_filename': 'lorai.pth',
|
|
|
|
| 54 |
'resolution': 576,
|
| 55 |
'threshold': 0.7,
|
| 56 |
'use_llm': True,
|
|
@@ -165,7 +169,7 @@ class TextGenerator:
|
|
| 165 |
self.processor = None
|
| 166 |
self.is_multimodal = False
|
| 167 |
|
| 168 |
-
def load_model(self):
|
| 169 |
"""Load the LLM model."""
|
| 170 |
if self.model is not None:
|
| 171 |
return
|
|
@@ -182,6 +186,10 @@ class TextGenerator:
|
|
| 182 |
"device_map": "auto",
|
| 183 |
"low_cpu_mem_usage": True,
|
| 184 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
if torch and torch.cuda.is_available():
|
| 187 |
model_kwargs["torch_dtype"] = torch.bfloat16
|
|
@@ -204,11 +212,11 @@ class TextGenerator:
|
|
| 204 |
is_multimodal = "medgemma" in self.model_id.lower()
|
| 205 |
|
| 206 |
if is_multimodal and AutoModelForImageTextToText is not None and AutoProcessor is not None:
|
| 207 |
-
self.processor = AutoProcessor.from_pretrained(self.model_id)
|
| 208 |
self.model = AutoModelForImageTextToText.from_pretrained(self.model_id, **model_kwargs)
|
| 209 |
self.is_multimodal = True
|
| 210 |
elif AutoModelForCausalLM is not None and AutoTokenizer is not None:
|
| 211 |
-
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
| 212 |
self.model = AutoModelForCausalLM.from_pretrained(self.model_id, **model_kwargs)
|
| 213 |
self.is_multimodal = False
|
| 214 |
else:
|
|
@@ -216,9 +224,9 @@ class TextGenerator:
|
|
| 216 |
|
| 217 |
print("β Model loaded successfully")
|
| 218 |
|
| 219 |
-
def generate(self, text: str, image: Optional[Image.Image] = None) -> str:
|
| 220 |
"""Generate text using the loaded model."""
|
| 221 |
-
self.load_model()
|
| 222 |
|
| 223 |
if self.model is None:
|
| 224 |
return f"[Model not loaded: {text}]"
|
|
@@ -447,10 +455,13 @@ def create_detection_interface():
|
|
| 447 |
if app_state.config.get('use_llm'):
|
| 448 |
try:
|
| 449 |
generator = app_state.get_text_generator(model_size)
|
| 450 |
-
|
|
|
|
| 451 |
description = llm_description
|
| 452 |
except Exception as e:
|
| 453 |
-
|
|
|
|
|
|
|
| 454 |
else:
|
| 455 |
description += "No objects detected above the confidence threshold."
|
| 456 |
|
|
@@ -465,6 +476,13 @@ def create_detection_interface():
|
|
| 465 |
with gr.Blocks(title="Medical Image Analysis", theme=gr.themes.Soft()) as demo:
|
| 466 |
gr.Markdown("# π₯ Medical Image Analysis")
|
| 467 |
gr.Markdown("Upload a medical image to detect and analyze findings using AI.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
|
| 469 |
with gr.Row():
|
| 470 |
with gr.Column():
|
|
@@ -479,10 +497,11 @@ def create_detection_interface():
|
|
| 479 |
)
|
| 480 |
|
| 481 |
model_size_radio = gr.Radio(
|
| 482 |
-
choices=["4B"
|
| 483 |
value="4B",
|
| 484 |
label="MedGemma Model Size",
|
| 485 |
-
info="
|
|
|
|
| 486 |
)
|
| 487 |
|
| 488 |
analyze_btn = gr.Button("π Analyze Image", variant="primary")
|
|
|
|
| 46 |
"""Configuration optimized for Hugging Face Spaces."""
|
| 47 |
|
| 48 |
def __init__(self):
|
| 49 |
+
# Get HF token from environment
|
| 50 |
+
hf_token = os.environ.get('HF_TOKEN') or os.environ.get('HUGGINGFACE_TOKEN')
|
| 51 |
+
|
| 52 |
self.settings = {
|
| 53 |
'results_dir': '/tmp/results',
|
| 54 |
'checkpoint': None,
|
| 55 |
'hf_model_repo': 'edeler/lorai', # Hugging Face model repository
|
| 56 |
'hf_model_filename': 'lorai.pth',
|
| 57 |
+
'hf_token': hf_token,
|
| 58 |
'resolution': 576,
|
| 59 |
'threshold': 0.7,
|
| 60 |
'use_llm': True,
|
|
|
|
| 169 |
self.processor = None
|
| 170 |
self.is_multimodal = False
|
| 171 |
|
| 172 |
+
def load_model(self, hf_token: Optional[str] = None):
|
| 173 |
"""Load the LLM model."""
|
| 174 |
if self.model is not None:
|
| 175 |
return
|
|
|
|
| 186 |
"device_map": "auto",
|
| 187 |
"low_cpu_mem_usage": True,
|
| 188 |
}
|
| 189 |
+
|
| 190 |
+
# Add token if provided
|
| 191 |
+
if hf_token:
|
| 192 |
+
model_kwargs["token"] = hf_token
|
| 193 |
|
| 194 |
if torch and torch.cuda.is_available():
|
| 195 |
model_kwargs["torch_dtype"] = torch.bfloat16
|
|
|
|
| 212 |
is_multimodal = "medgemma" in self.model_id.lower()
|
| 213 |
|
| 214 |
if is_multimodal and AutoModelForImageTextToText is not None and AutoProcessor is not None:
|
| 215 |
+
self.processor = AutoProcessor.from_pretrained(self.model_id, token=hf_token)
|
| 216 |
self.model = AutoModelForImageTextToText.from_pretrained(self.model_id, **model_kwargs)
|
| 217 |
self.is_multimodal = True
|
| 218 |
elif AutoModelForCausalLM is not None and AutoTokenizer is not None:
|
| 219 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, token=hf_token)
|
| 220 |
self.model = AutoModelForCausalLM.from_pretrained(self.model_id, **model_kwargs)
|
| 221 |
self.is_multimodal = False
|
| 222 |
else:
|
|
|
|
| 224 |
|
| 225 |
print("β Model loaded successfully")
|
| 226 |
|
| 227 |
+
def generate(self, text: str, image: Optional[Image.Image] = None, hf_token: Optional[str] = None) -> str:
|
| 228 |
"""Generate text using the loaded model."""
|
| 229 |
+
self.load_model(hf_token)
|
| 230 |
|
| 231 |
if self.model is None:
|
| 232 |
return f"[Model not loaded: {text}]"
|
|
|
|
| 455 |
if app_state.config.get('use_llm'):
|
| 456 |
try:
|
| 457 |
generator = app_state.get_text_generator(model_size)
|
| 458 |
+
hf_token = app_state.config.get('hf_token')
|
| 459 |
+
llm_description = generator.generate(description, image=annotated, hf_token=hf_token)
|
| 460 |
description = llm_description
|
| 461 |
except Exception as e:
|
| 462 |
+
print(f"LLM generation failed: {e}")
|
| 463 |
+
# Just use the basic description if LLM fails
|
| 464 |
+
pass
|
| 465 |
else:
|
| 466 |
description += "No objects detected above the confidence threshold."
|
| 467 |
|
|
|
|
| 476 |
with gr.Blocks(title="Medical Image Analysis", theme=gr.themes.Soft()) as demo:
|
| 477 |
gr.Markdown("# π₯ Medical Image Analysis")
|
| 478 |
gr.Markdown("Upload a medical image to detect and analyze findings using AI.")
|
| 479 |
+
|
| 480 |
+
# Check if HF token is available
|
| 481 |
+
hf_token = app_state.config.get('hf_token')
|
| 482 |
+
if not hf_token:
|
| 483 |
+
gr.Markdown("β οΈ **Note:** HF_TOKEN not set. AI text generation will be disabled. Detection will still work.")
|
| 484 |
+
else:
|
| 485 |
+
gr.Markdown("β
**AI-powered analysis enabled** using MedGemma 4B")
|
| 486 |
|
| 487 |
with gr.Row():
|
| 488 |
with gr.Column():
|
|
|
|
| 497 |
)
|
| 498 |
|
| 499 |
model_size_radio = gr.Radio(
|
| 500 |
+
choices=["4B"],
|
| 501 |
value="4B",
|
| 502 |
label="MedGemma Model Size",
|
| 503 |
+
info="Using MedGemma 4B for AI-generated analysis",
|
| 504 |
+
visible=False # Hide since only one option
|
| 505 |
)
|
| 506 |
|
| 507 |
analyze_btn = gr.Button("π Analyze Image", variant="primary")
|