first push
Browse files- app.py +127 -0
- requirements.txt +5 -0
- train.py +92 -0
app.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from datasets import load_dataset
|
| 3 |
+
import torch
|
| 4 |
+
from torchvision import transforms, models
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import numpy as np
|
| 7 |
+
import random
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# ---- 1. Load dataset ----
|
| 11 |
+
# gymprathap/Breast-Cancer-Ultrasound-Images-Dataset has 'train' split with 'image' and 'label'
|
| 12 |
+
dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset", split="train")
|
| 13 |
+
|
| 14 |
+
# Map numeric labels to readable classes
|
| 15 |
+
label_names = dataset.features["label"].names # ['benign', 'malignant', 'normal']
|
| 16 |
+
|
| 17 |
+
# ---- 2. Define model (mock pretrained or real CNN) ----
|
| 18 |
+
# For MVP, load a pretrained ResNet18 and adapt its head
|
| 19 |
+
model = models.resnet18(pretrained=True)
|
| 20 |
+
model.fc = torch.nn.Linear(model.fc.in_features, len(label_names))
|
| 21 |
+
# For demo purposes, we’ll use random weights (no fine-tuning)
|
| 22 |
+
model.eval()
|
| 23 |
+
|
| 24 |
+
# Transform for inference
|
| 25 |
+
transform = transforms.Compose([
|
| 26 |
+
transforms.Resize((224, 224)),
|
| 27 |
+
transforms.Lambda(lambda img: img.convert("RGB")),
|
| 28 |
+
transforms.ToTensor(),
|
| 29 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406],
|
| 30 |
+
std=[0.229, 0.224, 0.225])
|
| 31 |
+
])
|
| 32 |
+
|
| 33 |
+
# ---- 3. Utility: get image + run prediction ----
|
| 34 |
+
def predict_from_sample(sample_idx):
|
| 35 |
+
"""Return image, prediction info, and true label."""
|
| 36 |
+
row = dataset[int(sample_idx)]
|
| 37 |
+
image = row["image"]
|
| 38 |
+
true_label_idx = row["label"]
|
| 39 |
+
true_label = label_names[true_label_idx]
|
| 40 |
+
|
| 41 |
+
image_t = transform(image).unsqueeze(0)
|
| 42 |
+
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
logits = model(image_t)
|
| 45 |
+
probs = torch.nn.functional.softmax(logits, dim=1).numpy().flatten()
|
| 46 |
+
pred_idx = int(np.argmax(probs))
|
| 47 |
+
pred_label = label_names[pred_idx]
|
| 48 |
+
conf = probs[pred_idx]
|
| 49 |
+
|
| 50 |
+
# Build output caption
|
| 51 |
+
if pred_label == true_label:
|
| 52 |
+
status = "✅ **Correct**"
|
| 53 |
+
else:
|
| 54 |
+
status = "❌ **Incorrect**"
|
| 55 |
+
|
| 56 |
+
caption = (
|
| 57 |
+
f"**Predicted:** {pred_label} (confidence: {conf:.2f}) \n"
|
| 58 |
+
f"**True Label:** {true_label} \n"
|
| 59 |
+
f"{status}"
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
return image, caption
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# ---- 4. Build Gradio UI ----
|
| 66 |
+
N_SAMPLES = 10 # number of random samples to show
|
| 67 |
+
total = len(dataset)
|
| 68 |
+
random_indices = random.sample(range(total), N_SAMPLES)
|
| 69 |
+
|
| 70 |
+
sample_options = [f"{i}: {dataset[i]['label']}" for i in random_indices]
|
| 71 |
+
# sample_options = [f"{i}: {dataset[i]['label']}" for i in range(10)] # first 10 samples
|
| 72 |
+
|
| 73 |
+
with gr.Blocks(title="Women's Longevity Hack") as demo:
|
| 74 |
+
gr.Markdown("## 🩺 Women's Longevity Hack")
|
| 75 |
+
|
| 76 |
+
with gr.Tabs():
|
| 77 |
+
with gr.Tab("Getting Started"):
|
| 78 |
+
gr.Markdown(
|
| 79 |
+
"## Getting Started\n"
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
with gr.Tab("More Datasets"):
|
| 83 |
+
gr.Markdown(
|
| 84 |
+
"""
|
| 85 |
+
## 📚 Dataset Inspiration
|
| 86 |
+
|
| 87 |
+
| Dataset | Modalities / Type | Description & Use Cases |
|
| 88 |
+
|---|---|---|
|
| 89 |
+
| [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset](https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset) | Images (ultrasound) + labels | Ultrasound images labeled as benign / malignant / normal. Useful for image classification, explainability (e.g., Grad-CAM), or multimodal fusion if metadata available. |
|
| 90 |
+
| [altaidevorg/women-health-mini](https://huggingface.co/datasets/altaidevorg/women-health-mini) | Mixed / tabular / survey (small) | A small women’s-health dataset for quick prototyping; good starting point for longevity-related feature exploration. |
|
| 91 |
+
| [HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p](https://huggingface.co/datasets/HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p) | Tabular / survey | U.S. behavioral risk factor data (demographics, behaviors, chronic-disease prevalence). Ideal for risk dashboards or feature importance demos. |
|
| 92 |
+
| [nguyenvy/cleaned_nhanes_1988_2018](https://huggingface.co/datasets/nguyenvy/cleaned_nhanes_1988_2018) | Tabular / biomarker + demographic | Cleaned NHANES dataset (1988-2018) with lab values, anthropometrics, and demographics. Useful for biological-age or biomarker-based longevity models. |
|
| 93 |
+
| [BoneMet/BoneMet](https://huggingface.co/datasets/BoneMet/BoneMet) | Biomedical / genomic / imaging | Dataset focused on bone-metastasis research; can support multimodal modeling combining clinical, imaging, and molecular data. |
|
| 94 |
+
| [AIBIC/MLOmics](https://huggingface.co/datasets/AIBIC/MLOmics) | Multi-omics / biomedical | Multi-omics resource (genomic, transcriptomic, proteomic) for biomedical discovery and precision-health modeling. |
|
| 95 |
+
|
| 96 |
+
"""
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
with gr.Tab("Classifier Demo"):
|
| 101 |
+
with gr.Row():
|
| 102 |
+
sample_selector = gr.Dropdown(
|
| 103 |
+
label="Select sample image",
|
| 104 |
+
choices=sample_options,
|
| 105 |
+
value=sample_options[0],
|
| 106 |
+
)
|
| 107 |
+
predict_btn = gr.Button("Run Prediction")
|
| 108 |
+
|
| 109 |
+
image_output = gr.Image(label="Ultrasound Image")
|
| 110 |
+
text_output = gr.Markdown(label="Prediction")
|
| 111 |
+
|
| 112 |
+
predict_btn.click(
|
| 113 |
+
fn=lambda s: predict_from_sample(s.split(":")[0]),
|
| 114 |
+
inputs=sample_selector,
|
| 115 |
+
outputs=[image_output, text_output]
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
gr.Markdown(
|
| 119 |
+
"Dataset: [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset]"
|
| 120 |
+
"(https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset)\n"
|
| 121 |
+
"Note: Model weights here are for demonstration only."
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
# ---- 5. Launch app ----
|
| 125 |
+
if __name__ == "__main__":
|
| 126 |
+
demo.launch()
|
| 127 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
torchvision
|
| 3 |
+
datasets
|
| 4 |
+
gradio
|
| 5 |
+
pillow
|
train.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
train.py — Finetune a Hugging Face vision model (e.g., ViT) on breast ultrasound images
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from transformers import (
|
| 7 |
+
AutoImageProcessor,
|
| 8 |
+
AutoModelForImageClassification,
|
| 9 |
+
TrainingArguments,
|
| 10 |
+
Trainer,
|
| 11 |
+
)
|
| 12 |
+
import evaluate
|
| 13 |
+
import numpy as np
|
| 14 |
+
import torch
|
| 15 |
+
|
| 16 |
+
# ---- 1. Load dataset ----
|
| 17 |
+
dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset")
|
| 18 |
+
|
| 19 |
+
# Dataset info
|
| 20 |
+
labels = dataset["train"].features["label"].names
|
| 21 |
+
num_labels = len(labels)
|
| 22 |
+
print(f"Classes: {labels}")
|
| 23 |
+
|
| 24 |
+
# ---- 2. Preprocessing ----
|
| 25 |
+
checkpoint = "google/vit-base-patch16-224-in21k" # choose your model
|
| 26 |
+
image_processor = AutoImageProcessor.from_pretrained(checkpoint)
|
| 27 |
+
|
| 28 |
+
def transform_examples(examples):
|
| 29 |
+
images = [img.convert("RGB") for img in examples["image"]] # ensure 3-channel
|
| 30 |
+
inputs = image_processor(images, return_tensors="pt")
|
| 31 |
+
inputs["labels"] = examples["label"]
|
| 32 |
+
return inputs
|
| 33 |
+
|
| 34 |
+
prepared_ds = dataset.with_transform(transform_examples)
|
| 35 |
+
|
| 36 |
+
# Split dataset
|
| 37 |
+
splits = prepared_ds["train"].train_test_split(test_size=0.2, seed=42)
|
| 38 |
+
train_ds, val_ds = splits["train"], splits["test"]
|
| 39 |
+
|
| 40 |
+
# ---- 3. Load model ----
|
| 41 |
+
model = AutoModelForImageClassification.from_pretrained(
|
| 42 |
+
checkpoint,
|
| 43 |
+
num_labels=num_labels,
|
| 44 |
+
ignore_mismatched_sizes=True, # handles final layer shape mismatch
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# ---- 4. Metrics ----
|
| 48 |
+
accuracy = evaluate.load("accuracy")
|
| 49 |
+
f1 = evaluate.load("f1")
|
| 50 |
+
|
| 51 |
+
def compute_metrics(eval_pred):
|
| 52 |
+
logits, labels = eval_pred
|
| 53 |
+
preds = np.argmax(logits, axis=-1)
|
| 54 |
+
acc = accuracy.compute(predictions=preds, references=labels)["accuracy"]
|
| 55 |
+
f1_score = f1.compute(predictions=preds, references=labels, average="macro")["f1"]
|
| 56 |
+
return {"accuracy": acc, "f1": f1_score}
|
| 57 |
+
|
| 58 |
+
# ---- 5. Training setup ----
|
| 59 |
+
training_args = TrainingArguments(
|
| 60 |
+
output_dir="./results",
|
| 61 |
+
per_device_train_batch_size=8,
|
| 62 |
+
per_device_eval_batch_size=8,
|
| 63 |
+
eval_strategy="epoch",
|
| 64 |
+
save_strategy="epoch",
|
| 65 |
+
num_train_epochs=3,
|
| 66 |
+
learning_rate=5e-5,
|
| 67 |
+
logging_dir="./logs",
|
| 68 |
+
load_best_model_at_end=True,
|
| 69 |
+
remove_unused_columns=False,
|
| 70 |
+
push_to_hub=True,
|
| 71 |
+
hub_model_id="hugging-science/sample-breast-cancer-classification",
|
| 72 |
+
report_to="none",
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# ---- 6. Trainer ----
|
| 76 |
+
trainer = Trainer(
|
| 77 |
+
model=model,
|
| 78 |
+
args=training_args,
|
| 79 |
+
train_dataset=train_ds,
|
| 80 |
+
eval_dataset=val_ds,
|
| 81 |
+
tokenizer=image_processor,
|
| 82 |
+
compute_metrics=compute_metrics,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# ---- 7. Train ----
|
| 86 |
+
trainer.train()
|
| 87 |
+
|
| 88 |
+
# ---- 8. Save locally ----
|
| 89 |
+
model.save_pretrained("./finetuned-ultrasound-model")
|
| 90 |
+
image_processor.save_pretrained("./finetuned-ultrasound-model")
|
| 91 |
+
|
| 92 |
+
print("✅ Training complete. Model saved to ./finetuned-ultrasound-model")
|