cgeorgiaw HF Staff commited on
Commit
49e2b29
·
1 Parent(s): 764fa80

first push

Browse files
Files changed (3) hide show
  1. app.py +127 -0
  2. requirements.txt +5 -0
  3. train.py +92 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import torch
4
+ from torchvision import transforms, models
5
+ from PIL import Image
6
+ import numpy as np
7
+ import random
8
+
9
+
10
+ # ---- 1. Load dataset ----
11
+ # gymprathap/Breast-Cancer-Ultrasound-Images-Dataset has 'train' split with 'image' and 'label'
12
+ dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset", split="train")
13
+
14
+ # Map numeric labels to readable classes
15
+ label_names = dataset.features["label"].names # ['benign', 'malignant', 'normal']
16
+
17
+ # ---- 2. Define model (mock pretrained or real CNN) ----
18
+ # For MVP, load a pretrained ResNet18 and adapt its head
19
+ model = models.resnet18(pretrained=True)
20
+ model.fc = torch.nn.Linear(model.fc.in_features, len(label_names))
21
+ # For demo purposes, we’ll use random weights (no fine-tuning)
22
+ model.eval()
23
+
24
+ # Transform for inference
25
+ transform = transforms.Compose([
26
+ transforms.Resize((224, 224)),
27
+ transforms.Lambda(lambda img: img.convert("RGB")),
28
+ transforms.ToTensor(),
29
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
30
+ std=[0.229, 0.224, 0.225])
31
+ ])
32
+
33
+ # ---- 3. Utility: get image + run prediction ----
34
+ def predict_from_sample(sample_idx):
35
+ """Return image, prediction info, and true label."""
36
+ row = dataset[int(sample_idx)]
37
+ image = row["image"]
38
+ true_label_idx = row["label"]
39
+ true_label = label_names[true_label_idx]
40
+
41
+ image_t = transform(image).unsqueeze(0)
42
+
43
+ with torch.no_grad():
44
+ logits = model(image_t)
45
+ probs = torch.nn.functional.softmax(logits, dim=1).numpy().flatten()
46
+ pred_idx = int(np.argmax(probs))
47
+ pred_label = label_names[pred_idx]
48
+ conf = probs[pred_idx]
49
+
50
+ # Build output caption
51
+ if pred_label == true_label:
52
+ status = "✅ **Correct**"
53
+ else:
54
+ status = "❌ **Incorrect**"
55
+
56
+ caption = (
57
+ f"**Predicted:** {pred_label} (confidence: {conf:.2f}) \n"
58
+ f"**True Label:** {true_label} \n"
59
+ f"{status}"
60
+ )
61
+
62
+ return image, caption
63
+
64
+
65
+ # ---- 4. Build Gradio UI ----
66
+ N_SAMPLES = 10 # number of random samples to show
67
+ total = len(dataset)
68
+ random_indices = random.sample(range(total), N_SAMPLES)
69
+
70
+ sample_options = [f"{i}: {dataset[i]['label']}" for i in random_indices]
71
+ # sample_options = [f"{i}: {dataset[i]['label']}" for i in range(10)] # first 10 samples
72
+
73
+ with gr.Blocks(title="Women's Longevity Hack") as demo:
74
+ gr.Markdown("## 🩺 Women's Longevity Hack")
75
+
76
+ with gr.Tabs():
77
+ with gr.Tab("Getting Started"):
78
+ gr.Markdown(
79
+ "## Getting Started\n"
80
+ )
81
+
82
+ with gr.Tab("More Datasets"):
83
+ gr.Markdown(
84
+ """
85
+ ## 📚 Dataset Inspiration
86
+
87
+ | Dataset | Modalities / Type | Description & Use Cases |
88
+ |---|---|---|
89
+ | [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset](https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset) | Images (ultrasound) + labels | Ultrasound images labeled as benign / malignant / normal. Useful for image classification, explainability (e.g., Grad-CAM), or multimodal fusion if metadata available. |
90
+ | [altaidevorg/women-health-mini](https://huggingface.co/datasets/altaidevorg/women-health-mini) | Mixed / tabular / survey (small) | A small women’s-health dataset for quick prototyping; good starting point for longevity-related feature exploration. |
91
+ | [HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p](https://huggingface.co/datasets/HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p) | Tabular / survey | U.S. behavioral risk factor data (demographics, behaviors, chronic-disease prevalence). Ideal for risk dashboards or feature importance demos. |
92
+ | [nguyenvy/cleaned_nhanes_1988_2018](https://huggingface.co/datasets/nguyenvy/cleaned_nhanes_1988_2018) | Tabular / biomarker + demographic | Cleaned NHANES dataset (1988-2018) with lab values, anthropometrics, and demographics. Useful for biological-age or biomarker-based longevity models. |
93
+ | [BoneMet/BoneMet](https://huggingface.co/datasets/BoneMet/BoneMet) | Biomedical / genomic / imaging | Dataset focused on bone-metastasis research; can support multimodal modeling combining clinical, imaging, and molecular data. |
94
+ | [AIBIC/MLOmics](https://huggingface.co/datasets/AIBIC/MLOmics) | Multi-omics / biomedical | Multi-omics resource (genomic, transcriptomic, proteomic) for biomedical discovery and precision-health modeling. |
95
+
96
+ """
97
+ )
98
+
99
+
100
+ with gr.Tab("Classifier Demo"):
101
+ with gr.Row():
102
+ sample_selector = gr.Dropdown(
103
+ label="Select sample image",
104
+ choices=sample_options,
105
+ value=sample_options[0],
106
+ )
107
+ predict_btn = gr.Button("Run Prediction")
108
+
109
+ image_output = gr.Image(label="Ultrasound Image")
110
+ text_output = gr.Markdown(label="Prediction")
111
+
112
+ predict_btn.click(
113
+ fn=lambda s: predict_from_sample(s.split(":")[0]),
114
+ inputs=sample_selector,
115
+ outputs=[image_output, text_output]
116
+ )
117
+
118
+ gr.Markdown(
119
+ "Dataset: [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset]"
120
+ "(https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset)\n"
121
+ "Note: Model weights here are for demonstration only."
122
+ )
123
+
124
+ # ---- 5. Launch app ----
125
+ if __name__ == "__main__":
126
+ demo.launch()
127
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ datasets
4
+ gradio
5
+ pillow
train.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ train.py — Finetune a Hugging Face vision model (e.g., ViT) on breast ultrasound images
3
+ """
4
+
5
+ from datasets import load_dataset
6
+ from transformers import (
7
+ AutoImageProcessor,
8
+ AutoModelForImageClassification,
9
+ TrainingArguments,
10
+ Trainer,
11
+ )
12
+ import evaluate
13
+ import numpy as np
14
+ import torch
15
+
16
+ # ---- 1. Load dataset ----
17
+ dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset")
18
+
19
+ # Dataset info
20
+ labels = dataset["train"].features["label"].names
21
+ num_labels = len(labels)
22
+ print(f"Classes: {labels}")
23
+
24
+ # ---- 2. Preprocessing ----
25
+ checkpoint = "google/vit-base-patch16-224-in21k" # choose your model
26
+ image_processor = AutoImageProcessor.from_pretrained(checkpoint)
27
+
28
+ def transform_examples(examples):
29
+ images = [img.convert("RGB") for img in examples["image"]] # ensure 3-channel
30
+ inputs = image_processor(images, return_tensors="pt")
31
+ inputs["labels"] = examples["label"]
32
+ return inputs
33
+
34
+ prepared_ds = dataset.with_transform(transform_examples)
35
+
36
+ # Split dataset
37
+ splits = prepared_ds["train"].train_test_split(test_size=0.2, seed=42)
38
+ train_ds, val_ds = splits["train"], splits["test"]
39
+
40
+ # ---- 3. Load model ----
41
+ model = AutoModelForImageClassification.from_pretrained(
42
+ checkpoint,
43
+ num_labels=num_labels,
44
+ ignore_mismatched_sizes=True, # handles final layer shape mismatch
45
+ )
46
+
47
+ # ---- 4. Metrics ----
48
+ accuracy = evaluate.load("accuracy")
49
+ f1 = evaluate.load("f1")
50
+
51
+ def compute_metrics(eval_pred):
52
+ logits, labels = eval_pred
53
+ preds = np.argmax(logits, axis=-1)
54
+ acc = accuracy.compute(predictions=preds, references=labels)["accuracy"]
55
+ f1_score = f1.compute(predictions=preds, references=labels, average="macro")["f1"]
56
+ return {"accuracy": acc, "f1": f1_score}
57
+
58
+ # ---- 5. Training setup ----
59
+ training_args = TrainingArguments(
60
+ output_dir="./results",
61
+ per_device_train_batch_size=8,
62
+ per_device_eval_batch_size=8,
63
+ eval_strategy="epoch",
64
+ save_strategy="epoch",
65
+ num_train_epochs=3,
66
+ learning_rate=5e-5,
67
+ logging_dir="./logs",
68
+ load_best_model_at_end=True,
69
+ remove_unused_columns=False,
70
+ push_to_hub=True,
71
+ hub_model_id="hugging-science/sample-breast-cancer-classification",
72
+ report_to="none",
73
+ )
74
+
75
+ # ---- 6. Trainer ----
76
+ trainer = Trainer(
77
+ model=model,
78
+ args=training_args,
79
+ train_dataset=train_ds,
80
+ eval_dataset=val_ds,
81
+ tokenizer=image_processor,
82
+ compute_metrics=compute_metrics,
83
+ )
84
+
85
+ # ---- 7. Train ----
86
+ trainer.train()
87
+
88
+ # ---- 8. Save locally ----
89
+ model.save_pretrained("./finetuned-ultrasound-model")
90
+ image_processor.save_pretrained("./finetuned-ultrasound-model")
91
+
92
+ print("✅ Training complete. Model saved to ./finetuned-ultrasound-model")