Spaces:

hugging-science
/

womens-longevity-hack

Running

App Files Files Community

cgeorgiaw HF Staff commited on 14 days ago

Commit

49e2b29

1 Parent(s): 764fa80

first push

Browse files

Files changed (3) hide show

app.py +127 -0
requirements.txt +5 -0
train.py +92 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import gradio as gr
+from datasets import load_dataset
+import torch
+from torchvision import transforms, models
+from PIL import Image
+import numpy as np
+import random
+# ---- 1. Load dataset ----
+# gymprathap/Breast-Cancer-Ultrasound-Images-Dataset has 'train' split with 'image' and 'label'
+dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset", split="train")
+# Map numeric labels to readable classes
+label_names = dataset.features["label"].names  # ['benign', 'malignant', 'normal']
+# ---- 2. Define model (mock pretrained or real CNN) ----
+# For MVP, load a pretrained ResNet18 and adapt its head
+model = models.resnet18(pretrained=True)
+model.fc = torch.nn.Linear(model.fc.in_features, len(label_names))
+# For demo purposes, we’ll use random weights (no fine-tuning)
+model.eval()
+# Transform for inference
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.Lambda(lambda img: img.convert("RGB")),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225])
+])
+# ---- 3. Utility: get image + run prediction ----
+def predict_from_sample(sample_idx):
+    """Return image, prediction info, and true label."""
+    row = dataset[int(sample_idx)]
+    image = row["image"]
+    true_label_idx = row["label"]
+    true_label = label_names[true_label_idx]
+    image_t = transform(image).unsqueeze(0)
+    with torch.no_grad():
+        logits = model(image_t)
+        probs = torch.nn.functional.softmax(logits, dim=1).numpy().flatten()
+        pred_idx = int(np.argmax(probs))
+        pred_label = label_names[pred_idx]
+        conf = probs[pred_idx]
+    # Build output caption
+    if pred_label == true_label:
+        status = "✅ **Correct**"
+    else:
+        status = "❌ **Incorrect**"
+    caption = (
+        f"**Predicted:** {pred_label}  (confidence: {conf:.2f})  \n"
+        f"**True Label:** {true_label}  \n"
+        f"{status}"
+    )
+    return image, caption
+# ---- 4. Build Gradio UI ----
+N_SAMPLES = 10  # number of random samples to show
+total = len(dataset)
+random_indices = random.sample(range(total), N_SAMPLES)
+sample_options = [f"{i}: {dataset[i]['label']}" for i in random_indices]
+# sample_options = [f"{i}: {dataset[i]['label']}" for i in range(10)]  # first 10 samples
+with gr.Blocks(title="Women's Longevity Hack") as demo:
+    gr.Markdown("## 🩺 Women's Longevity Hack")
+    with gr.Tabs():
+        with gr.Tab("Getting Started"):
+            gr.Markdown(
+                "## Getting Started\n"
+            )
+        with gr.Tab("More Datasets"):
+            gr.Markdown(
+                """
+                ## 📚 Dataset Inspiration
+                | Dataset | Modalities / Type | Description & Use Cases |
+                |---|---|---|
+                | [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset](https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset) | Images (ultrasound) + labels | Ultrasound images labeled as benign / malignant / normal. Useful for image classification, explainability (e.g., Grad-CAM), or multimodal fusion if metadata available. |
+                | [altaidevorg/women-health-mini](https://huggingface.co/datasets/altaidevorg/women-health-mini) | Mixed / tabular / survey (small) | A small women’s-health dataset for quick prototyping; good starting point for longevity-related feature exploration. |
+                | [HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p](https://huggingface.co/datasets/HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p) | Tabular / survey | U.S. behavioral risk factor data (demographics, behaviors, chronic-disease prevalence). Ideal for risk dashboards or feature importance demos. |
+                | [nguyenvy/cleaned_nhanes_1988_2018](https://huggingface.co/datasets/nguyenvy/cleaned_nhanes_1988_2018) | Tabular / biomarker + demographic | Cleaned NHANES dataset (1988-2018) with lab values, anthropometrics, and demographics. Useful for biological-age or biomarker-based longevity models. |
+                | [BoneMet/BoneMet](https://huggingface.co/datasets/BoneMet/BoneMet) | Biomedical / genomic / imaging | Dataset focused on bone-metastasis research; can support multimodal modeling combining clinical, imaging, and molecular data. |
+                | [AIBIC/MLOmics](https://huggingface.co/datasets/AIBIC/MLOmics) | Multi-omics / biomedical | Multi-omics resource (genomic, transcriptomic, proteomic) for biomedical discovery and precision-health modeling. |
+                """
+            )
+        with gr.Tab("Classifier Demo"):
+            with gr.Row():
+                sample_selector = gr.Dropdown(
+                    label="Select sample image",
+                    choices=sample_options,
+                    value=sample_options[0],
+                )
+                predict_btn = gr.Button("Run Prediction")
+            image_output = gr.Image(label="Ultrasound Image")
+            text_output = gr.Markdown(label="Prediction")
+            predict_btn.click(
+                fn=lambda s: predict_from_sample(s.split(":")[0]),
+                inputs=sample_selector,
+                outputs=[image_output, text_output]
+            )
+            gr.Markdown(
+                "Dataset: [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset]"
+                "(https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset)\n"
+                "Note: Model weights here are for demonstration only."
+            )
+# ---- 5. Launch app ----
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+torchvision
+datasets
+gradio
+pillow

train.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+train.py — Finetune a Hugging Face vision model (e.g., ViT) on breast ultrasound images
+"""
+from datasets import load_dataset
+from transformers import (
+    AutoImageProcessor,
+    AutoModelForImageClassification,
+    TrainingArguments,
+    Trainer,
+)
+import evaluate
+import numpy as np
+import torch
+# ---- 1. Load dataset ----
+dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset")
+# Dataset info
+labels = dataset["train"].features["label"].names
+num_labels = len(labels)
+print(f"Classes: {labels}")
+# ---- 2. Preprocessing ----
+checkpoint = "google/vit-base-patch16-224-in21k"  # choose your model
+image_processor = AutoImageProcessor.from_pretrained(checkpoint)
+def transform_examples(examples):
+    images = [img.convert("RGB") for img in examples["image"]]  # ensure 3-channel
+    inputs = image_processor(images, return_tensors="pt")
+    inputs["labels"] = examples["label"]
+    return inputs
+prepared_ds = dataset.with_transform(transform_examples)
+# Split dataset
+splits = prepared_ds["train"].train_test_split(test_size=0.2, seed=42)
+train_ds, val_ds = splits["train"], splits["test"]
+# ---- 3. Load model ----
+model = AutoModelForImageClassification.from_pretrained(
+    checkpoint,
+    num_labels=num_labels,
+    ignore_mismatched_sizes=True,  # handles final layer shape mismatch
+)
+# ---- 4. Metrics ----
+accuracy = evaluate.load("accuracy")
+f1 = evaluate.load("f1")
+def compute_metrics(eval_pred):
+    logits, labels = eval_pred
+    preds = np.argmax(logits, axis=-1)
+    acc = accuracy.compute(predictions=preds, references=labels)["accuracy"]
+    f1_score = f1.compute(predictions=preds, references=labels, average="macro")["f1"]
+    return {"accuracy": acc, "f1": f1_score}
+# ---- 5. Training setup ----
+training_args = TrainingArguments(
+    output_dir="./results",
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=8,
+    eval_strategy="epoch",
+    save_strategy="epoch",
+    num_train_epochs=3,
+    learning_rate=5e-5,
+    logging_dir="./logs",
+    load_best_model_at_end=True,
+    remove_unused_columns=False,
+    push_to_hub=True,
+    hub_model_id="hugging-science/sample-breast-cancer-classification",
+    report_to="none",
+)
+# ---- 6. Trainer ----
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_ds,
+    eval_dataset=val_ds,
+    tokenizer=image_processor,
+    compute_metrics=compute_metrics,
+)
+# ---- 7. Train ----
+trainer.train()
+# ---- 8. Save locally ----
+model.save_pretrained("./finetuned-ultrasound-model")
+image_processor.save_pretrained("./finetuned-ultrasound-model")
+print("✅ Training complete. Model saved to ./finetuned-ultrasound-model")