File size: 6,813 Bytes
49e2b29
 
 
 
 
249dc98
49e2b29
 
 
 
 
 
 
 
 
 
 
249dc98
 
 
 
 
49e2b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249dc98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49e2b29
249dc98
49e2b29
249dc98
49e2b29
 
249dc98
49e2b29
 
249dc98
49e2b29
 
 
 
 
 
 
 
 
 
 
 
249dc98
 
 
 
 
 
 
 
 
 
49e2b29
 
 
 
 
 
 
249dc98
 
 
49e2b29
 
 
 
 
 
 
 
 
249dc98
 
49e2b29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
from datasets import load_dataset
import torch
from torchvision import transforms, models
from PIL import Image
from transformers import pipeline
import numpy as np
import random


# ---- 1. Load dataset ----
# gymprathap/Breast-Cancer-Ultrasound-Images-Dataset has 'train' split with 'image' and 'label'
dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset", split="train")

# Map numeric labels to readable classes
label_names = dataset.features["label"].names  # ['benign', 'malignant', 'normal']

# ---- 2. Define model (pretrained or real CNN) ----
clf = pipeline(
    "image-classification",
    model="hugging-science/sample-breast-cancer-classification"
)

# Transform for inference
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Lambda(lambda img: img.convert("RGB")),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ---- 3. Utility: get image + run prediction ----
def predict_from_sample(sample_idx):
    row = dataset[int(sample_idx)]
    image = row["image"]
    true_label_idx = row["label"]
    true_label = label_names[true_label_idx]

    # Run Hugging Face pipeline
    preds = clf(image.convert("RGB"))
    pred_label = preds[0]["label"]
    conf = preds[0]["score"]

    label_map = {
        "LABEL_0": "benign",
        "LABEL_1": "malignant",
        "LABEL_2": "normal"
    }
    pred_label_raw = preds[0]["label"]
    pred_label = label_map.get(pred_label_raw, pred_label_raw)  # fallback to raw if missing
    conf = preds[0]["score"]

    # Format output
    correct = (pred_label.lower() == true_label.lower())
    caption = (
        f"**Predicted:** {pred_label} (confidence: {conf:.2f})  \n"
        f"**True Label:** {true_label}  \n"
        f"{'✅ Correct' if correct else '❌ Incorrect'}"
    )

    return image.convert("RGB"), caption

# ---- 4. Build Gradio UI ----
N_SAMPLES = 30  # number of random samples to show
total = len(dataset)
random_indices = random.sample(range(total), N_SAMPLES)

sample_options = [f"{i}: {dataset[i]['label']}" for i in random_indices]
# sample_options = [f"{i}: {dataset[i]['label']}" for i in range(10)]  # first 10 samples

with gr.Blocks(title="Women's Longevity Hack") as demo:
    gr.Markdown("## 🩺 Women's Longevity Hack")
    
    with gr.Tabs():
        with gr.Tab("Getting Started"):
            gr.Markdown(
                """## Getting Started
                
                Welcome to the Women's Longevity Hack demo! This app showcases a simple image classification model trained to identify breast cancer from ultrasound images. It also provides resources for exploring related datasets.

                To build something like this yourself, check out the files in this repo:
                - `train.py`: Code to fine-tune a Hugging Face vision model (e.g., ViT) on breast ultrasound images.
                - `app.py`: Code for this Gradio app, which loads the model and dataset, and provides an interactive demo.

                Don't be afraid to modify and experiment with the code! Or, ask ChatGPT to help you customize it for your own use case. If a generative model gives you something that doesn't work (especially if a flag is not recognized), I recommend telling the model you think it's hallucinating and checking its suggestions against the most recent documentation.
                """
            )

        with gr.Tab("More Datasets"):
            gr.Markdown(
                """
                ## 📚 Dataset Inspiration


                If you want to explore other potentially interesting datasets, please check these out:

                | Dataset | Modalities / Type | Description & Use Cases |
                |---|---|---|
                | [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset](https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset) | Images (ultrasound) + labels | Ultrasound images labeled as benign / malignant / normal. Useful for image classification, explainability (e.g., Grad-CAM), or multimodal fusion if metadata available. |
                | [altaidevorg/women-health-mini](https://huggingface.co/datasets/altaidevorg/women-health-mini) | Mixed / tabular / survey (small) | A small women’s-health dataset for quick prototyping; good starting point for longevity-related feature exploration. |
                | [HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p](https://huggingface.co/datasets/HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p) | Tabular / survey | U.S. behavioral risk factor data (demographics, behaviors, chronic-disease prevalence). Ideal for risk dashboards or feature importance demos. |
                | [nguyenvy/cleaned_nhanes_1988_2018](https://huggingface.co/datasets/nguyenvy/cleaned_nhanes_1988_2018) | Tabular / biomarker + demographic | Cleaned NHANES dataset (1988-2018) with lab values, anthropometrics, and demographics. Useful for biological-age or biomarker-based longevity models. |
                | [BoneMet/BoneMet](https://huggingface.co/datasets/BoneMet/BoneMet) | Biomedical / genomic / imaging | Dataset focused on bone-metastasis research; can support multimodal modeling combining clinical, imaging, and molecular data. |
                | [AIBIC/MLOmics](https://huggingface.co/datasets/AIBIC/MLOmics) | Multi-omics / biomedical | Multi-omics resource (genomic, transcriptomic, proteomic) for biomedical discovery and precision-health modeling. |

                
                If none of these is quite what you're looking for, you can also explore the [Datasets Semantic Search](https://huggingface.co/spaces/librarian-bots/huggingface-semantic-search) to find something more your speed.
                """
            )


        with gr.Tab("Classifier Demo"):
            with gr.Row():
                sample_selector = gr.Dropdown(
                    label="Select sample image",
                    choices=sample_options,
                    value=sample_options[0],
                )
                predict_btn = gr.Button("Run Prediction")

            image_output = gr.Image(label="Ultrasound Image")
            text_output = gr.Markdown(label="Prediction")

            predict_btn.click(
                fn=lambda s: predict_from_sample(s.split(":")[0]),
                inputs=sample_selector,
                outputs=[image_output, text_output]
            )

            gr.Markdown(
                "Dataset: [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset]"
                "(https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset)\n"
            )

# ---- 5. Launch app ----
if __name__ == "__main__":
    demo.launch()