import gradio as gr from datasets import load_dataset import torch from torchvision import transforms, models from PIL import Image from transformers import pipeline import numpy as np import random # ---- 1. Load dataset ---- # gymprathap/Breast-Cancer-Ultrasound-Images-Dataset has 'train' split with 'image' and 'label' dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset", split="train") # Map numeric labels to readable classes label_names = dataset.features["label"].names # ['benign', 'malignant', 'normal'] # ---- 2. Define model (pretrained or real CNN) ---- clf = pipeline( "image-classification", model="hugging-science/sample-breast-cancer-classification" ) # Transform for inference transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.Lambda(lambda img: img.convert("RGB")), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # ---- 3. Utility: get image + run prediction ---- def predict_from_sample(sample_idx): row = dataset[int(sample_idx)] image = row["image"] true_label_idx = row["label"] true_label = label_names[true_label_idx] # Run Hugging Face pipeline preds = clf(image.convert("RGB")) pred_label = preds[0]["label"] conf = preds[0]["score"] label_map = { "LABEL_0": "benign", "LABEL_1": "malignant", "LABEL_2": "normal" } pred_label_raw = preds[0]["label"] pred_label = label_map.get(pred_label_raw, pred_label_raw) # fallback to raw if missing conf = preds[0]["score"] # Format output correct = (pred_label.lower() == true_label.lower()) caption = ( f"**Predicted:** {pred_label} (confidence: {conf:.2f}) \n" f"**True Label:** {true_label} \n" f"{'✅ Correct' if correct else '❌ Incorrect'}" ) return image.convert("RGB"), caption # ---- 4. Build Gradio UI ---- N_SAMPLES = 30 # number of random samples to show total = len(dataset) random_indices = random.sample(range(total), N_SAMPLES) sample_options = [f"{i}: {dataset[i]['label']}" for i in random_indices] # sample_options = [f"{i}: {dataset[i]['label']}" for i in range(10)] # first 10 samples with gr.Blocks(title="Women's Longevity Hack") as demo: gr.Markdown("## 🩺 Women's Longevity Hack") with gr.Tabs(): with gr.Tab("Getting Started"): gr.Markdown( """## Getting Started Welcome to the Women's Longevity Hack demo! This app showcases a simple image classification model trained to identify breast cancer from ultrasound images. It also provides resources for exploring related datasets. To build something like this yourself, check out the files in this repo: - `train.py`: Code to fine-tune a Hugging Face vision model (e.g., ViT) on breast ultrasound images. - `app.py`: Code for this Gradio app, which loads the model and dataset, and provides an interactive demo. Don't be afraid to modify and experiment with the code! Or, ask ChatGPT to help you customize it for your own use case. If a generative model gives you something that doesn't work (especially if a flag is not recognized), I recommend telling the model you think it's hallucinating and checking its suggestions against the most recent documentation. """ ) with gr.Tab("More Datasets"): gr.Markdown( """ ## 📚 Dataset Inspiration If you want to explore other potentially interesting datasets, please check these out: | Dataset | Modalities / Type | Description & Use Cases | |---|---|---| | [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset](https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset) | Images (ultrasound) + labels | Ultrasound images labeled as benign / malignant / normal. Useful for image classification, explainability (e.g., Grad-CAM), or multimodal fusion if metadata available. | | [altaidevorg/women-health-mini](https://huggingface.co/datasets/altaidevorg/women-health-mini) | Mixed / tabular / survey (small) | A small women’s-health dataset for quick prototyping; good starting point for longevity-related feature exploration. | | [HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p](https://huggingface.co/datasets/HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p) | Tabular / survey | U.S. behavioral risk factor data (demographics, behaviors, chronic-disease prevalence). Ideal for risk dashboards or feature importance demos. | | [nguyenvy/cleaned_nhanes_1988_2018](https://huggingface.co/datasets/nguyenvy/cleaned_nhanes_1988_2018) | Tabular / biomarker + demographic | Cleaned NHANES dataset (1988-2018) with lab values, anthropometrics, and demographics. Useful for biological-age or biomarker-based longevity models. | | [BoneMet/BoneMet](https://huggingface.co/datasets/BoneMet/BoneMet) | Biomedical / genomic / imaging | Dataset focused on bone-metastasis research; can support multimodal modeling combining clinical, imaging, and molecular data. | | [AIBIC/MLOmics](https://huggingface.co/datasets/AIBIC/MLOmics) | Multi-omics / biomedical | Multi-omics resource (genomic, transcriptomic, proteomic) for biomedical discovery and precision-health modeling. | If none of these is quite what you're looking for, you can also explore the [Datasets Semantic Search](https://huggingface.co/spaces/librarian-bots/huggingface-semantic-search) to find something more your speed. """ ) with gr.Tab("Classifier Demo"): with gr.Row(): sample_selector = gr.Dropdown( label="Select sample image", choices=sample_options, value=sample_options[0], ) predict_btn = gr.Button("Run Prediction") image_output = gr.Image(label="Ultrasound Image") text_output = gr.Markdown(label="Prediction") predict_btn.click( fn=lambda s: predict_from_sample(s.split(":")[0]), inputs=sample_selector, outputs=[image_output, text_output] ) gr.Markdown( "Dataset: [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset]" "(https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset)\n" ) # ---- 5. Launch app ---- if __name__ == "__main__": demo.launch()