import gradio as gr
from datasets import load_dataset
import torch
from torchvision import transforms, models
from PIL import Image
from transformers import pipeline
import numpy as np
import random


# ---- 1. Load dataset ----
# gymprathap/Breast-Cancer-Ultrasound-Images-Dataset has 'train' split with 'image' and 'label'
dataset = load_dataset("gymprathap/Breast-Cancer-Ultrasound-Images-Dataset", split="train")

# Map numeric labels to readable classes
label_names = dataset.features["label"].names  # ['benign', 'malignant', 'normal']

# ---- 2. Define model (pretrained or real CNN) ----
clf = pipeline(
    "image-classification",
    model="hugging-science/sample-breast-cancer-classification"
)

# Transform for inference
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Lambda(lambda img: img.convert("RGB")),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ---- 3. Utility: get image + run prediction ----
def predict_from_sample(sample_idx):
    row = dataset[int(sample_idx)]
    image = row["image"]
    true_label_idx = row["label"]
    true_label = label_names[true_label_idx]

    # Run Hugging Face pipeline
    preds = clf(image.convert("RGB"))
    pred_label = preds[0]["label"]
    conf = preds[0]["score"]

    label_map = {
        "LABEL_0": "benign",
        "LABEL_1": "malignant",
        "LABEL_2": "normal"
    }
    pred_label_raw = preds[0]["label"]
    pred_label = label_map.get(pred_label_raw, pred_label_raw)  # fallback to raw if missing
    conf = preds[0]["score"]

    # Format output
    correct = (pred_label.lower() == true_label.lower())
    caption = (
        f"**Predicted:** {pred_label} (confidence: {conf:.2f})  \n"
        f"**True Label:** {true_label}  \n"
        f"{'✅ Correct' if correct else '❌ Incorrect'}"
    )

    return image.convert("RGB"), caption

# ---- 4. Build Gradio UI ----
N_SAMPLES = 30  # number of random samples to show
total = len(dataset)
random_indices = random.sample(range(total), N_SAMPLES)

sample_options = [f"{i}: {dataset[i]['label']}" for i in random_indices]
# sample_options = [f"{i}: {dataset[i]['label']}" for i in range(10)]  # first 10 samples

with gr.Blocks(title="Women's Longevity Hack") as demo:
    gr.Markdown("## 🩺 Women's Longevity Hack")
    
    with gr.Tabs():
        with gr.Tab("Getting Started"):
            gr.Markdown(
                """## Getting Started
                
                Welcome to the Women's Longevity Hack demo! This app showcases a simple image classification model trained to identify breast cancer from ultrasound images. It also provides resources for exploring related datasets.

                To build something like this yourself, check out the files in this repo:
                - `train.py`: Code to fine-tune a Hugging Face vision model (e.g., ViT) on breast ultrasound images.
                - `app.py`: Code for this Gradio app, which loads the model and dataset, and provides an interactive demo.

                Don't be afraid to modify and experiment with the code! Or, ask ChatGPT to help you customize it for your own use case. If a generative model gives you something that doesn't work (especially if a flag is not recognized), I recommend telling the model you think it's hallucinating and checking its suggestions against the most recent documentation.
                """
            )

        with gr.Tab("More Datasets"):
            gr.Markdown(
                """
                ## 📚 Dataset Inspiration


                If you want to explore other potentially interesting datasets, please check these out:

                | Dataset | Modalities / Type | Description & Use Cases |
                |---|---|---|
                | [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset](https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset) | Images (ultrasound) + labels | Ultrasound images labeled as benign / malignant / normal. Useful for image classification, explainability (e.g., Grad-CAM), or multimodal fusion if metadata available. |
                | [altaidevorg/women-health-mini](https://huggingface.co/datasets/altaidevorg/women-health-mini) | Mixed / tabular / survey (small) | A small women’s-health dataset for quick prototyping; good starting point for longevity-related feature exploration. |
                | [HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p](https://huggingface.co/datasets/HHS-Official/behavioral-risk-factor-surveillance-system-brfss-p) | Tabular / survey | U.S. behavioral risk factor data (demographics, behaviors, chronic-disease prevalence). Ideal for risk dashboards or feature importance demos. |
                | [nguyenvy/cleaned_nhanes_1988_2018](https://huggingface.co/datasets/nguyenvy/cleaned_nhanes_1988_2018) | Tabular / biomarker + demographic | Cleaned NHANES dataset (1988-2018) with lab values, anthropometrics, and demographics. Useful for biological-age or biomarker-based longevity models. |
                | [BoneMet/BoneMet](https://huggingface.co/datasets/BoneMet/BoneMet) | Biomedical / genomic / imaging | Dataset focused on bone-metastasis research; can support multimodal modeling combining clinical, imaging, and molecular data. |
                | [AIBIC/MLOmics](https://huggingface.co/datasets/AIBIC/MLOmics) | Multi-omics / biomedical | Multi-omics resource (genomic, transcriptomic, proteomic) for biomedical discovery and precision-health modeling. |

                
                If none of these is quite what you're looking for, you can also explore the [Datasets Semantic Search](https://huggingface.co/spaces/librarian-bots/huggingface-semantic-search) to find something more your speed.
                """
            )


        with gr.Tab("Classifier Demo"):
            with gr.Row():
                sample_selector = gr.Dropdown(
                    label="Select sample image",
                    choices=sample_options,
                    value=sample_options[0],
                )
                predict_btn = gr.Button("Run Prediction")

            image_output = gr.Image(label="Ultrasound Image")
            text_output = gr.Markdown(label="Prediction")

            predict_btn.click(
                fn=lambda s: predict_from_sample(s.split(":")[0]),
                inputs=sample_selector,
                outputs=[image_output, text_output]
            )

            gr.Markdown(
                "Dataset: [gymprathap/Breast-Cancer-Ultrasound-Images-Dataset]"
                "(https://huggingface.co/datasets/gymprathap/Breast-Cancer-Ultrasound-Images-Dataset)\n"
            )

# ---- 5. Launch app ----
if __name__ == "__main__":
    demo.launch()