import gradio as gr
import pandas as pd
from pathlib import Path
from scipy.stats import spearmanr, kendalltau
from sklearn.metrics import mean_absolute_error, r2_score
from typing import List
from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
from huggingface_hub import hf_hub_download
import datetime
import io
import json, tempfile


def _compact_dict(d: dict) -> dict:
    """Drop None/empty-string values; strip whitespace for strings."""
    out = {}
    for k, v in d.items():
        if isinstance(v, str):
            v = v.strip()
        if v not in (None, "", []):
            out[k] = v
    return out


def submit_data(predictions_file: str, 
                user_state,
                *,
                participant_name: str = "",
                discord_username: str = "",
                email: str = "",
                affiliation: str = ""
):
    
    if user_state is None:
        raise gr.Error("Username or alias is required for submission.")
    
    file_path = Path(predictions_file).resolve()

    if not file_path.exists():
        raise gr.Error("Uploaded file object does not have a valid file path.")

    # Read results file 
    try:
        results_df = pd.read_csv(file_path)            
    except Exception as e:
        return f"❌ Error reading results file: {str(e)}"

    if results_df.empty:
        raise gr.Error("The uploaded file is empty.")
    if not set(ENDPOINTS).issubset(set(results_df.columns)):
        raise gr.Error(f"The uploaded file must contain all endpoint predictions")

    # Build destination filename in the dataset
    ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds").replace(":", "-")
    safe_user = str(user_state.strip()).replace("/", "_").replace(" ", "_")
        
    destination_csv = f"submissions/{safe_user}_{ts}.csv"
    destination_json = destination_csv.replace(".csv", ".json")
    # Upload the CSV file
    API.upload_file(
        path_or_fileobj=str(file_path),
        path_in_repo=destination_csv,
        repo_id=submissions_repo,
        repo_type="dataset",
        commit_message=f"Add submission for {safe_user} at {ts}"
    )

    # Optional participant record
    participant_fields = _compact_dict({
        "participant_name": participant_name,
        "discord_username": discord_username,
        "email": email,
        "affiliation": affiliation,
    })
    # Metadata JSON
    meta = {
        "submission_time_utc": ts,
        "user": user_state,
        "original_filename": file_path.name,
        "evaluated": False,
        **participant_fields,  # merged here
    }

    meta_bytes = io.BytesIO(json.dumps(meta, indent=2).encode("utf-8"))
    API.upload_file(
        path_or_fileobj=meta_bytes,
        path_in_repo=destination_json,
        repo_id=submissions_repo,
        repo_type="dataset",
        commit_message=f"Add metadata for {user_state} submission at {ts}"
    )

    return "✅ Your submission has been received! Your scores will appear on the leaderboard shortly.", destination_csv

def evaluate_data(filename: str) -> None:

    # Load the submission csv
    try:
        local_path = hf_hub_download(
            repo_id=submissions_repo,
            repo_type="dataset",
            filename=filename,
        )
    except Exception as e:
        raise gr.Error(f"Failed to download submission file: {e}")
    
    # Load the test set
    try: 
        test_path = hf_hub_download(
            repo_id=test_repo,
            repo_type="dataset",
            filename="data/test_dataset.csv",
        )
    except Exception as e:
        raise gr.Error(f"Failed to download test file: {e}")
    
    data_df = pd.read_csv(local_path)
    test_df = pd.read_csv(test_path)
    try:
        results_df = calculate_metrics(data_df, test_df)
        if not isinstance(results_df, pd.DataFrame) or results_df.empty:
            raise gr.Error("Evaluation produced no results.")
    except Exception as e:
        raise gr.Error(f'Evaluation failed: {e}. No results written to results dataset.')
    
    # Load metadata file
    meta_filename = filename.replace(".csv", ".json")
    try:
        meta_path = hf_hub_download(
                repo_id=submissions_repo,
                repo_type="dataset",
                filename=meta_filename,
            )
        with open(meta_path, "r", encoding="utf-8") as f:
            meta = json.load(f)
        username = meta.get("user")
        timestamp = meta.get("submission_time_utc")
    except Exception as e:
        raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")

    # Write results to results dataset
    results_df['user'] = username
    safe_user = str(username).replace("/", "_").replace(" ", "_")
    destination_path = f"results/{safe_user}_{timestamp}_results.csv"
    tmp_name = None
    with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as tmp:
        results_df.to_csv(tmp, index=False)
        tmp.flush()
        tmp_name = tmp.name
    
    API.upload_file(
            path_or_fileobj=tmp_name, 
            path_in_repo=destination_path,
            repo_id=results_repo,
            repo_type="dataset",
            commit_message=f"Add result data for {username}"
        )
    Path(tmp_name).unlink()


def calculate_metrics(
        results_dataframe: pd.DataFrame,
        test_dataframe: pd.DataFrame
    ):

    def metrics_per_ep(pred, true):
        mae = mean_absolute_error(true, pred)
        r2 = r2_score(true, pred)
        spr, _ = spearmanr(true, pred)
        ktau, _ = kendalltau(true, pred)
        return mae, r2, spr, ktau

    df_results = pd.DataFrame(columns=["endpoint", "MAE", "R2", "Spearman R", "Kendall's Tau"])
    for i, measurement in enumerate(ENDPOINTS):  
        df_pred = results_dataframe[['Molecule Name', measurement]].dropna()
        df_true = test_dataframe[['Molecule Name', measurement]].dropna()
        # Make sure both have the same order
        pred = df_pred.sort_values(by='Molecule Name')[measurement]
        true = df_true.sort_values(by='Molecule Name')[measurement]
        mae, r2, spearman, ktau = metrics_per_ep(pred, true)
        df_results.loc[i, 'endpoint'] = measurement
        df_results.loc[i, 'MAE'] = mae
        df_results.loc[i, 'R2'] = r2
        df_results.loc[i, 'Spearman R'] = spearman
        df_results.loc[i, "Kendall's Tau"] = ktau

    return df_results