import pandas as pd
import json
import os
import numpy as np
import uuid
from utils.constants import (NEW_SUBMISSION_FOLDER, CSV_FILE, JSON_FILE, DIMENSIONS, 
                        NEW_SUBMISSION_COLUMN_INFO, NEW_SUBMISSION_COLUMN_NAMES,
                        JSON_FORMAT, MODEL_INFO_FILE, RESULTS_DIR)


def check_correct_file_type(folder_contents) -> bool:
    """ 
        checks that folder has 2 items: a csv file and a json file
    """
    contains_correct_files = (len(folder_contents) == 2) and (CSV_FILE in folder_contents) and (JSON_FILE in folder_contents)
    if not contains_correct_files:
        print("\nInput Validation Error: Please check that the {NEW_SUBMISSION_FOLDER} contains the files: \
              {CSV_FILE} and {JSON_FILE}")
        return False
    return True


def check_csv_columns_datatypes() -> bool:
    """ 
        checks that csv file has only required columns and columns have correct data types
    """
    #check for correct columns
    csv_data = pd.read_csv(f"{NEW_SUBMISSION_FOLDER}/{CSV_FILE}")
    submitted_csv_column_names = set(csv_data.columns)
    expected_column_names = set(NEW_SUBMISSION_COLUMN_NAMES)
    for item in expected_column_names:
        if item not in submitted_csv_column_names:
            print(f"The following column is missing: {item}")
    correct_columns = expected_column_names.issubset(submitted_csv_column_names)
    if not correct_columns:
        print(f"\nInput Validation Error: Please ensure that the csv file contains the following columns: {NEW_SUBMISSION_COLUMN_NAMES}")
    
    #check for correct dtype
    correct_dtypes = []
    for col in NEW_SUBMISSION_COLUMN_INFO["string_cols"]:
        if col in csv_data.columns:
            print(f"{col} is string/object:{pd.api.types.is_object_dtype(csv_data[col])}")
            correct_dtypes.append(pd.api.types.is_object_dtype(csv_data[col]))
    for col in NEW_SUBMISSION_COLUMN_INFO["integer_cols"]:
        if col in csv_data.columns:
            print(f"{col} is numeric: {pd.api.types.is_numeric_dtype(csv_data[col])}")
            correct_dtypes.append(pd.api.types.is_numeric_dtype(csv_data[col]))
    for col in NEW_SUBMISSION_COLUMN_INFO["float_cols"]:
        if col in csv_data.columns:
            print(f"{col} is numeric: {pd.api.types.is_numeric_dtype(csv_data[col])}")
            correct_dtypes.append(pd.api.types.is_numeric_dtype(csv_data[col]))
    correct_dtypes = all(correct_dtypes)
    if not correct_dtypes:
        print(f"\nInput Validation Error: Please ensure that the csv columns have the correct datatypes as follows: \n\
                string/object type columns: {NEW_SUBMISSION_COLUMN_INFO['string_cols']} \n\
                numeric/integer type columns: {NEW_SUBMISSION_COLUMN_INFO['integer_cols']}\
                                                {NEW_SUBMISSION_COLUMN_INFO['float_cols']}")
    return correct_columns, correct_dtypes 


def check_correct_entries_per_dataset(required_seeds: int = 10) -> bool:
    """
        checks for correct number of runs per backbone/dataset combination
        checks for required number of unique seeds
    """
    csv_data = pd.read_csv(f"{NEW_SUBMISSION_FOLDER}/{CSV_FILE}")
    count_values = csv_data.groupby(["backbone", "dataset"]).count()
    count_values = list(set(count_values["test metric"].tolist()))
    correct_num_values = (len(count_values) == 1) and (count_values[0] == required_seeds)
    if not correct_num_values:
        print(f"\nInput Validation Error: Please ensure that each backbone/dataset combination has {required_seeds} entries")

    count_seeds = csv_data.groupby(["backbone", "dataset"]).nunique()
    count_seeds = list(set(count_seeds["Seed"].tolist()))
    correct_num_seeds = (len(count_seeds) == 1) and (count_seeds[0] == required_seeds)
    if not correct_num_seeds:
        print(f"\nInput Validation Warning: Please ensure that each backbone/dataset combination has {required_seeds} unique seeds")
    return correct_num_values, correct_num_seeds


def check_json_keys() -> bool:
    """ 
        checks json file has required keys and subkeys, 
        check json file values have correct data type
    """
    with open(f"{NEW_SUBMISSION_FOLDER}/{JSON_FILE}") as f:
        json_submission_data = json.load(f)
    #TBD: check json file nested values have correct data type
    all_required_keys = []
    for key, value in JSON_FORMAT.items():
        if (key in json_submission_data) and (type(value) == type(json_submission_data[key])): 
            all_required_keys.append(True)
        else:
            all_required_keys.append(False)
    all_required_keys = all(all_required_keys)
    if not all_required_keys:
        print(f"\nInput Validation Error: Please ensure that json file has the correct keys and datatypes")
    
    return all_required_keys


def check_has_atleast_one_dimension() -> bool:
    """
        check that submission contains datasets required for at least one submission
    """
    csv_data = pd.read_csv(f"{NEW_SUBMISSION_FOLDER}/{CSV_FILE}")
    submitted_csv_datasets = set(csv_data["dataset"].tolist())
    contains_atleast_one_dimension = []
    for dimension, datasets in DIMENSIONS.items():
        datasets = set(datasets)
        contains_atleast_one_dimension.append(datasets.issubset(submitted_csv_datasets))
    contains_atleast_one_dimension = any(contains_atleast_one_dimension)
    if not contains_atleast_one_dimension:
        print("\nInput Validation Error: Please check that the submission contains all datasets for one or more dimensions")
    return contains_atleast_one_dimension


def update_new_backbones_and_models():
    """
        checks if backbone exists in model_info.json (used to display results)
        if not, information on the new model is added to the json file
    """
    with open(f"{NEW_SUBMISSION_FOLDER}/{JSON_FILE}") as f:
        json_submission_data = json.load(f)

    #read model info
    with open(MODEL_INFO_FILE) as f:
        existing_model_info = json.load(f)
    for item in json_submission_data["New model info"]:
        submitted_backbone = item["unique_backbone_key"]
        if submitted_backbone not in existing_model_info["BACKBONE_NAMES"]:
            existing_model_info["BACKBONE_NAMES"][submitted_backbone] = item["model_display_name"]
            existing_model_info["MODEL_SIZE"][submitted_backbone] = item["model_size"]
    
    #save new information
    with open(MODEL_INFO_FILE, 'w') as fp:
        json.dump(existing_model_info, fp)


def validate_new_submission() -> bool:
    """
    
    """
    #get folder contents
    if not os.path.exists(NEW_SUBMISSION_FOLDER): return
    folder_contents = os.listdir(NEW_SUBMISSION_FOLDER)
    items_to_ignore = ['.DS_Store']
    for item in items_to_ignore:
        if item in folder_contents: folder_contents.remove(item)
    if len(folder_contents) == 0:
        print("no new submissions")
        return
    
    #check all conditions
    correct_file_type = check_correct_file_type(folder_contents)
    correct_columns, correct_dtypes = check_csv_columns_datatypes()
    correct_num_values, correct_num_seeds = check_correct_entries_per_dataset()
    correct_json_keys = check_json_keys()
    contains_atleast_one_dimension = check_has_atleast_one_dimension()
    all_checks_passed = all([correct_file_type, correct_columns, correct_dtypes,
                            correct_json_keys, correct_num_values, #correct_num_seeds,
                            contains_atleast_one_dimension])
    
    if all_checks_passed:
        submission_id = uuid.uuid4()
        os.makedirs(f"{RESULTS_DIR}/{submission_id}")

        #copy only required keys in json file to new submission folder
        with open(f"{NEW_SUBMISSION_FOLDER}/{JSON_FILE}") as f:
            json_submission_data = json.load(f)
        new_dict = {}
        for key, value in JSON_FORMAT.items():
            if value == "TBD": continue
            new_dict[key] = json_submission_data[key]
        with open(f"{RESULTS_DIR}/{submission_id}/{JSON_FILE}", 'w') as fp:
            json.dump(new_dict, fp)

        #copy only required columns in csv file to new submission folder
        csv_data = pd.read_csv(f"{NEW_SUBMISSION_FOLDER}/{CSV_FILE}")
        csv_data = csv_data[NEW_SUBMISSION_COLUMN_NAMES]
        csv_data.to_csv(f"{RESULTS_DIR}/{submission_id}/{CSV_FILE}", index=False)

        #add any new model info to model_info.json
        update_new_backbones_and_models()

        #reset NEW_SUBMISSION_FOLDER
        os.system(f"rm -r {NEW_SUBMISSION_FOLDER}/")
        os.makedirs(NEW_SUBMISSION_FOLDER)
        return
    else:
        print("\nThe new sumbission has not been formatted correctly. Please fix the errors above")
        raise ValueError

    
if __name__ == "__main__":
    validate_new_submission()