#!/usr/bin/env python3
"""
Advanced helper script to download the int4 model files using HfFileSystem
"""

import os
import sys
import logging
from pathlib import Path
from tqdm import tqdm

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Model configuration
MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"
INT4_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft/int4"
LOCAL_MODEL_PATH = "./int4"

def get_file_info(fs, repo_path):
    """Get detailed information about files in the repository"""
    try:
        files = fs.ls(repo_path, detail=True)
        return [f for f in files if f['type'] == 'file']
    except Exception as e:
        logger.error(f"Error listing files in {repo_path}: {e}")
        return []

def download_with_progress(fs, remote_path, local_path, file_size):
    """Download a file with progress bar"""
    try:
        # Create directory if it doesn't exist
        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        
        # Download with progress bar
        with tqdm(total=file_size, unit='B', unit_scale=True, desc=os.path.basename(local_path)) as pbar:
            with fs.open(remote_path, 'rb') as remote_file:
                with open(local_path, 'wb') as local_file:
                    chunk_size = 8192
                    while True:
                        chunk = remote_file.read(chunk_size)
                        if not chunk:
                            break
                        local_file.write(chunk)
                        pbar.update(len(chunk))
        
        return True
    except Exception as e:
        logger.error(f"Error downloading {remote_path}: {e}")
        return False

def download_model_advanced():
    """Download the int4 model files using advanced HfFileSystem features"""
    try:
        logger.info(f"Downloading int4 model from {INT4_MODEL_ID}")
        
        # Create local directory if it doesn't exist
        os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)
        
        # Use HfFileSystem for downloading
        from huggingface_hub import HfFileSystem
        
        # Initialize the file system
        fs = HfFileSystem()
        
        # Check if repository exists
        if not fs.exists(INT4_MODEL_ID):
            logger.error(f"Repository {INT4_MODEL_ID} does not exist")
            return False
        
        # Get file information
        files = get_file_info(fs, INT4_MODEL_ID)
        if not files:
            logger.error("No files found in repository")
            return False
        
        # Filter essential model files
        essential_files = [
            'config.json',
            'pytorch_model.bin',
            'tokenizer.json',
            'tokenizer_config.json',
            'special_tokens_map.json',
            'generation_config.json'
        ]
        
        files_to_download = []
        for file_info in files:
            file_name = os.path.basename(file_info['name'])
            if file_name in essential_files:
                files_to_download.append(file_info)
        
        logger.info(f"Found {len(files_to_download)} essential files to download")
        
        # Download each file
        successful_downloads = 0
        for file_info in files_to_download:
            file_path = file_info['name']
            file_name = os.path.basename(file_path)
            local_file_path = os.path.join(LOCAL_MODEL_PATH, file_name)
            file_size = file_info.get('size', 0)
            
            logger.info(f"Downloading {file_name} ({file_size} bytes)...")
            
            # Download the file with progress
            if download_with_progress(fs, file_path, local_file_path, file_size):
                successful_downloads += 1
                logger.info(f"Successfully downloaded {file_name}")
            else:
                logger.error(f"Failed to download {file_name}")
        
        logger.info(f"Downloaded {successful_downloads}/{len(files_to_download)} files")
        return successful_downloads == len(files_to_download)
        
    except Exception as e:
        logger.error(f"Error downloading model: {e}")
        return False

def verify_download_advanced():
    """Advanced verification of downloaded model files"""
    try:
        logger.info("Verifying downloaded model files...")
        
        # Expected file sizes (approximate)
        expected_files = {
            "config.json": (1000, 10000),  # (min_size, max_size) in bytes
            "pytorch_model.bin": (1000000, 5000000000),  # Should be several MB
            "tokenizer.json": (10000, 1000000),  # Should be several KB
            "tokenizer_config.json": (100, 10000),  # Minimum size
            "special_tokens_map.json": (100, 10000),
            "generation_config.json": (100, 10000)
        }
        
        verification_results = []
        
        for file_name, (min_size, max_size) in expected_files.items():
            file_path = os.path.join(LOCAL_MODEL_PATH, file_name)
            if os.path.exists(file_path):
                actual_size = os.path.getsize(file_path)
                if min_size <= actual_size <= max_size:
                    logger.info(f"✅ {file_name} verified ({actual_size} bytes)")
                    verification_results.append(True)
                else:
                    logger.warning(f"⚠️ {file_name} size unexpected ({actual_size} bytes)")
                    verification_results.append(False)
            else:
                logger.error(f"❌ Missing {file_name}")
                verification_results.append(False)
        
        success_rate = sum(verification_results) / len(verification_results)
        logger.info(f"Verification complete: {sum(verification_results)}/{len(verification_results)} files valid")
        
        return success_rate >= 0.8  # Allow 20% tolerance
        
    except Exception as e:
        logger.error(f"Error verifying files: {e}")
        return False

def check_model_files():
    """Check if required model files exist"""
    required_files = [
        "config.json",
        "pytorch_model.bin",
        "tokenizer.json",
        "tokenizer_config.json"
    ]
    
    missing_files = []
    for file in required_files:
        file_path = os.path.join(LOCAL_MODEL_PATH, file)
        if not os.path.exists(file_path):
            missing_files.append(file)
    
    if missing_files:
        logger.error(f"Missing model files: {missing_files}")
        return False
    
    logger.info("All required model files found")
    return True

def main():
    """Main function to download model at build time"""
    logger.info("Starting advanced model download for Hugging Face Space...")
    
    # Check if model files already exist
    if check_model_files():
        logger.info("Model files already exist, skipping download")
        return True
    
    # Download the model using advanced method
    if download_model_advanced():
        # Verify the download
        if verify_download_advanced():
            logger.info("Model download and verification completed successfully")
            return True
        else:
            logger.error("Model verification failed")
            return False
    else:
        logger.error("Model download failed")
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)