Spaces:
Running
on
Zero
Running
on
Zero
| #!/usr/bin/env python3 | |
| """ | |
| Helper script to download the int4 model files at build time for Hugging Face Spaces | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import logging | |
| from pathlib import Path | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Model configuration | |
| MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft" | |
| INT4_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft/int4" | |
| LOCAL_MODEL_PATH = "./int4" | |
| def download_model(): | |
| """Download the int4 model files to local directory""" | |
| try: | |
| logger.info(f"Downloading int4 model from {INT4_MODEL_ID}") | |
| # Create local directory if it doesn't exist | |
| os.makedirs(LOCAL_MODEL_PATH, exist_ok=True) | |
| # Use huggingface_hub to download the model | |
| from huggingface_hub import snapshot_download | |
| # Download the int4 model files | |
| snapshot_download( | |
| repo_id=INT4_MODEL_ID, | |
| local_dir=LOCAL_MODEL_PATH, | |
| local_dir_use_symlinks=False, | |
| ignore_patterns=["*.md", "*.txt", "*.git*", "*.ipynb", "*.py"] | |
| ) | |
| logger.info(f"Model downloaded successfully to {LOCAL_MODEL_PATH}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error downloading model: {e}") | |
| return False | |
| def check_model_files(): | |
| """Check if required model files exist""" | |
| required_files = [ | |
| "config.json", | |
| "pytorch_model.bin", | |
| "tokenizer.json", | |
| "tokenizer_config.json" | |
| ] | |
| missing_files = [] | |
| for file in required_files: | |
| file_path = os.path.join(LOCAL_MODEL_PATH, file) | |
| if not os.path.exists(file_path): | |
| missing_files.append(file) | |
| if missing_files: | |
| logger.error(f"Missing model files: {missing_files}") | |
| return False | |
| logger.info("All required model files found") | |
| return True | |
| def verify_model_integrity(): | |
| """Verify that the downloaded model files are valid""" | |
| try: | |
| # Try to load the tokenizer to verify it's working | |
| from transformers import AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_PATH) | |
| logger.info("Tokenizer loaded successfully from local files") | |
| # Try to load the model config | |
| from transformers import AutoConfig | |
| config = AutoConfig.from_pretrained(LOCAL_MODEL_PATH) | |
| logger.info("Model config loaded successfully from local files") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error verifying model integrity: {e}") | |
| return False | |
| def main(): | |
| """Main function to download model at build time""" | |
| logger.info("Starting model download for Hugging Face Space...") | |
| # Check if model files already exist | |
| if check_model_files(): | |
| logger.info("Model files already exist, verifying integrity...") | |
| if verify_model_integrity(): | |
| logger.info("Model files verified successfully") | |
| return True | |
| else: | |
| logger.warning("Model files exist but failed integrity check, re-downloading...") | |
| # Download the model | |
| if download_model(): | |
| logger.info("Model download completed successfully") | |
| # Verify the downloaded files | |
| if check_model_files() and verify_model_integrity(): | |
| logger.info("Model download and verification completed successfully") | |
| return True | |
| else: | |
| logger.error("Model download completed but verification failed") | |
| return False | |
| else: | |
| logger.error("Model download failed") | |
| return False | |
| if __name__ == "__main__": | |
| success = main() | |
| sys.exit(0 if success else 1) |