Spaces:

Tonic
/

Petite-LLM-3

Running on Zero

App Files Files Community

Petite-LLM-3 / download_model.py

Tonic

tries to download the model at build time

19b19f0 5 months ago

raw

history blame

3.76 kB

	#!/usr/bin/env python3
	"""
	Helper script to download the int4 model files at build time for Hugging Face Spaces
	"""

	import os
	import sys
	import subprocess
	import logging
	from pathlib import Path

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Model configuration
	MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"
	INT4_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft/int4"
	LOCAL_MODEL_PATH = "./int4"

	def download_model():
	"""Download the int4 model files to local directory"""
	try:
	logger.info(f"Downloading int4 model from {INT4_MODEL_ID}")

	# Create local directory if it doesn't exist
	os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)

	# Use huggingface_hub to download the model
	from huggingface_hub import snapshot_download

	# Download the int4 model files
	snapshot_download(
	repo_id=INT4_MODEL_ID,
	local_dir=LOCAL_MODEL_PATH,
	local_dir_use_symlinks=False,
	ignore_patterns=[".md", ".txt", ".git", ".ipynb", ".py"]
	)

	logger.info(f"Model downloaded successfully to {LOCAL_MODEL_PATH}")
	return True

	except Exception as e:
	logger.error(f"Error downloading model: {e}")
	return False

	def check_model_files():
	"""Check if required model files exist"""
	required_files = [
	"config.json",
	"pytorch_model.bin",
	"tokenizer.json",
	"tokenizer_config.json"
	]

	missing_files = []
	for file in required_files:
	file_path = os.path.join(LOCAL_MODEL_PATH, file)
	if not os.path.exists(file_path):
	missing_files.append(file)

	if missing_files:
	logger.error(f"Missing model files: {missing_files}")
	return False

	logger.info("All required model files found")
	return True

	def verify_model_integrity():
	"""Verify that the downloaded model files are valid"""
	try:
	# Try to load the tokenizer to verify it's working
	from transformers import AutoTokenizer
	tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_PATH)
	logger.info("Tokenizer loaded successfully from local files")

	# Try to load the model config
	from transformers import AutoConfig
	config = AutoConfig.from_pretrained(LOCAL_MODEL_PATH)
	logger.info("Model config loaded successfully from local files")

	return True

	except Exception as e:
	logger.error(f"Error verifying model integrity: {e}")
	return False

	def main():
	"""Main function to download model at build time"""
	logger.info("Starting model download for Hugging Face Space...")

	# Check if model files already exist
	if check_model_files():
	logger.info("Model files already exist, verifying integrity...")
	if verify_model_integrity():
	logger.info("Model files verified successfully")
	return True
	else:
	logger.warning("Model files exist but failed integrity check, re-downloading...")

	# Download the model
	if download_model():
	logger.info("Model download completed successfully")

	# Verify the downloaded files
	if check_model_files() and verify_model_integrity():
	logger.info("Model download and verification completed successfully")
	return True
	else:
	logger.error("Model download completed but verification failed")
	return False
	else:
	logger.error("Model download failed")
	return False

	if __name__ == "__main__":
	success = main()
	sys.exit(0 if success else 1)