Spaces:

Tonic
/

Petite-LLM-3

Running on Zero

App Files Files Community

Petite-LLM-3 / docsandtests /download_model.py

Tonic

small mods on the title and description

5975026 4 months ago

raw

history blame contribute delete

4.86 kB

	#!/usr/bin/env python3
	"""
	Helper script to download the full fine-tuned model files at build time for Hugging Face Spaces
	"""

	import os
	import sys
	import subprocess
	import logging
	from pathlib import Path

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Model configuration
	MAIN_MODEL_ID = "Tonic/petite-elle-L-aime-3-sft"
	LOCAL_MODEL_PATH = "./model"

	def download_model():
	"""Download the full fine-tuned model files to local directory"""
	try:
	logger.info(f"Downloading full fine-tuned model from {MAIN_MODEL_ID}")

	# Create local directory if it doesn't exist
	os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)

	# Use huggingface_hub to download the model files
	from huggingface_hub import hf_hub_download, list_repo_files

	# List all files in the repository
	all_files = list_repo_files(MAIN_MODEL_ID)

	# Filter files that are in the main repository (not in subfolders)
	main_files = [f for f in all_files if not "/" in f or f.startswith("int4/") == False]
	logger.info(f"Found {len(main_files)} files in main repository")

	# Download each required file
	required_files = [
	"config.json",
	"pytorch_model.bin",
	"tokenizer.json",
	"tokenizer_config.json",
	"special_tokens_map.json",
	"generation_config.json",
	"chat_template.jinja"
	]

	downloaded_count = 0
	for file_name in required_files:
	if file_name in all_files:
	logger.info(f"Downloading {file_name}...")
	hf_hub_download(
	repo_id=MAIN_MODEL_ID,
	filename=file_name,
	local_dir=LOCAL_MODEL_PATH,
	local_dir_use_symlinks=False
	)
	logger.info(f"Downloaded {file_name}")
	downloaded_count += 1
	else:
	logger.warning(f"File {file_name} not found in main repository")

	logger.info(f"Downloaded {downloaded_count} out of {len(required_files)} required files")
	logger.info(f"Model downloaded successfully to {LOCAL_MODEL_PATH}")
	return True

	except Exception as e:
	logger.error(f"Error downloading model: {e}")
	return False

	def check_model_files():
	"""Check if required model files exist"""
	required_files = [
	"config.json",
	"pytorch_model.bin",
	"tokenizer.json",
	"tokenizer_config.json"
	]

	missing_files = []
	for file in required_files:
	file_path = os.path.join(LOCAL_MODEL_PATH, file)
	if not os.path.exists(file_path):
	missing_files.append(file)

	if missing_files:
	logger.error(f"Missing model files: {missing_files}")
	return False

	logger.info("All required model files found")
	return True

	def verify_model_integrity():
	"""Verify that the downloaded model files are valid"""
	try:
	# Try to load the tokenizer to verify it's working
	from transformers import AutoTokenizer
	tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL_PATH)
	logger.info("Tokenizer loaded successfully from local files")

	# Try to load the model config
	from transformers import AutoConfig
	config = AutoConfig.from_pretrained(LOCAL_MODEL_PATH)
	logger.info("Model config loaded successfully from local files")

	return True

	except Exception as e:
	logger.error(f"Error verifying model integrity: {e}")
	return False

	def main():
	"""Main function to download model at build time"""
	logger.info("Starting model download for Hugging Face Space...")

	# Check if model files already exist
	if check_model_files():
	logger.info("Model files already exist, verifying integrity...")
	if verify_model_integrity():
	logger.info("Model files verified successfully")
	return True
	else:
	logger.warning("Model files exist but failed integrity check, re-downloading...")

	# Download the model
	if download_model():
	logger.info("Model download completed successfully")

	# Verify the downloaded files
	if check_model_files() and verify_model_integrity():
	logger.info("Model download and verification completed successfully")
	return True
	else:
	logger.error("Model download completed but verification failed")
	return False
	else:
	logger.error("Model download failed")
	return False

	if __name__ == "__main__":
	success = main()
	sys.exit(0 if success else 1)