Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Quantize and Push Script | |
| Quantizes the uploaded model and pushes quantized versions to the same repository | |
| """ | |
| import os | |
| import sys | |
| import logging | |
| from pathlib import Path | |
| # Add src to path for imports | |
| sys.path.append(os.path.join(os.path.dirname(__file__), 'src')) | |
| # Setup logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def main(): | |
| """Quantize and push the model""" | |
| # Configuration | |
| MODEL_PATH = "/output-checkpoint" | |
| REPO_NAME = "Tonic/smollm3-finetuned" | |
| HF_TOKEN = os.getenv('HF_TOKEN') | |
| if not HF_TOKEN: | |
| logger.error("β HF_TOKEN not set") | |
| return 1 | |
| if not Path(MODEL_PATH).exists(): | |
| logger.error(f"β Model path not found: {MODEL_PATH}") | |
| return 1 | |
| logger.info("β Model files validated") | |
| # Import and run quantization | |
| try: | |
| from scripts.model_tonic.quantize_model import ModelQuantizer | |
| # Quantization types to process | |
| quant_types = ["int8_weight_only", "int4_weight_only"] | |
| success_count = 0 | |
| total_count = len(quant_types) | |
| for quant_type in quant_types: | |
| logger.info(f"π Processing quantization type: {quant_type}") | |
| # Initialize quantizer | |
| quantizer = ModelQuantizer( | |
| model_path=MODEL_PATH, | |
| repo_name=REPO_NAME, | |
| token=HF_TOKEN, | |
| private=False, | |
| hf_token=HF_TOKEN | |
| ) | |
| # Perform quantization and push | |
| success = quantizer.quantize_and_push( | |
| quant_type=quant_type, | |
| device="auto", | |
| group_size=128 | |
| ) | |
| if success: | |
| logger.info(f"β {quant_type} quantization and push completed") | |
| success_count += 1 | |
| else: | |
| logger.error(f"β {quant_type} quantization and push failed") | |
| logger.info(f"π Quantization summary: {success_count}/{total_count} successful") | |
| if success_count > 0: | |
| logger.info("β Quantization completed successfully!") | |
| logger.info(f"π View your models at: https://huggingface.co/{REPO_NAME}") | |
| logger.info("π Quantized models available at:") | |
| logger.info(f" - https://huggingface.co/{REPO_NAME}/int8 (GPU optimized)") | |
| logger.info(f" - https://huggingface.co/{REPO_NAME}/int4 (CPU optimized)") | |
| return 0 | |
| else: | |
| logger.error("β All quantization attempts failed!") | |
| return 1 | |
| except Exception as e: | |
| logger.error(f"β Error during quantization: {e}") | |
| return 1 | |
| if __name__ == "__main__": | |
| exit(main()) |