Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| SmolLM3 Fine-tuning Script for FlexAI Console | |
| Based on the nanoGPT structure but adapted for SmolLM3 model | |
| """ | |
| import os | |
| import sys | |
| import argparse | |
| import json | |
| import torch | |
| import logging | |
| from pathlib import Path | |
| from typing import Optional, Dict, Any | |
| # Add the current directory to the path for imports | |
| sys.path.append(os.path.dirname(os.path.abspath(__file__))) | |
| from config import get_config | |
| from model import SmolLM3Model | |
| from data import SmolLM3Dataset | |
| from trainer import SmolLM3Trainer | |
| def setup_logging(): | |
| """Setup logging configuration""" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(sys.stdout), | |
| logging.FileHandler('training.log') | |
| ] | |
| ) | |
| return logging.getLogger(__name__) | |
| def parse_args(): | |
| """Parse command line arguments""" | |
| parser = argparse.ArgumentParser(description='SmolLM3 Fine-tuning Script') | |
| # Configuration file | |
| parser.add_argument('config', type=str, help='Path to configuration file') | |
| # Dataset arguments | |
| parser.add_argument('--dataset_dir', type=str, default='my_dataset', | |
| help='Path to dataset directory within /input') | |
| # Checkpoint arguments | |
| parser.add_argument('--out_dir', type=str, default='/output-checkpoint', | |
| help='Output directory for checkpoints') | |
| parser.add_argument('--init_from', type=str, default='scratch', | |
| choices=['scratch', 'resume', 'pretrained'], | |
| help='Initialization method') | |
| # Training arguments | |
| parser.add_argument('--max_iters', type=int, default=None, | |
| help='Maximum number of training iterations') | |
| parser.add_argument('--batch_size', type=int, default=None, | |
| help='Batch size for training') | |
| parser.add_argument('--learning_rate', type=float, default=None, | |
| help='Learning rate') | |
| parser.add_argument('--gradient_accumulation_steps', type=int, default=None, | |
| help='Gradient accumulation steps') | |
| # Model arguments | |
| parser.add_argument('--model_name', type=str, | |
| default='HuggingFaceTB/SmolLM3-3B', | |
| help='Model name or path') | |
| parser.add_argument('--max_seq_length', type=int, default=4096, | |
| help='Maximum sequence length') | |
| # Logging and saving | |
| parser.add_argument('--save_steps', type=int, default=500, | |
| help='Save checkpoint every N steps') | |
| parser.add_argument('--eval_steps', type=int, default=100, | |
| help='Evaluate every N steps') | |
| parser.add_argument('--logging_steps', type=int, default=10, | |
| help='Log every N steps') | |
| # Trackio monitoring arguments | |
| parser.add_argument('--enable_tracking', action='store_true', default=True, | |
| help='Enable Trackio experiment tracking') | |
| parser.add_argument('--trackio_url', type=str, default=None, | |
| help='Trackio server URL') | |
| parser.add_argument('--trackio_token', type=str, default=None, | |
| help='Trackio authentication token') | |
| parser.add_argument('--experiment_name', type=str, default=None, | |
| help='Custom experiment name for tracking') | |
| return parser.parse_args() | |
| def main(): | |
| """Main training function""" | |
| args = parse_args() | |
| logger = setup_logging() | |
| logger.info("Starting SmolLM3 fine-tuning...") | |
| logger.info(f"Arguments: {vars(args)}") | |
| # Load configuration | |
| config = get_config(args.config) | |
| # Override config with command line arguments | |
| if args.max_iters is not None: | |
| config.max_iters = args.max_iters | |
| if args.batch_size is not None: | |
| config.batch_size = args.batch_size | |
| if args.learning_rate is not None: | |
| config.learning_rate = args.learning_rate | |
| if args.gradient_accumulation_steps is not None: | |
| config.gradient_accumulation_steps = args.gradient_accumulation_steps | |
| # Override Trackio configuration | |
| if args.enable_tracking is not None: | |
| config.enable_tracking = args.enable_tracking | |
| if args.trackio_url is not None: | |
| config.trackio_url = args.trackio_url | |
| if args.trackio_token is not None: | |
| config.trackio_token = args.trackio_token | |
| if args.experiment_name is not None: | |
| config.experiment_name = args.experiment_name | |
| # Setup paths | |
| output_path = args.out_dir | |
| # Ensure output directory exists | |
| os.makedirs(output_path, exist_ok=True) | |
| logger.info(f"Output path: {output_path}") | |
| # Initialize model | |
| model = SmolLM3Model( | |
| model_name=args.model_name, | |
| max_seq_length=args.max_seq_length, | |
| config=config | |
| ) | |
| # Determine dataset path | |
| if hasattr(config, 'dataset_name') and config.dataset_name: | |
| # Use Hugging Face dataset | |
| dataset_path = config.dataset_name | |
| logger.info(f"Using Hugging Face dataset: {dataset_path}") | |
| else: | |
| # Use local dataset | |
| dataset_path = os.path.join('/input', args.dataset_dir) | |
| logger.info(f"Using local dataset: {dataset_path}") | |
| # Load dataset with filtering options | |
| dataset = SmolLM3Dataset( | |
| data_path=dataset_path, | |
| tokenizer=model.tokenizer, | |
| max_seq_length=args.max_seq_length, | |
| filter_bad_entries=getattr(config, 'filter_bad_entries', False), | |
| bad_entry_field=getattr(config, 'bad_entry_field', 'bad_entry') | |
| ) | |
| # Initialize trainer | |
| trainer = SmolLM3Trainer( | |
| model=model, | |
| dataset=dataset, | |
| config=config, | |
| output_dir=output_path, | |
| init_from=args.init_from | |
| ) | |
| # Start training | |
| try: | |
| trainer.train() | |
| logger.info("Training completed successfully!") | |
| except Exception as e: | |
| logger.error(f"Training failed: {e}") | |
| raise | |
| if __name__ == '__main__': | |
| main() |