Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Test monitoring integration for real experiment | |
| """ | |
| import os | |
| import sys | |
| from pathlib import Path | |
| # Add the current directory to the path for imports | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| def test_monitoring_setup(): | |
| """Test that monitoring is correctly configured""" | |
| print("π Testing Monitoring Integration") | |
| print("=" * 50) | |
| # Test 1: Check if monitoring module can be imported | |
| try: | |
| from monitoring import SmolLM3Monitor, create_monitor_from_config | |
| print("β Monitoring module imported successfully") | |
| except ImportError as e: | |
| print(f"β Failed to import monitoring module: {e}") | |
| return False | |
| # Test 2: Check if API client can be imported | |
| try: | |
| from trackio_api_client import TrackioAPIClient | |
| print("β Trackio API client imported successfully") | |
| except ImportError as e: | |
| print(f"β Failed to import Trackio API client: {e}") | |
| return False | |
| # Test 3: Test configuration loading | |
| try: | |
| from config.train_smollm3_openhermes_fr_a100_balanced import get_config | |
| config = get_config("config/train_smollm3_openhermes_fr_a100_balanced.py") | |
| print("β Configuration loaded successfully") | |
| print(f" Model: {config.model_name}") | |
| print(f" Batch size: {config.batch_size}") | |
| print(f" Max iterations: {config.max_iters}") | |
| print(f" Enable tracking: {config.enable_tracking}") | |
| print(f" Trackio URL: {config.trackio_url}") | |
| except Exception as e: | |
| print(f"β Failed to load configuration: {e}") | |
| return False | |
| # Test 4: Test monitor creation | |
| try: | |
| # Set the Trackio URL for testing | |
| config.trackio_url = "https://tonic-test-trackio-test.hf.space" | |
| config.experiment_name = "test_monitoring_integration" | |
| monitor = create_monitor_from_config(config) | |
| print("β Monitor created successfully") | |
| print(f" Experiment name: {monitor.experiment_name}") | |
| print(f" Enable tracking: {monitor.enable_tracking}") | |
| print(f" Log metrics: {monitor.log_metrics}") | |
| print(f" Log artifacts: {monitor.log_artifacts}") | |
| if monitor.enable_tracking and monitor.trackio_client: | |
| print("β Trackio client initialized") | |
| if monitor.experiment_id: | |
| print(f" Experiment ID: {monitor.experiment_id}") | |
| else: | |
| print(" β οΈ No experiment ID (will be created during training)") | |
| else: | |
| print(" β οΈ Trackio client not initialized") | |
| except Exception as e: | |
| print(f"β Failed to create monitor: {e}") | |
| return False | |
| # Test 5: Test callback creation | |
| try: | |
| callback = monitor.create_monitoring_callback() | |
| if callback: | |
| print("β Monitoring callback created successfully") | |
| else: | |
| print(" β οΈ No monitoring callback (tracking disabled)") | |
| except Exception as e: | |
| print(f"β Failed to create callback: {e}") | |
| return False | |
| print("\n" + "=" * 50) | |
| print("π― Monitoring Integration Test Complete") | |
| print("=" * 50) | |
| return True | |
| def test_real_experiment_command(): | |
| """Test the real experiment command""" | |
| print("\nπ Testing Real Experiment Command") | |
| print("=" * 50) | |
| # Build the command | |
| cmd = [ | |
| "python", "run_a100_large_experiment.py", | |
| "--config", "config/train_smollm3_openhermes_fr_a100_balanced.py", | |
| "--experiment-name", "petit-elle-l-aime-3-balanced-real", | |
| "--output-dir", "./outputs/balanced-real", | |
| "--trackio-url", "https://tonic-test-trackio-test.hf.space" | |
| ] | |
| print("Command to run:") | |
| print(" ".join(cmd)) | |
| print("\nThis command will:") | |
| print("β Load the balanced A100 configuration") | |
| print("β Create a real experiment in Trackio") | |
| print("β Log real training metrics every 25 steps") | |
| print("β Save checkpoints every 2000 steps") | |
| print("β Monitor progress in real-time") | |
| print("\nExpected training parameters:") | |
| print(" Model: HuggingFaceTB/SmolLM3-3B") | |
| print(" Batch size: 8") | |
| print(" Gradient accumulation: 16") | |
| print(" Effective batch size: 128") | |
| print(" Learning rate: 3.5e-6") | |
| print(" Max iterations: 18000") | |
| print(" Mixed precision: bf16") | |
| print(" Max sequence length: 12288") | |
| print("\n" + "=" * 50) | |
| print("π― Ready to run real experiment!") | |
| print("=" * 50) | |
| if __name__ == "__main__": | |
| # Test monitoring integration | |
| if test_monitoring_setup(): | |
| # Show real experiment command | |
| test_real_experiment_command() | |
| else: | |
| print("\nβ Monitoring integration test failed. Please fix issues before running real experiment.") |