| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.05790622665655238, | |
| "eval_steps": 500, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002895311332827619, | |
| "grad_norm": 1.7338896989822388, | |
| "learning_rate": 4.985523443335862e-05, | |
| "loss": 3.1507, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.005790622665655238, | |
| "grad_norm": 1.2579742670059204, | |
| "learning_rate": 4.9710468866717244e-05, | |
| "loss": 2.6357, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.008685933998482857, | |
| "grad_norm": 0.9996505975723267, | |
| "learning_rate": 4.956570330007586e-05, | |
| "loss": 2.4288, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.011581245331310476, | |
| "grad_norm": 0.9657206535339355, | |
| "learning_rate": 4.942093773343448e-05, | |
| "loss": 2.2119, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.014476556664138095, | |
| "grad_norm": 1.044640064239502, | |
| "learning_rate": 4.92761721667931e-05, | |
| "loss": 2.1225, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.017371867996965714, | |
| "grad_norm": 0.8428456783294678, | |
| "learning_rate": 4.913140660015172e-05, | |
| "loss": 2.0409, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02026717932979333, | |
| "grad_norm": 1.2827749252319336, | |
| "learning_rate": 4.898664103351033e-05, | |
| "loss": 1.9031, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.023162490662620952, | |
| "grad_norm": 0.9291247129440308, | |
| "learning_rate": 4.884187546686895e-05, | |
| "loss": 1.8438, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.02605780199544857, | |
| "grad_norm": 0.7757951617240906, | |
| "learning_rate": 4.8697109900227575e-05, | |
| "loss": 1.7833, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.02895311332827619, | |
| "grad_norm": 1.520269751548767, | |
| "learning_rate": 4.855234433358619e-05, | |
| "loss": 1.6942, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.03184842466110381, | |
| "grad_norm": 1.2558553218841553, | |
| "learning_rate": 4.840757876694481e-05, | |
| "loss": 1.6669, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.03474373599393143, | |
| "grad_norm": 1.3526251316070557, | |
| "learning_rate": 4.826281320030343e-05, | |
| "loss": 1.6115, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.037639047326759045, | |
| "grad_norm": 2.5751099586486816, | |
| "learning_rate": 4.811804763366205e-05, | |
| "loss": 1.5629, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.04053435865958666, | |
| "grad_norm": 1.3569105863571167, | |
| "learning_rate": 4.797328206702067e-05, | |
| "loss": 1.5258, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.04342966999241429, | |
| "grad_norm": 1.7734428644180298, | |
| "learning_rate": 4.782851650037929e-05, | |
| "loss": 1.475, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.046324981325241904, | |
| "grad_norm": 0.7975415587425232, | |
| "learning_rate": 4.7683750933737905e-05, | |
| "loss": 1.437, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.04922029265806952, | |
| "grad_norm": 0.8590123057365417, | |
| "learning_rate": 4.753898536709653e-05, | |
| "loss": 1.4263, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.05211560399089714, | |
| "grad_norm": 1.1225152015686035, | |
| "learning_rate": 4.739421980045515e-05, | |
| "loss": 1.3528, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.05501091532372476, | |
| "grad_norm": 1.1342971324920654, | |
| "learning_rate": 4.7249454233813765e-05, | |
| "loss": 1.3576, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.05790622665655238, | |
| "grad_norm": 0.9445364475250244, | |
| "learning_rate": 4.710468866717238e-05, | |
| "loss": 1.3283, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 172693, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.180672512e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |