| { | |
| "best_global_step": 100, | |
| "best_metric": 0.4209205210208893, | |
| "best_model_checkpoint": "results/Qwen2.5-3B-Instruct-SFT/checkpoint-100", | |
| "epoch": 0.9941582580987786, | |
| "eval_steps": 50, | |
| "global_step": 117, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 1.0300904512405396, | |
| "eval_num_tokens": 0.0, | |
| "eval_runtime": 5.1949, | |
| "eval_samples_per_second": 3.85, | |
| "eval_steps_per_second": 1.925, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0, | |
| "eval_reward_final_answer": 0.0, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0, | |
| "eval_reward_response_format": 0.0, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0, | |
| "eval_reward_interaction": 0.0, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.008497079129049389, | |
| "grad_norm": 2.155670404434204, | |
| "learning_rate": 0.0, | |
| "loss": 0.9504, | |
| "num_tokens": 221701.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.016994158258098777, | |
| "grad_norm": 2.175981044769287, | |
| "learning_rate": 5e-06, | |
| "loss": 0.9609, | |
| "num_tokens": 443374.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.025491237387148168, | |
| "grad_norm": 2.1068813800811768, | |
| "learning_rate": 1e-05, | |
| "loss": 0.9455, | |
| "num_tokens": 667777.0, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.033988316516197555, | |
| "grad_norm": 2.1638681888580322, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.9521, | |
| "num_tokens": 887347.0, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.04248539564524695, | |
| "grad_norm": 2.1004698276519775, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9523, | |
| "num_tokens": 1111957.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.050982474774296335, | |
| "grad_norm": 2.0184059143066406, | |
| "learning_rate": 1.9996582763224565e-05, | |
| "loss": 0.9102, | |
| "num_tokens": 1340874.0, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.05947955390334572, | |
| "grad_norm": 2.0365166664123535, | |
| "learning_rate": 1.9986333647899847e-05, | |
| "loss": 0.9047, | |
| "num_tokens": 1553400.0, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.06797663303239511, | |
| "grad_norm": 1.7037795782089233, | |
| "learning_rate": 1.9969260437060028e-05, | |
| "loss": 0.8468, | |
| "num_tokens": 1779751.0, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07647371216144451, | |
| "grad_norm": 1.5123393535614014, | |
| "learning_rate": 1.9945376095861546e-05, | |
| "loss": 0.818, | |
| "num_tokens": 1996995.0, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0849707912904939, | |
| "grad_norm": 1.132192611694336, | |
| "learning_rate": 1.991469876173753e-05, | |
| "loss": 0.7753, | |
| "num_tokens": 2228097.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09346787041954328, | |
| "grad_norm": 0.8642380237579346, | |
| "learning_rate": 1.9877251730624504e-05, | |
| "loss": 0.7495, | |
| "num_tokens": 2451216.0, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.10196494954859267, | |
| "grad_norm": 0.6592623591423035, | |
| "learning_rate": 1.983306343927176e-05, | |
| "loss": 0.7353, | |
| "num_tokens": 2671449.0, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11046202867764206, | |
| "grad_norm": 0.5095056295394897, | |
| "learning_rate": 1.978216744364692e-05, | |
| "loss": 0.7199, | |
| "num_tokens": 2887598.0, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.11895910780669144, | |
| "grad_norm": 0.43245795369148254, | |
| "learning_rate": 1.9724602393453976e-05, | |
| "loss": 0.7074, | |
| "num_tokens": 3102297.0, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.12745618693574085, | |
| "grad_norm": 0.3649689853191376, | |
| "learning_rate": 1.9660412002783254e-05, | |
| "loss": 0.6916, | |
| "num_tokens": 3322488.0, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13595326606479022, | |
| "grad_norm": 0.32840588688850403, | |
| "learning_rate": 1.958964501691557e-05, | |
| "loss": 0.6907, | |
| "num_tokens": 3552428.0, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.14445034519383962, | |
| "grad_norm": 0.30777254700660706, | |
| "learning_rate": 1.9512355175305713e-05, | |
| "loss": 0.6805, | |
| "num_tokens": 3779989.0, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.15294742432288902, | |
| "grad_norm": 0.2842954695224762, | |
| "learning_rate": 1.9428601170773492e-05, | |
| "loss": 0.6681, | |
| "num_tokens": 4019860.0, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1614445034519384, | |
| "grad_norm": 0.28378936648368835, | |
| "learning_rate": 1.9338446604933218e-05, | |
| "loss": 0.6591, | |
| "num_tokens": 4243770.0, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.1699415825809878, | |
| "grad_norm": 0.2757631242275238, | |
| "learning_rate": 1.9241959939895518e-05, | |
| "loss": 0.6637, | |
| "num_tokens": 4471904.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.17843866171003717, | |
| "grad_norm": 0.2628677487373352, | |
| "learning_rate": 1.91392144462782e-05, | |
| "loss": 0.6455, | |
| "num_tokens": 4707414.0, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.18693574083908657, | |
| "grad_norm": 0.2680025100708008, | |
| "learning_rate": 1.9030288147565547e-05, | |
| "loss": 0.6415, | |
| "num_tokens": 4934312.0, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.19543281996813594, | |
| "grad_norm": 0.26622337102890015, | |
| "learning_rate": 1.89152637608584e-05, | |
| "loss": 0.635, | |
| "num_tokens": 5154540.0, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.20392989909718534, | |
| "grad_norm": 0.25727367401123047, | |
| "learning_rate": 1.879422863405995e-05, | |
| "loss": 0.6281, | |
| "num_tokens": 5381802.0, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.21242697822623474, | |
| "grad_norm": 0.2600906491279602, | |
| "learning_rate": 1.8667274679544944e-05, | |
| "loss": 0.6198, | |
| "num_tokens": 5600149.0, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.22092405735528411, | |
| "grad_norm": 0.24757230281829834, | |
| "learning_rate": 1.8534498304362758e-05, | |
| "loss": 0.6131, | |
| "num_tokens": 5827367.0, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.22942113648433352, | |
| "grad_norm": 0.2472054362297058, | |
| "learning_rate": 1.8396000337027208e-05, | |
| "loss": 0.6062, | |
| "num_tokens": 6047147.0, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2379182156133829, | |
| "grad_norm": 0.23867134749889374, | |
| "learning_rate": 1.8251885950948805e-05, | |
| "loss": 0.5917, | |
| "num_tokens": 6267160.0, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2464152947424323, | |
| "grad_norm": 0.23080606758594513, | |
| "learning_rate": 1.8102264584567543e-05, | |
| "loss": 0.588, | |
| "num_tokens": 6501872.0, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2549123738714817, | |
| "grad_norm": 0.22549813985824585, | |
| "learning_rate": 1.7947249858246888e-05, | |
| "loss": 0.5868, | |
| "num_tokens": 6739822.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2634094530005311, | |
| "grad_norm": 0.22806097567081451, | |
| "learning_rate": 1.7786959487992068e-05, | |
| "loss": 0.5726, | |
| "num_tokens": 6969353.0, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.27190653212958044, | |
| "grad_norm": 0.23609699308872223, | |
| "learning_rate": 1.7621515196058188e-05, | |
| "loss": 0.5695, | |
| "num_tokens": 7196669.0, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.28040361125862984, | |
| "grad_norm": 0.2319885790348053, | |
| "learning_rate": 1.7451042618516063e-05, | |
| "loss": 0.5592, | |
| "num_tokens": 7427812.0, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.28890069038767924, | |
| "grad_norm": 0.2277105748653412, | |
| "learning_rate": 1.727567120984596e-05, | |
| "loss": 0.5453, | |
| "num_tokens": 7664616.0, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.29739776951672864, | |
| "grad_norm": 0.24143275618553162, | |
| "learning_rate": 1.709553414463167e-05, | |
| "loss": 0.5381, | |
| "num_tokens": 7892289.0, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.30589484864577804, | |
| "grad_norm": 0.23762071132659912, | |
| "learning_rate": 1.6910768216429613e-05, | |
| "loss": 0.5447, | |
| "num_tokens": 8125714.0, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3143919277748274, | |
| "grad_norm": 0.2455436736345291, | |
| "learning_rate": 1.6721513733889716e-05, | |
| "loss": 0.5234, | |
| "num_tokens": 8354957.0, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3228890069038768, | |
| "grad_norm": 0.25508517026901245, | |
| "learning_rate": 1.6527914414207012e-05, | |
| "loss": 0.5173, | |
| "num_tokens": 8575672.0, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3313860860329262, | |
| "grad_norm": 0.256209135055542, | |
| "learning_rate": 1.6330117273984822e-05, | |
| "loss": 0.5232, | |
| "num_tokens": 8795680.0, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.3398831651619756, | |
| "grad_norm": 0.2554221451282501, | |
| "learning_rate": 1.6128272517592397e-05, | |
| "loss": 0.5019, | |
| "num_tokens": 9017987.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.348380244291025, | |
| "grad_norm": 0.24708497524261475, | |
| "learning_rate": 1.5922533423101843e-05, | |
| "loss": 0.4874, | |
| "num_tokens": 9251233.0, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.35687732342007433, | |
| "grad_norm": 0.2547791302204132, | |
| "learning_rate": 1.5713056225890904e-05, | |
| "loss": 0.4929, | |
| "num_tokens": 9483146.0, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.36537440254912373, | |
| "grad_norm": 0.26357918977737427, | |
| "learning_rate": 1.55e-05, | |
| "loss": 0.4735, | |
| "num_tokens": 9707559.0, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.37387148167817313, | |
| "grad_norm": 0.272932767868042, | |
| "learning_rate": 1.5283526537333664e-05, | |
| "loss": 0.4728, | |
| "num_tokens": 9927096.0, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.38236856080722254, | |
| "grad_norm": 0.2784619629383087, | |
| "learning_rate": 1.5063800224798007e-05, | |
| "loss": 0.4537, | |
| "num_tokens": 10146036.0, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.3908656399362719, | |
| "grad_norm": 0.28873109817504883, | |
| "learning_rate": 1.4840987919467634e-05, | |
| "loss": 0.4438, | |
| "num_tokens": 10360134.0, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.3993627190653213, | |
| "grad_norm": 0.2874409854412079, | |
| "learning_rate": 1.4615258821876728e-05, | |
| "loss": 0.4411, | |
| "num_tokens": 10580887.0, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.4078597981943707, | |
| "grad_norm": 0.28669029474258423, | |
| "learning_rate": 1.4386784347530522e-05, | |
| "loss": 0.4284, | |
| "num_tokens": 10803846.0, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4163568773234201, | |
| "grad_norm": 0.28335171937942505, | |
| "learning_rate": 1.4155737996734791e-05, | |
| "loss": 0.4254, | |
| "num_tokens": 11035018.0, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "grad_norm": 0.29582643508911133, | |
| "learning_rate": 1.3922295222842153e-05, | |
| "loss": 0.4183, | |
| "num_tokens": 11259327.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_loss": 0.44191795587539673, | |
| "eval_num_tokens": 11259327.0, | |
| "eval_runtime": 6.3535, | |
| "eval_samples_per_second": 3.148, | |
| "eval_steps_per_second": 1.574, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_reward_final_answer": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_reward_response_format": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_reward_interaction": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_loss": 0.44191795587539673, | |
| "eval_num_tokens": 0.0, | |
| "eval_runtime": 5.1228, | |
| "eval_samples_per_second": 3.904, | |
| "eval_steps_per_second": 1.952, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_reward_final_answer": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_reward_response_format": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_reward_interaction": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_diagnostic/format_valid_ratio": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_diagnostic/tool_parse_success_ratio": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_diagnostic/answer_attempted_ratio": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_diagnostic/avg_turns_taken": 1.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4248539564524695, | |
| "eval_diagnostic/stop_reason/extraction_failed": 1.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.43335103558151883, | |
| "grad_norm": 0.29690828919410706, | |
| "learning_rate": 1.3686633299015254e-05, | |
| "loss": 0.4135, | |
| "num_tokens": 227214.0, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.44184811471056823, | |
| "grad_norm": 0.3105921745300293, | |
| "learning_rate": 1.3448931183608016e-05, | |
| "loss": 0.4018, | |
| "num_tokens": 448367.0, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.45034519383961763, | |
| "grad_norm": 0.30604615807533264, | |
| "learning_rate": 1.3209369384267194e-05, | |
| "loss": 0.396, | |
| "num_tokens": 675175.0, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.45884227296866703, | |
| "grad_norm": 0.3152115046977997, | |
| "learning_rate": 1.2968129820857384e-05, | |
| "loss": 0.3767, | |
| "num_tokens": 900784.0, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.46733935209771643, | |
| "grad_norm": 0.3190907835960388, | |
| "learning_rate": 1.2725395687313646e-05, | |
| "loss": 0.3731, | |
| "num_tokens": 1128575.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.4758364312267658, | |
| "grad_norm": 0.3275493383407593, | |
| "learning_rate": 1.2481351312526606e-05, | |
| "loss": 0.3668, | |
| "num_tokens": 1347830.0, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.4843335103558152, | |
| "grad_norm": 0.30037736892700195, | |
| "learning_rate": 1.2236182020365675e-05, | |
| "loss": 0.368, | |
| "num_tokens": 1584214.0, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.4928305894848646, | |
| "grad_norm": 0.30363041162490845, | |
| "learning_rate": 1.1990073988946716e-05, | |
| "loss": 0.3584, | |
| "num_tokens": 1806122.0, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.501327668613914, | |
| "grad_norm": 0.27165499329566956, | |
| "learning_rate": 1.1743214109250994e-05, | |
| "loss": 0.3474, | |
| "num_tokens": 2038933.0, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5098247477429634, | |
| "grad_norm": 0.26867446303367615, | |
| "learning_rate": 1.1495789843202792e-05, | |
| "loss": 0.3432, | |
| "num_tokens": 2257233.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5183218268720128, | |
| "grad_norm": 0.22571073472499847, | |
| "learning_rate": 1.124798908131346e-05, | |
| "loss": 0.3497, | |
| "num_tokens": 2502206.0, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5268189060010622, | |
| "grad_norm": 0.22096557915210724, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 0.3357, | |
| "num_tokens": 2733328.0, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5353159851301115, | |
| "grad_norm": 0.21603769063949585, | |
| "learning_rate": 1.0752010918686544e-05, | |
| "loss": 0.3323, | |
| "num_tokens": 2958525.0, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5438130642591609, | |
| "grad_norm": 0.2027578502893448, | |
| "learning_rate": 1.050421015679721e-05, | |
| "loss": 0.3346, | |
| "num_tokens": 3189000.0, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5523101433882103, | |
| "grad_norm": 0.20277895033359528, | |
| "learning_rate": 1.0256785890749011e-05, | |
| "loss": 0.3126, | |
| "num_tokens": 3407547.0, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5608072225172597, | |
| "grad_norm": 0.187447190284729, | |
| "learning_rate": 1.0009926011053289e-05, | |
| "loss": 0.3175, | |
| "num_tokens": 3638073.0, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.5693043016463091, | |
| "grad_norm": 0.18870113790035248, | |
| "learning_rate": 9.763817979634327e-06, | |
| "loss": 0.3053, | |
| "num_tokens": 3857870.0, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.5778013807753585, | |
| "grad_norm": 0.18314893543720245, | |
| "learning_rate": 9.518648687473396e-06, | |
| "loss": 0.3087, | |
| "num_tokens": 4081907.0, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5862984599044079, | |
| "grad_norm": 0.17281264066696167, | |
| "learning_rate": 9.274604312686356e-06, | |
| "loss": 0.3167, | |
| "num_tokens": 4318189.0, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.5947955390334573, | |
| "grad_norm": 0.1761389523744583, | |
| "learning_rate": 9.031870179142619e-06, | |
| "loss": 0.2964, | |
| "num_tokens": 4542756.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6032926181625067, | |
| "grad_norm": 0.17463359236717224, | |
| "learning_rate": 8.790630615732809e-06, | |
| "loss": 0.2915, | |
| "num_tokens": 4764901.0, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6117896972915561, | |
| "grad_norm": 0.1684163361787796, | |
| "learning_rate": 8.551068816391984e-06, | |
| "loss": 0.292, | |
| "num_tokens": 4989831.0, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6202867764206054, | |
| "grad_norm": 0.17157095670700073, | |
| "learning_rate": 8.313366700984753e-06, | |
| "loss": 0.2779, | |
| "num_tokens": 5202719.0, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6287838555496548, | |
| "grad_norm": 0.16972655057907104, | |
| "learning_rate": 8.07770477715785e-06, | |
| "loss": 0.285, | |
| "num_tokens": 5424906.0, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.6372809346787042, | |
| "grad_norm": 0.16237466037273407, | |
| "learning_rate": 7.844262003265214e-06, | |
| "loss": 0.2903, | |
| "num_tokens": 5653610.0, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6457780138077536, | |
| "grad_norm": 0.1564113050699234, | |
| "learning_rate": 7.613215652469481e-06, | |
| "loss": 0.298, | |
| "num_tokens": 5886275.0, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.654275092936803, | |
| "grad_norm": 0.1518831104040146, | |
| "learning_rate": 7.384741178123278e-06, | |
| "loss": 0.29, | |
| "num_tokens": 6127331.0, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.6627721720658524, | |
| "grad_norm": 0.1514195203781128, | |
| "learning_rate": 7.159012080532368e-06, | |
| "loss": 0.2808, | |
| "num_tokens": 6350243.0, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6712692511949018, | |
| "grad_norm": 0.14839230477809906, | |
| "learning_rate": 6.936199775201998e-06, | |
| "loss": 0.2759, | |
| "num_tokens": 6574494.0, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.6797663303239512, | |
| "grad_norm": 0.1434764415025711, | |
| "learning_rate": 6.7164734626663384e-06, | |
| "loss": 0.2812, | |
| "num_tokens": 6805646.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6882634094530006, | |
| "grad_norm": 0.14427417516708374, | |
| "learning_rate": 6.500000000000003e-06, | |
| "loss": 0.2708, | |
| "num_tokens": 7029262.0, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.69676048858205, | |
| "grad_norm": 0.14127525687217712, | |
| "learning_rate": 6.2869437741091e-06, | |
| "loss": 0.2813, | |
| "num_tokens": 7257816.0, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7052575677110993, | |
| "grad_norm": 0.1428772211074829, | |
| "learning_rate": 6.077466576898161e-06, | |
| "loss": 0.2711, | |
| "num_tokens": 7481399.0, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.7137546468401487, | |
| "grad_norm": 0.14086274802684784, | |
| "learning_rate": 5.871727482407605e-06, | |
| "loss": 0.2679, | |
| "num_tokens": 7703455.0, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.7222517259691981, | |
| "grad_norm": 0.13953223824501038, | |
| "learning_rate": 5.669882726015181e-06, | |
| "loss": 0.2692, | |
| "num_tokens": 7924077.0, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7307488050982475, | |
| "grad_norm": 0.14064429700374603, | |
| "learning_rate": 5.47208558579299e-06, | |
| "loss": 0.259, | |
| "num_tokens": 8143604.0, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7392458842272969, | |
| "grad_norm": 0.13626375794410706, | |
| "learning_rate": 5.27848626611029e-06, | |
| "loss": 0.2674, | |
| "num_tokens": 8370430.0, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7477429633563463, | |
| "grad_norm": 0.13688842952251434, | |
| "learning_rate": 5.089231783570392e-06, | |
| "loss": 0.2763, | |
| "num_tokens": 8603383.0, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.7562400424853957, | |
| "grad_norm": 0.13918966054916382, | |
| "learning_rate": 4.904465855368333e-06, | |
| "loss": 0.2742, | |
| "num_tokens": 8830677.0, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.7647371216144451, | |
| "grad_norm": 0.14204055070877075, | |
| "learning_rate": 4.724328790154042e-06, | |
| "loss": 0.2525, | |
| "num_tokens": 9049384.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7732342007434945, | |
| "grad_norm": 0.13645566999912262, | |
| "learning_rate": 4.548957381483941e-06, | |
| "loss": 0.2752, | |
| "num_tokens": 9283178.0, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.7817312798725438, | |
| "grad_norm": 0.14060115814208984, | |
| "learning_rate": 4.378484803941816e-06, | |
| "loss": 0.2561, | |
| "num_tokens": 9507128.0, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.7902283590015932, | |
| "grad_norm": 0.140924334526062, | |
| "learning_rate": 4.2130405120079356e-06, | |
| "loss": 0.2615, | |
| "num_tokens": 9733410.0, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.7987254381306426, | |
| "grad_norm": 0.1394304782152176, | |
| "learning_rate": 4.052750141753112e-06, | |
| "loss": 0.2616, | |
| "num_tokens": 9960712.0, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.807222517259692, | |
| "grad_norm": 0.1327814757823944, | |
| "learning_rate": 3.89773541543246e-06, | |
| "loss": 0.2618, | |
| "num_tokens": 10189564.0, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8157195963887414, | |
| "grad_norm": 0.13691458106040955, | |
| "learning_rate": 3.748114049051197e-06, | |
| "loss": 0.2528, | |
| "num_tokens": 10412249.0, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8242166755177908, | |
| "grad_norm": 0.13488516211509705, | |
| "learning_rate": 3.603999662972795e-06, | |
| "loss": 0.2524, | |
| "num_tokens": 10633903.0, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.8327137546468402, | |
| "grad_norm": 0.12742717564105988, | |
| "learning_rate": 3.4655016956372432e-06, | |
| "loss": 0.2646, | |
| "num_tokens": 10864574.0, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.8412108337758896, | |
| "grad_norm": 0.12795040011405945, | |
| "learning_rate": 3.332725320455058e-06, | |
| "loss": 0.2442, | |
| "num_tokens": 11083990.0, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "grad_norm": 0.12140467017889023, | |
| "learning_rate": 3.205771365940052e-06, | |
| "loss": 0.2523, | |
| "num_tokens": 11315846.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_loss": 0.4209205210208893, | |
| "eval_num_tokens": 11315846.0, | |
| "eval_runtime": 4.5682, | |
| "eval_samples_per_second": 4.378, | |
| "eval_steps_per_second": 2.189, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_reward_final_answer": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_reward_response_format": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_reward_interaction": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_diagnostic/format_valid_ratio": 0.1, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_diagnostic/tool_parse_success_ratio": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_diagnostic/answer_attempted_ratio": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_diagnostic/avg_turns_taken": 1.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_diagnostic/stop_reason/extraction_failed": 0.9, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.849707912904939, | |
| "eval_diagnostic/stop_reason/no_action": 0.1, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8582049920339884, | |
| "grad_norm": 0.12342186272144318, | |
| "learning_rate": 3.0847362391415995e-06, | |
| "loss": 0.2586, | |
| "num_tokens": 11540477.0, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.8667020711630377, | |
| "grad_norm": 0.1200730949640274, | |
| "learning_rate": 2.969711852434454e-06, | |
| "loss": 0.2604, | |
| "num_tokens": 11771582.0, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.8751991502920871, | |
| "grad_norm": 0.12449000030755997, | |
| "learning_rate": 2.860785553721803e-06, | |
| "loss": 0.2453, | |
| "num_tokens": 11992209.0, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.8836962294211365, | |
| "grad_norm": 0.11838784068822861, | |
| "learning_rate": 2.7580400601044825e-06, | |
| "loss": 0.2523, | |
| "num_tokens": 12214354.0, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.8921933085501859, | |
| "grad_norm": 0.11962386220693588, | |
| "learning_rate": 2.6615533950667844e-06, | |
| "loss": 0.2456, | |
| "num_tokens": 12433134.0, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.9006903876792353, | |
| "grad_norm": 0.11446674913167953, | |
| "learning_rate": 2.5713988292265084e-06, | |
| "loss": 0.2656, | |
| "num_tokens": 12672477.0, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.9091874668082847, | |
| "grad_norm": 0.1163739487528801, | |
| "learning_rate": 2.4876448246942884e-06, | |
| "loss": 0.2462, | |
| "num_tokens": 12894135.0, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.9176845459373341, | |
| "grad_norm": 0.11075320094823837, | |
| "learning_rate": 2.4103549830844328e-06, | |
| "loss": 0.2641, | |
| "num_tokens": 13129073.0, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.9261816250663835, | |
| "grad_norm": 0.11537513881921768, | |
| "learning_rate": 2.3395879972167464e-06, | |
| "loss": 0.2474, | |
| "num_tokens": 13348255.0, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9346787041954329, | |
| "grad_norm": 0.10961325466632843, | |
| "learning_rate": 2.275397606546027e-06, | |
| "loss": 0.2697, | |
| "num_tokens": 13592454.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9431757833244823, | |
| "grad_norm": 0.11164919286966324, | |
| "learning_rate": 2.21783255635308e-06, | |
| "loss": 0.2552, | |
| "num_tokens": 13819727.0, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.9516728624535316, | |
| "grad_norm": 0.10987438261508942, | |
| "learning_rate": 2.1669365607282396e-06, | |
| "loss": 0.2548, | |
| "num_tokens": 14051729.0, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.960169941582581, | |
| "grad_norm": 0.11316878348588943, | |
| "learning_rate": 2.1227482693754995e-06, | |
| "loss": 0.2492, | |
| "num_tokens": 14274432.0, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.9686670207116304, | |
| "grad_norm": 0.10757338255643845, | |
| "learning_rate": 2.085301238262471e-06, | |
| "loss": 0.2531, | |
| "num_tokens": 14498986.0, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.9771640998406798, | |
| "grad_norm": 0.10984344780445099, | |
| "learning_rate": 2.054623904138455e-06, | |
| "loss": 0.2535, | |
| "num_tokens": 14724914.0, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.9856611789697292, | |
| "grad_norm": 0.10751090943813324, | |
| "learning_rate": 2.0307395629399716e-06, | |
| "loss": 0.2517, | |
| "num_tokens": 14957559.0, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.9941582580987786, | |
| "grad_norm": 0.10235580801963806, | |
| "learning_rate": 2.0136663521001547e-06, | |
| "loss": 0.2592, | |
| "num_tokens": 15193005.0, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.9941582580987786, | |
| "step": 117, | |
| "total_flos": 5.5251981578153165e+17, | |
| "train_loss": 0.16806216436064142, | |
| "train_runtime": 6592.4366, | |
| "train_samples_per_second": 1.142, | |
| "train_steps_per_second": 0.018 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 118, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.5251981578153165e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |