Qwen2.5-3B-Instruct-SFT / trainer_state.json
atomwalk12's picture
Model save
2b6e3a2 verified
{
"best_global_step": 100,
"best_metric": 0.4209205210208893,
"best_model_checkpoint": "results/Qwen2.5-3B-Instruct-SFT/checkpoint-100",
"epoch": 0.9941582580987786,
"eval_steps": 50,
"global_step": 117,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_loss": 1.0300904512405396,
"eval_num_tokens": 0.0,
"eval_runtime": 5.1949,
"eval_samples_per_second": 3.85,
"eval_steps_per_second": 1.925,
"step": 0
},
{
"epoch": 0,
"eval_reward_final_answer": 0.0,
"step": 0
},
{
"epoch": 0,
"eval_reward_response_format": 0.0,
"step": 0
},
{
"epoch": 0,
"eval_reward_interaction": 0.0,
"step": 0
},
{
"epoch": 0.008497079129049389,
"grad_norm": 2.155670404434204,
"learning_rate": 0.0,
"loss": 0.9504,
"num_tokens": 221701.0,
"step": 1
},
{
"epoch": 0.016994158258098777,
"grad_norm": 2.175981044769287,
"learning_rate": 5e-06,
"loss": 0.9609,
"num_tokens": 443374.0,
"step": 2
},
{
"epoch": 0.025491237387148168,
"grad_norm": 2.1068813800811768,
"learning_rate": 1e-05,
"loss": 0.9455,
"num_tokens": 667777.0,
"step": 3
},
{
"epoch": 0.033988316516197555,
"grad_norm": 2.1638681888580322,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.9521,
"num_tokens": 887347.0,
"step": 4
},
{
"epoch": 0.04248539564524695,
"grad_norm": 2.1004698276519775,
"learning_rate": 2e-05,
"loss": 0.9523,
"num_tokens": 1111957.0,
"step": 5
},
{
"epoch": 0.050982474774296335,
"grad_norm": 2.0184059143066406,
"learning_rate": 1.9996582763224565e-05,
"loss": 0.9102,
"num_tokens": 1340874.0,
"step": 6
},
{
"epoch": 0.05947955390334572,
"grad_norm": 2.0365166664123535,
"learning_rate": 1.9986333647899847e-05,
"loss": 0.9047,
"num_tokens": 1553400.0,
"step": 7
},
{
"epoch": 0.06797663303239511,
"grad_norm": 1.7037795782089233,
"learning_rate": 1.9969260437060028e-05,
"loss": 0.8468,
"num_tokens": 1779751.0,
"step": 8
},
{
"epoch": 0.07647371216144451,
"grad_norm": 1.5123393535614014,
"learning_rate": 1.9945376095861546e-05,
"loss": 0.818,
"num_tokens": 1996995.0,
"step": 9
},
{
"epoch": 0.0849707912904939,
"grad_norm": 1.132192611694336,
"learning_rate": 1.991469876173753e-05,
"loss": 0.7753,
"num_tokens": 2228097.0,
"step": 10
},
{
"epoch": 0.09346787041954328,
"grad_norm": 0.8642380237579346,
"learning_rate": 1.9877251730624504e-05,
"loss": 0.7495,
"num_tokens": 2451216.0,
"step": 11
},
{
"epoch": 0.10196494954859267,
"grad_norm": 0.6592623591423035,
"learning_rate": 1.983306343927176e-05,
"loss": 0.7353,
"num_tokens": 2671449.0,
"step": 12
},
{
"epoch": 0.11046202867764206,
"grad_norm": 0.5095056295394897,
"learning_rate": 1.978216744364692e-05,
"loss": 0.7199,
"num_tokens": 2887598.0,
"step": 13
},
{
"epoch": 0.11895910780669144,
"grad_norm": 0.43245795369148254,
"learning_rate": 1.9724602393453976e-05,
"loss": 0.7074,
"num_tokens": 3102297.0,
"step": 14
},
{
"epoch": 0.12745618693574085,
"grad_norm": 0.3649689853191376,
"learning_rate": 1.9660412002783254e-05,
"loss": 0.6916,
"num_tokens": 3322488.0,
"step": 15
},
{
"epoch": 0.13595326606479022,
"grad_norm": 0.32840588688850403,
"learning_rate": 1.958964501691557e-05,
"loss": 0.6907,
"num_tokens": 3552428.0,
"step": 16
},
{
"epoch": 0.14445034519383962,
"grad_norm": 0.30777254700660706,
"learning_rate": 1.9512355175305713e-05,
"loss": 0.6805,
"num_tokens": 3779989.0,
"step": 17
},
{
"epoch": 0.15294742432288902,
"grad_norm": 0.2842954695224762,
"learning_rate": 1.9428601170773492e-05,
"loss": 0.6681,
"num_tokens": 4019860.0,
"step": 18
},
{
"epoch": 0.1614445034519384,
"grad_norm": 0.28378936648368835,
"learning_rate": 1.9338446604933218e-05,
"loss": 0.6591,
"num_tokens": 4243770.0,
"step": 19
},
{
"epoch": 0.1699415825809878,
"grad_norm": 0.2757631242275238,
"learning_rate": 1.9241959939895518e-05,
"loss": 0.6637,
"num_tokens": 4471904.0,
"step": 20
},
{
"epoch": 0.17843866171003717,
"grad_norm": 0.2628677487373352,
"learning_rate": 1.91392144462782e-05,
"loss": 0.6455,
"num_tokens": 4707414.0,
"step": 21
},
{
"epoch": 0.18693574083908657,
"grad_norm": 0.2680025100708008,
"learning_rate": 1.9030288147565547e-05,
"loss": 0.6415,
"num_tokens": 4934312.0,
"step": 22
},
{
"epoch": 0.19543281996813594,
"grad_norm": 0.26622337102890015,
"learning_rate": 1.89152637608584e-05,
"loss": 0.635,
"num_tokens": 5154540.0,
"step": 23
},
{
"epoch": 0.20392989909718534,
"grad_norm": 0.25727367401123047,
"learning_rate": 1.879422863405995e-05,
"loss": 0.6281,
"num_tokens": 5381802.0,
"step": 24
},
{
"epoch": 0.21242697822623474,
"grad_norm": 0.2600906491279602,
"learning_rate": 1.8667274679544944e-05,
"loss": 0.6198,
"num_tokens": 5600149.0,
"step": 25
},
{
"epoch": 0.22092405735528411,
"grad_norm": 0.24757230281829834,
"learning_rate": 1.8534498304362758e-05,
"loss": 0.6131,
"num_tokens": 5827367.0,
"step": 26
},
{
"epoch": 0.22942113648433352,
"grad_norm": 0.2472054362297058,
"learning_rate": 1.8396000337027208e-05,
"loss": 0.6062,
"num_tokens": 6047147.0,
"step": 27
},
{
"epoch": 0.2379182156133829,
"grad_norm": 0.23867134749889374,
"learning_rate": 1.8251885950948805e-05,
"loss": 0.5917,
"num_tokens": 6267160.0,
"step": 28
},
{
"epoch": 0.2464152947424323,
"grad_norm": 0.23080606758594513,
"learning_rate": 1.8102264584567543e-05,
"loss": 0.588,
"num_tokens": 6501872.0,
"step": 29
},
{
"epoch": 0.2549123738714817,
"grad_norm": 0.22549813985824585,
"learning_rate": 1.7947249858246888e-05,
"loss": 0.5868,
"num_tokens": 6739822.0,
"step": 30
},
{
"epoch": 0.2634094530005311,
"grad_norm": 0.22806097567081451,
"learning_rate": 1.7786959487992068e-05,
"loss": 0.5726,
"num_tokens": 6969353.0,
"step": 31
},
{
"epoch": 0.27190653212958044,
"grad_norm": 0.23609699308872223,
"learning_rate": 1.7621515196058188e-05,
"loss": 0.5695,
"num_tokens": 7196669.0,
"step": 32
},
{
"epoch": 0.28040361125862984,
"grad_norm": 0.2319885790348053,
"learning_rate": 1.7451042618516063e-05,
"loss": 0.5592,
"num_tokens": 7427812.0,
"step": 33
},
{
"epoch": 0.28890069038767924,
"grad_norm": 0.2277105748653412,
"learning_rate": 1.727567120984596e-05,
"loss": 0.5453,
"num_tokens": 7664616.0,
"step": 34
},
{
"epoch": 0.29739776951672864,
"grad_norm": 0.24143275618553162,
"learning_rate": 1.709553414463167e-05,
"loss": 0.5381,
"num_tokens": 7892289.0,
"step": 35
},
{
"epoch": 0.30589484864577804,
"grad_norm": 0.23762071132659912,
"learning_rate": 1.6910768216429613e-05,
"loss": 0.5447,
"num_tokens": 8125714.0,
"step": 36
},
{
"epoch": 0.3143919277748274,
"grad_norm": 0.2455436736345291,
"learning_rate": 1.6721513733889716e-05,
"loss": 0.5234,
"num_tokens": 8354957.0,
"step": 37
},
{
"epoch": 0.3228890069038768,
"grad_norm": 0.25508517026901245,
"learning_rate": 1.6527914414207012e-05,
"loss": 0.5173,
"num_tokens": 8575672.0,
"step": 38
},
{
"epoch": 0.3313860860329262,
"grad_norm": 0.256209135055542,
"learning_rate": 1.6330117273984822e-05,
"loss": 0.5232,
"num_tokens": 8795680.0,
"step": 39
},
{
"epoch": 0.3398831651619756,
"grad_norm": 0.2554221451282501,
"learning_rate": 1.6128272517592397e-05,
"loss": 0.5019,
"num_tokens": 9017987.0,
"step": 40
},
{
"epoch": 0.348380244291025,
"grad_norm": 0.24708497524261475,
"learning_rate": 1.5922533423101843e-05,
"loss": 0.4874,
"num_tokens": 9251233.0,
"step": 41
},
{
"epoch": 0.35687732342007433,
"grad_norm": 0.2547791302204132,
"learning_rate": 1.5713056225890904e-05,
"loss": 0.4929,
"num_tokens": 9483146.0,
"step": 42
},
{
"epoch": 0.36537440254912373,
"grad_norm": 0.26357918977737427,
"learning_rate": 1.55e-05,
"loss": 0.4735,
"num_tokens": 9707559.0,
"step": 43
},
{
"epoch": 0.37387148167817313,
"grad_norm": 0.272932767868042,
"learning_rate": 1.5283526537333664e-05,
"loss": 0.4728,
"num_tokens": 9927096.0,
"step": 44
},
{
"epoch": 0.38236856080722254,
"grad_norm": 0.2784619629383087,
"learning_rate": 1.5063800224798007e-05,
"loss": 0.4537,
"num_tokens": 10146036.0,
"step": 45
},
{
"epoch": 0.3908656399362719,
"grad_norm": 0.28873109817504883,
"learning_rate": 1.4840987919467634e-05,
"loss": 0.4438,
"num_tokens": 10360134.0,
"step": 46
},
{
"epoch": 0.3993627190653213,
"grad_norm": 0.2874409854412079,
"learning_rate": 1.4615258821876728e-05,
"loss": 0.4411,
"num_tokens": 10580887.0,
"step": 47
},
{
"epoch": 0.4078597981943707,
"grad_norm": 0.28669029474258423,
"learning_rate": 1.4386784347530522e-05,
"loss": 0.4284,
"num_tokens": 10803846.0,
"step": 48
},
{
"epoch": 0.4163568773234201,
"grad_norm": 0.28335171937942505,
"learning_rate": 1.4155737996734791e-05,
"loss": 0.4254,
"num_tokens": 11035018.0,
"step": 49
},
{
"epoch": 0.4248539564524695,
"grad_norm": 0.29582643508911133,
"learning_rate": 1.3922295222842153e-05,
"loss": 0.4183,
"num_tokens": 11259327.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_loss": 0.44191795587539673,
"eval_num_tokens": 11259327.0,
"eval_runtime": 6.3535,
"eval_samples_per_second": 3.148,
"eval_steps_per_second": 1.574,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_reward_final_answer": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_reward_response_format": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_reward_interaction": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_loss": 0.44191795587539673,
"eval_num_tokens": 0.0,
"eval_runtime": 5.1228,
"eval_samples_per_second": 3.904,
"eval_steps_per_second": 1.952,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_reward_final_answer": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_reward_response_format": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_reward_interaction": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_diagnostic/format_valid_ratio": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_diagnostic/tool_parse_success_ratio": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_diagnostic/answer_attempted_ratio": 0.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_diagnostic/avg_turns_taken": 1.0,
"step": 50
},
{
"epoch": 0.4248539564524695,
"eval_diagnostic/stop_reason/extraction_failed": 1.0,
"step": 50
},
{
"epoch": 0.43335103558151883,
"grad_norm": 0.29690828919410706,
"learning_rate": 1.3686633299015254e-05,
"loss": 0.4135,
"num_tokens": 227214.0,
"step": 51
},
{
"epoch": 0.44184811471056823,
"grad_norm": 0.3105921745300293,
"learning_rate": 1.3448931183608016e-05,
"loss": 0.4018,
"num_tokens": 448367.0,
"step": 52
},
{
"epoch": 0.45034519383961763,
"grad_norm": 0.30604615807533264,
"learning_rate": 1.3209369384267194e-05,
"loss": 0.396,
"num_tokens": 675175.0,
"step": 53
},
{
"epoch": 0.45884227296866703,
"grad_norm": 0.3152115046977997,
"learning_rate": 1.2968129820857384e-05,
"loss": 0.3767,
"num_tokens": 900784.0,
"step": 54
},
{
"epoch": 0.46733935209771643,
"grad_norm": 0.3190907835960388,
"learning_rate": 1.2725395687313646e-05,
"loss": 0.3731,
"num_tokens": 1128575.0,
"step": 55
},
{
"epoch": 0.4758364312267658,
"grad_norm": 0.3275493383407593,
"learning_rate": 1.2481351312526606e-05,
"loss": 0.3668,
"num_tokens": 1347830.0,
"step": 56
},
{
"epoch": 0.4843335103558152,
"grad_norm": 0.30037736892700195,
"learning_rate": 1.2236182020365675e-05,
"loss": 0.368,
"num_tokens": 1584214.0,
"step": 57
},
{
"epoch": 0.4928305894848646,
"grad_norm": 0.30363041162490845,
"learning_rate": 1.1990073988946716e-05,
"loss": 0.3584,
"num_tokens": 1806122.0,
"step": 58
},
{
"epoch": 0.501327668613914,
"grad_norm": 0.27165499329566956,
"learning_rate": 1.1743214109250994e-05,
"loss": 0.3474,
"num_tokens": 2038933.0,
"step": 59
},
{
"epoch": 0.5098247477429634,
"grad_norm": 0.26867446303367615,
"learning_rate": 1.1495789843202792e-05,
"loss": 0.3432,
"num_tokens": 2257233.0,
"step": 60
},
{
"epoch": 0.5183218268720128,
"grad_norm": 0.22571073472499847,
"learning_rate": 1.124798908131346e-05,
"loss": 0.3497,
"num_tokens": 2502206.0,
"step": 61
},
{
"epoch": 0.5268189060010622,
"grad_norm": 0.22096557915210724,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.3357,
"num_tokens": 2733328.0,
"step": 62
},
{
"epoch": 0.5353159851301115,
"grad_norm": 0.21603769063949585,
"learning_rate": 1.0752010918686544e-05,
"loss": 0.3323,
"num_tokens": 2958525.0,
"step": 63
},
{
"epoch": 0.5438130642591609,
"grad_norm": 0.2027578502893448,
"learning_rate": 1.050421015679721e-05,
"loss": 0.3346,
"num_tokens": 3189000.0,
"step": 64
},
{
"epoch": 0.5523101433882103,
"grad_norm": 0.20277895033359528,
"learning_rate": 1.0256785890749011e-05,
"loss": 0.3126,
"num_tokens": 3407547.0,
"step": 65
},
{
"epoch": 0.5608072225172597,
"grad_norm": 0.187447190284729,
"learning_rate": 1.0009926011053289e-05,
"loss": 0.3175,
"num_tokens": 3638073.0,
"step": 66
},
{
"epoch": 0.5693043016463091,
"grad_norm": 0.18870113790035248,
"learning_rate": 9.763817979634327e-06,
"loss": 0.3053,
"num_tokens": 3857870.0,
"step": 67
},
{
"epoch": 0.5778013807753585,
"grad_norm": 0.18314893543720245,
"learning_rate": 9.518648687473396e-06,
"loss": 0.3087,
"num_tokens": 4081907.0,
"step": 68
},
{
"epoch": 0.5862984599044079,
"grad_norm": 0.17281264066696167,
"learning_rate": 9.274604312686356e-06,
"loss": 0.3167,
"num_tokens": 4318189.0,
"step": 69
},
{
"epoch": 0.5947955390334573,
"grad_norm": 0.1761389523744583,
"learning_rate": 9.031870179142619e-06,
"loss": 0.2964,
"num_tokens": 4542756.0,
"step": 70
},
{
"epoch": 0.6032926181625067,
"grad_norm": 0.17463359236717224,
"learning_rate": 8.790630615732809e-06,
"loss": 0.2915,
"num_tokens": 4764901.0,
"step": 71
},
{
"epoch": 0.6117896972915561,
"grad_norm": 0.1684163361787796,
"learning_rate": 8.551068816391984e-06,
"loss": 0.292,
"num_tokens": 4989831.0,
"step": 72
},
{
"epoch": 0.6202867764206054,
"grad_norm": 0.17157095670700073,
"learning_rate": 8.313366700984753e-06,
"loss": 0.2779,
"num_tokens": 5202719.0,
"step": 73
},
{
"epoch": 0.6287838555496548,
"grad_norm": 0.16972655057907104,
"learning_rate": 8.07770477715785e-06,
"loss": 0.285,
"num_tokens": 5424906.0,
"step": 74
},
{
"epoch": 0.6372809346787042,
"grad_norm": 0.16237466037273407,
"learning_rate": 7.844262003265214e-06,
"loss": 0.2903,
"num_tokens": 5653610.0,
"step": 75
},
{
"epoch": 0.6457780138077536,
"grad_norm": 0.1564113050699234,
"learning_rate": 7.613215652469481e-06,
"loss": 0.298,
"num_tokens": 5886275.0,
"step": 76
},
{
"epoch": 0.654275092936803,
"grad_norm": 0.1518831104040146,
"learning_rate": 7.384741178123278e-06,
"loss": 0.29,
"num_tokens": 6127331.0,
"step": 77
},
{
"epoch": 0.6627721720658524,
"grad_norm": 0.1514195203781128,
"learning_rate": 7.159012080532368e-06,
"loss": 0.2808,
"num_tokens": 6350243.0,
"step": 78
},
{
"epoch": 0.6712692511949018,
"grad_norm": 0.14839230477809906,
"learning_rate": 6.936199775201998e-06,
"loss": 0.2759,
"num_tokens": 6574494.0,
"step": 79
},
{
"epoch": 0.6797663303239512,
"grad_norm": 0.1434764415025711,
"learning_rate": 6.7164734626663384e-06,
"loss": 0.2812,
"num_tokens": 6805646.0,
"step": 80
},
{
"epoch": 0.6882634094530006,
"grad_norm": 0.14427417516708374,
"learning_rate": 6.500000000000003e-06,
"loss": 0.2708,
"num_tokens": 7029262.0,
"step": 81
},
{
"epoch": 0.69676048858205,
"grad_norm": 0.14127525687217712,
"learning_rate": 6.2869437741091e-06,
"loss": 0.2813,
"num_tokens": 7257816.0,
"step": 82
},
{
"epoch": 0.7052575677110993,
"grad_norm": 0.1428772211074829,
"learning_rate": 6.077466576898161e-06,
"loss": 0.2711,
"num_tokens": 7481399.0,
"step": 83
},
{
"epoch": 0.7137546468401487,
"grad_norm": 0.14086274802684784,
"learning_rate": 5.871727482407605e-06,
"loss": 0.2679,
"num_tokens": 7703455.0,
"step": 84
},
{
"epoch": 0.7222517259691981,
"grad_norm": 0.13953223824501038,
"learning_rate": 5.669882726015181e-06,
"loss": 0.2692,
"num_tokens": 7924077.0,
"step": 85
},
{
"epoch": 0.7307488050982475,
"grad_norm": 0.14064429700374603,
"learning_rate": 5.47208558579299e-06,
"loss": 0.259,
"num_tokens": 8143604.0,
"step": 86
},
{
"epoch": 0.7392458842272969,
"grad_norm": 0.13626375794410706,
"learning_rate": 5.27848626611029e-06,
"loss": 0.2674,
"num_tokens": 8370430.0,
"step": 87
},
{
"epoch": 0.7477429633563463,
"grad_norm": 0.13688842952251434,
"learning_rate": 5.089231783570392e-06,
"loss": 0.2763,
"num_tokens": 8603383.0,
"step": 88
},
{
"epoch": 0.7562400424853957,
"grad_norm": 0.13918966054916382,
"learning_rate": 4.904465855368333e-06,
"loss": 0.2742,
"num_tokens": 8830677.0,
"step": 89
},
{
"epoch": 0.7647371216144451,
"grad_norm": 0.14204055070877075,
"learning_rate": 4.724328790154042e-06,
"loss": 0.2525,
"num_tokens": 9049384.0,
"step": 90
},
{
"epoch": 0.7732342007434945,
"grad_norm": 0.13645566999912262,
"learning_rate": 4.548957381483941e-06,
"loss": 0.2752,
"num_tokens": 9283178.0,
"step": 91
},
{
"epoch": 0.7817312798725438,
"grad_norm": 0.14060115814208984,
"learning_rate": 4.378484803941816e-06,
"loss": 0.2561,
"num_tokens": 9507128.0,
"step": 92
},
{
"epoch": 0.7902283590015932,
"grad_norm": 0.140924334526062,
"learning_rate": 4.2130405120079356e-06,
"loss": 0.2615,
"num_tokens": 9733410.0,
"step": 93
},
{
"epoch": 0.7987254381306426,
"grad_norm": 0.1394304782152176,
"learning_rate": 4.052750141753112e-06,
"loss": 0.2616,
"num_tokens": 9960712.0,
"step": 94
},
{
"epoch": 0.807222517259692,
"grad_norm": 0.1327814757823944,
"learning_rate": 3.89773541543246e-06,
"loss": 0.2618,
"num_tokens": 10189564.0,
"step": 95
},
{
"epoch": 0.8157195963887414,
"grad_norm": 0.13691458106040955,
"learning_rate": 3.748114049051197e-06,
"loss": 0.2528,
"num_tokens": 10412249.0,
"step": 96
},
{
"epoch": 0.8242166755177908,
"grad_norm": 0.13488516211509705,
"learning_rate": 3.603999662972795e-06,
"loss": 0.2524,
"num_tokens": 10633903.0,
"step": 97
},
{
"epoch": 0.8327137546468402,
"grad_norm": 0.12742717564105988,
"learning_rate": 3.4655016956372432e-06,
"loss": 0.2646,
"num_tokens": 10864574.0,
"step": 98
},
{
"epoch": 0.8412108337758896,
"grad_norm": 0.12795040011405945,
"learning_rate": 3.332725320455058e-06,
"loss": 0.2442,
"num_tokens": 11083990.0,
"step": 99
},
{
"epoch": 0.849707912904939,
"grad_norm": 0.12140467017889023,
"learning_rate": 3.205771365940052e-06,
"loss": 0.2523,
"num_tokens": 11315846.0,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_loss": 0.4209205210208893,
"eval_num_tokens": 11315846.0,
"eval_runtime": 4.5682,
"eval_samples_per_second": 4.378,
"eval_steps_per_second": 2.189,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_reward_final_answer": 0.0,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_reward_response_format": 0.0,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_reward_interaction": 0.0,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_diagnostic/format_valid_ratio": 0.1,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_diagnostic/tool_parse_success_ratio": 0.0,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_diagnostic/answer_attempted_ratio": 0.0,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_diagnostic/avg_turns_taken": 1.0,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_diagnostic/stop_reason/extraction_failed": 0.9,
"step": 100
},
{
"epoch": 0.849707912904939,
"eval_diagnostic/stop_reason/no_action": 0.1,
"step": 100
},
{
"epoch": 0.8582049920339884,
"grad_norm": 0.12342186272144318,
"learning_rate": 3.0847362391415995e-06,
"loss": 0.2586,
"num_tokens": 11540477.0,
"step": 101
},
{
"epoch": 0.8667020711630377,
"grad_norm": 0.1200730949640274,
"learning_rate": 2.969711852434454e-06,
"loss": 0.2604,
"num_tokens": 11771582.0,
"step": 102
},
{
"epoch": 0.8751991502920871,
"grad_norm": 0.12449000030755997,
"learning_rate": 2.860785553721803e-06,
"loss": 0.2453,
"num_tokens": 11992209.0,
"step": 103
},
{
"epoch": 0.8836962294211365,
"grad_norm": 0.11838784068822861,
"learning_rate": 2.7580400601044825e-06,
"loss": 0.2523,
"num_tokens": 12214354.0,
"step": 104
},
{
"epoch": 0.8921933085501859,
"grad_norm": 0.11962386220693588,
"learning_rate": 2.6615533950667844e-06,
"loss": 0.2456,
"num_tokens": 12433134.0,
"step": 105
},
{
"epoch": 0.9006903876792353,
"grad_norm": 0.11446674913167953,
"learning_rate": 2.5713988292265084e-06,
"loss": 0.2656,
"num_tokens": 12672477.0,
"step": 106
},
{
"epoch": 0.9091874668082847,
"grad_norm": 0.1163739487528801,
"learning_rate": 2.4876448246942884e-06,
"loss": 0.2462,
"num_tokens": 12894135.0,
"step": 107
},
{
"epoch": 0.9176845459373341,
"grad_norm": 0.11075320094823837,
"learning_rate": 2.4103549830844328e-06,
"loss": 0.2641,
"num_tokens": 13129073.0,
"step": 108
},
{
"epoch": 0.9261816250663835,
"grad_norm": 0.11537513881921768,
"learning_rate": 2.3395879972167464e-06,
"loss": 0.2474,
"num_tokens": 13348255.0,
"step": 109
},
{
"epoch": 0.9346787041954329,
"grad_norm": 0.10961325466632843,
"learning_rate": 2.275397606546027e-06,
"loss": 0.2697,
"num_tokens": 13592454.0,
"step": 110
},
{
"epoch": 0.9431757833244823,
"grad_norm": 0.11164919286966324,
"learning_rate": 2.21783255635308e-06,
"loss": 0.2552,
"num_tokens": 13819727.0,
"step": 111
},
{
"epoch": 0.9516728624535316,
"grad_norm": 0.10987438261508942,
"learning_rate": 2.1669365607282396e-06,
"loss": 0.2548,
"num_tokens": 14051729.0,
"step": 112
},
{
"epoch": 0.960169941582581,
"grad_norm": 0.11316878348588943,
"learning_rate": 2.1227482693754995e-06,
"loss": 0.2492,
"num_tokens": 14274432.0,
"step": 113
},
{
"epoch": 0.9686670207116304,
"grad_norm": 0.10757338255643845,
"learning_rate": 2.085301238262471e-06,
"loss": 0.2531,
"num_tokens": 14498986.0,
"step": 114
},
{
"epoch": 0.9771640998406798,
"grad_norm": 0.10984344780445099,
"learning_rate": 2.054623904138455e-06,
"loss": 0.2535,
"num_tokens": 14724914.0,
"step": 115
},
{
"epoch": 0.9856611789697292,
"grad_norm": 0.10751090943813324,
"learning_rate": 2.0307395629399716e-06,
"loss": 0.2517,
"num_tokens": 14957559.0,
"step": 116
},
{
"epoch": 0.9941582580987786,
"grad_norm": 0.10235580801963806,
"learning_rate": 2.0136663521001547e-06,
"loss": 0.2592,
"num_tokens": 15193005.0,
"step": 117
},
{
"epoch": 0.9941582580987786,
"step": 117,
"total_flos": 5.5251981578153165e+17,
"train_loss": 0.16806216436064142,
"train_runtime": 6592.4366,
"train_samples_per_second": 1.142,
"train_steps_per_second": 0.018
}
],
"logging_steps": 1,
"max_steps": 118,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.5251981578153165e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}