{ "best_global_step": 100, "best_metric": 0.4209205210208893, "best_model_checkpoint": "results/Qwen2.5-3B-Instruct-SFT/checkpoint-100", "epoch": 0.9941582580987786, "eval_steps": 50, "global_step": 117, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 1.0300904512405396, "eval_num_tokens": 0.0, "eval_runtime": 5.1949, "eval_samples_per_second": 3.85, "eval_steps_per_second": 1.925, "step": 0 }, { "epoch": 0, "eval_reward_final_answer": 0.0, "step": 0 }, { "epoch": 0, "eval_reward_response_format": 0.0, "step": 0 }, { "epoch": 0, "eval_reward_interaction": 0.0, "step": 0 }, { "epoch": 0.008497079129049389, "grad_norm": 2.155670404434204, "learning_rate": 0.0, "loss": 0.9504, "num_tokens": 221701.0, "step": 1 }, { "epoch": 0.016994158258098777, "grad_norm": 2.175981044769287, "learning_rate": 5e-06, "loss": 0.9609, "num_tokens": 443374.0, "step": 2 }, { "epoch": 0.025491237387148168, "grad_norm": 2.1068813800811768, "learning_rate": 1e-05, "loss": 0.9455, "num_tokens": 667777.0, "step": 3 }, { "epoch": 0.033988316516197555, "grad_norm": 2.1638681888580322, "learning_rate": 1.5000000000000002e-05, "loss": 0.9521, "num_tokens": 887347.0, "step": 4 }, { "epoch": 0.04248539564524695, "grad_norm": 2.1004698276519775, "learning_rate": 2e-05, "loss": 0.9523, "num_tokens": 1111957.0, "step": 5 }, { "epoch": 0.050982474774296335, "grad_norm": 2.0184059143066406, "learning_rate": 1.9996582763224565e-05, "loss": 0.9102, "num_tokens": 1340874.0, "step": 6 }, { "epoch": 0.05947955390334572, "grad_norm": 2.0365166664123535, "learning_rate": 1.9986333647899847e-05, "loss": 0.9047, "num_tokens": 1553400.0, "step": 7 }, { "epoch": 0.06797663303239511, "grad_norm": 1.7037795782089233, "learning_rate": 1.9969260437060028e-05, "loss": 0.8468, "num_tokens": 1779751.0, "step": 8 }, { "epoch": 0.07647371216144451, "grad_norm": 1.5123393535614014, "learning_rate": 1.9945376095861546e-05, "loss": 0.818, "num_tokens": 1996995.0, "step": 9 }, { "epoch": 0.0849707912904939, "grad_norm": 1.132192611694336, "learning_rate": 1.991469876173753e-05, "loss": 0.7753, "num_tokens": 2228097.0, "step": 10 }, { "epoch": 0.09346787041954328, "grad_norm": 0.8642380237579346, "learning_rate": 1.9877251730624504e-05, "loss": 0.7495, "num_tokens": 2451216.0, "step": 11 }, { "epoch": 0.10196494954859267, "grad_norm": 0.6592623591423035, "learning_rate": 1.983306343927176e-05, "loss": 0.7353, "num_tokens": 2671449.0, "step": 12 }, { "epoch": 0.11046202867764206, "grad_norm": 0.5095056295394897, "learning_rate": 1.978216744364692e-05, "loss": 0.7199, "num_tokens": 2887598.0, "step": 13 }, { "epoch": 0.11895910780669144, "grad_norm": 0.43245795369148254, "learning_rate": 1.9724602393453976e-05, "loss": 0.7074, "num_tokens": 3102297.0, "step": 14 }, { "epoch": 0.12745618693574085, "grad_norm": 0.3649689853191376, "learning_rate": 1.9660412002783254e-05, "loss": 0.6916, "num_tokens": 3322488.0, "step": 15 }, { "epoch": 0.13595326606479022, "grad_norm": 0.32840588688850403, "learning_rate": 1.958964501691557e-05, "loss": 0.6907, "num_tokens": 3552428.0, "step": 16 }, { "epoch": 0.14445034519383962, "grad_norm": 0.30777254700660706, "learning_rate": 1.9512355175305713e-05, "loss": 0.6805, "num_tokens": 3779989.0, "step": 17 }, { "epoch": 0.15294742432288902, "grad_norm": 0.2842954695224762, "learning_rate": 1.9428601170773492e-05, "loss": 0.6681, "num_tokens": 4019860.0, "step": 18 }, { "epoch": 0.1614445034519384, "grad_norm": 0.28378936648368835, "learning_rate": 1.9338446604933218e-05, "loss": 0.6591, "num_tokens": 4243770.0, "step": 19 }, { "epoch": 0.1699415825809878, "grad_norm": 0.2757631242275238, "learning_rate": 1.9241959939895518e-05, "loss": 0.6637, "num_tokens": 4471904.0, "step": 20 }, { "epoch": 0.17843866171003717, "grad_norm": 0.2628677487373352, "learning_rate": 1.91392144462782e-05, "loss": 0.6455, "num_tokens": 4707414.0, "step": 21 }, { "epoch": 0.18693574083908657, "grad_norm": 0.2680025100708008, "learning_rate": 1.9030288147565547e-05, "loss": 0.6415, "num_tokens": 4934312.0, "step": 22 }, { "epoch": 0.19543281996813594, "grad_norm": 0.26622337102890015, "learning_rate": 1.89152637608584e-05, "loss": 0.635, "num_tokens": 5154540.0, "step": 23 }, { "epoch": 0.20392989909718534, "grad_norm": 0.25727367401123047, "learning_rate": 1.879422863405995e-05, "loss": 0.6281, "num_tokens": 5381802.0, "step": 24 }, { "epoch": 0.21242697822623474, "grad_norm": 0.2600906491279602, "learning_rate": 1.8667274679544944e-05, "loss": 0.6198, "num_tokens": 5600149.0, "step": 25 }, { "epoch": 0.22092405735528411, "grad_norm": 0.24757230281829834, "learning_rate": 1.8534498304362758e-05, "loss": 0.6131, "num_tokens": 5827367.0, "step": 26 }, { "epoch": 0.22942113648433352, "grad_norm": 0.2472054362297058, "learning_rate": 1.8396000337027208e-05, "loss": 0.6062, "num_tokens": 6047147.0, "step": 27 }, { "epoch": 0.2379182156133829, "grad_norm": 0.23867134749889374, "learning_rate": 1.8251885950948805e-05, "loss": 0.5917, "num_tokens": 6267160.0, "step": 28 }, { "epoch": 0.2464152947424323, "grad_norm": 0.23080606758594513, "learning_rate": 1.8102264584567543e-05, "loss": 0.588, "num_tokens": 6501872.0, "step": 29 }, { "epoch": 0.2549123738714817, "grad_norm": 0.22549813985824585, "learning_rate": 1.7947249858246888e-05, "loss": 0.5868, "num_tokens": 6739822.0, "step": 30 }, { "epoch": 0.2634094530005311, "grad_norm": 0.22806097567081451, "learning_rate": 1.7786959487992068e-05, "loss": 0.5726, "num_tokens": 6969353.0, "step": 31 }, { "epoch": 0.27190653212958044, "grad_norm": 0.23609699308872223, "learning_rate": 1.7621515196058188e-05, "loss": 0.5695, "num_tokens": 7196669.0, "step": 32 }, { "epoch": 0.28040361125862984, "grad_norm": 0.2319885790348053, "learning_rate": 1.7451042618516063e-05, "loss": 0.5592, "num_tokens": 7427812.0, "step": 33 }, { "epoch": 0.28890069038767924, "grad_norm": 0.2277105748653412, "learning_rate": 1.727567120984596e-05, "loss": 0.5453, "num_tokens": 7664616.0, "step": 34 }, { "epoch": 0.29739776951672864, "grad_norm": 0.24143275618553162, "learning_rate": 1.709553414463167e-05, "loss": 0.5381, "num_tokens": 7892289.0, "step": 35 }, { "epoch": 0.30589484864577804, "grad_norm": 0.23762071132659912, "learning_rate": 1.6910768216429613e-05, "loss": 0.5447, "num_tokens": 8125714.0, "step": 36 }, { "epoch": 0.3143919277748274, "grad_norm": 0.2455436736345291, "learning_rate": 1.6721513733889716e-05, "loss": 0.5234, "num_tokens": 8354957.0, "step": 37 }, { "epoch": 0.3228890069038768, "grad_norm": 0.25508517026901245, "learning_rate": 1.6527914414207012e-05, "loss": 0.5173, "num_tokens": 8575672.0, "step": 38 }, { "epoch": 0.3313860860329262, "grad_norm": 0.256209135055542, "learning_rate": 1.6330117273984822e-05, "loss": 0.5232, "num_tokens": 8795680.0, "step": 39 }, { "epoch": 0.3398831651619756, "grad_norm": 0.2554221451282501, "learning_rate": 1.6128272517592397e-05, "loss": 0.5019, "num_tokens": 9017987.0, "step": 40 }, { "epoch": 0.348380244291025, "grad_norm": 0.24708497524261475, "learning_rate": 1.5922533423101843e-05, "loss": 0.4874, "num_tokens": 9251233.0, "step": 41 }, { "epoch": 0.35687732342007433, "grad_norm": 0.2547791302204132, "learning_rate": 1.5713056225890904e-05, "loss": 0.4929, "num_tokens": 9483146.0, "step": 42 }, { "epoch": 0.36537440254912373, "grad_norm": 0.26357918977737427, "learning_rate": 1.55e-05, "loss": 0.4735, "num_tokens": 9707559.0, "step": 43 }, { "epoch": 0.37387148167817313, "grad_norm": 0.272932767868042, "learning_rate": 1.5283526537333664e-05, "loss": 0.4728, "num_tokens": 9927096.0, "step": 44 }, { "epoch": 0.38236856080722254, "grad_norm": 0.2784619629383087, "learning_rate": 1.5063800224798007e-05, "loss": 0.4537, "num_tokens": 10146036.0, "step": 45 }, { "epoch": 0.3908656399362719, "grad_norm": 0.28873109817504883, "learning_rate": 1.4840987919467634e-05, "loss": 0.4438, "num_tokens": 10360134.0, "step": 46 }, { "epoch": 0.3993627190653213, "grad_norm": 0.2874409854412079, "learning_rate": 1.4615258821876728e-05, "loss": 0.4411, "num_tokens": 10580887.0, "step": 47 }, { "epoch": 0.4078597981943707, "grad_norm": 0.28669029474258423, "learning_rate": 1.4386784347530522e-05, "loss": 0.4284, "num_tokens": 10803846.0, "step": 48 }, { "epoch": 0.4163568773234201, "grad_norm": 0.28335171937942505, "learning_rate": 1.4155737996734791e-05, "loss": 0.4254, "num_tokens": 11035018.0, "step": 49 }, { "epoch": 0.4248539564524695, "grad_norm": 0.29582643508911133, "learning_rate": 1.3922295222842153e-05, "loss": 0.4183, "num_tokens": 11259327.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_loss": 0.44191795587539673, "eval_num_tokens": 11259327.0, "eval_runtime": 6.3535, "eval_samples_per_second": 3.148, "eval_steps_per_second": 1.574, "step": 50 }, { "epoch": 0.4248539564524695, "eval_reward_final_answer": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_reward_response_format": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_reward_interaction": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_loss": 0.44191795587539673, "eval_num_tokens": 0.0, "eval_runtime": 5.1228, "eval_samples_per_second": 3.904, "eval_steps_per_second": 1.952, "step": 50 }, { "epoch": 0.4248539564524695, "eval_reward_final_answer": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_reward_response_format": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_reward_interaction": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_diagnostic/format_valid_ratio": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_diagnostic/tool_parse_success_ratio": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_diagnostic/answer_attempted_ratio": 0.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_diagnostic/avg_turns_taken": 1.0, "step": 50 }, { "epoch": 0.4248539564524695, "eval_diagnostic/stop_reason/extraction_failed": 1.0, "step": 50 }, { "epoch": 0.43335103558151883, "grad_norm": 0.29690828919410706, "learning_rate": 1.3686633299015254e-05, "loss": 0.4135, "num_tokens": 227214.0, "step": 51 }, { "epoch": 0.44184811471056823, "grad_norm": 0.3105921745300293, "learning_rate": 1.3448931183608016e-05, "loss": 0.4018, "num_tokens": 448367.0, "step": 52 }, { "epoch": 0.45034519383961763, "grad_norm": 0.30604615807533264, "learning_rate": 1.3209369384267194e-05, "loss": 0.396, "num_tokens": 675175.0, "step": 53 }, { "epoch": 0.45884227296866703, "grad_norm": 0.3152115046977997, "learning_rate": 1.2968129820857384e-05, "loss": 0.3767, "num_tokens": 900784.0, "step": 54 }, { "epoch": 0.46733935209771643, "grad_norm": 0.3190907835960388, "learning_rate": 1.2725395687313646e-05, "loss": 0.3731, "num_tokens": 1128575.0, "step": 55 }, { "epoch": 0.4758364312267658, "grad_norm": 0.3275493383407593, "learning_rate": 1.2481351312526606e-05, "loss": 0.3668, "num_tokens": 1347830.0, "step": 56 }, { "epoch": 0.4843335103558152, "grad_norm": 0.30037736892700195, "learning_rate": 1.2236182020365675e-05, "loss": 0.368, "num_tokens": 1584214.0, "step": 57 }, { "epoch": 0.4928305894848646, "grad_norm": 0.30363041162490845, "learning_rate": 1.1990073988946716e-05, "loss": 0.3584, "num_tokens": 1806122.0, "step": 58 }, { "epoch": 0.501327668613914, "grad_norm": 0.27165499329566956, "learning_rate": 1.1743214109250994e-05, "loss": 0.3474, "num_tokens": 2038933.0, "step": 59 }, { "epoch": 0.5098247477429634, "grad_norm": 0.26867446303367615, "learning_rate": 1.1495789843202792e-05, "loss": 0.3432, "num_tokens": 2257233.0, "step": 60 }, { "epoch": 0.5183218268720128, "grad_norm": 0.22571073472499847, "learning_rate": 1.124798908131346e-05, "loss": 0.3497, "num_tokens": 2502206.0, "step": 61 }, { "epoch": 0.5268189060010622, "grad_norm": 0.22096557915210724, "learning_rate": 1.1000000000000001e-05, "loss": 0.3357, "num_tokens": 2733328.0, "step": 62 }, { "epoch": 0.5353159851301115, "grad_norm": 0.21603769063949585, "learning_rate": 1.0752010918686544e-05, "loss": 0.3323, "num_tokens": 2958525.0, "step": 63 }, { "epoch": 0.5438130642591609, "grad_norm": 0.2027578502893448, "learning_rate": 1.050421015679721e-05, "loss": 0.3346, "num_tokens": 3189000.0, "step": 64 }, { "epoch": 0.5523101433882103, "grad_norm": 0.20277895033359528, "learning_rate": 1.0256785890749011e-05, "loss": 0.3126, "num_tokens": 3407547.0, "step": 65 }, { "epoch": 0.5608072225172597, "grad_norm": 0.187447190284729, "learning_rate": 1.0009926011053289e-05, "loss": 0.3175, "num_tokens": 3638073.0, "step": 66 }, { "epoch": 0.5693043016463091, "grad_norm": 0.18870113790035248, "learning_rate": 9.763817979634327e-06, "loss": 0.3053, "num_tokens": 3857870.0, "step": 67 }, { "epoch": 0.5778013807753585, "grad_norm": 0.18314893543720245, "learning_rate": 9.518648687473396e-06, "loss": 0.3087, "num_tokens": 4081907.0, "step": 68 }, { "epoch": 0.5862984599044079, "grad_norm": 0.17281264066696167, "learning_rate": 9.274604312686356e-06, "loss": 0.3167, "num_tokens": 4318189.0, "step": 69 }, { "epoch": 0.5947955390334573, "grad_norm": 0.1761389523744583, "learning_rate": 9.031870179142619e-06, "loss": 0.2964, "num_tokens": 4542756.0, "step": 70 }, { "epoch": 0.6032926181625067, "grad_norm": 0.17463359236717224, "learning_rate": 8.790630615732809e-06, "loss": 0.2915, "num_tokens": 4764901.0, "step": 71 }, { "epoch": 0.6117896972915561, "grad_norm": 0.1684163361787796, "learning_rate": 8.551068816391984e-06, "loss": 0.292, "num_tokens": 4989831.0, "step": 72 }, { "epoch": 0.6202867764206054, "grad_norm": 0.17157095670700073, "learning_rate": 8.313366700984753e-06, "loss": 0.2779, "num_tokens": 5202719.0, "step": 73 }, { "epoch": 0.6287838555496548, "grad_norm": 0.16972655057907104, "learning_rate": 8.07770477715785e-06, "loss": 0.285, "num_tokens": 5424906.0, "step": 74 }, { "epoch": 0.6372809346787042, "grad_norm": 0.16237466037273407, "learning_rate": 7.844262003265214e-06, "loss": 0.2903, "num_tokens": 5653610.0, "step": 75 }, { "epoch": 0.6457780138077536, "grad_norm": 0.1564113050699234, "learning_rate": 7.613215652469481e-06, "loss": 0.298, "num_tokens": 5886275.0, "step": 76 }, { "epoch": 0.654275092936803, "grad_norm": 0.1518831104040146, "learning_rate": 7.384741178123278e-06, "loss": 0.29, "num_tokens": 6127331.0, "step": 77 }, { "epoch": 0.6627721720658524, "grad_norm": 0.1514195203781128, "learning_rate": 7.159012080532368e-06, "loss": 0.2808, "num_tokens": 6350243.0, "step": 78 }, { "epoch": 0.6712692511949018, "grad_norm": 0.14839230477809906, "learning_rate": 6.936199775201998e-06, "loss": 0.2759, "num_tokens": 6574494.0, "step": 79 }, { "epoch": 0.6797663303239512, "grad_norm": 0.1434764415025711, "learning_rate": 6.7164734626663384e-06, "loss": 0.2812, "num_tokens": 6805646.0, "step": 80 }, { "epoch": 0.6882634094530006, "grad_norm": 0.14427417516708374, "learning_rate": 6.500000000000003e-06, "loss": 0.2708, "num_tokens": 7029262.0, "step": 81 }, { "epoch": 0.69676048858205, "grad_norm": 0.14127525687217712, "learning_rate": 6.2869437741091e-06, "loss": 0.2813, "num_tokens": 7257816.0, "step": 82 }, { "epoch": 0.7052575677110993, "grad_norm": 0.1428772211074829, "learning_rate": 6.077466576898161e-06, "loss": 0.2711, "num_tokens": 7481399.0, "step": 83 }, { "epoch": 0.7137546468401487, "grad_norm": 0.14086274802684784, "learning_rate": 5.871727482407605e-06, "loss": 0.2679, "num_tokens": 7703455.0, "step": 84 }, { "epoch": 0.7222517259691981, "grad_norm": 0.13953223824501038, "learning_rate": 5.669882726015181e-06, "loss": 0.2692, "num_tokens": 7924077.0, "step": 85 }, { "epoch": 0.7307488050982475, "grad_norm": 0.14064429700374603, "learning_rate": 5.47208558579299e-06, "loss": 0.259, "num_tokens": 8143604.0, "step": 86 }, { "epoch": 0.7392458842272969, "grad_norm": 0.13626375794410706, "learning_rate": 5.27848626611029e-06, "loss": 0.2674, "num_tokens": 8370430.0, "step": 87 }, { "epoch": 0.7477429633563463, "grad_norm": 0.13688842952251434, "learning_rate": 5.089231783570392e-06, "loss": 0.2763, "num_tokens": 8603383.0, "step": 88 }, { "epoch": 0.7562400424853957, "grad_norm": 0.13918966054916382, "learning_rate": 4.904465855368333e-06, "loss": 0.2742, "num_tokens": 8830677.0, "step": 89 }, { "epoch": 0.7647371216144451, "grad_norm": 0.14204055070877075, "learning_rate": 4.724328790154042e-06, "loss": 0.2525, "num_tokens": 9049384.0, "step": 90 }, { "epoch": 0.7732342007434945, "grad_norm": 0.13645566999912262, "learning_rate": 4.548957381483941e-06, "loss": 0.2752, "num_tokens": 9283178.0, "step": 91 }, { "epoch": 0.7817312798725438, "grad_norm": 0.14060115814208984, "learning_rate": 4.378484803941816e-06, "loss": 0.2561, "num_tokens": 9507128.0, "step": 92 }, { "epoch": 0.7902283590015932, "grad_norm": 0.140924334526062, "learning_rate": 4.2130405120079356e-06, "loss": 0.2615, "num_tokens": 9733410.0, "step": 93 }, { "epoch": 0.7987254381306426, "grad_norm": 0.1394304782152176, "learning_rate": 4.052750141753112e-06, "loss": 0.2616, "num_tokens": 9960712.0, "step": 94 }, { "epoch": 0.807222517259692, "grad_norm": 0.1327814757823944, "learning_rate": 3.89773541543246e-06, "loss": 0.2618, "num_tokens": 10189564.0, "step": 95 }, { "epoch": 0.8157195963887414, "grad_norm": 0.13691458106040955, "learning_rate": 3.748114049051197e-06, "loss": 0.2528, "num_tokens": 10412249.0, "step": 96 }, { "epoch": 0.8242166755177908, "grad_norm": 0.13488516211509705, "learning_rate": 3.603999662972795e-06, "loss": 0.2524, "num_tokens": 10633903.0, "step": 97 }, { "epoch": 0.8327137546468402, "grad_norm": 0.12742717564105988, "learning_rate": 3.4655016956372432e-06, "loss": 0.2646, "num_tokens": 10864574.0, "step": 98 }, { "epoch": 0.8412108337758896, "grad_norm": 0.12795040011405945, "learning_rate": 3.332725320455058e-06, "loss": 0.2442, "num_tokens": 11083990.0, "step": 99 }, { "epoch": 0.849707912904939, "grad_norm": 0.12140467017889023, "learning_rate": 3.205771365940052e-06, "loss": 0.2523, "num_tokens": 11315846.0, "step": 100 }, { "epoch": 0.849707912904939, "eval_loss": 0.4209205210208893, "eval_num_tokens": 11315846.0, "eval_runtime": 4.5682, "eval_samples_per_second": 4.378, "eval_steps_per_second": 2.189, "step": 100 }, { "epoch": 0.849707912904939, "eval_reward_final_answer": 0.0, "step": 100 }, { "epoch": 0.849707912904939, "eval_reward_response_format": 0.0, "step": 100 }, { "epoch": 0.849707912904939, "eval_reward_interaction": 0.0, "step": 100 }, { "epoch": 0.849707912904939, "eval_diagnostic/format_valid_ratio": 0.1, "step": 100 }, { "epoch": 0.849707912904939, "eval_diagnostic/tool_parse_success_ratio": 0.0, "step": 100 }, { "epoch": 0.849707912904939, "eval_diagnostic/answer_attempted_ratio": 0.0, "step": 100 }, { "epoch": 0.849707912904939, "eval_diagnostic/avg_turns_taken": 1.0, "step": 100 }, { "epoch": 0.849707912904939, "eval_diagnostic/stop_reason/extraction_failed": 0.9, "step": 100 }, { "epoch": 0.849707912904939, "eval_diagnostic/stop_reason/no_action": 0.1, "step": 100 }, { "epoch": 0.8582049920339884, "grad_norm": 0.12342186272144318, "learning_rate": 3.0847362391415995e-06, "loss": 0.2586, "num_tokens": 11540477.0, "step": 101 }, { "epoch": 0.8667020711630377, "grad_norm": 0.1200730949640274, "learning_rate": 2.969711852434454e-06, "loss": 0.2604, "num_tokens": 11771582.0, "step": 102 }, { "epoch": 0.8751991502920871, "grad_norm": 0.12449000030755997, "learning_rate": 2.860785553721803e-06, "loss": 0.2453, "num_tokens": 11992209.0, "step": 103 }, { "epoch": 0.8836962294211365, "grad_norm": 0.11838784068822861, "learning_rate": 2.7580400601044825e-06, "loss": 0.2523, "num_tokens": 12214354.0, "step": 104 }, { "epoch": 0.8921933085501859, "grad_norm": 0.11962386220693588, "learning_rate": 2.6615533950667844e-06, "loss": 0.2456, "num_tokens": 12433134.0, "step": 105 }, { "epoch": 0.9006903876792353, "grad_norm": 0.11446674913167953, "learning_rate": 2.5713988292265084e-06, "loss": 0.2656, "num_tokens": 12672477.0, "step": 106 }, { "epoch": 0.9091874668082847, "grad_norm": 0.1163739487528801, "learning_rate": 2.4876448246942884e-06, "loss": 0.2462, "num_tokens": 12894135.0, "step": 107 }, { "epoch": 0.9176845459373341, "grad_norm": 0.11075320094823837, "learning_rate": 2.4103549830844328e-06, "loss": 0.2641, "num_tokens": 13129073.0, "step": 108 }, { "epoch": 0.9261816250663835, "grad_norm": 0.11537513881921768, "learning_rate": 2.3395879972167464e-06, "loss": 0.2474, "num_tokens": 13348255.0, "step": 109 }, { "epoch": 0.9346787041954329, "grad_norm": 0.10961325466632843, "learning_rate": 2.275397606546027e-06, "loss": 0.2697, "num_tokens": 13592454.0, "step": 110 }, { "epoch": 0.9431757833244823, "grad_norm": 0.11164919286966324, "learning_rate": 2.21783255635308e-06, "loss": 0.2552, "num_tokens": 13819727.0, "step": 111 }, { "epoch": 0.9516728624535316, "grad_norm": 0.10987438261508942, "learning_rate": 2.1669365607282396e-06, "loss": 0.2548, "num_tokens": 14051729.0, "step": 112 }, { "epoch": 0.960169941582581, "grad_norm": 0.11316878348588943, "learning_rate": 2.1227482693754995e-06, "loss": 0.2492, "num_tokens": 14274432.0, "step": 113 }, { "epoch": 0.9686670207116304, "grad_norm": 0.10757338255643845, "learning_rate": 2.085301238262471e-06, "loss": 0.2531, "num_tokens": 14498986.0, "step": 114 }, { "epoch": 0.9771640998406798, "grad_norm": 0.10984344780445099, "learning_rate": 2.054623904138455e-06, "loss": 0.2535, "num_tokens": 14724914.0, "step": 115 }, { "epoch": 0.9856611789697292, "grad_norm": 0.10751090943813324, "learning_rate": 2.0307395629399716e-06, "loss": 0.2517, "num_tokens": 14957559.0, "step": 116 }, { "epoch": 0.9941582580987786, "grad_norm": 0.10235580801963806, "learning_rate": 2.0136663521001547e-06, "loss": 0.2592, "num_tokens": 15193005.0, "step": 117 }, { "epoch": 0.9941582580987786, "step": 117, "total_flos": 5.5251981578153165e+17, "train_loss": 0.16806216436064142, "train_runtime": 6592.4366, "train_samples_per_second": 1.142, "train_steps_per_second": 0.018 } ], "logging_steps": 1, "max_steps": 118, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.5251981578153165e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }