{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 54789, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002737775830914965, "grad_norm": 200.228759765625, "learning_rate": 4.562876437306078e-08, "loss": 9.3162, "step": 5 }, { "epoch": 0.000547555166182993, "grad_norm": 236.3983917236328, "learning_rate": 9.125752874612156e-08, "loss": 9.2845, "step": 10 }, { "epoch": 0.0008213327492744894, "grad_norm": 234.62106323242188, "learning_rate": 1.3688629311918233e-07, "loss": 9.3598, "step": 15 }, { "epoch": 0.001095110332365986, "grad_norm": 219.67446899414062, "learning_rate": 1.8251505749224312e-07, "loss": 9.5057, "step": 20 }, { "epoch": 0.0013688879154574824, "grad_norm": 211.73594665527344, "learning_rate": 2.2814382186530392e-07, "loss": 9.154, "step": 25 }, { "epoch": 0.0016426654985489788, "grad_norm": 204.5263214111328, "learning_rate": 2.7377258623836466e-07, "loss": 9.2069, "step": 30 }, { "epoch": 0.0019164430816404753, "grad_norm": 71.06412506103516, "learning_rate": 3.194013506114255e-07, "loss": 8.7315, "step": 35 }, { "epoch": 0.002190220664731972, "grad_norm": 86.12408447265625, "learning_rate": 3.6503011498448625e-07, "loss": 8.6348, "step": 40 }, { "epoch": 0.002463998247823468, "grad_norm": 88.61984252929688, "learning_rate": 4.1065887935754696e-07, "loss": 7.438, "step": 45 }, { "epoch": 0.002737775830914965, "grad_norm": 88.58251190185547, "learning_rate": 4.5628764373060784e-07, "loss": 6.9658, "step": 50 }, { "epoch": 0.003011553414006461, "grad_norm": 47.61603927612305, "learning_rate": 5.019164081036686e-07, "loss": 4.9104, "step": 55 }, { "epoch": 0.0032853309970979575, "grad_norm": 35.96636962890625, "learning_rate": 5.475451724767293e-07, "loss": 2.7841, "step": 60 }, { "epoch": 0.0035591085801894543, "grad_norm": 26.4632568359375, "learning_rate": 5.931739368497901e-07, "loss": 2.3295, "step": 65 }, { "epoch": 0.0038328861632809506, "grad_norm": 25.339841842651367, "learning_rate": 6.38802701222851e-07, "loss": 1.8779, "step": 70 }, { "epoch": 0.004106663746372447, "grad_norm": 5.818326473236084, "learning_rate": 6.844314655959117e-07, "loss": 1.273, "step": 75 }, { "epoch": 0.004380441329463944, "grad_norm": 2.545865774154663, "learning_rate": 7.300602299689725e-07, "loss": 1.0659, "step": 80 }, { "epoch": 0.00465421891255544, "grad_norm": 1.8601058721542358, "learning_rate": 7.756889943420332e-07, "loss": 0.9729, "step": 85 }, { "epoch": 0.004927996495646936, "grad_norm": 1.5824148654937744, "learning_rate": 8.213177587150939e-07, "loss": 0.8916, "step": 90 }, { "epoch": 0.005201774078738433, "grad_norm": 1.1154818534851074, "learning_rate": 8.669465230881549e-07, "loss": 0.8324, "step": 95 }, { "epoch": 0.00547555166182993, "grad_norm": 0.9312515258789062, "learning_rate": 9.125752874612157e-07, "loss": 0.7798, "step": 100 }, { "epoch": 0.005749329244921426, "grad_norm": 0.6706961393356323, "learning_rate": 9.582040518342764e-07, "loss": 0.723, "step": 105 }, { "epoch": 0.006023106828012922, "grad_norm": 0.5829969644546509, "learning_rate": 1.0038328162073372e-06, "loss": 0.6807, "step": 110 }, { "epoch": 0.006296884411104419, "grad_norm": 0.48154789209365845, "learning_rate": 1.049461580580398e-06, "loss": 0.6587, "step": 115 }, { "epoch": 0.006570661994195915, "grad_norm": 0.4221213757991791, "learning_rate": 1.0950903449534586e-06, "loss": 0.6263, "step": 120 }, { "epoch": 0.006844439577287411, "grad_norm": 0.3536619246006012, "learning_rate": 1.1407191093265195e-06, "loss": 0.6448, "step": 125 }, { "epoch": 0.0071182171603789085, "grad_norm": 0.4364500343799591, "learning_rate": 1.1863478736995803e-06, "loss": 0.6092, "step": 130 }, { "epoch": 0.007391994743470405, "grad_norm": 0.33262965083122253, "learning_rate": 1.231976638072641e-06, "loss": 0.5989, "step": 135 }, { "epoch": 0.007665772326561901, "grad_norm": 0.30082330107688904, "learning_rate": 1.277605402445702e-06, "loss": 0.5809, "step": 140 }, { "epoch": 0.007939549909653398, "grad_norm": 0.2779456079006195, "learning_rate": 1.3232341668187625e-06, "loss": 0.5657, "step": 145 }, { "epoch": 0.008213327492744895, "grad_norm": 0.27062201499938965, "learning_rate": 1.3688629311918234e-06, "loss": 0.5553, "step": 150 }, { "epoch": 0.008487105075836391, "grad_norm": 0.3466357886791229, "learning_rate": 1.4144916955648842e-06, "loss": 0.5683, "step": 155 }, { "epoch": 0.008760882658927887, "grad_norm": 0.23494262993335724, "learning_rate": 1.460120459937945e-06, "loss": 0.5382, "step": 160 }, { "epoch": 0.009034660242019384, "grad_norm": 0.2563057243824005, "learning_rate": 1.5057492243110058e-06, "loss": 0.5236, "step": 165 }, { "epoch": 0.00930843782511088, "grad_norm": 0.26110002398490906, "learning_rate": 1.5513779886840664e-06, "loss": 0.5381, "step": 170 }, { "epoch": 0.009582215408202376, "grad_norm": 0.270365446805954, "learning_rate": 1.5970067530571272e-06, "loss": 0.5239, "step": 175 }, { "epoch": 0.009855992991293873, "grad_norm": 0.21463119983673096, "learning_rate": 1.6426355174301879e-06, "loss": 0.522, "step": 180 }, { "epoch": 0.010129770574385369, "grad_norm": 0.21974734961986542, "learning_rate": 1.6882642818032489e-06, "loss": 0.4901, "step": 185 }, { "epoch": 0.010403548157476865, "grad_norm": 0.21815474331378937, "learning_rate": 1.7338930461763097e-06, "loss": 0.5034, "step": 190 }, { "epoch": 0.010677325740568362, "grad_norm": 0.2236776202917099, "learning_rate": 1.7795218105493703e-06, "loss": 0.5079, "step": 195 }, { "epoch": 0.01095110332365986, "grad_norm": 0.21032094955444336, "learning_rate": 1.8251505749224313e-06, "loss": 0.5278, "step": 200 }, { "epoch": 0.011224880906751356, "grad_norm": 0.22678357362747192, "learning_rate": 1.870779339295492e-06, "loss": 0.4959, "step": 205 }, { "epoch": 0.011498658489842852, "grad_norm": 0.19817833602428436, "learning_rate": 1.9164081036685528e-06, "loss": 0.4777, "step": 210 }, { "epoch": 0.011772436072934349, "grad_norm": 0.21468302607536316, "learning_rate": 1.962036868041614e-06, "loss": 0.48, "step": 215 }, { "epoch": 0.012046213656025845, "grad_norm": 0.21804505586624146, "learning_rate": 2.0076656324146744e-06, "loss": 0.469, "step": 220 }, { "epoch": 0.012319991239117341, "grad_norm": 0.22068734467029572, "learning_rate": 2.053294396787735e-06, "loss": 0.4751, "step": 225 }, { "epoch": 0.012593768822208837, "grad_norm": 0.20763404667377472, "learning_rate": 2.098923161160796e-06, "loss": 0.4642, "step": 230 }, { "epoch": 0.012867546405300334, "grad_norm": 0.2129552662372589, "learning_rate": 2.1445519255338567e-06, "loss": 0.4678, "step": 235 }, { "epoch": 0.01314132398839183, "grad_norm": 0.1870199292898178, "learning_rate": 2.1901806899069173e-06, "loss": 0.4533, "step": 240 }, { "epoch": 0.013415101571483326, "grad_norm": 0.2371717244386673, "learning_rate": 2.235809454279978e-06, "loss": 0.4753, "step": 245 }, { "epoch": 0.013688879154574823, "grad_norm": 0.21068786084651947, "learning_rate": 2.281438218653039e-06, "loss": 0.4515, "step": 250 }, { "epoch": 0.013962656737666319, "grad_norm": 0.19745992124080658, "learning_rate": 2.3270669830260995e-06, "loss": 0.445, "step": 255 }, { "epoch": 0.014236434320757817, "grad_norm": 0.21025888621807098, "learning_rate": 2.3726957473991606e-06, "loss": 0.4561, "step": 260 }, { "epoch": 0.014510211903849313, "grad_norm": 0.1885746568441391, "learning_rate": 2.4183245117722216e-06, "loss": 0.4436, "step": 265 }, { "epoch": 0.01478398948694081, "grad_norm": 0.20961129665374756, "learning_rate": 2.463953276145282e-06, "loss": 0.4343, "step": 270 }, { "epoch": 0.015057767070032306, "grad_norm": 0.2048693150281906, "learning_rate": 2.509582040518343e-06, "loss": 0.4432, "step": 275 }, { "epoch": 0.015331544653123802, "grad_norm": 0.18323342502117157, "learning_rate": 2.555210804891404e-06, "loss": 0.4373, "step": 280 }, { "epoch": 0.015605322236215299, "grad_norm": 0.1912016123533249, "learning_rate": 2.6008395692644645e-06, "loss": 0.4382, "step": 285 }, { "epoch": 0.015879099819306797, "grad_norm": 0.1846287101507187, "learning_rate": 2.646468333637525e-06, "loss": 0.4255, "step": 290 }, { "epoch": 0.01615287740239829, "grad_norm": 0.1940842717885971, "learning_rate": 2.692097098010586e-06, "loss": 0.4265, "step": 295 }, { "epoch": 0.01642665498548979, "grad_norm": 0.19855505228042603, "learning_rate": 2.7377258623836467e-06, "loss": 0.4271, "step": 300 }, { "epoch": 0.016700432568581284, "grad_norm": 0.18777455389499664, "learning_rate": 2.7833546267567073e-06, "loss": 0.4243, "step": 305 }, { "epoch": 0.016974210151672782, "grad_norm": 0.1986641138792038, "learning_rate": 2.8289833911297683e-06, "loss": 0.4118, "step": 310 }, { "epoch": 0.017247987734764277, "grad_norm": 0.21131789684295654, "learning_rate": 2.874612155502829e-06, "loss": 0.4403, "step": 315 }, { "epoch": 0.017521765317855775, "grad_norm": 0.18199770152568817, "learning_rate": 2.92024091987589e-06, "loss": 0.4034, "step": 320 }, { "epoch": 0.01779554290094727, "grad_norm": 0.18342019617557526, "learning_rate": 2.965869684248951e-06, "loss": 0.41, "step": 325 }, { "epoch": 0.018069320484038767, "grad_norm": 0.21450960636138916, "learning_rate": 3.0114984486220116e-06, "loss": 0.4122, "step": 330 }, { "epoch": 0.018343098067130262, "grad_norm": 0.19499611854553223, "learning_rate": 3.0571272129950722e-06, "loss": 0.4172, "step": 335 }, { "epoch": 0.01861687565022176, "grad_norm": 0.2731406092643738, "learning_rate": 3.102755977368133e-06, "loss": 0.4015, "step": 340 }, { "epoch": 0.018890653233313258, "grad_norm": 0.18471704423427582, "learning_rate": 3.1483847417411935e-06, "loss": 0.4166, "step": 345 }, { "epoch": 0.019164430816404752, "grad_norm": 0.2153637558221817, "learning_rate": 3.1940135061142545e-06, "loss": 0.4221, "step": 350 }, { "epoch": 0.01943820839949625, "grad_norm": 0.20720428228378296, "learning_rate": 3.2396422704873155e-06, "loss": 0.4131, "step": 355 }, { "epoch": 0.019711985982587745, "grad_norm": 0.19870685040950775, "learning_rate": 3.2852710348603757e-06, "loss": 0.4142, "step": 360 }, { "epoch": 0.019985763565679243, "grad_norm": 0.18633286654949188, "learning_rate": 3.3308997992334367e-06, "loss": 0.3994, "step": 365 }, { "epoch": 0.020259541148770738, "grad_norm": 0.2026391476392746, "learning_rate": 3.3765285636064978e-06, "loss": 0.3957, "step": 370 }, { "epoch": 0.020533318731862236, "grad_norm": 0.18209725618362427, "learning_rate": 3.4221573279795584e-06, "loss": 0.3817, "step": 375 }, { "epoch": 0.02080709631495373, "grad_norm": 0.20284634828567505, "learning_rate": 3.4677860923526194e-06, "loss": 0.3897, "step": 380 }, { "epoch": 0.02108087389804523, "grad_norm": 0.18771933019161224, "learning_rate": 3.5134148567256804e-06, "loss": 0.4046, "step": 385 }, { "epoch": 0.021354651481136723, "grad_norm": 0.2059711515903473, "learning_rate": 3.5590436210987406e-06, "loss": 0.3909, "step": 390 }, { "epoch": 0.02162842906422822, "grad_norm": 0.2121465504169464, "learning_rate": 3.6046723854718017e-06, "loss": 0.3969, "step": 395 }, { "epoch": 0.02190220664731972, "grad_norm": 0.20246420800685883, "learning_rate": 3.6503011498448627e-06, "loss": 0.3954, "step": 400 }, { "epoch": 0.022175984230411214, "grad_norm": 0.22908546030521393, "learning_rate": 3.695929914217923e-06, "loss": 0.4317, "step": 405 }, { "epoch": 0.022449761813502712, "grad_norm": 0.2641874849796295, "learning_rate": 3.741558678590984e-06, "loss": 0.3905, "step": 410 }, { "epoch": 0.022723539396594206, "grad_norm": 0.2056393325328827, "learning_rate": 3.787187442964045e-06, "loss": 0.3736, "step": 415 }, { "epoch": 0.022997316979685704, "grad_norm": 0.2146635204553604, "learning_rate": 3.8328162073371056e-06, "loss": 0.3977, "step": 420 }, { "epoch": 0.0232710945627772, "grad_norm": 0.25253772735595703, "learning_rate": 3.878444971710167e-06, "loss": 0.3838, "step": 425 }, { "epoch": 0.023544872145868697, "grad_norm": 0.21368232369422913, "learning_rate": 3.924073736083228e-06, "loss": 0.3928, "step": 430 }, { "epoch": 0.02381864972896019, "grad_norm": 0.194202721118927, "learning_rate": 3.969702500456288e-06, "loss": 0.3884, "step": 435 }, { "epoch": 0.02409242731205169, "grad_norm": 0.29932790994644165, "learning_rate": 4.015331264829349e-06, "loss": 0.3809, "step": 440 }, { "epoch": 0.024366204895143184, "grad_norm": 0.22465236485004425, "learning_rate": 4.06096002920241e-06, "loss": 0.3948, "step": 445 }, { "epoch": 0.024639982478234682, "grad_norm": 0.2347760945558548, "learning_rate": 4.10658879357547e-06, "loss": 0.3756, "step": 450 }, { "epoch": 0.02491376006132618, "grad_norm": 0.18728160858154297, "learning_rate": 4.152217557948531e-06, "loss": 0.3895, "step": 455 }, { "epoch": 0.025187537644417675, "grad_norm": 0.1802617758512497, "learning_rate": 4.197846322321592e-06, "loss": 0.3712, "step": 460 }, { "epoch": 0.025461315227509173, "grad_norm": 0.23105181753635406, "learning_rate": 4.243475086694652e-06, "loss": 0.3706, "step": 465 }, { "epoch": 0.025735092810600668, "grad_norm": 0.20042307674884796, "learning_rate": 4.289103851067713e-06, "loss": 0.3654, "step": 470 }, { "epoch": 0.026008870393692166, "grad_norm": 0.23039017617702484, "learning_rate": 4.334732615440774e-06, "loss": 0.3944, "step": 475 }, { "epoch": 0.02628264797678366, "grad_norm": 0.2635555565357208, "learning_rate": 4.3803613798138346e-06, "loss": 0.3765, "step": 480 }, { "epoch": 0.026556425559875158, "grad_norm": 0.22533030807971954, "learning_rate": 4.425990144186896e-06, "loss": 0.3692, "step": 485 }, { "epoch": 0.026830203142966653, "grad_norm": 0.22466622292995453, "learning_rate": 4.471618908559956e-06, "loss": 0.388, "step": 490 }, { "epoch": 0.02710398072605815, "grad_norm": 0.19894878566265106, "learning_rate": 4.517247672933017e-06, "loss": 0.3745, "step": 495 }, { "epoch": 0.027377758309149645, "grad_norm": 0.20690177381038666, "learning_rate": 4.562876437306078e-06, "loss": 0.375, "step": 500 }, { "epoch": 0.027651535892241143, "grad_norm": 0.23020519316196442, "learning_rate": 4.608505201679138e-06, "loss": 0.3713, "step": 505 }, { "epoch": 0.027925313475332638, "grad_norm": 0.20380523800849915, "learning_rate": 4.654133966052199e-06, "loss": 0.3782, "step": 510 }, { "epoch": 0.028199091058424136, "grad_norm": 0.19383655488491058, "learning_rate": 4.69976273042526e-06, "loss": 0.3667, "step": 515 }, { "epoch": 0.028472868641515634, "grad_norm": 0.20563113689422607, "learning_rate": 4.745391494798321e-06, "loss": 0.3777, "step": 520 }, { "epoch": 0.02874664622460713, "grad_norm": 0.18626484274864197, "learning_rate": 4.791020259171382e-06, "loss": 0.3588, "step": 525 }, { "epoch": 0.029020423807698627, "grad_norm": 0.24076426029205322, "learning_rate": 4.836649023544443e-06, "loss": 0.3562, "step": 530 }, { "epoch": 0.02929420139079012, "grad_norm": 0.21886415779590607, "learning_rate": 4.882277787917503e-06, "loss": 0.3569, "step": 535 }, { "epoch": 0.02956797897388162, "grad_norm": 0.28214970231056213, "learning_rate": 4.927906552290564e-06, "loss": 0.3679, "step": 540 }, { "epoch": 0.029841756556973114, "grad_norm": 0.2456277310848236, "learning_rate": 4.9735353166636254e-06, "loss": 0.3719, "step": 545 }, { "epoch": 0.030115534140064612, "grad_norm": 0.24034559726715088, "learning_rate": 5.019164081036686e-06, "loss": 0.3668, "step": 550 }, { "epoch": 0.030389311723156107, "grad_norm": 0.2337094098329544, "learning_rate": 5.064792845409747e-06, "loss": 0.3691, "step": 555 }, { "epoch": 0.030663089306247605, "grad_norm": 0.2228941023349762, "learning_rate": 5.110421609782808e-06, "loss": 0.3577, "step": 560 }, { "epoch": 0.0309368668893391, "grad_norm": 0.2510257959365845, "learning_rate": 5.156050374155868e-06, "loss": 0.3475, "step": 565 }, { "epoch": 0.031210644472430597, "grad_norm": 0.2723871171474457, "learning_rate": 5.201679138528929e-06, "loss": 0.3424, "step": 570 }, { "epoch": 0.03148442205552209, "grad_norm": 0.20698121190071106, "learning_rate": 5.24730790290199e-06, "loss": 0.3689, "step": 575 }, { "epoch": 0.03175819963861359, "grad_norm": 0.2717188894748688, "learning_rate": 5.29293666727505e-06, "loss": 0.363, "step": 580 }, { "epoch": 0.03203197722170509, "grad_norm": 0.24291227757930756, "learning_rate": 5.338565431648111e-06, "loss": 0.369, "step": 585 }, { "epoch": 0.03230575480479658, "grad_norm": 0.2081000953912735, "learning_rate": 5.384194196021172e-06, "loss": 0.3499, "step": 590 }, { "epoch": 0.03257953238788808, "grad_norm": 0.23294711112976074, "learning_rate": 5.429822960394232e-06, "loss": 0.3596, "step": 595 }, { "epoch": 0.03285330997097958, "grad_norm": 0.22464661300182343, "learning_rate": 5.475451724767293e-06, "loss": 0.354, "step": 600 }, { "epoch": 0.03312708755407107, "grad_norm": 0.19763486087322235, "learning_rate": 5.5210804891403544e-06, "loss": 0.358, "step": 605 }, { "epoch": 0.03340086513716257, "grad_norm": 0.21214543282985687, "learning_rate": 5.566709253513415e-06, "loss": 0.3467, "step": 610 }, { "epoch": 0.03367464272025406, "grad_norm": 0.22304119169712067, "learning_rate": 5.612338017886476e-06, "loss": 0.3523, "step": 615 }, { "epoch": 0.033948420303345564, "grad_norm": 0.21422536671161652, "learning_rate": 5.657966782259537e-06, "loss": 0.3671, "step": 620 }, { "epoch": 0.03422219788643706, "grad_norm": 0.21790562570095062, "learning_rate": 5.703595546632597e-06, "loss": 0.3495, "step": 625 }, { "epoch": 0.03449597546952855, "grad_norm": 0.19820933043956757, "learning_rate": 5.749224311005658e-06, "loss": 0.3459, "step": 630 }, { "epoch": 0.034769753052620055, "grad_norm": 0.2547595202922821, "learning_rate": 5.794853075378719e-06, "loss": 0.3595, "step": 635 }, { "epoch": 0.03504353063571155, "grad_norm": 0.20341265201568604, "learning_rate": 5.84048183975178e-06, "loss": 0.3475, "step": 640 }, { "epoch": 0.035317308218803044, "grad_norm": 0.21111460030078888, "learning_rate": 5.886110604124841e-06, "loss": 0.3398, "step": 645 }, { "epoch": 0.03559108580189454, "grad_norm": 0.21191801130771637, "learning_rate": 5.931739368497902e-06, "loss": 0.3546, "step": 650 }, { "epoch": 0.03586486338498604, "grad_norm": 0.29510870575904846, "learning_rate": 5.977368132870962e-06, "loss": 0.3424, "step": 655 }, { "epoch": 0.036138640968077534, "grad_norm": 0.2693905532360077, "learning_rate": 6.022996897244023e-06, "loss": 0.3493, "step": 660 }, { "epoch": 0.03641241855116903, "grad_norm": 0.23110175132751465, "learning_rate": 6.068625661617084e-06, "loss": 0.3506, "step": 665 }, { "epoch": 0.036686196134260524, "grad_norm": 0.19595643877983093, "learning_rate": 6.1142544259901445e-06, "loss": 0.3528, "step": 670 }, { "epoch": 0.036959973717352025, "grad_norm": 0.23342353105545044, "learning_rate": 6.1598831903632055e-06, "loss": 0.3282, "step": 675 }, { "epoch": 0.03723375130044352, "grad_norm": 0.22334885597229004, "learning_rate": 6.205511954736266e-06, "loss": 0.3465, "step": 680 }, { "epoch": 0.037507528883535014, "grad_norm": 0.22564563155174255, "learning_rate": 6.2511407191093276e-06, "loss": 0.3534, "step": 685 }, { "epoch": 0.037781306466626516, "grad_norm": 0.21350613236427307, "learning_rate": 6.296769483482387e-06, "loss": 0.3409, "step": 690 }, { "epoch": 0.03805508404971801, "grad_norm": 0.2759123742580414, "learning_rate": 6.342398247855448e-06, "loss": 0.3479, "step": 695 }, { "epoch": 0.038328861632809505, "grad_norm": 0.24188661575317383, "learning_rate": 6.388027012228509e-06, "loss": 0.3458, "step": 700 }, { "epoch": 0.038602639215901, "grad_norm": 0.21330590546131134, "learning_rate": 6.43365577660157e-06, "loss": 0.343, "step": 705 }, { "epoch": 0.0388764167989925, "grad_norm": 0.24794210493564606, "learning_rate": 6.479284540974631e-06, "loss": 0.3334, "step": 710 }, { "epoch": 0.039150194382083996, "grad_norm": 0.22784093022346497, "learning_rate": 6.524913305347692e-06, "loss": 0.3363, "step": 715 }, { "epoch": 0.03942397196517549, "grad_norm": 0.21583162248134613, "learning_rate": 6.570542069720751e-06, "loss": 0.3562, "step": 720 }, { "epoch": 0.039697749548266985, "grad_norm": 0.2091892659664154, "learning_rate": 6.6161708340938124e-06, "loss": 0.3509, "step": 725 }, { "epoch": 0.039971527131358486, "grad_norm": 0.2383112907409668, "learning_rate": 6.6617995984668735e-06, "loss": 0.3515, "step": 730 }, { "epoch": 0.04024530471444998, "grad_norm": 0.21699918806552887, "learning_rate": 6.7074283628399345e-06, "loss": 0.3353, "step": 735 }, { "epoch": 0.040519082297541476, "grad_norm": 0.24418699741363525, "learning_rate": 6.7530571272129955e-06, "loss": 0.3438, "step": 740 }, { "epoch": 0.04079285988063298, "grad_norm": 0.2535061836242676, "learning_rate": 6.7986858915860566e-06, "loss": 0.3334, "step": 745 }, { "epoch": 0.04106663746372447, "grad_norm": 0.18727625906467438, "learning_rate": 6.844314655959117e-06, "loss": 0.345, "step": 750 }, { "epoch": 0.041340415046815966, "grad_norm": 0.23316045105457306, "learning_rate": 6.889943420332178e-06, "loss": 0.3291, "step": 755 }, { "epoch": 0.04161419262990746, "grad_norm": 0.2559155523777008, "learning_rate": 6.935572184705239e-06, "loss": 0.327, "step": 760 }, { "epoch": 0.04188797021299896, "grad_norm": 0.2015332728624344, "learning_rate": 6.9812009490783e-06, "loss": 0.3245, "step": 765 }, { "epoch": 0.04216174779609046, "grad_norm": 0.23822851479053497, "learning_rate": 7.026829713451361e-06, "loss": 0.3335, "step": 770 }, { "epoch": 0.04243552537918195, "grad_norm": 0.23886382579803467, "learning_rate": 7.07245847782442e-06, "loss": 0.327, "step": 775 }, { "epoch": 0.042709302962273446, "grad_norm": 0.2150401622056961, "learning_rate": 7.118087242197481e-06, "loss": 0.3323, "step": 780 }, { "epoch": 0.04298308054536495, "grad_norm": 0.23437072336673737, "learning_rate": 7.163716006570542e-06, "loss": 0.3354, "step": 785 }, { "epoch": 0.04325685812845644, "grad_norm": 0.20755285024642944, "learning_rate": 7.209344770943603e-06, "loss": 0.3327, "step": 790 }, { "epoch": 0.04353063571154794, "grad_norm": 0.23211613297462463, "learning_rate": 7.254973535316664e-06, "loss": 0.3369, "step": 795 }, { "epoch": 0.04380441329463944, "grad_norm": 0.222047358751297, "learning_rate": 7.300602299689725e-06, "loss": 0.3324, "step": 800 }, { "epoch": 0.04407819087773093, "grad_norm": 0.19857639074325562, "learning_rate": 7.346231064062785e-06, "loss": 0.3305, "step": 805 }, { "epoch": 0.04435196846082243, "grad_norm": 0.27519533038139343, "learning_rate": 7.391859828435846e-06, "loss": 0.3373, "step": 810 }, { "epoch": 0.04462574604391392, "grad_norm": 0.24311824142932892, "learning_rate": 7.437488592808907e-06, "loss": 0.3242, "step": 815 }, { "epoch": 0.044899523627005423, "grad_norm": 0.29422828555107117, "learning_rate": 7.483117357181968e-06, "loss": 0.3204, "step": 820 }, { "epoch": 0.04517330121009692, "grad_norm": 0.2185431867837906, "learning_rate": 7.528746121555029e-06, "loss": 0.3301, "step": 825 }, { "epoch": 0.04544707879318841, "grad_norm": 0.22312821447849274, "learning_rate": 7.57437488592809e-06, "loss": 0.3306, "step": 830 }, { "epoch": 0.04572085637627991, "grad_norm": 0.2104414850473404, "learning_rate": 7.620003650301149e-06, "loss": 0.3242, "step": 835 }, { "epoch": 0.04599463395937141, "grad_norm": 0.2025061696767807, "learning_rate": 7.665632414674211e-06, "loss": 0.3229, "step": 840 }, { "epoch": 0.0462684115424629, "grad_norm": 0.22620242834091187, "learning_rate": 7.711261179047271e-06, "loss": 0.3162, "step": 845 }, { "epoch": 0.0465421891255544, "grad_norm": 0.21440553665161133, "learning_rate": 7.756889943420333e-06, "loss": 0.3266, "step": 850 }, { "epoch": 0.0468159667086459, "grad_norm": 0.23834997415542603, "learning_rate": 7.802518707793393e-06, "loss": 0.3255, "step": 855 }, { "epoch": 0.047089744291737394, "grad_norm": 0.30302315950393677, "learning_rate": 7.848147472166455e-06, "loss": 0.3225, "step": 860 }, { "epoch": 0.04736352187482889, "grad_norm": 0.294956237077713, "learning_rate": 7.893776236539514e-06, "loss": 0.3418, "step": 865 }, { "epoch": 0.04763729945792038, "grad_norm": 0.27867528796195984, "learning_rate": 7.939405000912576e-06, "loss": 0.3374, "step": 870 }, { "epoch": 0.047911077041011885, "grad_norm": 0.2723163664340973, "learning_rate": 7.985033765285636e-06, "loss": 0.3254, "step": 875 }, { "epoch": 0.04818485462410338, "grad_norm": 0.2409539669752121, "learning_rate": 8.030662529658698e-06, "loss": 0.3206, "step": 880 }, { "epoch": 0.048458632207194874, "grad_norm": 0.22860880196094513, "learning_rate": 8.076291294031758e-06, "loss": 0.3221, "step": 885 }, { "epoch": 0.04873240979028637, "grad_norm": 0.2269233912229538, "learning_rate": 8.12192005840482e-06, "loss": 0.3267, "step": 890 }, { "epoch": 0.04900618737337787, "grad_norm": 0.2030406892299652, "learning_rate": 8.167548822777878e-06, "loss": 0.3151, "step": 895 }, { "epoch": 0.049279964956469365, "grad_norm": 0.23167534172534943, "learning_rate": 8.21317758715094e-06, "loss": 0.3197, "step": 900 }, { "epoch": 0.04955374253956086, "grad_norm": 0.26715925335884094, "learning_rate": 8.258806351524e-06, "loss": 0.3198, "step": 905 }, { "epoch": 0.04982752012265236, "grad_norm": 0.22016629576683044, "learning_rate": 8.304435115897062e-06, "loss": 0.3239, "step": 910 }, { "epoch": 0.050101297705743855, "grad_norm": 0.22596225142478943, "learning_rate": 8.350063880270122e-06, "loss": 0.3261, "step": 915 }, { "epoch": 0.05037507528883535, "grad_norm": 0.2504442632198334, "learning_rate": 8.395692644643184e-06, "loss": 0.325, "step": 920 }, { "epoch": 0.050648852871926844, "grad_norm": 0.20658543705940247, "learning_rate": 8.441321409016244e-06, "loss": 0.3233, "step": 925 }, { "epoch": 0.050922630455018346, "grad_norm": 0.28574660420417786, "learning_rate": 8.486950173389305e-06, "loss": 0.3225, "step": 930 }, { "epoch": 0.05119640803810984, "grad_norm": 0.2245403379201889, "learning_rate": 8.532578937762366e-06, "loss": 0.3162, "step": 935 }, { "epoch": 0.051470185621201335, "grad_norm": 0.23046043515205383, "learning_rate": 8.578207702135427e-06, "loss": 0.3233, "step": 940 }, { "epoch": 0.05174396320429283, "grad_norm": 0.23372115194797516, "learning_rate": 8.623836466508489e-06, "loss": 0.3306, "step": 945 }, { "epoch": 0.05201774078738433, "grad_norm": 0.271594375371933, "learning_rate": 8.669465230881549e-06, "loss": 0.3295, "step": 950 }, { "epoch": 0.052291518370475826, "grad_norm": 0.22017930448055267, "learning_rate": 8.715093995254609e-06, "loss": 0.3098, "step": 955 }, { "epoch": 0.05256529595356732, "grad_norm": 0.2109350860118866, "learning_rate": 8.760722759627669e-06, "loss": 0.3039, "step": 960 }, { "epoch": 0.05283907353665882, "grad_norm": 0.2323787659406662, "learning_rate": 8.806351524000731e-06, "loss": 0.3119, "step": 965 }, { "epoch": 0.053112851119750316, "grad_norm": 0.2756807804107666, "learning_rate": 8.851980288373791e-06, "loss": 0.3135, "step": 970 }, { "epoch": 0.05338662870284181, "grad_norm": 0.22024646401405334, "learning_rate": 8.897609052746853e-06, "loss": 0.3172, "step": 975 }, { "epoch": 0.053660406285933306, "grad_norm": 0.22836965322494507, "learning_rate": 8.943237817119912e-06, "loss": 0.3213, "step": 980 }, { "epoch": 0.05393418386902481, "grad_norm": 0.231476292014122, "learning_rate": 8.988866581492973e-06, "loss": 0.3163, "step": 985 }, { "epoch": 0.0542079614521163, "grad_norm": 0.257990300655365, "learning_rate": 9.034495345866034e-06, "loss": 0.3237, "step": 990 }, { "epoch": 0.054481739035207796, "grad_norm": 0.2624226212501526, "learning_rate": 9.080124110239095e-06, "loss": 0.315, "step": 995 }, { "epoch": 0.05475551661829929, "grad_norm": 0.2690432369709015, "learning_rate": 9.125752874612156e-06, "loss": 0.3066, "step": 1000 }, { "epoch": 0.05502929420139079, "grad_norm": 0.22080813348293304, "learning_rate": 9.171381638985218e-06, "loss": 0.3105, "step": 1005 }, { "epoch": 0.05530307178448229, "grad_norm": 0.22431889176368713, "learning_rate": 9.217010403358276e-06, "loss": 0.3136, "step": 1010 }, { "epoch": 0.05557684936757378, "grad_norm": 0.2292473167181015, "learning_rate": 9.262639167731338e-06, "loss": 0.3184, "step": 1015 }, { "epoch": 0.055850626950665276, "grad_norm": 0.20891442894935608, "learning_rate": 9.308267932104398e-06, "loss": 0.3075, "step": 1020 }, { "epoch": 0.05612440453375678, "grad_norm": 0.29056060314178467, "learning_rate": 9.35389669647746e-06, "loss": 0.3179, "step": 1025 }, { "epoch": 0.05639818211684827, "grad_norm": 0.2473951280117035, "learning_rate": 9.39952546085052e-06, "loss": 0.3212, "step": 1030 }, { "epoch": 0.05667195969993977, "grad_norm": 0.3191179931163788, "learning_rate": 9.445154225223582e-06, "loss": 0.3196, "step": 1035 }, { "epoch": 0.05694573728303127, "grad_norm": 0.22162701189517975, "learning_rate": 9.490782989596642e-06, "loss": 0.3186, "step": 1040 }, { "epoch": 0.05721951486612276, "grad_norm": 0.23137275874614716, "learning_rate": 9.536411753969702e-06, "loss": 0.3116, "step": 1045 }, { "epoch": 0.05749329244921426, "grad_norm": 0.23387910425662994, "learning_rate": 9.582040518342764e-06, "loss": 0.3074, "step": 1050 }, { "epoch": 0.05776707003230575, "grad_norm": 0.21096503734588623, "learning_rate": 9.627669282715824e-06, "loss": 0.3103, "step": 1055 }, { "epoch": 0.058040847615397254, "grad_norm": 0.2325180023908615, "learning_rate": 9.673298047088886e-06, "loss": 0.3004, "step": 1060 }, { "epoch": 0.05831462519848875, "grad_norm": 0.2201705276966095, "learning_rate": 9.718926811461947e-06, "loss": 0.3142, "step": 1065 }, { "epoch": 0.05858840278158024, "grad_norm": 0.2077445089817047, "learning_rate": 9.764555575835007e-06, "loss": 0.3038, "step": 1070 }, { "epoch": 0.05886218036467174, "grad_norm": 0.20754215121269226, "learning_rate": 9.810184340208067e-06, "loss": 0.3059, "step": 1075 }, { "epoch": 0.05913595794776324, "grad_norm": 0.3068656027317047, "learning_rate": 9.855813104581129e-06, "loss": 0.3127, "step": 1080 }, { "epoch": 0.05940973553085473, "grad_norm": 0.28077444434165955, "learning_rate": 9.901441868954189e-06, "loss": 0.319, "step": 1085 }, { "epoch": 0.05968351311394623, "grad_norm": 0.24593354761600494, "learning_rate": 9.947070633327251e-06, "loss": 0.3003, "step": 1090 }, { "epoch": 0.05995729069703773, "grad_norm": 0.22825887799263, "learning_rate": 9.992699397700311e-06, "loss": 0.3093, "step": 1095 }, { "epoch": 0.060231068280129224, "grad_norm": 0.25524628162384033, "learning_rate": 1.0038328162073371e-05, "loss": 0.3012, "step": 1100 }, { "epoch": 0.06050484586322072, "grad_norm": 0.23772338032722473, "learning_rate": 1.0083956926446431e-05, "loss": 0.304, "step": 1105 }, { "epoch": 0.06077862344631221, "grad_norm": 0.25054118037223816, "learning_rate": 1.0129585690819493e-05, "loss": 0.3092, "step": 1110 }, { "epoch": 0.061052401029403715, "grad_norm": 0.2500600814819336, "learning_rate": 1.0175214455192553e-05, "loss": 0.2891, "step": 1115 }, { "epoch": 0.06132617861249521, "grad_norm": 0.22989729046821594, "learning_rate": 1.0220843219565615e-05, "loss": 0.3101, "step": 1120 }, { "epoch": 0.061599956195586704, "grad_norm": 0.288083553314209, "learning_rate": 1.0266471983938676e-05, "loss": 0.2997, "step": 1125 }, { "epoch": 0.0618737337786782, "grad_norm": 0.2956428825855255, "learning_rate": 1.0312100748311736e-05, "loss": 0.3062, "step": 1130 }, { "epoch": 0.0621475113617697, "grad_norm": 0.2541584074497223, "learning_rate": 1.0357729512684796e-05, "loss": 0.3042, "step": 1135 }, { "epoch": 0.062421288944861195, "grad_norm": 0.23169410228729248, "learning_rate": 1.0403358277057858e-05, "loss": 0.2942, "step": 1140 }, { "epoch": 0.06269506652795269, "grad_norm": 0.2304466962814331, "learning_rate": 1.0448987041430918e-05, "loss": 0.2973, "step": 1145 }, { "epoch": 0.06296884411104418, "grad_norm": 0.1979750096797943, "learning_rate": 1.049461580580398e-05, "loss": 0.3092, "step": 1150 }, { "epoch": 0.06324262169413568, "grad_norm": 0.26116713881492615, "learning_rate": 1.054024457017704e-05, "loss": 0.311, "step": 1155 }, { "epoch": 0.06351639927722719, "grad_norm": 0.2638317048549652, "learning_rate": 1.05858733345501e-05, "loss": 0.3085, "step": 1160 }, { "epoch": 0.06379017686031868, "grad_norm": 0.2752329111099243, "learning_rate": 1.0631502098923162e-05, "loss": 0.3065, "step": 1165 }, { "epoch": 0.06406395444341018, "grad_norm": 0.26268911361694336, "learning_rate": 1.0677130863296222e-05, "loss": 0.2955, "step": 1170 }, { "epoch": 0.06433773202650167, "grad_norm": 0.23789165914058685, "learning_rate": 1.0722759627669284e-05, "loss": 0.305, "step": 1175 }, { "epoch": 0.06461150960959317, "grad_norm": 0.2275351732969284, "learning_rate": 1.0768388392042344e-05, "loss": 0.2987, "step": 1180 }, { "epoch": 0.06488528719268466, "grad_norm": 0.23834466934204102, "learning_rate": 1.0814017156415405e-05, "loss": 0.3047, "step": 1185 }, { "epoch": 0.06515906477577615, "grad_norm": 0.23719002306461334, "learning_rate": 1.0859645920788465e-05, "loss": 0.3092, "step": 1190 }, { "epoch": 0.06543284235886766, "grad_norm": 0.2385096698999405, "learning_rate": 1.0905274685161527e-05, "loss": 0.293, "step": 1195 }, { "epoch": 0.06570661994195916, "grad_norm": 0.296029657125473, "learning_rate": 1.0950903449534587e-05, "loss": 0.315, "step": 1200 }, { "epoch": 0.06598039752505065, "grad_norm": 0.346734881401062, "learning_rate": 1.0996532213907649e-05, "loss": 0.2989, "step": 1205 }, { "epoch": 0.06625417510814215, "grad_norm": 0.24207128584384918, "learning_rate": 1.1042160978280709e-05, "loss": 0.3037, "step": 1210 }, { "epoch": 0.06652795269123364, "grad_norm": 0.2380809783935547, "learning_rate": 1.1087789742653769e-05, "loss": 0.3061, "step": 1215 }, { "epoch": 0.06680173027432514, "grad_norm": 0.3192184865474701, "learning_rate": 1.113341850702683e-05, "loss": 0.297, "step": 1220 }, { "epoch": 0.06707550785741663, "grad_norm": 0.2661077678203583, "learning_rate": 1.1179047271399891e-05, "loss": 0.3063, "step": 1225 }, { "epoch": 0.06734928544050812, "grad_norm": 0.23735447227954865, "learning_rate": 1.1224676035772951e-05, "loss": 0.3073, "step": 1230 }, { "epoch": 0.06762306302359963, "grad_norm": 0.2576318383216858, "learning_rate": 1.1270304800146013e-05, "loss": 0.3171, "step": 1235 }, { "epoch": 0.06789684060669113, "grad_norm": 0.27198243141174316, "learning_rate": 1.1315933564519073e-05, "loss": 0.3036, "step": 1240 }, { "epoch": 0.06817061818978262, "grad_norm": 0.27176180481910706, "learning_rate": 1.1361562328892134e-05, "loss": 0.3018, "step": 1245 }, { "epoch": 0.06844439577287412, "grad_norm": 0.27241262793540955, "learning_rate": 1.1407191093265194e-05, "loss": 0.3047, "step": 1250 }, { "epoch": 0.06871817335596561, "grad_norm": 0.21193532645702362, "learning_rate": 1.1452819857638256e-05, "loss": 0.2891, "step": 1255 }, { "epoch": 0.0689919509390571, "grad_norm": 0.2571476101875305, "learning_rate": 1.1498448622011316e-05, "loss": 0.2954, "step": 1260 }, { "epoch": 0.0692657285221486, "grad_norm": 0.2241770178079605, "learning_rate": 1.1544077386384378e-05, "loss": 0.2982, "step": 1265 }, { "epoch": 0.06953950610524011, "grad_norm": 0.20185531675815582, "learning_rate": 1.1589706150757438e-05, "loss": 0.2978, "step": 1270 }, { "epoch": 0.0698132836883316, "grad_norm": 0.2709437608718872, "learning_rate": 1.1635334915130498e-05, "loss": 0.3085, "step": 1275 }, { "epoch": 0.0700870612714231, "grad_norm": 0.28422120213508606, "learning_rate": 1.168096367950356e-05, "loss": 0.3051, "step": 1280 }, { "epoch": 0.07036083885451459, "grad_norm": 0.2905484735965729, "learning_rate": 1.172659244387662e-05, "loss": 0.2998, "step": 1285 }, { "epoch": 0.07063461643760609, "grad_norm": 0.2673369348049164, "learning_rate": 1.1772221208249682e-05, "loss": 0.2913, "step": 1290 }, { "epoch": 0.07090839402069758, "grad_norm": 0.2631447911262512, "learning_rate": 1.1817849972622742e-05, "loss": 0.2939, "step": 1295 }, { "epoch": 0.07118217160378908, "grad_norm": 0.24854813516139984, "learning_rate": 1.1863478736995804e-05, "loss": 0.2949, "step": 1300 }, { "epoch": 0.07145594918688057, "grad_norm": 0.23220109939575195, "learning_rate": 1.1909107501368863e-05, "loss": 0.2902, "step": 1305 }, { "epoch": 0.07172972676997208, "grad_norm": 0.21286430954933167, "learning_rate": 1.1954736265741924e-05, "loss": 0.3069, "step": 1310 }, { "epoch": 0.07200350435306357, "grad_norm": 0.23694084584712982, "learning_rate": 1.2000365030114985e-05, "loss": 0.3138, "step": 1315 }, { "epoch": 0.07227728193615507, "grad_norm": 0.26456981897354126, "learning_rate": 1.2045993794488047e-05, "loss": 0.2921, "step": 1320 }, { "epoch": 0.07255105951924656, "grad_norm": 0.25777551531791687, "learning_rate": 1.2091622558861107e-05, "loss": 0.2969, "step": 1325 }, { "epoch": 0.07282483710233806, "grad_norm": 0.2802751362323761, "learning_rate": 1.2137251323234169e-05, "loss": 0.297, "step": 1330 }, { "epoch": 0.07309861468542955, "grad_norm": 0.2376076728105545, "learning_rate": 1.2182880087607227e-05, "loss": 0.2902, "step": 1335 }, { "epoch": 0.07337239226852105, "grad_norm": 0.2789604663848877, "learning_rate": 1.2228508851980289e-05, "loss": 0.2886, "step": 1340 }, { "epoch": 0.07364616985161256, "grad_norm": 0.2721031904220581, "learning_rate": 1.2274137616353349e-05, "loss": 0.2969, "step": 1345 }, { "epoch": 0.07391994743470405, "grad_norm": 0.2537602484226227, "learning_rate": 1.2319766380726411e-05, "loss": 0.2992, "step": 1350 }, { "epoch": 0.07419372501779554, "grad_norm": 0.2673051953315735, "learning_rate": 1.2365395145099471e-05, "loss": 0.2925, "step": 1355 }, { "epoch": 0.07446750260088704, "grad_norm": 0.2381039559841156, "learning_rate": 1.2411023909472531e-05, "loss": 0.2887, "step": 1360 }, { "epoch": 0.07474128018397853, "grad_norm": 0.24278239905834198, "learning_rate": 1.2456652673845593e-05, "loss": 0.2956, "step": 1365 }, { "epoch": 0.07501505776707003, "grad_norm": 0.25239109992980957, "learning_rate": 1.2502281438218655e-05, "loss": 0.302, "step": 1370 }, { "epoch": 0.07528883535016152, "grad_norm": 0.2259359210729599, "learning_rate": 1.2547910202591714e-05, "loss": 0.2954, "step": 1375 }, { "epoch": 0.07556261293325303, "grad_norm": 0.24145886301994324, "learning_rate": 1.2593538966964774e-05, "loss": 0.2843, "step": 1380 }, { "epoch": 0.07583639051634453, "grad_norm": 0.23517194390296936, "learning_rate": 1.2639167731337836e-05, "loss": 0.3059, "step": 1385 }, { "epoch": 0.07611016809943602, "grad_norm": 0.27311041951179504, "learning_rate": 1.2684796495710896e-05, "loss": 0.2903, "step": 1390 }, { "epoch": 0.07638394568252752, "grad_norm": 0.25049543380737305, "learning_rate": 1.2730425260083958e-05, "loss": 0.3062, "step": 1395 }, { "epoch": 0.07665772326561901, "grad_norm": 0.25819551944732666, "learning_rate": 1.2776054024457018e-05, "loss": 0.3014, "step": 1400 }, { "epoch": 0.0769315008487105, "grad_norm": 0.3225780129432678, "learning_rate": 1.2821682788830078e-05, "loss": 0.2939, "step": 1405 }, { "epoch": 0.077205278431802, "grad_norm": 0.34966033697128296, "learning_rate": 1.286731155320314e-05, "loss": 0.2978, "step": 1410 }, { "epoch": 0.0774790560148935, "grad_norm": 0.42469292879104614, "learning_rate": 1.29129403175762e-05, "loss": 0.2962, "step": 1415 }, { "epoch": 0.077752833597985, "grad_norm": 0.3147035539150238, "learning_rate": 1.2958569081949262e-05, "loss": 0.3099, "step": 1420 }, { "epoch": 0.0780266111810765, "grad_norm": 0.2855072319507599, "learning_rate": 1.3004197846322322e-05, "loss": 0.2979, "step": 1425 }, { "epoch": 0.07830038876416799, "grad_norm": 0.26425430178642273, "learning_rate": 1.3049826610695384e-05, "loss": 0.2888, "step": 1430 }, { "epoch": 0.07857416634725949, "grad_norm": 0.25852057337760925, "learning_rate": 1.3095455375068444e-05, "loss": 0.2982, "step": 1435 }, { "epoch": 0.07884794393035098, "grad_norm": 0.24423108994960785, "learning_rate": 1.3141084139441503e-05, "loss": 0.2959, "step": 1440 }, { "epoch": 0.07912172151344248, "grad_norm": 0.25945737957954407, "learning_rate": 1.3186712903814566e-05, "loss": 0.3041, "step": 1445 }, { "epoch": 0.07939549909653397, "grad_norm": 0.27346545457839966, "learning_rate": 1.3232341668187625e-05, "loss": 0.2978, "step": 1450 }, { "epoch": 0.07966927667962548, "grad_norm": 0.25872427225112915, "learning_rate": 1.3277970432560688e-05, "loss": 0.2936, "step": 1455 }, { "epoch": 0.07994305426271697, "grad_norm": 0.2772864103317261, "learning_rate": 1.3323599196933747e-05, "loss": 0.297, "step": 1460 }, { "epoch": 0.08021683184580847, "grad_norm": 0.32123076915740967, "learning_rate": 1.3369227961306807e-05, "loss": 0.2926, "step": 1465 }, { "epoch": 0.08049060942889996, "grad_norm": 0.24984332919120789, "learning_rate": 1.3414856725679869e-05, "loss": 0.2957, "step": 1470 }, { "epoch": 0.08076438701199146, "grad_norm": 0.25864213705062866, "learning_rate": 1.346048549005293e-05, "loss": 0.2888, "step": 1475 }, { "epoch": 0.08103816459508295, "grad_norm": 0.3200620114803314, "learning_rate": 1.3506114254425991e-05, "loss": 0.2965, "step": 1480 }, { "epoch": 0.08131194217817445, "grad_norm": 0.195997416973114, "learning_rate": 1.3551743018799051e-05, "loss": 0.2879, "step": 1485 }, { "epoch": 0.08158571976126595, "grad_norm": 0.25161421298980713, "learning_rate": 1.3597371783172113e-05, "loss": 0.2999, "step": 1490 }, { "epoch": 0.08185949734435745, "grad_norm": 0.24134337902069092, "learning_rate": 1.3643000547545173e-05, "loss": 0.2929, "step": 1495 }, { "epoch": 0.08213327492744894, "grad_norm": 0.23968924582004547, "learning_rate": 1.3688629311918234e-05, "loss": 0.2841, "step": 1500 }, { "epoch": 0.08240705251054044, "grad_norm": 0.2830949127674103, "learning_rate": 1.3734258076291295e-05, "loss": 0.2766, "step": 1505 }, { "epoch": 0.08268083009363193, "grad_norm": 0.27651041746139526, "learning_rate": 1.3779886840664356e-05, "loss": 0.3013, "step": 1510 }, { "epoch": 0.08295460767672343, "grad_norm": 0.2691957354545593, "learning_rate": 1.3825515605037417e-05, "loss": 0.2942, "step": 1515 }, { "epoch": 0.08322838525981492, "grad_norm": 0.2861427366733551, "learning_rate": 1.3871144369410478e-05, "loss": 0.2943, "step": 1520 }, { "epoch": 0.08350216284290642, "grad_norm": 0.3385035991668701, "learning_rate": 1.3916773133783536e-05, "loss": 0.296, "step": 1525 }, { "epoch": 0.08377594042599792, "grad_norm": 0.3797488808631897, "learning_rate": 1.39624018981566e-05, "loss": 0.2843, "step": 1530 }, { "epoch": 0.08404971800908942, "grad_norm": 0.27325746417045593, "learning_rate": 1.4008030662529658e-05, "loss": 0.2958, "step": 1535 }, { "epoch": 0.08432349559218091, "grad_norm": 0.26520445942878723, "learning_rate": 1.4053659426902722e-05, "loss": 0.2821, "step": 1540 }, { "epoch": 0.08459727317527241, "grad_norm": 0.19776509702205658, "learning_rate": 1.409928819127578e-05, "loss": 0.288, "step": 1545 }, { "epoch": 0.0848710507583639, "grad_norm": 0.2950904667377472, "learning_rate": 1.414491695564884e-05, "loss": 0.2855, "step": 1550 }, { "epoch": 0.0851448283414554, "grad_norm": 0.2500796914100647, "learning_rate": 1.4190545720021902e-05, "loss": 0.2797, "step": 1555 }, { "epoch": 0.08541860592454689, "grad_norm": 0.24570882320404053, "learning_rate": 1.4236174484394963e-05, "loss": 0.2865, "step": 1560 }, { "epoch": 0.0856923835076384, "grad_norm": 0.23947098851203918, "learning_rate": 1.4281803248768024e-05, "loss": 0.2853, "step": 1565 }, { "epoch": 0.0859661610907299, "grad_norm": 0.276030033826828, "learning_rate": 1.4327432013141085e-05, "loss": 0.298, "step": 1570 }, { "epoch": 0.08623993867382139, "grad_norm": 0.27081504464149475, "learning_rate": 1.4373060777514146e-05, "loss": 0.2847, "step": 1575 }, { "epoch": 0.08651371625691288, "grad_norm": 0.24168546497821808, "learning_rate": 1.4418689541887207e-05, "loss": 0.2842, "step": 1580 }, { "epoch": 0.08678749384000438, "grad_norm": 0.2776585519313812, "learning_rate": 1.4464318306260267e-05, "loss": 0.2846, "step": 1585 }, { "epoch": 0.08706127142309587, "grad_norm": 0.24711936712265015, "learning_rate": 1.4509947070633329e-05, "loss": 0.2927, "step": 1590 }, { "epoch": 0.08733504900618737, "grad_norm": 0.22233085334300995, "learning_rate": 1.4555575835006389e-05, "loss": 0.2871, "step": 1595 }, { "epoch": 0.08760882658927888, "grad_norm": 0.24501758813858032, "learning_rate": 1.460120459937945e-05, "loss": 0.2944, "step": 1600 }, { "epoch": 0.08788260417237037, "grad_norm": 0.24327433109283447, "learning_rate": 1.4646833363752511e-05, "loss": 0.2983, "step": 1605 }, { "epoch": 0.08815638175546187, "grad_norm": 0.24114979803562164, "learning_rate": 1.469246212812557e-05, "loss": 0.2829, "step": 1610 }, { "epoch": 0.08843015933855336, "grad_norm": 0.22532787919044495, "learning_rate": 1.4738090892498633e-05, "loss": 0.2981, "step": 1615 }, { "epoch": 0.08870393692164485, "grad_norm": 0.28646257519721985, "learning_rate": 1.4783719656871692e-05, "loss": 0.2898, "step": 1620 }, { "epoch": 0.08897771450473635, "grad_norm": 0.2139185070991516, "learning_rate": 1.4829348421244753e-05, "loss": 0.2816, "step": 1625 }, { "epoch": 0.08925149208782784, "grad_norm": 0.25196319818496704, "learning_rate": 1.4874977185617814e-05, "loss": 0.279, "step": 1630 }, { "epoch": 0.08952526967091934, "grad_norm": 0.2787584066390991, "learning_rate": 1.4920605949990875e-05, "loss": 0.274, "step": 1635 }, { "epoch": 0.08979904725401085, "grad_norm": 0.25464215874671936, "learning_rate": 1.4966234714363936e-05, "loss": 0.2799, "step": 1640 }, { "epoch": 0.09007282483710234, "grad_norm": 0.27166318893432617, "learning_rate": 1.5011863478736996e-05, "loss": 0.2859, "step": 1645 }, { "epoch": 0.09034660242019384, "grad_norm": 0.3100299537181854, "learning_rate": 1.5057492243110058e-05, "loss": 0.2819, "step": 1650 }, { "epoch": 0.09062038000328533, "grad_norm": 0.3013085722923279, "learning_rate": 1.5103121007483118e-05, "loss": 0.2829, "step": 1655 }, { "epoch": 0.09089415758637683, "grad_norm": 0.33084920048713684, "learning_rate": 1.514874977185618e-05, "loss": 0.2835, "step": 1660 }, { "epoch": 0.09116793516946832, "grad_norm": 0.27223315834999084, "learning_rate": 1.519437853622924e-05, "loss": 0.2831, "step": 1665 }, { "epoch": 0.09144171275255981, "grad_norm": 0.35748955607414246, "learning_rate": 1.5240007300602298e-05, "loss": 0.2945, "step": 1670 }, { "epoch": 0.09171549033565132, "grad_norm": 0.3209342658519745, "learning_rate": 1.5285636064975362e-05, "loss": 0.2814, "step": 1675 }, { "epoch": 0.09198926791874282, "grad_norm": 0.3107457458972931, "learning_rate": 1.5331264829348422e-05, "loss": 0.2814, "step": 1680 }, { "epoch": 0.09226304550183431, "grad_norm": 0.33080556988716125, "learning_rate": 1.5376893593721482e-05, "loss": 0.2909, "step": 1685 }, { "epoch": 0.0925368230849258, "grad_norm": 0.2824276387691498, "learning_rate": 1.5422522358094543e-05, "loss": 0.2924, "step": 1690 }, { "epoch": 0.0928106006680173, "grad_norm": 0.2526974380016327, "learning_rate": 1.5468151122467606e-05, "loss": 0.274, "step": 1695 }, { "epoch": 0.0930843782511088, "grad_norm": 0.2678331732749939, "learning_rate": 1.5513779886840666e-05, "loss": 0.2873, "step": 1700 }, { "epoch": 0.09335815583420029, "grad_norm": 0.2606250047683716, "learning_rate": 1.5559408651213727e-05, "loss": 0.2866, "step": 1705 }, { "epoch": 0.0936319334172918, "grad_norm": 0.27019408345222473, "learning_rate": 1.5605037415586787e-05, "loss": 0.2938, "step": 1710 }, { "epoch": 0.0939057110003833, "grad_norm": 0.27617931365966797, "learning_rate": 1.5650666179959847e-05, "loss": 0.2826, "step": 1715 }, { "epoch": 0.09417948858347479, "grad_norm": 0.27253827452659607, "learning_rate": 1.569629494433291e-05, "loss": 0.2917, "step": 1720 }, { "epoch": 0.09445326616656628, "grad_norm": 0.30582720041275024, "learning_rate": 1.574192370870597e-05, "loss": 0.2728, "step": 1725 }, { "epoch": 0.09472704374965778, "grad_norm": 0.2776755392551422, "learning_rate": 1.5787552473079027e-05, "loss": 0.3021, "step": 1730 }, { "epoch": 0.09500082133274927, "grad_norm": 0.28487449884414673, "learning_rate": 1.583318123745209e-05, "loss": 0.286, "step": 1735 }, { "epoch": 0.09527459891584077, "grad_norm": 0.2628595530986786, "learning_rate": 1.587881000182515e-05, "loss": 0.2867, "step": 1740 }, { "epoch": 0.09554837649893226, "grad_norm": 0.32692280411720276, "learning_rate": 1.592443876619821e-05, "loss": 0.2894, "step": 1745 }, { "epoch": 0.09582215408202377, "grad_norm": 0.2117467224597931, "learning_rate": 1.597006753057127e-05, "loss": 0.2715, "step": 1750 }, { "epoch": 0.09609593166511526, "grad_norm": 0.28370344638824463, "learning_rate": 1.6015696294944332e-05, "loss": 0.2812, "step": 1755 }, { "epoch": 0.09636970924820676, "grad_norm": 0.2782098352909088, "learning_rate": 1.6061325059317395e-05, "loss": 0.2856, "step": 1760 }, { "epoch": 0.09664348683129825, "grad_norm": 0.24737748503684998, "learning_rate": 1.6106953823690456e-05, "loss": 0.2919, "step": 1765 }, { "epoch": 0.09691726441438975, "grad_norm": 0.280889093875885, "learning_rate": 1.6152582588063516e-05, "loss": 0.276, "step": 1770 }, { "epoch": 0.09719104199748124, "grad_norm": 0.2452787309885025, "learning_rate": 1.6198211352436576e-05, "loss": 0.2906, "step": 1775 }, { "epoch": 0.09746481958057274, "grad_norm": 0.2575785219669342, "learning_rate": 1.624384011680964e-05, "loss": 0.2783, "step": 1780 }, { "epoch": 0.09773859716366425, "grad_norm": 0.22887082397937775, "learning_rate": 1.62894688811827e-05, "loss": 0.272, "step": 1785 }, { "epoch": 0.09801237474675574, "grad_norm": 0.3198131024837494, "learning_rate": 1.6335097645555756e-05, "loss": 0.2824, "step": 1790 }, { "epoch": 0.09828615232984723, "grad_norm": 0.26505860686302185, "learning_rate": 1.638072640992882e-05, "loss": 0.282, "step": 1795 }, { "epoch": 0.09855992991293873, "grad_norm": 0.32250458002090454, "learning_rate": 1.642635517430188e-05, "loss": 0.2814, "step": 1800 }, { "epoch": 0.09883370749603022, "grad_norm": 0.25023865699768066, "learning_rate": 1.6471983938674944e-05, "loss": 0.293, "step": 1805 }, { "epoch": 0.09910748507912172, "grad_norm": 0.23037517070770264, "learning_rate": 1.6517612703048e-05, "loss": 0.284, "step": 1810 }, { "epoch": 0.09938126266221321, "grad_norm": 0.30290552973747253, "learning_rate": 1.656324146742106e-05, "loss": 0.2837, "step": 1815 }, { "epoch": 0.09965504024530472, "grad_norm": 0.2870585024356842, "learning_rate": 1.6608870231794124e-05, "loss": 0.2782, "step": 1820 }, { "epoch": 0.09992881782839622, "grad_norm": 0.2840345501899719, "learning_rate": 1.6654498996167185e-05, "loss": 0.2874, "step": 1825 }, { "epoch": 0.10020259541148771, "grad_norm": 0.27660495042800903, "learning_rate": 1.6700127760540245e-05, "loss": 0.2732, "step": 1830 }, { "epoch": 0.1004763729945792, "grad_norm": 0.2163310945034027, "learning_rate": 1.6745756524913305e-05, "loss": 0.2759, "step": 1835 }, { "epoch": 0.1007501505776707, "grad_norm": 0.21137605607509613, "learning_rate": 1.679138528928637e-05, "loss": 0.2801, "step": 1840 }, { "epoch": 0.1010239281607622, "grad_norm": 0.27090054750442505, "learning_rate": 1.683701405365943e-05, "loss": 0.2735, "step": 1845 }, { "epoch": 0.10129770574385369, "grad_norm": 0.21483692526817322, "learning_rate": 1.688264281803249e-05, "loss": 0.2775, "step": 1850 }, { "epoch": 0.10157148332694518, "grad_norm": 0.26710212230682373, "learning_rate": 1.692827158240555e-05, "loss": 0.2753, "step": 1855 }, { "epoch": 0.10184526091003669, "grad_norm": 0.24761667847633362, "learning_rate": 1.697390034677861e-05, "loss": 0.2734, "step": 1860 }, { "epoch": 0.10211903849312819, "grad_norm": 0.23863814771175385, "learning_rate": 1.7019529111151673e-05, "loss": 0.2748, "step": 1865 }, { "epoch": 0.10239281607621968, "grad_norm": 0.29947417974472046, "learning_rate": 1.7065157875524733e-05, "loss": 0.2718, "step": 1870 }, { "epoch": 0.10266659365931118, "grad_norm": 0.2460521012544632, "learning_rate": 1.711078663989779e-05, "loss": 0.2832, "step": 1875 }, { "epoch": 0.10294037124240267, "grad_norm": 0.22989939153194427, "learning_rate": 1.7156415404270853e-05, "loss": 0.2842, "step": 1880 }, { "epoch": 0.10321414882549416, "grad_norm": 0.2372380793094635, "learning_rate": 1.7202044168643914e-05, "loss": 0.2816, "step": 1885 }, { "epoch": 0.10348792640858566, "grad_norm": 0.2870679199695587, "learning_rate": 1.7247672933016977e-05, "loss": 0.284, "step": 1890 }, { "epoch": 0.10376170399167717, "grad_norm": 0.30206143856048584, "learning_rate": 1.7293301697390034e-05, "loss": 0.2741, "step": 1895 }, { "epoch": 0.10403548157476866, "grad_norm": 0.24199837446212769, "learning_rate": 1.7338930461763097e-05, "loss": 0.276, "step": 1900 }, { "epoch": 0.10430925915786016, "grad_norm": 0.3291589915752411, "learning_rate": 1.7384559226136158e-05, "loss": 0.2813, "step": 1905 }, { "epoch": 0.10458303674095165, "grad_norm": 0.24891743063926697, "learning_rate": 1.7430187990509218e-05, "loss": 0.2682, "step": 1910 }, { "epoch": 0.10485681432404315, "grad_norm": 0.2551855146884918, "learning_rate": 1.7475816754882278e-05, "loss": 0.2768, "step": 1915 }, { "epoch": 0.10513059190713464, "grad_norm": 0.25044190883636475, "learning_rate": 1.7521445519255338e-05, "loss": 0.2765, "step": 1920 }, { "epoch": 0.10540436949022614, "grad_norm": 0.24438539147377014, "learning_rate": 1.7567074283628402e-05, "loss": 0.2741, "step": 1925 }, { "epoch": 0.10567814707331764, "grad_norm": 0.20583611726760864, "learning_rate": 1.7612703048001462e-05, "loss": 0.2751, "step": 1930 }, { "epoch": 0.10595192465640914, "grad_norm": 0.2575974464416504, "learning_rate": 1.7658331812374522e-05, "loss": 0.2721, "step": 1935 }, { "epoch": 0.10622570223950063, "grad_norm": 0.2801431715488434, "learning_rate": 1.7703960576747582e-05, "loss": 0.2809, "step": 1940 }, { "epoch": 0.10649947982259213, "grad_norm": 0.28349757194519043, "learning_rate": 1.7749589341120643e-05, "loss": 0.2827, "step": 1945 }, { "epoch": 0.10677325740568362, "grad_norm": 0.2664687931537628, "learning_rate": 1.7795218105493706e-05, "loss": 0.2706, "step": 1950 }, { "epoch": 0.10704703498877512, "grad_norm": 0.2310260385274887, "learning_rate": 1.7840846869866766e-05, "loss": 0.278, "step": 1955 }, { "epoch": 0.10732081257186661, "grad_norm": 0.2123335599899292, "learning_rate": 1.7886475634239823e-05, "loss": 0.2736, "step": 1960 }, { "epoch": 0.1075945901549581, "grad_norm": 0.28372251987457275, "learning_rate": 1.7932104398612887e-05, "loss": 0.2601, "step": 1965 }, { "epoch": 0.10786836773804961, "grad_norm": 0.2402605265378952, "learning_rate": 1.7977733162985947e-05, "loss": 0.275, "step": 1970 }, { "epoch": 0.10814214532114111, "grad_norm": 0.2355998456478119, "learning_rate": 1.802336192735901e-05, "loss": 0.288, "step": 1975 }, { "epoch": 0.1084159229042326, "grad_norm": 0.2741391658782959, "learning_rate": 1.8068990691732067e-05, "loss": 0.2856, "step": 1980 }, { "epoch": 0.1086897004873241, "grad_norm": 0.3681468069553375, "learning_rate": 1.811461945610513e-05, "loss": 0.2834, "step": 1985 }, { "epoch": 0.10896347807041559, "grad_norm": 0.3056526780128479, "learning_rate": 1.816024822047819e-05, "loss": 0.2773, "step": 1990 }, { "epoch": 0.10923725565350709, "grad_norm": 0.2899211049079895, "learning_rate": 1.820587698485125e-05, "loss": 0.2673, "step": 1995 }, { "epoch": 0.10951103323659858, "grad_norm": 0.32630014419555664, "learning_rate": 1.825150574922431e-05, "loss": 0.2689, "step": 2000 }, { "epoch": 0.10978481081969009, "grad_norm": 0.31507086753845215, "learning_rate": 1.829713451359737e-05, "loss": 0.2701, "step": 2005 }, { "epoch": 0.11005858840278158, "grad_norm": 0.3302938640117645, "learning_rate": 1.8342763277970435e-05, "loss": 0.2688, "step": 2010 }, { "epoch": 0.11033236598587308, "grad_norm": 0.3261253535747528, "learning_rate": 1.8388392042343495e-05, "loss": 0.2663, "step": 2015 }, { "epoch": 0.11060614356896457, "grad_norm": 0.2771484851837158, "learning_rate": 1.8434020806716552e-05, "loss": 0.2701, "step": 2020 }, { "epoch": 0.11087992115205607, "grad_norm": 0.24533522129058838, "learning_rate": 1.8479649571089616e-05, "loss": 0.2823, "step": 2025 }, { "epoch": 0.11115369873514756, "grad_norm": 0.25145503878593445, "learning_rate": 1.8525278335462676e-05, "loss": 0.2696, "step": 2030 }, { "epoch": 0.11142747631823906, "grad_norm": 0.23340903222560883, "learning_rate": 1.857090709983574e-05, "loss": 0.2653, "step": 2035 }, { "epoch": 0.11170125390133055, "grad_norm": 0.2602600157260895, "learning_rate": 1.8616535864208796e-05, "loss": 0.2693, "step": 2040 }, { "epoch": 0.11197503148442206, "grad_norm": 0.22735410928726196, "learning_rate": 1.866216462858186e-05, "loss": 0.2746, "step": 2045 }, { "epoch": 0.11224880906751356, "grad_norm": 0.2711876630783081, "learning_rate": 1.870779339295492e-05, "loss": 0.2775, "step": 2050 }, { "epoch": 0.11252258665060505, "grad_norm": 0.3087702989578247, "learning_rate": 1.875342215732798e-05, "loss": 0.2793, "step": 2055 }, { "epoch": 0.11279636423369654, "grad_norm": 0.291387677192688, "learning_rate": 1.879905092170104e-05, "loss": 0.2699, "step": 2060 }, { "epoch": 0.11307014181678804, "grad_norm": 0.2518584132194519, "learning_rate": 1.88446796860741e-05, "loss": 0.2738, "step": 2065 }, { "epoch": 0.11334391939987953, "grad_norm": 0.23948034644126892, "learning_rate": 1.8890308450447164e-05, "loss": 0.2749, "step": 2070 }, { "epoch": 0.11361769698297103, "grad_norm": 0.28412893414497375, "learning_rate": 1.8935937214820224e-05, "loss": 0.2731, "step": 2075 }, { "epoch": 0.11389147456606254, "grad_norm": 0.25779324769973755, "learning_rate": 1.8981565979193284e-05, "loss": 0.2784, "step": 2080 }, { "epoch": 0.11416525214915403, "grad_norm": 0.25822949409484863, "learning_rate": 1.9027194743566345e-05, "loss": 0.2703, "step": 2085 }, { "epoch": 0.11443902973224553, "grad_norm": 0.3233028054237366, "learning_rate": 1.9072823507939405e-05, "loss": 0.2837, "step": 2090 }, { "epoch": 0.11471280731533702, "grad_norm": 0.26903235912323, "learning_rate": 1.911845227231247e-05, "loss": 0.2779, "step": 2095 }, { "epoch": 0.11498658489842851, "grad_norm": 0.21941813826560974, "learning_rate": 1.916408103668553e-05, "loss": 0.2785, "step": 2100 }, { "epoch": 0.11526036248152001, "grad_norm": 0.24363915622234344, "learning_rate": 1.920970980105859e-05, "loss": 0.264, "step": 2105 }, { "epoch": 0.1155341400646115, "grad_norm": 0.3322632312774658, "learning_rate": 1.925533856543165e-05, "loss": 0.265, "step": 2110 }, { "epoch": 0.11580791764770301, "grad_norm": 0.3555445671081543, "learning_rate": 1.930096732980471e-05, "loss": 0.2755, "step": 2115 }, { "epoch": 0.11608169523079451, "grad_norm": 0.3081902265548706, "learning_rate": 1.9346596094177773e-05, "loss": 0.2814, "step": 2120 }, { "epoch": 0.116355472813886, "grad_norm": 0.32061120867729187, "learning_rate": 1.939222485855083e-05, "loss": 0.2648, "step": 2125 }, { "epoch": 0.1166292503969775, "grad_norm": 0.2428998053073883, "learning_rate": 1.9437853622923893e-05, "loss": 0.2683, "step": 2130 }, { "epoch": 0.11690302798006899, "grad_norm": 0.2723926901817322, "learning_rate": 1.9483482387296953e-05, "loss": 0.2577, "step": 2135 }, { "epoch": 0.11717680556316049, "grad_norm": 0.3193281888961792, "learning_rate": 1.9529111151670013e-05, "loss": 0.2869, "step": 2140 }, { "epoch": 0.11745058314625198, "grad_norm": 0.30740463733673096, "learning_rate": 1.9574739916043074e-05, "loss": 0.271, "step": 2145 }, { "epoch": 0.11772436072934347, "grad_norm": 0.31710001826286316, "learning_rate": 1.9620368680416134e-05, "loss": 0.2776, "step": 2150 }, { "epoch": 0.11799813831243498, "grad_norm": 0.2495526373386383, "learning_rate": 1.9665997444789197e-05, "loss": 0.2734, "step": 2155 }, { "epoch": 0.11827191589552648, "grad_norm": 0.23337216675281525, "learning_rate": 1.9711626209162258e-05, "loss": 0.2737, "step": 2160 }, { "epoch": 0.11854569347861797, "grad_norm": 0.24691453576087952, "learning_rate": 1.9757254973535318e-05, "loss": 0.2584, "step": 2165 }, { "epoch": 0.11881947106170947, "grad_norm": 0.25676101446151733, "learning_rate": 1.9802883737908378e-05, "loss": 0.2747, "step": 2170 }, { "epoch": 0.11909324864480096, "grad_norm": 0.2969067394733429, "learning_rate": 1.9848512502281438e-05, "loss": 0.2748, "step": 2175 }, { "epoch": 0.11936702622789246, "grad_norm": 0.2637587785720825, "learning_rate": 1.9894141266654502e-05, "loss": 0.2763, "step": 2180 }, { "epoch": 0.11964080381098395, "grad_norm": 0.3115275204181671, "learning_rate": 1.9939770031027562e-05, "loss": 0.2755, "step": 2185 }, { "epoch": 0.11991458139407546, "grad_norm": 0.2417454570531845, "learning_rate": 1.9985398795400622e-05, "loss": 0.2711, "step": 2190 }, { "epoch": 0.12018835897716695, "grad_norm": 0.22867712378501892, "learning_rate": 2.0031027559773682e-05, "loss": 0.2737, "step": 2195 }, { "epoch": 0.12046213656025845, "grad_norm": 0.24251528084278107, "learning_rate": 2.0076656324146742e-05, "loss": 0.2801, "step": 2200 }, { "epoch": 0.12073591414334994, "grad_norm": 0.28513944149017334, "learning_rate": 2.0122285088519806e-05, "loss": 0.2611, "step": 2205 }, { "epoch": 0.12100969172644144, "grad_norm": 0.34505000710487366, "learning_rate": 2.0167913852892863e-05, "loss": 0.2587, "step": 2210 }, { "epoch": 0.12128346930953293, "grad_norm": 0.3516639173030853, "learning_rate": 2.0213542617265926e-05, "loss": 0.2701, "step": 2215 }, { "epoch": 0.12155724689262443, "grad_norm": 0.32801106572151184, "learning_rate": 2.0259171381638987e-05, "loss": 0.272, "step": 2220 }, { "epoch": 0.12183102447571593, "grad_norm": 0.2889196574687958, "learning_rate": 2.0304800146012047e-05, "loss": 0.2812, "step": 2225 }, { "epoch": 0.12210480205880743, "grad_norm": 0.2695263624191284, "learning_rate": 2.0350428910385107e-05, "loss": 0.2747, "step": 2230 }, { "epoch": 0.12237857964189892, "grad_norm": 0.2253248691558838, "learning_rate": 2.0396057674758167e-05, "loss": 0.2665, "step": 2235 }, { "epoch": 0.12265235722499042, "grad_norm": 0.27726003527641296, "learning_rate": 2.044168643913123e-05, "loss": 0.2611, "step": 2240 }, { "epoch": 0.12292613480808191, "grad_norm": 0.25480055809020996, "learning_rate": 2.048731520350429e-05, "loss": 0.2757, "step": 2245 }, { "epoch": 0.12319991239117341, "grad_norm": 0.29205331206321716, "learning_rate": 2.053294396787735e-05, "loss": 0.2638, "step": 2250 }, { "epoch": 0.1234736899742649, "grad_norm": 0.37321922183036804, "learning_rate": 2.057857273225041e-05, "loss": 0.2729, "step": 2255 }, { "epoch": 0.1237474675573564, "grad_norm": 0.4094937741756439, "learning_rate": 2.062420149662347e-05, "loss": 0.2612, "step": 2260 }, { "epoch": 0.1240212451404479, "grad_norm": 0.24623741209506989, "learning_rate": 2.0669830260996535e-05, "loss": 0.2668, "step": 2265 }, { "epoch": 0.1242950227235394, "grad_norm": 0.30056825280189514, "learning_rate": 2.0715459025369592e-05, "loss": 0.2717, "step": 2270 }, { "epoch": 0.1245688003066309, "grad_norm": 0.24422918260097504, "learning_rate": 2.0761087789742655e-05, "loss": 0.2701, "step": 2275 }, { "epoch": 0.12484257788972239, "grad_norm": 0.262864351272583, "learning_rate": 2.0806716554115716e-05, "loss": 0.2675, "step": 2280 }, { "epoch": 0.12511635547281388, "grad_norm": 0.31539472937583923, "learning_rate": 2.0852345318488776e-05, "loss": 0.2675, "step": 2285 }, { "epoch": 0.12539013305590538, "grad_norm": 0.2945603132247925, "learning_rate": 2.0897974082861836e-05, "loss": 0.2702, "step": 2290 }, { "epoch": 0.12566391063899687, "grad_norm": 0.35206618905067444, "learning_rate": 2.0943602847234896e-05, "loss": 0.2714, "step": 2295 }, { "epoch": 0.12593768822208837, "grad_norm": 0.3096575140953064, "learning_rate": 2.098923161160796e-05, "loss": 0.2819, "step": 2300 }, { "epoch": 0.12621146580517986, "grad_norm": 0.266610711812973, "learning_rate": 2.103486037598102e-05, "loss": 0.2727, "step": 2305 }, { "epoch": 0.12648524338827136, "grad_norm": 0.2973441481590271, "learning_rate": 2.108048914035408e-05, "loss": 0.269, "step": 2310 }, { "epoch": 0.12675902097136285, "grad_norm": 0.32928702235221863, "learning_rate": 2.112611790472714e-05, "loss": 0.2707, "step": 2315 }, { "epoch": 0.12703279855445437, "grad_norm": 0.23319463431835175, "learning_rate": 2.11717466691002e-05, "loss": 0.2697, "step": 2320 }, { "epoch": 0.12730657613754587, "grad_norm": 0.3512718975543976, "learning_rate": 2.1217375433473264e-05, "loss": 0.2555, "step": 2325 }, { "epoch": 0.12758035372063736, "grad_norm": 0.3383982479572296, "learning_rate": 2.1263004197846324e-05, "loss": 0.2569, "step": 2330 }, { "epoch": 0.12785413130372886, "grad_norm": 0.30611130595207214, "learning_rate": 2.1308632962219384e-05, "loss": 0.2747, "step": 2335 }, { "epoch": 0.12812790888682035, "grad_norm": 0.31998494267463684, "learning_rate": 2.1354261726592445e-05, "loss": 0.2669, "step": 2340 }, { "epoch": 0.12840168646991185, "grad_norm": 0.27161601185798645, "learning_rate": 2.1399890490965505e-05, "loss": 0.2824, "step": 2345 }, { "epoch": 0.12867546405300334, "grad_norm": 0.4055858254432678, "learning_rate": 2.144551925533857e-05, "loss": 0.266, "step": 2350 }, { "epoch": 0.12894924163609484, "grad_norm": 0.3189844489097595, "learning_rate": 2.1491148019711625e-05, "loss": 0.2586, "step": 2355 }, { "epoch": 0.12922301921918633, "grad_norm": 0.25828826427459717, "learning_rate": 2.153677678408469e-05, "loss": 0.2649, "step": 2360 }, { "epoch": 0.12949679680227782, "grad_norm": 0.21762540936470032, "learning_rate": 2.158240554845775e-05, "loss": 0.268, "step": 2365 }, { "epoch": 0.12977057438536932, "grad_norm": 0.2626693844795227, "learning_rate": 2.162803431283081e-05, "loss": 0.2826, "step": 2370 }, { "epoch": 0.13004435196846081, "grad_norm": 0.31049373745918274, "learning_rate": 2.167366307720387e-05, "loss": 0.2703, "step": 2375 }, { "epoch": 0.1303181295515523, "grad_norm": 0.2677549421787262, "learning_rate": 2.171929184157693e-05, "loss": 0.2763, "step": 2380 }, { "epoch": 0.1305919071346438, "grad_norm": 0.29236871004104614, "learning_rate": 2.1764920605949993e-05, "loss": 0.2688, "step": 2385 }, { "epoch": 0.13086568471773533, "grad_norm": 0.29079946875572205, "learning_rate": 2.1810549370323053e-05, "loss": 0.2673, "step": 2390 }, { "epoch": 0.13113946230082682, "grad_norm": 0.29694706201553345, "learning_rate": 2.1856178134696113e-05, "loss": 0.2683, "step": 2395 }, { "epoch": 0.13141323988391831, "grad_norm": 0.25043541193008423, "learning_rate": 2.1901806899069174e-05, "loss": 0.2625, "step": 2400 }, { "epoch": 0.1316870174670098, "grad_norm": 0.23868800699710846, "learning_rate": 2.1947435663442234e-05, "loss": 0.2636, "step": 2405 }, { "epoch": 0.1319607950501013, "grad_norm": 0.271220862865448, "learning_rate": 2.1993064427815297e-05, "loss": 0.2636, "step": 2410 }, { "epoch": 0.1322345726331928, "grad_norm": 0.24823680520057678, "learning_rate": 2.2038693192188358e-05, "loss": 0.2594, "step": 2415 }, { "epoch": 0.1325083502162843, "grad_norm": 0.27774471044540405, "learning_rate": 2.2084321956561418e-05, "loss": 0.2535, "step": 2420 }, { "epoch": 0.1327821277993758, "grad_norm": 0.28112757205963135, "learning_rate": 2.2129950720934478e-05, "loss": 0.2785, "step": 2425 }, { "epoch": 0.13305590538246728, "grad_norm": 0.35596662759780884, "learning_rate": 2.2175579485307538e-05, "loss": 0.2646, "step": 2430 }, { "epoch": 0.13332968296555878, "grad_norm": 0.2831571400165558, "learning_rate": 2.22212082496806e-05, "loss": 0.2663, "step": 2435 }, { "epoch": 0.13360346054865027, "grad_norm": 0.28933197259902954, "learning_rate": 2.226683701405366e-05, "loss": 0.2688, "step": 2440 }, { "epoch": 0.13387723813174177, "grad_norm": 0.33129215240478516, "learning_rate": 2.2312465778426722e-05, "loss": 0.273, "step": 2445 }, { "epoch": 0.13415101571483326, "grad_norm": 0.2965438961982727, "learning_rate": 2.2358094542799782e-05, "loss": 0.2607, "step": 2450 }, { "epoch": 0.13442479329792476, "grad_norm": 0.2596689760684967, "learning_rate": 2.2403723307172846e-05, "loss": 0.2736, "step": 2455 }, { "epoch": 0.13469857088101625, "grad_norm": 0.3028768002986908, "learning_rate": 2.2449352071545903e-05, "loss": 0.2671, "step": 2460 }, { "epoch": 0.13497234846410777, "grad_norm": 0.42625534534454346, "learning_rate": 2.2494980835918963e-05, "loss": 0.2563, "step": 2465 }, { "epoch": 0.13524612604719927, "grad_norm": 0.2875621020793915, "learning_rate": 2.2540609600292026e-05, "loss": 0.2651, "step": 2470 }, { "epoch": 0.13551990363029076, "grad_norm": 0.2603064477443695, "learning_rate": 2.2586238364665087e-05, "loss": 0.2639, "step": 2475 }, { "epoch": 0.13579368121338226, "grad_norm": 0.24917280673980713, "learning_rate": 2.2631867129038147e-05, "loss": 0.2616, "step": 2480 }, { "epoch": 0.13606745879647375, "grad_norm": 0.2647268772125244, "learning_rate": 2.2677495893411207e-05, "loss": 0.269, "step": 2485 }, { "epoch": 0.13634123637956524, "grad_norm": 0.24410279095172882, "learning_rate": 2.2723124657784267e-05, "loss": 0.2686, "step": 2490 }, { "epoch": 0.13661501396265674, "grad_norm": 0.22951918840408325, "learning_rate": 2.276875342215733e-05, "loss": 0.2646, "step": 2495 }, { "epoch": 0.13688879154574823, "grad_norm": 0.2645512819290161, "learning_rate": 2.2814382186530387e-05, "loss": 0.2621, "step": 2500 }, { "epoch": 0.13716256912883973, "grad_norm": 0.3238069713115692, "learning_rate": 2.286001095090345e-05, "loss": 0.2917, "step": 2505 }, { "epoch": 0.13743634671193122, "grad_norm": 0.2530249357223511, "learning_rate": 2.290563971527651e-05, "loss": 0.2777, "step": 2510 }, { "epoch": 0.13771012429502272, "grad_norm": 0.2531275749206543, "learning_rate": 2.295126847964957e-05, "loss": 0.2632, "step": 2515 }, { "epoch": 0.1379839018781142, "grad_norm": 0.2471143752336502, "learning_rate": 2.299689724402263e-05, "loss": 0.2681, "step": 2520 }, { "epoch": 0.1382576794612057, "grad_norm": 0.26616302132606506, "learning_rate": 2.3042526008395692e-05, "loss": 0.2677, "step": 2525 }, { "epoch": 0.1385314570442972, "grad_norm": 0.29285505414009094, "learning_rate": 2.3088154772768755e-05, "loss": 0.2705, "step": 2530 }, { "epoch": 0.1388052346273887, "grad_norm": 0.27973148226737976, "learning_rate": 2.3133783537141816e-05, "loss": 0.2618, "step": 2535 }, { "epoch": 0.13907901221048022, "grad_norm": 0.2922056317329407, "learning_rate": 2.3179412301514876e-05, "loss": 0.266, "step": 2540 }, { "epoch": 0.1393527897935717, "grad_norm": 0.3131359815597534, "learning_rate": 2.3225041065887936e-05, "loss": 0.2728, "step": 2545 }, { "epoch": 0.1396265673766632, "grad_norm": 0.33375284075737, "learning_rate": 2.3270669830260996e-05, "loss": 0.2635, "step": 2550 }, { "epoch": 0.1399003449597547, "grad_norm": 0.26566609740257263, "learning_rate": 2.331629859463406e-05, "loss": 0.273, "step": 2555 }, { "epoch": 0.1401741225428462, "grad_norm": 0.23836486041545868, "learning_rate": 2.336192735900712e-05, "loss": 0.2599, "step": 2560 }, { "epoch": 0.1404479001259377, "grad_norm": 0.2666809856891632, "learning_rate": 2.340755612338018e-05, "loss": 0.2631, "step": 2565 }, { "epoch": 0.14072167770902919, "grad_norm": 0.2816884219646454, "learning_rate": 2.345318488775324e-05, "loss": 0.2791, "step": 2570 }, { "epoch": 0.14099545529212068, "grad_norm": 0.30078887939453125, "learning_rate": 2.34988136521263e-05, "loss": 0.2708, "step": 2575 }, { "epoch": 0.14126923287521218, "grad_norm": 0.2643229067325592, "learning_rate": 2.3544442416499364e-05, "loss": 0.2643, "step": 2580 }, { "epoch": 0.14154301045830367, "grad_norm": 0.29941216111183167, "learning_rate": 2.359007118087242e-05, "loss": 0.2687, "step": 2585 }, { "epoch": 0.14181678804139516, "grad_norm": 0.29582440853118896, "learning_rate": 2.3635699945245484e-05, "loss": 0.2728, "step": 2590 }, { "epoch": 0.14209056562448666, "grad_norm": 0.2946060001850128, "learning_rate": 2.3681328709618545e-05, "loss": 0.2559, "step": 2595 }, { "epoch": 0.14236434320757815, "grad_norm": 0.27332285046577454, "learning_rate": 2.3726957473991608e-05, "loss": 0.2659, "step": 2600 }, { "epoch": 0.14263812079066965, "grad_norm": 0.2589055895805359, "learning_rate": 2.3772586238364665e-05, "loss": 0.2685, "step": 2605 }, { "epoch": 0.14291189837376114, "grad_norm": 0.2729928493499756, "learning_rate": 2.3818215002737725e-05, "loss": 0.2703, "step": 2610 }, { "epoch": 0.14318567595685266, "grad_norm": 0.26781606674194336, "learning_rate": 2.386384376711079e-05, "loss": 0.261, "step": 2615 }, { "epoch": 0.14345945353994416, "grad_norm": 0.2931343913078308, "learning_rate": 2.390947253148385e-05, "loss": 0.2711, "step": 2620 }, { "epoch": 0.14373323112303565, "grad_norm": 0.23224301636219025, "learning_rate": 2.395510129585691e-05, "loss": 0.2614, "step": 2625 }, { "epoch": 0.14400700870612715, "grad_norm": 0.27723971009254456, "learning_rate": 2.400073006022997e-05, "loss": 0.2629, "step": 2630 }, { "epoch": 0.14428078628921864, "grad_norm": 0.2299395203590393, "learning_rate": 2.404635882460303e-05, "loss": 0.2541, "step": 2635 }, { "epoch": 0.14455456387231014, "grad_norm": 0.24797271192073822, "learning_rate": 2.4091987588976093e-05, "loss": 0.2535, "step": 2640 }, { "epoch": 0.14482834145540163, "grad_norm": 0.2982407212257385, "learning_rate": 2.4137616353349153e-05, "loss": 0.2649, "step": 2645 }, { "epoch": 0.14510211903849313, "grad_norm": 0.3289448618888855, "learning_rate": 2.4183245117722213e-05, "loss": 0.2634, "step": 2650 }, { "epoch": 0.14537589662158462, "grad_norm": 0.3169625699520111, "learning_rate": 2.4228873882095274e-05, "loss": 0.2529, "step": 2655 }, { "epoch": 0.14564967420467612, "grad_norm": 0.30877766013145447, "learning_rate": 2.4274502646468337e-05, "loss": 0.2676, "step": 2660 }, { "epoch": 0.1459234517877676, "grad_norm": 0.3337065577507019, "learning_rate": 2.4320131410841397e-05, "loss": 0.2589, "step": 2665 }, { "epoch": 0.1461972293708591, "grad_norm": 0.24164262413978577, "learning_rate": 2.4365760175214454e-05, "loss": 0.2484, "step": 2670 }, { "epoch": 0.1464710069539506, "grad_norm": 0.20701323449611664, "learning_rate": 2.4411388939587518e-05, "loss": 0.2637, "step": 2675 }, { "epoch": 0.1467447845370421, "grad_norm": 0.25436365604400635, "learning_rate": 2.4457017703960578e-05, "loss": 0.2683, "step": 2680 }, { "epoch": 0.14701856212013362, "grad_norm": 0.2263154238462448, "learning_rate": 2.450264646833364e-05, "loss": 0.2551, "step": 2685 }, { "epoch": 0.1472923397032251, "grad_norm": 0.2845243513584137, "learning_rate": 2.4548275232706698e-05, "loss": 0.2604, "step": 2690 }, { "epoch": 0.1475661172863166, "grad_norm": 0.24110570549964905, "learning_rate": 2.459390399707976e-05, "loss": 0.2695, "step": 2695 }, { "epoch": 0.1478398948694081, "grad_norm": 0.2738218605518341, "learning_rate": 2.4639532761452822e-05, "loss": 0.2643, "step": 2700 }, { "epoch": 0.1481136724524996, "grad_norm": 0.24598979949951172, "learning_rate": 2.4685161525825882e-05, "loss": 0.2707, "step": 2705 }, { "epoch": 0.1483874500355911, "grad_norm": 0.24260710179805756, "learning_rate": 2.4730790290198942e-05, "loss": 0.258, "step": 2710 }, { "epoch": 0.14866122761868258, "grad_norm": 0.3051038384437561, "learning_rate": 2.4776419054572003e-05, "loss": 0.2688, "step": 2715 }, { "epoch": 0.14893500520177408, "grad_norm": 0.251924604177475, "learning_rate": 2.4822047818945063e-05, "loss": 0.2639, "step": 2720 }, { "epoch": 0.14920878278486557, "grad_norm": 0.277418851852417, "learning_rate": 2.4867676583318126e-05, "loss": 0.2645, "step": 2725 }, { "epoch": 0.14948256036795707, "grad_norm": 0.38692542910575867, "learning_rate": 2.4913305347691187e-05, "loss": 0.2585, "step": 2730 }, { "epoch": 0.14975633795104856, "grad_norm": 0.2650293707847595, "learning_rate": 2.4958934112064247e-05, "loss": 0.2704, "step": 2735 }, { "epoch": 0.15003011553414006, "grad_norm": 0.2196091264486313, "learning_rate": 2.500456287643731e-05, "loss": 0.2556, "step": 2740 }, { "epoch": 0.15030389311723155, "grad_norm": 0.2727758586406708, "learning_rate": 2.5050191640810367e-05, "loss": 0.2685, "step": 2745 }, { "epoch": 0.15057767070032305, "grad_norm": 0.218942791223526, "learning_rate": 2.5095820405183427e-05, "loss": 0.2623, "step": 2750 }, { "epoch": 0.15085144828341454, "grad_norm": 0.2539045810699463, "learning_rate": 2.514144916955649e-05, "loss": 0.2624, "step": 2755 }, { "epoch": 0.15112522586650606, "grad_norm": 0.28998616337776184, "learning_rate": 2.5187077933929548e-05, "loss": 0.2449, "step": 2760 }, { "epoch": 0.15139900344959756, "grad_norm": 0.24887976050376892, "learning_rate": 2.523270669830261e-05, "loss": 0.2768, "step": 2765 }, { "epoch": 0.15167278103268905, "grad_norm": 0.24144023656845093, "learning_rate": 2.527833546267567e-05, "loss": 0.2563, "step": 2770 }, { "epoch": 0.15194655861578055, "grad_norm": 0.2354549765586853, "learning_rate": 2.5323964227048735e-05, "loss": 0.277, "step": 2775 }, { "epoch": 0.15222033619887204, "grad_norm": 0.3217938244342804, "learning_rate": 2.5369592991421792e-05, "loss": 0.2742, "step": 2780 }, { "epoch": 0.15249411378196354, "grad_norm": 0.23357290029525757, "learning_rate": 2.5415221755794855e-05, "loss": 0.2665, "step": 2785 }, { "epoch": 0.15276789136505503, "grad_norm": 0.32533061504364014, "learning_rate": 2.5460850520167916e-05, "loss": 0.2635, "step": 2790 }, { "epoch": 0.15304166894814653, "grad_norm": 0.3766584098339081, "learning_rate": 2.5506479284540972e-05, "loss": 0.257, "step": 2795 }, { "epoch": 0.15331544653123802, "grad_norm": 0.34189507365226746, "learning_rate": 2.5552108048914036e-05, "loss": 0.2669, "step": 2800 }, { "epoch": 0.15358922411432951, "grad_norm": 0.2822693884372711, "learning_rate": 2.55977368132871e-05, "loss": 0.261, "step": 2805 }, { "epoch": 0.153863001697421, "grad_norm": 0.4318119287490845, "learning_rate": 2.5643365577660156e-05, "loss": 0.2712, "step": 2810 }, { "epoch": 0.1541367792805125, "grad_norm": 0.2858974039554596, "learning_rate": 2.5688994342033216e-05, "loss": 0.2573, "step": 2815 }, { "epoch": 0.154410556863604, "grad_norm": 0.3028234541416168, "learning_rate": 2.573462310640628e-05, "loss": 0.2654, "step": 2820 }, { "epoch": 0.1546843344466955, "grad_norm": 0.2585378587245941, "learning_rate": 2.5780251870779344e-05, "loss": 0.2629, "step": 2825 }, { "epoch": 0.154958112029787, "grad_norm": 0.2634419798851013, "learning_rate": 2.58258806351524e-05, "loss": 0.2554, "step": 2830 }, { "epoch": 0.1552318896128785, "grad_norm": 0.29922470450401306, "learning_rate": 2.587150939952546e-05, "loss": 0.2606, "step": 2835 }, { "epoch": 0.15550566719597, "grad_norm": 0.261592298746109, "learning_rate": 2.5917138163898524e-05, "loss": 0.2671, "step": 2840 }, { "epoch": 0.1557794447790615, "grad_norm": 0.24941419064998627, "learning_rate": 2.596276692827158e-05, "loss": 0.2631, "step": 2845 }, { "epoch": 0.156053222362153, "grad_norm": 0.2460423707962036, "learning_rate": 2.6008395692644645e-05, "loss": 0.2618, "step": 2850 }, { "epoch": 0.1563269999452445, "grad_norm": 0.28557097911834717, "learning_rate": 2.6054024457017705e-05, "loss": 0.261, "step": 2855 }, { "epoch": 0.15660077752833598, "grad_norm": 0.28795745968818665, "learning_rate": 2.6099653221390768e-05, "loss": 0.2627, "step": 2860 }, { "epoch": 0.15687455511142748, "grad_norm": 0.2492648959159851, "learning_rate": 2.6145281985763825e-05, "loss": 0.2567, "step": 2865 }, { "epoch": 0.15714833269451897, "grad_norm": 0.22869956493377686, "learning_rate": 2.619091075013689e-05, "loss": 0.2706, "step": 2870 }, { "epoch": 0.15742211027761047, "grad_norm": 0.21835961937904358, "learning_rate": 2.623653951450995e-05, "loss": 0.2566, "step": 2875 }, { "epoch": 0.15769588786070196, "grad_norm": 0.27343329787254333, "learning_rate": 2.6282168278883006e-05, "loss": 0.2661, "step": 2880 }, { "epoch": 0.15796966544379346, "grad_norm": 0.24799755215644836, "learning_rate": 2.632779704325607e-05, "loss": 0.2758, "step": 2885 }, { "epoch": 0.15824344302688495, "grad_norm": 0.25292515754699707, "learning_rate": 2.6373425807629133e-05, "loss": 0.2621, "step": 2890 }, { "epoch": 0.15851722060997644, "grad_norm": 0.2999371290206909, "learning_rate": 2.641905457200219e-05, "loss": 0.2628, "step": 2895 }, { "epoch": 0.15879099819306794, "grad_norm": 0.2945338189601898, "learning_rate": 2.646468333637525e-05, "loss": 0.251, "step": 2900 }, { "epoch": 0.15906477577615946, "grad_norm": 0.2964285612106323, "learning_rate": 2.6510312100748313e-05, "loss": 0.2554, "step": 2905 }, { "epoch": 0.15933855335925096, "grad_norm": 0.27417856454849243, "learning_rate": 2.6555940865121377e-05, "loss": 0.2711, "step": 2910 }, { "epoch": 0.15961233094234245, "grad_norm": 0.34931328892707825, "learning_rate": 2.6601569629494434e-05, "loss": 0.2641, "step": 2915 }, { "epoch": 0.15988610852543395, "grad_norm": 0.2716037631034851, "learning_rate": 2.6647198393867494e-05, "loss": 0.2676, "step": 2920 }, { "epoch": 0.16015988610852544, "grad_norm": 0.3171830475330353, "learning_rate": 2.6692827158240557e-05, "loss": 0.2594, "step": 2925 }, { "epoch": 0.16043366369161693, "grad_norm": 0.2822520434856415, "learning_rate": 2.6738455922613614e-05, "loss": 0.2578, "step": 2930 }, { "epoch": 0.16070744127470843, "grad_norm": 0.259412944316864, "learning_rate": 2.6784084686986678e-05, "loss": 0.2546, "step": 2935 }, { "epoch": 0.16098121885779992, "grad_norm": 0.2527029812335968, "learning_rate": 2.6829713451359738e-05, "loss": 0.2529, "step": 2940 }, { "epoch": 0.16125499644089142, "grad_norm": 0.3461344540119171, "learning_rate": 2.68753422157328e-05, "loss": 0.2608, "step": 2945 }, { "epoch": 0.1615287740239829, "grad_norm": 0.277374267578125, "learning_rate": 2.692097098010586e-05, "loss": 0.2594, "step": 2950 }, { "epoch": 0.1618025516070744, "grad_norm": 0.23331065475940704, "learning_rate": 2.6966599744478922e-05, "loss": 0.2642, "step": 2955 }, { "epoch": 0.1620763291901659, "grad_norm": 0.31104084849357605, "learning_rate": 2.7012228508851982e-05, "loss": 0.2638, "step": 2960 }, { "epoch": 0.1623501067732574, "grad_norm": 0.2617088258266449, "learning_rate": 2.705785727322504e-05, "loss": 0.2605, "step": 2965 }, { "epoch": 0.1626238843563489, "grad_norm": 0.3534022569656372, "learning_rate": 2.7103486037598103e-05, "loss": 0.2643, "step": 2970 }, { "epoch": 0.16289766193944039, "grad_norm": 0.3709748387336731, "learning_rate": 2.7149114801971166e-05, "loss": 0.266, "step": 2975 }, { "epoch": 0.1631714395225319, "grad_norm": 0.28915300965309143, "learning_rate": 2.7194743566344226e-05, "loss": 0.2628, "step": 2980 }, { "epoch": 0.1634452171056234, "grad_norm": 0.30431273579597473, "learning_rate": 2.7240372330717283e-05, "loss": 0.2639, "step": 2985 }, { "epoch": 0.1637189946887149, "grad_norm": 0.35011181235313416, "learning_rate": 2.7286001095090347e-05, "loss": 0.2652, "step": 2990 }, { "epoch": 0.1639927722718064, "grad_norm": 0.3021288812160492, "learning_rate": 2.733162985946341e-05, "loss": 0.2556, "step": 2995 }, { "epoch": 0.1642665498548979, "grad_norm": 0.29405343532562256, "learning_rate": 2.7377258623836467e-05, "loss": 0.2637, "step": 3000 }, { "epoch": 0.16454032743798938, "grad_norm": 0.27567777037620544, "learning_rate": 2.7422887388209527e-05, "loss": 0.2508, "step": 3005 }, { "epoch": 0.16481410502108088, "grad_norm": 0.24749627709388733, "learning_rate": 2.746851615258259e-05, "loss": 0.2538, "step": 3010 }, { "epoch": 0.16508788260417237, "grad_norm": 0.2818320393562317, "learning_rate": 2.7514144916955648e-05, "loss": 0.268, "step": 3015 }, { "epoch": 0.16536166018726386, "grad_norm": 0.2580936849117279, "learning_rate": 2.755977368132871e-05, "loss": 0.26, "step": 3020 }, { "epoch": 0.16563543777035536, "grad_norm": 0.37154701352119446, "learning_rate": 2.760540244570177e-05, "loss": 0.2491, "step": 3025 }, { "epoch": 0.16590921535344685, "grad_norm": 0.38267025351524353, "learning_rate": 2.7651031210074835e-05, "loss": 0.2665, "step": 3030 }, { "epoch": 0.16618299293653835, "grad_norm": 0.34263238310813904, "learning_rate": 2.7696659974447892e-05, "loss": 0.2629, "step": 3035 }, { "epoch": 0.16645677051962984, "grad_norm": 0.25294655561447144, "learning_rate": 2.7742288738820955e-05, "loss": 0.2719, "step": 3040 }, { "epoch": 0.16673054810272134, "grad_norm": 0.24672187864780426, "learning_rate": 2.7787917503194015e-05, "loss": 0.2555, "step": 3045 }, { "epoch": 0.16700432568581283, "grad_norm": 0.31234443187713623, "learning_rate": 2.7833546267567072e-05, "loss": 0.2606, "step": 3050 }, { "epoch": 0.16727810326890435, "grad_norm": 0.26723524928092957, "learning_rate": 2.7879175031940136e-05, "loss": 0.2652, "step": 3055 }, { "epoch": 0.16755188085199585, "grad_norm": 0.220321387052536, "learning_rate": 2.79248037963132e-05, "loss": 0.2736, "step": 3060 }, { "epoch": 0.16782565843508734, "grad_norm": 0.24147644639015198, "learning_rate": 2.797043256068626e-05, "loss": 0.2683, "step": 3065 }, { "epoch": 0.16809943601817884, "grad_norm": 0.2470806986093521, "learning_rate": 2.8016061325059316e-05, "loss": 0.2587, "step": 3070 }, { "epoch": 0.16837321360127033, "grad_norm": 0.2409418374300003, "learning_rate": 2.806169008943238e-05, "loss": 0.2646, "step": 3075 }, { "epoch": 0.16864699118436183, "grad_norm": 0.2518804371356964, "learning_rate": 2.8107318853805444e-05, "loss": 0.2604, "step": 3080 }, { "epoch": 0.16892076876745332, "grad_norm": 0.26256120204925537, "learning_rate": 2.81529476181785e-05, "loss": 0.2629, "step": 3085 }, { "epoch": 0.16919454635054482, "grad_norm": 0.21556854248046875, "learning_rate": 2.819857638255156e-05, "loss": 0.2599, "step": 3090 }, { "epoch": 0.1694683239336363, "grad_norm": 0.2503984570503235, "learning_rate": 2.8244205146924624e-05, "loss": 0.2625, "step": 3095 }, { "epoch": 0.1697421015167278, "grad_norm": 0.24283699691295624, "learning_rate": 2.828983391129768e-05, "loss": 0.2581, "step": 3100 }, { "epoch": 0.1700158790998193, "grad_norm": 0.32657545804977417, "learning_rate": 2.8335462675670744e-05, "loss": 0.2555, "step": 3105 }, { "epoch": 0.1702896566829108, "grad_norm": 0.22700586915016174, "learning_rate": 2.8381091440043805e-05, "loss": 0.254, "step": 3110 }, { "epoch": 0.1705634342660023, "grad_norm": 0.34239792823791504, "learning_rate": 2.8426720204416868e-05, "loss": 0.2571, "step": 3115 }, { "epoch": 0.17083721184909378, "grad_norm": 0.24965687096118927, "learning_rate": 2.8472348968789925e-05, "loss": 0.2591, "step": 3120 }, { "epoch": 0.1711109894321853, "grad_norm": 0.2571142017841339, "learning_rate": 2.851797773316299e-05, "loss": 0.2584, "step": 3125 }, { "epoch": 0.1713847670152768, "grad_norm": 0.2332308143377304, "learning_rate": 2.856360649753605e-05, "loss": 0.2614, "step": 3130 }, { "epoch": 0.1716585445983683, "grad_norm": 0.2593749761581421, "learning_rate": 2.8609235261909106e-05, "loss": 0.2466, "step": 3135 }, { "epoch": 0.1719323221814598, "grad_norm": 0.25923216342926025, "learning_rate": 2.865486402628217e-05, "loss": 0.2549, "step": 3140 }, { "epoch": 0.17220609976455128, "grad_norm": 0.2811953127384186, "learning_rate": 2.8700492790655233e-05, "loss": 0.2558, "step": 3145 }, { "epoch": 0.17247987734764278, "grad_norm": 0.2441931813955307, "learning_rate": 2.8746121555028293e-05, "loss": 0.2569, "step": 3150 }, { "epoch": 0.17275365493073427, "grad_norm": 0.24263973534107208, "learning_rate": 2.879175031940135e-05, "loss": 0.2538, "step": 3155 }, { "epoch": 0.17302743251382577, "grad_norm": 0.25325536727905273, "learning_rate": 2.8837379083774413e-05, "loss": 0.2538, "step": 3160 }, { "epoch": 0.17330121009691726, "grad_norm": 0.2501485347747803, "learning_rate": 2.8883007848147477e-05, "loss": 0.2596, "step": 3165 }, { "epoch": 0.17357498768000876, "grad_norm": 0.27266645431518555, "learning_rate": 2.8928636612520534e-05, "loss": 0.2556, "step": 3170 }, { "epoch": 0.17384876526310025, "grad_norm": 0.30725783109664917, "learning_rate": 2.8974265376893594e-05, "loss": 0.2561, "step": 3175 }, { "epoch": 0.17412254284619175, "grad_norm": 0.229283407330513, "learning_rate": 2.9019894141266657e-05, "loss": 0.2531, "step": 3180 }, { "epoch": 0.17439632042928324, "grad_norm": 0.2809518873691559, "learning_rate": 2.906552290563972e-05, "loss": 0.2611, "step": 3185 }, { "epoch": 0.17467009801237474, "grad_norm": 0.28637418150901794, "learning_rate": 2.9111151670012778e-05, "loss": 0.2637, "step": 3190 }, { "epoch": 0.17494387559546623, "grad_norm": 0.27210739254951477, "learning_rate": 2.9156780434385838e-05, "loss": 0.2701, "step": 3195 }, { "epoch": 0.17521765317855775, "grad_norm": 0.2787868082523346, "learning_rate": 2.92024091987589e-05, "loss": 0.2565, "step": 3200 }, { "epoch": 0.17549143076164925, "grad_norm": 0.2921457886695862, "learning_rate": 2.924803796313196e-05, "loss": 0.251, "step": 3205 }, { "epoch": 0.17576520834474074, "grad_norm": 0.2872716188430786, "learning_rate": 2.9293666727505022e-05, "loss": 0.2658, "step": 3210 }, { "epoch": 0.17603898592783224, "grad_norm": 0.3501172363758087, "learning_rate": 2.9339295491878082e-05, "loss": 0.2612, "step": 3215 }, { "epoch": 0.17631276351092373, "grad_norm": 0.2774127125740051, "learning_rate": 2.938492425625114e-05, "loss": 0.2586, "step": 3220 }, { "epoch": 0.17658654109401523, "grad_norm": 0.2402140349149704, "learning_rate": 2.9430553020624202e-05, "loss": 0.2507, "step": 3225 }, { "epoch": 0.17686031867710672, "grad_norm": 0.2317354679107666, "learning_rate": 2.9476181784997266e-05, "loss": 0.2502, "step": 3230 }, { "epoch": 0.17713409626019821, "grad_norm": 0.24810566008090973, "learning_rate": 2.9521810549370326e-05, "loss": 0.2566, "step": 3235 }, { "epoch": 0.1774078738432897, "grad_norm": 0.2546860873699188, "learning_rate": 2.9567439313743383e-05, "loss": 0.2546, "step": 3240 }, { "epoch": 0.1776816514263812, "grad_norm": 0.29634934663772583, "learning_rate": 2.9613068078116447e-05, "loss": 0.2603, "step": 3245 }, { "epoch": 0.1779554290094727, "grad_norm": 0.24118155241012573, "learning_rate": 2.9658696842489507e-05, "loss": 0.2538, "step": 3250 }, { "epoch": 0.1782292065925642, "grad_norm": 0.28624850511550903, "learning_rate": 2.9704325606862567e-05, "loss": 0.2751, "step": 3255 }, { "epoch": 0.1785029841756557, "grad_norm": 0.2645981013774872, "learning_rate": 2.9749954371235627e-05, "loss": 0.2523, "step": 3260 }, { "epoch": 0.17877676175874718, "grad_norm": 0.22716784477233887, "learning_rate": 2.979558313560869e-05, "loss": 0.2651, "step": 3265 }, { "epoch": 0.17905053934183868, "grad_norm": 0.29666030406951904, "learning_rate": 2.984121189998175e-05, "loss": 0.2641, "step": 3270 }, { "epoch": 0.1793243169249302, "grad_norm": 0.23635101318359375, "learning_rate": 2.9886840664354808e-05, "loss": 0.2685, "step": 3275 }, { "epoch": 0.1795980945080217, "grad_norm": 0.21964983642101288, "learning_rate": 2.993246942872787e-05, "loss": 0.2549, "step": 3280 }, { "epoch": 0.1798718720911132, "grad_norm": 0.24936366081237793, "learning_rate": 2.9978098193100935e-05, "loss": 0.2637, "step": 3285 }, { "epoch": 0.18014564967420468, "grad_norm": 0.27707353234291077, "learning_rate": 3.002372695747399e-05, "loss": 0.2613, "step": 3290 }, { "epoch": 0.18041942725729618, "grad_norm": 0.2851099967956543, "learning_rate": 3.0069355721847052e-05, "loss": 0.2601, "step": 3295 }, { "epoch": 0.18069320484038767, "grad_norm": 0.28864917159080505, "learning_rate": 3.0114984486220115e-05, "loss": 0.2601, "step": 3300 }, { "epoch": 0.18096698242347917, "grad_norm": 0.20366884768009186, "learning_rate": 3.0160613250593172e-05, "loss": 0.25, "step": 3305 }, { "epoch": 0.18124076000657066, "grad_norm": 0.25791090726852417, "learning_rate": 3.0206242014966236e-05, "loss": 0.263, "step": 3310 }, { "epoch": 0.18151453758966216, "grad_norm": 0.314749538898468, "learning_rate": 3.0251870779339296e-05, "loss": 0.2532, "step": 3315 }, { "epoch": 0.18178831517275365, "grad_norm": 0.26012980937957764, "learning_rate": 3.029749954371236e-05, "loss": 0.2543, "step": 3320 }, { "epoch": 0.18206209275584515, "grad_norm": 0.26275089383125305, "learning_rate": 3.0343128308085416e-05, "loss": 0.2635, "step": 3325 }, { "epoch": 0.18233587033893664, "grad_norm": 0.33797040581703186, "learning_rate": 3.038875707245848e-05, "loss": 0.2465, "step": 3330 }, { "epoch": 0.18260964792202813, "grad_norm": 0.36574727296829224, "learning_rate": 3.043438583683154e-05, "loss": 0.2446, "step": 3335 }, { "epoch": 0.18288342550511963, "grad_norm": 0.33798569440841675, "learning_rate": 3.0480014601204597e-05, "loss": 0.2568, "step": 3340 }, { "epoch": 0.18315720308821112, "grad_norm": 0.25316688418388367, "learning_rate": 3.052564336557766e-05, "loss": 0.2646, "step": 3345 }, { "epoch": 0.18343098067130265, "grad_norm": 0.30070552229881287, "learning_rate": 3.0571272129950724e-05, "loss": 0.2496, "step": 3350 }, { "epoch": 0.18370475825439414, "grad_norm": 0.3341839015483856, "learning_rate": 3.061690089432379e-05, "loss": 0.2584, "step": 3355 }, { "epoch": 0.18397853583748563, "grad_norm": 0.245503231883049, "learning_rate": 3.0662529658696844e-05, "loss": 0.2495, "step": 3360 }, { "epoch": 0.18425231342057713, "grad_norm": 0.24233125150203705, "learning_rate": 3.070815842306991e-05, "loss": 0.2537, "step": 3365 }, { "epoch": 0.18452609100366862, "grad_norm": 0.23638667166233063, "learning_rate": 3.0753787187442965e-05, "loss": 0.2619, "step": 3370 }, { "epoch": 0.18479986858676012, "grad_norm": 0.23354344069957733, "learning_rate": 3.079941595181602e-05, "loss": 0.2464, "step": 3375 }, { "epoch": 0.1850736461698516, "grad_norm": 0.2209869623184204, "learning_rate": 3.0845044716189085e-05, "loss": 0.2593, "step": 3380 }, { "epoch": 0.1853474237529431, "grad_norm": 0.3414124846458435, "learning_rate": 3.089067348056215e-05, "loss": 0.2581, "step": 3385 }, { "epoch": 0.1856212013360346, "grad_norm": 0.3325765132904053, "learning_rate": 3.093630224493521e-05, "loss": 0.255, "step": 3390 }, { "epoch": 0.1858949789191261, "grad_norm": 0.30114346742630005, "learning_rate": 3.098193100930827e-05, "loss": 0.2586, "step": 3395 }, { "epoch": 0.1861687565022176, "grad_norm": 0.32905831933021545, "learning_rate": 3.102755977368133e-05, "loss": 0.2477, "step": 3400 }, { "epoch": 0.1864425340853091, "grad_norm": 0.23417909443378448, "learning_rate": 3.1073188538054396e-05, "loss": 0.254, "step": 3405 }, { "epoch": 0.18671631166840058, "grad_norm": 0.22932341694831848, "learning_rate": 3.111881730242745e-05, "loss": 0.2536, "step": 3410 }, { "epoch": 0.18699008925149208, "grad_norm": 0.3759411573410034, "learning_rate": 3.116444606680051e-05, "loss": 0.2563, "step": 3415 }, { "epoch": 0.1872638668345836, "grad_norm": 0.33542585372924805, "learning_rate": 3.1210074831173573e-05, "loss": 0.2486, "step": 3420 }, { "epoch": 0.1875376444176751, "grad_norm": 0.23737922310829163, "learning_rate": 3.125570359554663e-05, "loss": 0.2619, "step": 3425 }, { "epoch": 0.1878114220007666, "grad_norm": 0.24670377373695374, "learning_rate": 3.1301332359919694e-05, "loss": 0.247, "step": 3430 }, { "epoch": 0.18808519958385808, "grad_norm": 0.21480455994606018, "learning_rate": 3.134696112429276e-05, "loss": 0.248, "step": 3435 }, { "epoch": 0.18835897716694958, "grad_norm": 0.29027900099754333, "learning_rate": 3.139258988866582e-05, "loss": 0.2584, "step": 3440 }, { "epoch": 0.18863275475004107, "grad_norm": 0.33283019065856934, "learning_rate": 3.143821865303888e-05, "loss": 0.258, "step": 3445 }, { "epoch": 0.18890653233313257, "grad_norm": 0.27713847160339355, "learning_rate": 3.148384741741194e-05, "loss": 0.262, "step": 3450 }, { "epoch": 0.18918030991622406, "grad_norm": 0.2654966115951538, "learning_rate": 3.1529476181785e-05, "loss": 0.2584, "step": 3455 }, { "epoch": 0.18945408749931555, "grad_norm": 0.2944085896015167, "learning_rate": 3.1575104946158055e-05, "loss": 0.2637, "step": 3460 }, { "epoch": 0.18972786508240705, "grad_norm": 0.3440367877483368, "learning_rate": 3.162073371053112e-05, "loss": 0.2677, "step": 3465 }, { "epoch": 0.19000164266549854, "grad_norm": 0.34740376472473145, "learning_rate": 3.166636247490418e-05, "loss": 0.2612, "step": 3470 }, { "epoch": 0.19027542024859004, "grad_norm": 0.27377599477767944, "learning_rate": 3.1711991239277246e-05, "loss": 0.249, "step": 3475 }, { "epoch": 0.19054919783168153, "grad_norm": 0.4061751365661621, "learning_rate": 3.17576200036503e-05, "loss": 0.2629, "step": 3480 }, { "epoch": 0.19082297541477303, "grad_norm": 0.29018786549568176, "learning_rate": 3.1803248768023366e-05, "loss": 0.2562, "step": 3485 }, { "epoch": 0.19109675299786452, "grad_norm": 0.24915984272956848, "learning_rate": 3.184887753239642e-05, "loss": 0.2579, "step": 3490 }, { "epoch": 0.19137053058095604, "grad_norm": 0.29329413175582886, "learning_rate": 3.1894506296769486e-05, "loss": 0.2537, "step": 3495 }, { "epoch": 0.19164430816404754, "grad_norm": 0.3459126651287079, "learning_rate": 3.194013506114254e-05, "loss": 0.2614, "step": 3500 }, { "epoch": 0.19191808574713903, "grad_norm": 0.272569477558136, "learning_rate": 3.198576382551561e-05, "loss": 0.2516, "step": 3505 }, { "epoch": 0.19219186333023053, "grad_norm": 0.35650524497032166, "learning_rate": 3.2031392589888664e-05, "loss": 0.2444, "step": 3510 }, { "epoch": 0.19246564091332202, "grad_norm": 0.3239315152168274, "learning_rate": 3.207702135426173e-05, "loss": 0.2586, "step": 3515 }, { "epoch": 0.19273941849641352, "grad_norm": 0.2729489803314209, "learning_rate": 3.212265011863479e-05, "loss": 0.2486, "step": 3520 }, { "epoch": 0.193013196079505, "grad_norm": 0.2659699618816376, "learning_rate": 3.2168278883007854e-05, "loss": 0.2507, "step": 3525 }, { "epoch": 0.1932869736625965, "grad_norm": 0.33273977041244507, "learning_rate": 3.221390764738091e-05, "loss": 0.2613, "step": 3530 }, { "epoch": 0.193560751245688, "grad_norm": 0.24687622487545013, "learning_rate": 3.225953641175397e-05, "loss": 0.2522, "step": 3535 }, { "epoch": 0.1938345288287795, "grad_norm": 0.26545628905296326, "learning_rate": 3.230516517612703e-05, "loss": 0.2621, "step": 3540 }, { "epoch": 0.194108306411871, "grad_norm": 0.23730680346488953, "learning_rate": 3.235079394050009e-05, "loss": 0.257, "step": 3545 }, { "epoch": 0.19438208399496248, "grad_norm": 0.36633267998695374, "learning_rate": 3.239642270487315e-05, "loss": 0.2557, "step": 3550 }, { "epoch": 0.19465586157805398, "grad_norm": 0.25818902254104614, "learning_rate": 3.2442051469246215e-05, "loss": 0.2575, "step": 3555 }, { "epoch": 0.19492963916114547, "grad_norm": 0.24023178219795227, "learning_rate": 3.248768023361928e-05, "loss": 0.2421, "step": 3560 }, { "epoch": 0.19520341674423697, "grad_norm": 0.23573553562164307, "learning_rate": 3.2533308997992336e-05, "loss": 0.2498, "step": 3565 }, { "epoch": 0.1954771943273285, "grad_norm": 0.263369083404541, "learning_rate": 3.25789377623654e-05, "loss": 0.2508, "step": 3570 }, { "epoch": 0.19575097191041999, "grad_norm": 0.24902111291885376, "learning_rate": 3.2624566526738456e-05, "loss": 0.2466, "step": 3575 }, { "epoch": 0.19602474949351148, "grad_norm": 0.278157502412796, "learning_rate": 3.267019529111151e-05, "loss": 0.2487, "step": 3580 }, { "epoch": 0.19629852707660297, "grad_norm": 0.26406577229499817, "learning_rate": 3.2715824055484576e-05, "loss": 0.2602, "step": 3585 }, { "epoch": 0.19657230465969447, "grad_norm": 0.2517775595188141, "learning_rate": 3.276145281985764e-05, "loss": 0.2514, "step": 3590 }, { "epoch": 0.19684608224278596, "grad_norm": 0.2490648776292801, "learning_rate": 3.2807081584230704e-05, "loss": 0.2544, "step": 3595 }, { "epoch": 0.19711985982587746, "grad_norm": 0.19795477390289307, "learning_rate": 3.285271034860376e-05, "loss": 0.2524, "step": 3600 }, { "epoch": 0.19739363740896895, "grad_norm": 0.2874956429004669, "learning_rate": 3.2898339112976824e-05, "loss": 0.2552, "step": 3605 }, { "epoch": 0.19766741499206045, "grad_norm": 0.3574991226196289, "learning_rate": 3.294396787734989e-05, "loss": 0.2467, "step": 3610 }, { "epoch": 0.19794119257515194, "grad_norm": 0.2951659560203552, "learning_rate": 3.2989596641722944e-05, "loss": 0.2453, "step": 3615 }, { "epoch": 0.19821497015824344, "grad_norm": 0.3725830316543579, "learning_rate": 3.3035225406096e-05, "loss": 0.2529, "step": 3620 }, { "epoch": 0.19848874774133493, "grad_norm": 0.3503797948360443, "learning_rate": 3.3080854170469065e-05, "loss": 0.2448, "step": 3625 }, { "epoch": 0.19876252532442643, "grad_norm": 0.25447699427604675, "learning_rate": 3.312648293484212e-05, "loss": 0.2532, "step": 3630 }, { "epoch": 0.19903630290751792, "grad_norm": 0.23952428996562958, "learning_rate": 3.3172111699215185e-05, "loss": 0.2434, "step": 3635 }, { "epoch": 0.19931008049060944, "grad_norm": 0.2996550500392914, "learning_rate": 3.321774046358825e-05, "loss": 0.2514, "step": 3640 }, { "epoch": 0.19958385807370094, "grad_norm": 0.22214065492153168, "learning_rate": 3.326336922796131e-05, "loss": 0.2475, "step": 3645 }, { "epoch": 0.19985763565679243, "grad_norm": 0.2676449716091156, "learning_rate": 3.330899799233437e-05, "loss": 0.2564, "step": 3650 }, { "epoch": 0.20013141323988393, "grad_norm": 0.2549194097518921, "learning_rate": 3.335462675670743e-05, "loss": 0.252, "step": 3655 }, { "epoch": 0.20040519082297542, "grad_norm": 0.21957428753376007, "learning_rate": 3.340025552108049e-05, "loss": 0.2533, "step": 3660 }, { "epoch": 0.20067896840606692, "grad_norm": 0.3005646765232086, "learning_rate": 3.3445884285453546e-05, "loss": 0.2492, "step": 3665 }, { "epoch": 0.2009527459891584, "grad_norm": 0.20519892871379852, "learning_rate": 3.349151304982661e-05, "loss": 0.26, "step": 3670 }, { "epoch": 0.2012265235722499, "grad_norm": 0.3211405277252197, "learning_rate": 3.353714181419967e-05, "loss": 0.272, "step": 3675 }, { "epoch": 0.2015003011553414, "grad_norm": 0.2941063642501831, "learning_rate": 3.358277057857274e-05, "loss": 0.259, "step": 3680 }, { "epoch": 0.2017740787384329, "grad_norm": 0.28003761172294617, "learning_rate": 3.3628399342945794e-05, "loss": 0.2537, "step": 3685 }, { "epoch": 0.2020478563215244, "grad_norm": 0.2920829951763153, "learning_rate": 3.367402810731886e-05, "loss": 0.243, "step": 3690 }, { "epoch": 0.20232163390461588, "grad_norm": 0.3410775661468506, "learning_rate": 3.371965687169192e-05, "loss": 0.2507, "step": 3695 }, { "epoch": 0.20259541148770738, "grad_norm": 0.25295719504356384, "learning_rate": 3.376528563606498e-05, "loss": 0.2439, "step": 3700 }, { "epoch": 0.20286918907079887, "grad_norm": 0.27380478382110596, "learning_rate": 3.3810914400438034e-05, "loss": 0.2505, "step": 3705 }, { "epoch": 0.20314296665389037, "grad_norm": 0.25530150532722473, "learning_rate": 3.38565431648111e-05, "loss": 0.2621, "step": 3710 }, { "epoch": 0.2034167442369819, "grad_norm": 0.23673439025878906, "learning_rate": 3.3902171929184155e-05, "loss": 0.253, "step": 3715 }, { "epoch": 0.20369052182007338, "grad_norm": 0.3006655275821686, "learning_rate": 3.394780069355722e-05, "loss": 0.245, "step": 3720 }, { "epoch": 0.20396429940316488, "grad_norm": 0.26935243606567383, "learning_rate": 3.399342945793028e-05, "loss": 0.2581, "step": 3725 }, { "epoch": 0.20423807698625637, "grad_norm": 0.24153390526771545, "learning_rate": 3.4039058222303346e-05, "loss": 0.2397, "step": 3730 }, { "epoch": 0.20451185456934787, "grad_norm": 0.25148805975914, "learning_rate": 3.40846869866764e-05, "loss": 0.2524, "step": 3735 }, { "epoch": 0.20478563215243936, "grad_norm": 0.21015582978725433, "learning_rate": 3.4130315751049466e-05, "loss": 0.2496, "step": 3740 }, { "epoch": 0.20505940973553086, "grad_norm": 0.3104461431503296, "learning_rate": 3.417594451542252e-05, "loss": 0.2445, "step": 3745 }, { "epoch": 0.20533318731862235, "grad_norm": 0.242556631565094, "learning_rate": 3.422157327979558e-05, "loss": 0.2647, "step": 3750 }, { "epoch": 0.20560696490171385, "grad_norm": 0.28650227189064026, "learning_rate": 3.426720204416864e-05, "loss": 0.243, "step": 3755 }, { "epoch": 0.20588074248480534, "grad_norm": 0.2744476795196533, "learning_rate": 3.431283080854171e-05, "loss": 0.2649, "step": 3760 }, { "epoch": 0.20615452006789683, "grad_norm": 0.3906223177909851, "learning_rate": 3.435845957291477e-05, "loss": 0.2565, "step": 3765 }, { "epoch": 0.20642829765098833, "grad_norm": 0.3079637885093689, "learning_rate": 3.440408833728783e-05, "loss": 0.246, "step": 3770 }, { "epoch": 0.20670207523407982, "grad_norm": 0.30195048451423645, "learning_rate": 3.444971710166089e-05, "loss": 0.2503, "step": 3775 }, { "epoch": 0.20697585281717132, "grad_norm": 0.2438688427209854, "learning_rate": 3.4495345866033954e-05, "loss": 0.2512, "step": 3780 }, { "epoch": 0.2072496304002628, "grad_norm": 0.2501094341278076, "learning_rate": 3.454097463040701e-05, "loss": 0.2563, "step": 3785 }, { "epoch": 0.20752340798335434, "grad_norm": 0.2599429488182068, "learning_rate": 3.458660339478007e-05, "loss": 0.2493, "step": 3790 }, { "epoch": 0.20779718556644583, "grad_norm": 0.26450860500335693, "learning_rate": 3.463223215915313e-05, "loss": 0.2645, "step": 3795 }, { "epoch": 0.20807096314953732, "grad_norm": 0.2371610552072525, "learning_rate": 3.4677860923526195e-05, "loss": 0.2403, "step": 3800 }, { "epoch": 0.20834474073262882, "grad_norm": 0.27213361859321594, "learning_rate": 3.472348968789925e-05, "loss": 0.2571, "step": 3805 }, { "epoch": 0.2086185183157203, "grad_norm": 0.2695562243461609, "learning_rate": 3.4769118452272315e-05, "loss": 0.2575, "step": 3810 }, { "epoch": 0.2088922958988118, "grad_norm": 0.23483368754386902, "learning_rate": 3.481474721664538e-05, "loss": 0.2472, "step": 3815 }, { "epoch": 0.2091660734819033, "grad_norm": 0.2507854104042053, "learning_rate": 3.4860375981018436e-05, "loss": 0.2512, "step": 3820 }, { "epoch": 0.2094398510649948, "grad_norm": 0.23164600133895874, "learning_rate": 3.49060047453915e-05, "loss": 0.2653, "step": 3825 }, { "epoch": 0.2097136286480863, "grad_norm": 0.29900887608528137, "learning_rate": 3.4951633509764556e-05, "loss": 0.253, "step": 3830 }, { "epoch": 0.2099874062311778, "grad_norm": 0.2717505693435669, "learning_rate": 3.499726227413761e-05, "loss": 0.2529, "step": 3835 }, { "epoch": 0.21026118381426928, "grad_norm": 0.27576079964637756, "learning_rate": 3.5042891038510676e-05, "loss": 0.2624, "step": 3840 }, { "epoch": 0.21053496139736078, "grad_norm": 0.25125443935394287, "learning_rate": 3.508851980288374e-05, "loss": 0.2562, "step": 3845 }, { "epoch": 0.21080873898045227, "grad_norm": 0.23283769190311432, "learning_rate": 3.5134148567256804e-05, "loss": 0.253, "step": 3850 }, { "epoch": 0.21108251656354377, "grad_norm": 0.2452944666147232, "learning_rate": 3.517977733162986e-05, "loss": 0.2384, "step": 3855 }, { "epoch": 0.2113562941466353, "grad_norm": 0.2275332361459732, "learning_rate": 3.5225406096002924e-05, "loss": 0.2478, "step": 3860 }, { "epoch": 0.21163007172972678, "grad_norm": 0.22916211187839508, "learning_rate": 3.527103486037599e-05, "loss": 0.2436, "step": 3865 }, { "epoch": 0.21190384931281828, "grad_norm": 0.2343919426202774, "learning_rate": 3.5316663624749044e-05, "loss": 0.2427, "step": 3870 }, { "epoch": 0.21217762689590977, "grad_norm": 0.23250731825828552, "learning_rate": 3.53622923891221e-05, "loss": 0.2707, "step": 3875 }, { "epoch": 0.21245140447900127, "grad_norm": 0.3031955361366272, "learning_rate": 3.5407921153495165e-05, "loss": 0.2651, "step": 3880 }, { "epoch": 0.21272518206209276, "grad_norm": 0.2714652419090271, "learning_rate": 3.545354991786823e-05, "loss": 0.2521, "step": 3885 }, { "epoch": 0.21299895964518425, "grad_norm": 0.24424974620342255, "learning_rate": 3.5499178682241285e-05, "loss": 0.2589, "step": 3890 }, { "epoch": 0.21327273722827575, "grad_norm": 0.24349264800548553, "learning_rate": 3.554480744661435e-05, "loss": 0.2504, "step": 3895 }, { "epoch": 0.21354651481136724, "grad_norm": 0.2677138149738312, "learning_rate": 3.559043621098741e-05, "loss": 0.2478, "step": 3900 }, { "epoch": 0.21382029239445874, "grad_norm": 0.265488862991333, "learning_rate": 3.563606497536047e-05, "loss": 0.2503, "step": 3905 }, { "epoch": 0.21409406997755023, "grad_norm": 0.25588366389274597, "learning_rate": 3.568169373973353e-05, "loss": 0.2422, "step": 3910 }, { "epoch": 0.21436784756064173, "grad_norm": 0.2718571722507477, "learning_rate": 3.572732250410659e-05, "loss": 0.2577, "step": 3915 }, { "epoch": 0.21464162514373322, "grad_norm": 0.2792738080024719, "learning_rate": 3.5772951268479646e-05, "loss": 0.2506, "step": 3920 }, { "epoch": 0.21491540272682472, "grad_norm": 0.27696722745895386, "learning_rate": 3.581858003285271e-05, "loss": 0.2523, "step": 3925 }, { "epoch": 0.2151891803099162, "grad_norm": 0.23291754722595215, "learning_rate": 3.586420879722577e-05, "loss": 0.2507, "step": 3930 }, { "epoch": 0.21546295789300773, "grad_norm": 0.2571345567703247, "learning_rate": 3.590983756159884e-05, "loss": 0.2527, "step": 3935 }, { "epoch": 0.21573673547609923, "grad_norm": 0.24220170080661774, "learning_rate": 3.5955466325971894e-05, "loss": 0.2487, "step": 3940 }, { "epoch": 0.21601051305919072, "grad_norm": 0.28360262513160706, "learning_rate": 3.600109509034496e-05, "loss": 0.2498, "step": 3945 }, { "epoch": 0.21628429064228222, "grad_norm": 0.2725405693054199, "learning_rate": 3.604672385471802e-05, "loss": 0.2539, "step": 3950 }, { "epoch": 0.2165580682253737, "grad_norm": 0.23278482258319855, "learning_rate": 3.609235261909108e-05, "loss": 0.2559, "step": 3955 }, { "epoch": 0.2168318458084652, "grad_norm": 0.2895863950252533, "learning_rate": 3.6137981383464134e-05, "loss": 0.2539, "step": 3960 }, { "epoch": 0.2171056233915567, "grad_norm": 0.2898024320602417, "learning_rate": 3.61836101478372e-05, "loss": 0.2438, "step": 3965 }, { "epoch": 0.2173794009746482, "grad_norm": 0.2916575074195862, "learning_rate": 3.622923891221026e-05, "loss": 0.2486, "step": 3970 }, { "epoch": 0.2176531785577397, "grad_norm": 0.26973363757133484, "learning_rate": 3.627486767658332e-05, "loss": 0.2403, "step": 3975 }, { "epoch": 0.21792695614083119, "grad_norm": 0.2503397762775421, "learning_rate": 3.632049644095638e-05, "loss": 0.2454, "step": 3980 }, { "epoch": 0.21820073372392268, "grad_norm": 0.2584381401538849, "learning_rate": 3.6366125205329446e-05, "loss": 0.2523, "step": 3985 }, { "epoch": 0.21847451130701417, "grad_norm": 0.25577670335769653, "learning_rate": 3.64117539697025e-05, "loss": 0.2516, "step": 3990 }, { "epoch": 0.21874828889010567, "grad_norm": 0.2689623534679413, "learning_rate": 3.645738273407556e-05, "loss": 0.2539, "step": 3995 }, { "epoch": 0.21902206647319716, "grad_norm": 0.251787394285202, "learning_rate": 3.650301149844862e-05, "loss": 0.2532, "step": 4000 }, { "epoch": 0.21929584405628866, "grad_norm": 0.27519556879997253, "learning_rate": 3.6548640262821686e-05, "loss": 0.2555, "step": 4005 }, { "epoch": 0.21956962163938018, "grad_norm": 0.24165867269039154, "learning_rate": 3.659426902719474e-05, "loss": 0.2588, "step": 4010 }, { "epoch": 0.21984339922247167, "grad_norm": 0.3036324083805084, "learning_rate": 3.663989779156781e-05, "loss": 0.2569, "step": 4015 }, { "epoch": 0.22011717680556317, "grad_norm": 0.2407635599374771, "learning_rate": 3.668552655594087e-05, "loss": 0.2544, "step": 4020 }, { "epoch": 0.22039095438865466, "grad_norm": 0.28754857182502747, "learning_rate": 3.673115532031393e-05, "loss": 0.2524, "step": 4025 }, { "epoch": 0.22066473197174616, "grad_norm": 0.2555578052997589, "learning_rate": 3.677678408468699e-05, "loss": 0.2438, "step": 4030 }, { "epoch": 0.22093850955483765, "grad_norm": 0.24883520603179932, "learning_rate": 3.682241284906005e-05, "loss": 0.2517, "step": 4035 }, { "epoch": 0.22121228713792915, "grad_norm": 0.2691606283187866, "learning_rate": 3.6868041613433104e-05, "loss": 0.2474, "step": 4040 }, { "epoch": 0.22148606472102064, "grad_norm": 0.29815948009490967, "learning_rate": 3.691367037780617e-05, "loss": 0.2505, "step": 4045 }, { "epoch": 0.22175984230411214, "grad_norm": 0.2556496858596802, "learning_rate": 3.695929914217923e-05, "loss": 0.2558, "step": 4050 }, { "epoch": 0.22203361988720363, "grad_norm": 0.28370556235313416, "learning_rate": 3.7004927906552295e-05, "loss": 0.2434, "step": 4055 }, { "epoch": 0.22230739747029513, "grad_norm": 0.2623650133609772, "learning_rate": 3.705055667092535e-05, "loss": 0.2491, "step": 4060 }, { "epoch": 0.22258117505338662, "grad_norm": 0.2653619945049286, "learning_rate": 3.7096185435298415e-05, "loss": 0.246, "step": 4065 }, { "epoch": 0.22285495263647812, "grad_norm": 0.36086031794548035, "learning_rate": 3.714181419967148e-05, "loss": 0.2625, "step": 4070 }, { "epoch": 0.2231287302195696, "grad_norm": 0.24896885454654694, "learning_rate": 3.7187442964044536e-05, "loss": 0.2415, "step": 4075 }, { "epoch": 0.2234025078026611, "grad_norm": 0.2627232074737549, "learning_rate": 3.723307172841759e-05, "loss": 0.2619, "step": 4080 }, { "epoch": 0.22367628538575263, "grad_norm": 0.281186044216156, "learning_rate": 3.7278700492790656e-05, "loss": 0.2492, "step": 4085 }, { "epoch": 0.22395006296884412, "grad_norm": 0.31437551975250244, "learning_rate": 3.732432925716372e-05, "loss": 0.2543, "step": 4090 }, { "epoch": 0.22422384055193562, "grad_norm": 0.31571751832962036, "learning_rate": 3.7369958021536776e-05, "loss": 0.2536, "step": 4095 }, { "epoch": 0.2244976181350271, "grad_norm": 0.26452699303627014, "learning_rate": 3.741558678590984e-05, "loss": 0.2428, "step": 4100 }, { "epoch": 0.2247713957181186, "grad_norm": 0.2603134214878082, "learning_rate": 3.7461215550282904e-05, "loss": 0.2481, "step": 4105 }, { "epoch": 0.2250451733012101, "grad_norm": 0.2569795846939087, "learning_rate": 3.750684431465596e-05, "loss": 0.2451, "step": 4110 }, { "epoch": 0.2253189508843016, "grad_norm": 0.25072839856147766, "learning_rate": 3.7552473079029024e-05, "loss": 0.2583, "step": 4115 }, { "epoch": 0.2255927284673931, "grad_norm": 0.3084949254989624, "learning_rate": 3.759810184340208e-05, "loss": 0.245, "step": 4120 }, { "epoch": 0.22586650605048458, "grad_norm": 0.31286874413490295, "learning_rate": 3.764373060777514e-05, "loss": 0.2551, "step": 4125 }, { "epoch": 0.22614028363357608, "grad_norm": 0.3393649458885193, "learning_rate": 3.76893593721482e-05, "loss": 0.2549, "step": 4130 }, { "epoch": 0.22641406121666757, "grad_norm": 0.37033572793006897, "learning_rate": 3.7734988136521265e-05, "loss": 0.2492, "step": 4135 }, { "epoch": 0.22668783879975907, "grad_norm": 0.3923548460006714, "learning_rate": 3.778061690089433e-05, "loss": 0.2565, "step": 4140 }, { "epoch": 0.22696161638285056, "grad_norm": 0.34154197573661804, "learning_rate": 3.7826245665267385e-05, "loss": 0.2591, "step": 4145 }, { "epoch": 0.22723539396594206, "grad_norm": 0.25066590309143066, "learning_rate": 3.787187442964045e-05, "loss": 0.2514, "step": 4150 }, { "epoch": 0.22750917154903358, "grad_norm": 0.3424004018306732, "learning_rate": 3.791750319401351e-05, "loss": 0.2583, "step": 4155 }, { "epoch": 0.22778294913212507, "grad_norm": 0.216905415058136, "learning_rate": 3.796313195838657e-05, "loss": 0.2444, "step": 4160 }, { "epoch": 0.22805672671521657, "grad_norm": 0.25271832942962646, "learning_rate": 3.8008760722759626e-05, "loss": 0.2517, "step": 4165 }, { "epoch": 0.22833050429830806, "grad_norm": 0.21799437701702118, "learning_rate": 3.805438948713269e-05, "loss": 0.2509, "step": 4170 }, { "epoch": 0.22860428188139956, "grad_norm": 0.21871104836463928, "learning_rate": 3.810001825150575e-05, "loss": 0.2507, "step": 4175 }, { "epoch": 0.22887805946449105, "grad_norm": 0.22396697103977203, "learning_rate": 3.814564701587881e-05, "loss": 0.2454, "step": 4180 }, { "epoch": 0.22915183704758255, "grad_norm": 0.20412994921207428, "learning_rate": 3.819127578025187e-05, "loss": 0.2448, "step": 4185 }, { "epoch": 0.22942561463067404, "grad_norm": 0.23763421177864075, "learning_rate": 3.823690454462494e-05, "loss": 0.2485, "step": 4190 }, { "epoch": 0.22969939221376554, "grad_norm": 0.2564498782157898, "learning_rate": 3.8282533308997994e-05, "loss": 0.2623, "step": 4195 }, { "epoch": 0.22997316979685703, "grad_norm": 0.22358766198158264, "learning_rate": 3.832816207337106e-05, "loss": 0.2542, "step": 4200 }, { "epoch": 0.23024694737994852, "grad_norm": 0.243678018450737, "learning_rate": 3.8373790837744114e-05, "loss": 0.2623, "step": 4205 }, { "epoch": 0.23052072496304002, "grad_norm": 0.2764289975166321, "learning_rate": 3.841941960211718e-05, "loss": 0.2487, "step": 4210 }, { "epoch": 0.2307945025461315, "grad_norm": 0.31861135363578796, "learning_rate": 3.8465048366490234e-05, "loss": 0.2529, "step": 4215 }, { "epoch": 0.231068280129223, "grad_norm": 0.2055504471063614, "learning_rate": 3.85106771308633e-05, "loss": 0.2436, "step": 4220 }, { "epoch": 0.2313420577123145, "grad_norm": 0.24131885170936584, "learning_rate": 3.855630589523636e-05, "loss": 0.2576, "step": 4225 }, { "epoch": 0.23161583529540603, "grad_norm": 0.23356561362743378, "learning_rate": 3.860193465960942e-05, "loss": 0.2605, "step": 4230 }, { "epoch": 0.23188961287849752, "grad_norm": 0.2783644199371338, "learning_rate": 3.864756342398248e-05, "loss": 0.2436, "step": 4235 }, { "epoch": 0.23216339046158901, "grad_norm": 0.32564395666122437, "learning_rate": 3.8693192188355545e-05, "loss": 0.2456, "step": 4240 }, { "epoch": 0.2324371680446805, "grad_norm": 0.3205137848854065, "learning_rate": 3.87388209527286e-05, "loss": 0.2458, "step": 4245 }, { "epoch": 0.232710945627772, "grad_norm": 0.22009794414043427, "learning_rate": 3.878444971710166e-05, "loss": 0.2622, "step": 4250 }, { "epoch": 0.2329847232108635, "grad_norm": 0.34533926844596863, "learning_rate": 3.883007848147472e-05, "loss": 0.251, "step": 4255 }, { "epoch": 0.233258500793955, "grad_norm": 0.30947357416152954, "learning_rate": 3.8875707245847786e-05, "loss": 0.2496, "step": 4260 }, { "epoch": 0.2335322783770465, "grad_norm": 0.27745816111564636, "learning_rate": 3.892133601022084e-05, "loss": 0.2528, "step": 4265 }, { "epoch": 0.23380605596013798, "grad_norm": 0.27023571729660034, "learning_rate": 3.8966964774593907e-05, "loss": 0.2477, "step": 4270 }, { "epoch": 0.23407983354322948, "grad_norm": 0.3985164761543274, "learning_rate": 3.901259353896697e-05, "loss": 0.2565, "step": 4275 }, { "epoch": 0.23435361112632097, "grad_norm": 0.2487044632434845, "learning_rate": 3.905822230334003e-05, "loss": 0.2554, "step": 4280 }, { "epoch": 0.23462738870941247, "grad_norm": 0.24924829602241516, "learning_rate": 3.910385106771309e-05, "loss": 0.2575, "step": 4285 }, { "epoch": 0.23490116629250396, "grad_norm": 0.23408183455467224, "learning_rate": 3.914947983208615e-05, "loss": 0.2478, "step": 4290 }, { "epoch": 0.23517494387559545, "grad_norm": 0.26515111327171326, "learning_rate": 3.919510859645921e-05, "loss": 0.2444, "step": 4295 }, { "epoch": 0.23544872145868695, "grad_norm": 0.255464106798172, "learning_rate": 3.924073736083227e-05, "loss": 0.236, "step": 4300 }, { "epoch": 0.23572249904177847, "grad_norm": 0.24824002385139465, "learning_rate": 3.928636612520533e-05, "loss": 0.2457, "step": 4305 }, { "epoch": 0.23599627662486997, "grad_norm": 0.22883476316928864, "learning_rate": 3.9331994889578395e-05, "loss": 0.2448, "step": 4310 }, { "epoch": 0.23627005420796146, "grad_norm": 0.23084445297718048, "learning_rate": 3.937762365395145e-05, "loss": 0.2542, "step": 4315 }, { "epoch": 0.23654383179105296, "grad_norm": 0.24560359120368958, "learning_rate": 3.9423252418324515e-05, "loss": 0.2502, "step": 4320 }, { "epoch": 0.23681760937414445, "grad_norm": 0.2334827482700348, "learning_rate": 3.946888118269758e-05, "loss": 0.2454, "step": 4325 }, { "epoch": 0.23709138695723594, "grad_norm": 0.26481425762176514, "learning_rate": 3.9514509947070636e-05, "loss": 0.2657, "step": 4330 }, { "epoch": 0.23736516454032744, "grad_norm": 0.3035723865032196, "learning_rate": 3.956013871144369e-05, "loss": 0.2519, "step": 4335 }, { "epoch": 0.23763894212341893, "grad_norm": 0.2702432870864868, "learning_rate": 3.9605767475816756e-05, "loss": 0.2525, "step": 4340 }, { "epoch": 0.23791271970651043, "grad_norm": 0.26781755685806274, "learning_rate": 3.965139624018982e-05, "loss": 0.2577, "step": 4345 }, { "epoch": 0.23818649728960192, "grad_norm": 0.2492181360721588, "learning_rate": 3.9697025004562876e-05, "loss": 0.2459, "step": 4350 }, { "epoch": 0.23846027487269342, "grad_norm": 0.2526409327983856, "learning_rate": 3.974265376893594e-05, "loss": 0.2606, "step": 4355 }, { "epoch": 0.2387340524557849, "grad_norm": 0.3085223138332367, "learning_rate": 3.9788282533309003e-05, "loss": 0.2518, "step": 4360 }, { "epoch": 0.2390078300388764, "grad_norm": 0.2804242968559265, "learning_rate": 3.983391129768206e-05, "loss": 0.251, "step": 4365 }, { "epoch": 0.2392816076219679, "grad_norm": 0.2495495229959488, "learning_rate": 3.9879540062055124e-05, "loss": 0.2577, "step": 4370 }, { "epoch": 0.23955538520505942, "grad_norm": 0.26390478014945984, "learning_rate": 3.992516882642818e-05, "loss": 0.2514, "step": 4375 }, { "epoch": 0.23982916278815092, "grad_norm": 0.2628825604915619, "learning_rate": 3.9970797590801244e-05, "loss": 0.2524, "step": 4380 }, { "epoch": 0.2401029403712424, "grad_norm": 0.27005523443222046, "learning_rate": 4.00164263551743e-05, "loss": 0.2524, "step": 4385 }, { "epoch": 0.2403767179543339, "grad_norm": 0.24057400226593018, "learning_rate": 4.0062055119547365e-05, "loss": 0.2514, "step": 4390 }, { "epoch": 0.2406504955374254, "grad_norm": 0.1975509375333786, "learning_rate": 4.010768388392043e-05, "loss": 0.2543, "step": 4395 }, { "epoch": 0.2409242731205169, "grad_norm": 0.21429501473903656, "learning_rate": 4.0153312648293485e-05, "loss": 0.2409, "step": 4400 }, { "epoch": 0.2411980507036084, "grad_norm": 0.2592519223690033, "learning_rate": 4.019894141266655e-05, "loss": 0.2416, "step": 4405 }, { "epoch": 0.24147182828669989, "grad_norm": 0.3468511700630188, "learning_rate": 4.024457017703961e-05, "loss": 0.2546, "step": 4410 }, { "epoch": 0.24174560586979138, "grad_norm": 0.2455464005470276, "learning_rate": 4.029019894141267e-05, "loss": 0.2537, "step": 4415 }, { "epoch": 0.24201938345288287, "grad_norm": 0.26714280247688293, "learning_rate": 4.0335827705785726e-05, "loss": 0.2507, "step": 4420 }, { "epoch": 0.24229316103597437, "grad_norm": 0.27414068579673767, "learning_rate": 4.038145647015879e-05, "loss": 0.2485, "step": 4425 }, { "epoch": 0.24256693861906586, "grad_norm": 0.24360325932502747, "learning_rate": 4.042708523453185e-05, "loss": 0.2523, "step": 4430 }, { "epoch": 0.24284071620215736, "grad_norm": 0.2382192611694336, "learning_rate": 4.047271399890491e-05, "loss": 0.2393, "step": 4435 }, { "epoch": 0.24311449378524885, "grad_norm": 0.3047187328338623, "learning_rate": 4.051834276327797e-05, "loss": 0.2406, "step": 4440 }, { "epoch": 0.24338827136834035, "grad_norm": 0.2553282678127289, "learning_rate": 4.056397152765104e-05, "loss": 0.2508, "step": 4445 }, { "epoch": 0.24366204895143187, "grad_norm": 0.29552724957466125, "learning_rate": 4.0609600292024094e-05, "loss": 0.2527, "step": 4450 }, { "epoch": 0.24393582653452336, "grad_norm": 0.2704537510871887, "learning_rate": 4.065522905639716e-05, "loss": 0.2404, "step": 4455 }, { "epoch": 0.24420960411761486, "grad_norm": 0.25399988889694214, "learning_rate": 4.0700857820770214e-05, "loss": 0.2539, "step": 4460 }, { "epoch": 0.24448338170070635, "grad_norm": 0.26058298349380493, "learning_rate": 4.074648658514328e-05, "loss": 0.255, "step": 4465 }, { "epoch": 0.24475715928379785, "grad_norm": 0.20975327491760254, "learning_rate": 4.0792115349516334e-05, "loss": 0.254, "step": 4470 }, { "epoch": 0.24503093686688934, "grad_norm": 0.21892143785953522, "learning_rate": 4.08377441138894e-05, "loss": 0.243, "step": 4475 }, { "epoch": 0.24530471444998084, "grad_norm": 0.2702433466911316, "learning_rate": 4.088337287826246e-05, "loss": 0.2594, "step": 4480 }, { "epoch": 0.24557849203307233, "grad_norm": 0.288371205329895, "learning_rate": 4.092900164263552e-05, "loss": 0.252, "step": 4485 }, { "epoch": 0.24585226961616383, "grad_norm": 0.2604684829711914, "learning_rate": 4.097463040700858e-05, "loss": 0.2546, "step": 4490 }, { "epoch": 0.24612604719925532, "grad_norm": 0.21555176377296448, "learning_rate": 4.102025917138164e-05, "loss": 0.2422, "step": 4495 }, { "epoch": 0.24639982478234682, "grad_norm": 0.2954220473766327, "learning_rate": 4.10658879357547e-05, "loss": 0.2516, "step": 4500 }, { "epoch": 0.2466736023654383, "grad_norm": 0.3165931701660156, "learning_rate": 4.111151670012776e-05, "loss": 0.2411, "step": 4505 }, { "epoch": 0.2469473799485298, "grad_norm": 0.25518834590911865, "learning_rate": 4.115714546450082e-05, "loss": 0.2376, "step": 4510 }, { "epoch": 0.2472211575316213, "grad_norm": 0.27061235904693604, "learning_rate": 4.1202774228873886e-05, "loss": 0.2607, "step": 4515 }, { "epoch": 0.2474949351147128, "grad_norm": 0.22066497802734375, "learning_rate": 4.124840299324694e-05, "loss": 0.2517, "step": 4520 }, { "epoch": 0.24776871269780432, "grad_norm": 0.27854692935943604, "learning_rate": 4.1294031757620007e-05, "loss": 0.2488, "step": 4525 }, { "epoch": 0.2480424902808958, "grad_norm": 0.2644613981246948, "learning_rate": 4.133966052199307e-05, "loss": 0.2499, "step": 4530 }, { "epoch": 0.2483162678639873, "grad_norm": 0.2273409366607666, "learning_rate": 4.138528928636613e-05, "loss": 0.2437, "step": 4535 }, { "epoch": 0.2485900454470788, "grad_norm": 0.23294223845005035, "learning_rate": 4.1430918050739184e-05, "loss": 0.2412, "step": 4540 }, { "epoch": 0.2488638230301703, "grad_norm": 0.3166835904121399, "learning_rate": 4.147654681511225e-05, "loss": 0.2468, "step": 4545 }, { "epoch": 0.2491376006132618, "grad_norm": 0.29990553855895996, "learning_rate": 4.152217557948531e-05, "loss": 0.2606, "step": 4550 }, { "epoch": 0.24941137819635328, "grad_norm": 0.28057220578193665, "learning_rate": 4.156780434385837e-05, "loss": 0.2473, "step": 4555 }, { "epoch": 0.24968515577944478, "grad_norm": 0.2545385956764221, "learning_rate": 4.161343310823143e-05, "loss": 0.2488, "step": 4560 }, { "epoch": 0.24995893336253627, "grad_norm": 0.2576456367969513, "learning_rate": 4.1659061872604495e-05, "loss": 0.251, "step": 4565 }, { "epoch": 0.25023271094562777, "grad_norm": 0.2091209590435028, "learning_rate": 4.170469063697755e-05, "loss": 0.2548, "step": 4570 }, { "epoch": 0.25050648852871926, "grad_norm": 0.24470014870166779, "learning_rate": 4.1750319401350615e-05, "loss": 0.2541, "step": 4575 }, { "epoch": 0.25078026611181076, "grad_norm": 0.32664698362350464, "learning_rate": 4.179594816572367e-05, "loss": 0.2581, "step": 4580 }, { "epoch": 0.25105404369490225, "grad_norm": 0.2514382600784302, "learning_rate": 4.1841576930096736e-05, "loss": 0.2421, "step": 4585 }, { "epoch": 0.25132782127799375, "grad_norm": 0.27646756172180176, "learning_rate": 4.188720569446979e-05, "loss": 0.2563, "step": 4590 }, { "epoch": 0.25160159886108524, "grad_norm": 0.2526237964630127, "learning_rate": 4.1932834458842856e-05, "loss": 0.2602, "step": 4595 }, { "epoch": 0.25187537644417674, "grad_norm": 0.28141453862190247, "learning_rate": 4.197846322321592e-05, "loss": 0.2548, "step": 4600 }, { "epoch": 0.25214915402726823, "grad_norm": 0.25015655159950256, "learning_rate": 4.2024091987588976e-05, "loss": 0.257, "step": 4605 }, { "epoch": 0.2524229316103597, "grad_norm": 0.28108876943588257, "learning_rate": 4.206972075196204e-05, "loss": 0.2481, "step": 4610 }, { "epoch": 0.2526967091934512, "grad_norm": 0.2608867883682251, "learning_rate": 4.2115349516335103e-05, "loss": 0.2406, "step": 4615 }, { "epoch": 0.2529704867765427, "grad_norm": 0.24742445349693298, "learning_rate": 4.216097828070816e-05, "loss": 0.2473, "step": 4620 }, { "epoch": 0.2532442643596342, "grad_norm": 0.2687477767467499, "learning_rate": 4.220660704508122e-05, "loss": 0.2556, "step": 4625 }, { "epoch": 0.2535180419427257, "grad_norm": 0.2623775601387024, "learning_rate": 4.225223580945428e-05, "loss": 0.2482, "step": 4630 }, { "epoch": 0.25379181952581725, "grad_norm": 0.2663487493991852, "learning_rate": 4.2297864573827344e-05, "loss": 0.2437, "step": 4635 }, { "epoch": 0.25406559710890875, "grad_norm": 0.27905911207199097, "learning_rate": 4.23434933382004e-05, "loss": 0.2572, "step": 4640 }, { "epoch": 0.25433937469200024, "grad_norm": 0.22116625308990479, "learning_rate": 4.2389122102573465e-05, "loss": 0.2402, "step": 4645 }, { "epoch": 0.25461315227509174, "grad_norm": 0.2481691688299179, "learning_rate": 4.243475086694653e-05, "loss": 0.251, "step": 4650 }, { "epoch": 0.25488692985818323, "grad_norm": 0.24127717316150665, "learning_rate": 4.2480379631319585e-05, "loss": 0.2358, "step": 4655 }, { "epoch": 0.2551607074412747, "grad_norm": 0.2610842287540436, "learning_rate": 4.252600839569265e-05, "loss": 0.2581, "step": 4660 }, { "epoch": 0.2554344850243662, "grad_norm": 0.23170676827430725, "learning_rate": 4.2571637160065705e-05, "loss": 0.2401, "step": 4665 }, { "epoch": 0.2557082626074577, "grad_norm": 0.22332070767879486, "learning_rate": 4.261726592443877e-05, "loss": 0.2496, "step": 4670 }, { "epoch": 0.2559820401905492, "grad_norm": 0.2975783944129944, "learning_rate": 4.2662894688811826e-05, "loss": 0.2457, "step": 4675 }, { "epoch": 0.2562558177736407, "grad_norm": 0.26756229996681213, "learning_rate": 4.270852345318489e-05, "loss": 0.248, "step": 4680 }, { "epoch": 0.2565295953567322, "grad_norm": 0.2556333541870117, "learning_rate": 4.275415221755795e-05, "loss": 0.2474, "step": 4685 }, { "epoch": 0.2568033729398237, "grad_norm": 0.2974921464920044, "learning_rate": 4.279978098193101e-05, "loss": 0.2591, "step": 4690 }, { "epoch": 0.2570771505229152, "grad_norm": 0.2575792372226715, "learning_rate": 4.284540974630407e-05, "loss": 0.244, "step": 4695 }, { "epoch": 0.2573509281060067, "grad_norm": 0.28308144211769104, "learning_rate": 4.289103851067714e-05, "loss": 0.2464, "step": 4700 }, { "epoch": 0.2576247056890982, "grad_norm": 0.242915540933609, "learning_rate": 4.2936667275050194e-05, "loss": 0.2427, "step": 4705 }, { "epoch": 0.25789848327218967, "grad_norm": 0.22996677458286285, "learning_rate": 4.298229603942325e-05, "loss": 0.2478, "step": 4710 }, { "epoch": 0.25817226085528117, "grad_norm": 0.26830223202705383, "learning_rate": 4.3027924803796314e-05, "loss": 0.2413, "step": 4715 }, { "epoch": 0.25844603843837266, "grad_norm": 0.295215368270874, "learning_rate": 4.307355356816938e-05, "loss": 0.2345, "step": 4720 }, { "epoch": 0.25871981602146416, "grad_norm": 0.42018476128578186, "learning_rate": 4.3119182332542434e-05, "loss": 0.25, "step": 4725 }, { "epoch": 0.25899359360455565, "grad_norm": 0.27007439732551575, "learning_rate": 4.31648110969155e-05, "loss": 0.2444, "step": 4730 }, { "epoch": 0.25926737118764714, "grad_norm": 0.306768000125885, "learning_rate": 4.321043986128856e-05, "loss": 0.2439, "step": 4735 }, { "epoch": 0.25954114877073864, "grad_norm": 0.345150351524353, "learning_rate": 4.325606862566162e-05, "loss": 0.2625, "step": 4740 }, { "epoch": 0.25981492635383013, "grad_norm": 0.31858259439468384, "learning_rate": 4.330169739003468e-05, "loss": 0.2341, "step": 4745 }, { "epoch": 0.26008870393692163, "grad_norm": 0.2754937708377838, "learning_rate": 4.334732615440774e-05, "loss": 0.2484, "step": 4750 }, { "epoch": 0.2603624815200131, "grad_norm": 0.2932422459125519, "learning_rate": 4.33929549187808e-05, "loss": 0.2499, "step": 4755 }, { "epoch": 0.2606362591031046, "grad_norm": 0.2635485827922821, "learning_rate": 4.343858368315386e-05, "loss": 0.2513, "step": 4760 }, { "epoch": 0.2609100366861961, "grad_norm": 0.2588031589984894, "learning_rate": 4.348421244752692e-05, "loss": 0.2415, "step": 4765 }, { "epoch": 0.2611838142692876, "grad_norm": 0.25212639570236206, "learning_rate": 4.3529841211899986e-05, "loss": 0.253, "step": 4770 }, { "epoch": 0.2614575918523791, "grad_norm": 0.2693900167942047, "learning_rate": 4.357546997627304e-05, "loss": 0.2489, "step": 4775 }, { "epoch": 0.26173136943547065, "grad_norm": 0.22666992247104645, "learning_rate": 4.3621098740646106e-05, "loss": 0.2512, "step": 4780 }, { "epoch": 0.26200514701856215, "grad_norm": 0.24642427265644073, "learning_rate": 4.366672750501917e-05, "loss": 0.2571, "step": 4785 }, { "epoch": 0.26227892460165364, "grad_norm": 0.26301202178001404, "learning_rate": 4.371235626939223e-05, "loss": 0.2388, "step": 4790 }, { "epoch": 0.26255270218474513, "grad_norm": 0.25479286909103394, "learning_rate": 4.3757985033765284e-05, "loss": 0.2491, "step": 4795 }, { "epoch": 0.26282647976783663, "grad_norm": 0.27650463581085205, "learning_rate": 4.380361379813835e-05, "loss": 0.2576, "step": 4800 }, { "epoch": 0.2631002573509281, "grad_norm": 0.28295058012008667, "learning_rate": 4.384924256251141e-05, "loss": 0.2409, "step": 4805 }, { "epoch": 0.2633740349340196, "grad_norm": 0.32258567214012146, "learning_rate": 4.389487132688447e-05, "loss": 0.2425, "step": 4810 }, { "epoch": 0.2636478125171111, "grad_norm": 0.26618123054504395, "learning_rate": 4.394050009125753e-05, "loss": 0.2512, "step": 4815 }, { "epoch": 0.2639215901002026, "grad_norm": 0.21700382232666016, "learning_rate": 4.3986128855630595e-05, "loss": 0.2501, "step": 4820 }, { "epoch": 0.2641953676832941, "grad_norm": 0.2243664264678955, "learning_rate": 4.403175762000365e-05, "loss": 0.2415, "step": 4825 }, { "epoch": 0.2644691452663856, "grad_norm": 0.2300005406141281, "learning_rate": 4.4077386384376715e-05, "loss": 0.2365, "step": 4830 }, { "epoch": 0.2647429228494771, "grad_norm": 0.30665871500968933, "learning_rate": 4.412301514874977e-05, "loss": 0.2441, "step": 4835 }, { "epoch": 0.2650167004325686, "grad_norm": 0.26554012298583984, "learning_rate": 4.4168643913122835e-05, "loss": 0.2562, "step": 4840 }, { "epoch": 0.2652904780156601, "grad_norm": 0.29518750309944153, "learning_rate": 4.421427267749589e-05, "loss": 0.2435, "step": 4845 }, { "epoch": 0.2655642555987516, "grad_norm": 0.2593269944190979, "learning_rate": 4.4259901441868956e-05, "loss": 0.2495, "step": 4850 }, { "epoch": 0.26583803318184307, "grad_norm": 0.2857384979724884, "learning_rate": 4.430553020624202e-05, "loss": 0.2414, "step": 4855 }, { "epoch": 0.26611181076493456, "grad_norm": 0.23451922833919525, "learning_rate": 4.4351158970615076e-05, "loss": 0.2446, "step": 4860 }, { "epoch": 0.26638558834802606, "grad_norm": 0.2816329300403595, "learning_rate": 4.439678773498814e-05, "loss": 0.2492, "step": 4865 }, { "epoch": 0.26665936593111755, "grad_norm": 0.2573826313018799, "learning_rate": 4.44424164993612e-05, "loss": 0.2405, "step": 4870 }, { "epoch": 0.26693314351420905, "grad_norm": 0.2978557050228119, "learning_rate": 4.448804526373426e-05, "loss": 0.2379, "step": 4875 }, { "epoch": 0.26720692109730054, "grad_norm": 0.30081456899642944, "learning_rate": 4.453367402810732e-05, "loss": 0.2646, "step": 4880 }, { "epoch": 0.26748069868039204, "grad_norm": 0.30161619186401367, "learning_rate": 4.457930279248038e-05, "loss": 0.244, "step": 4885 }, { "epoch": 0.26775447626348353, "grad_norm": 0.26000988483428955, "learning_rate": 4.4624931556853444e-05, "loss": 0.2508, "step": 4890 }, { "epoch": 0.268028253846575, "grad_norm": 0.28259846568107605, "learning_rate": 4.46705603212265e-05, "loss": 0.2516, "step": 4895 }, { "epoch": 0.2683020314296665, "grad_norm": 0.271776407957077, "learning_rate": 4.4716189085599564e-05, "loss": 0.2417, "step": 4900 }, { "epoch": 0.268575809012758, "grad_norm": 0.2240038514137268, "learning_rate": 4.476181784997263e-05, "loss": 0.2317, "step": 4905 }, { "epoch": 0.2688495865958495, "grad_norm": 0.27858081459999084, "learning_rate": 4.480744661434569e-05, "loss": 0.244, "step": 4910 }, { "epoch": 0.269123364178941, "grad_norm": 0.2518106997013092, "learning_rate": 4.485307537871875e-05, "loss": 0.2422, "step": 4915 }, { "epoch": 0.2693971417620325, "grad_norm": 0.2550513446331024, "learning_rate": 4.4898704143091805e-05, "loss": 0.2563, "step": 4920 }, { "epoch": 0.269670919345124, "grad_norm": 0.2834402918815613, "learning_rate": 4.494433290746487e-05, "loss": 0.2424, "step": 4925 }, { "epoch": 0.26994469692821554, "grad_norm": 0.22261463105678558, "learning_rate": 4.4989961671837926e-05, "loss": 0.2398, "step": 4930 }, { "epoch": 0.27021847451130704, "grad_norm": 0.3155103623867035, "learning_rate": 4.503559043621099e-05, "loss": 0.2491, "step": 4935 }, { "epoch": 0.27049225209439853, "grad_norm": 0.27382588386535645, "learning_rate": 4.508121920058405e-05, "loss": 0.241, "step": 4940 }, { "epoch": 0.27076602967749, "grad_norm": 0.2868209779262543, "learning_rate": 4.512684796495711e-05, "loss": 0.2479, "step": 4945 }, { "epoch": 0.2710398072605815, "grad_norm": 0.26381435990333557, "learning_rate": 4.517247672933017e-05, "loss": 0.2476, "step": 4950 }, { "epoch": 0.271313584843673, "grad_norm": 0.2682936191558838, "learning_rate": 4.521810549370324e-05, "loss": 0.2427, "step": 4955 }, { "epoch": 0.2715873624267645, "grad_norm": 0.2749517560005188, "learning_rate": 4.5263734258076293e-05, "loss": 0.2562, "step": 4960 }, { "epoch": 0.271861140009856, "grad_norm": 0.3272988200187683, "learning_rate": 4.530936302244935e-05, "loss": 0.2531, "step": 4965 }, { "epoch": 0.2721349175929475, "grad_norm": 0.31717947125434875, "learning_rate": 4.5354991786822414e-05, "loss": 0.2551, "step": 4970 }, { "epoch": 0.272408695176039, "grad_norm": 0.2566382884979248, "learning_rate": 4.540062055119548e-05, "loss": 0.2398, "step": 4975 }, { "epoch": 0.2726824727591305, "grad_norm": 0.217291921377182, "learning_rate": 4.5446249315568534e-05, "loss": 0.2437, "step": 4980 }, { "epoch": 0.272956250342222, "grad_norm": 0.27652454376220703, "learning_rate": 4.54918780799416e-05, "loss": 0.2641, "step": 4985 }, { "epoch": 0.2732300279253135, "grad_norm": 0.2733290493488312, "learning_rate": 4.553750684431466e-05, "loss": 0.2559, "step": 4990 }, { "epoch": 0.273503805508405, "grad_norm": 0.25608813762664795, "learning_rate": 4.558313560868772e-05, "loss": 0.2491, "step": 4995 }, { "epoch": 0.27377758309149647, "grad_norm": 0.252150297164917, "learning_rate": 4.5628764373060775e-05, "loss": 0.2462, "step": 5000 }, { "epoch": 0.27405136067458796, "grad_norm": 0.2151588499546051, "learning_rate": 4.567439313743384e-05, "loss": 0.2519, "step": 5005 }, { "epoch": 0.27432513825767946, "grad_norm": 0.2799592614173889, "learning_rate": 4.57200219018069e-05, "loss": 0.2487, "step": 5010 }, { "epoch": 0.27459891584077095, "grad_norm": 0.2530966103076935, "learning_rate": 4.576565066617996e-05, "loss": 0.2442, "step": 5015 }, { "epoch": 0.27487269342386245, "grad_norm": 0.2450266033411026, "learning_rate": 4.581127943055302e-05, "loss": 0.2545, "step": 5020 }, { "epoch": 0.27514647100695394, "grad_norm": 0.22896835207939148, "learning_rate": 4.5856908194926086e-05, "loss": 0.2483, "step": 5025 }, { "epoch": 0.27542024859004544, "grad_norm": 0.27197572588920593, "learning_rate": 4.590253695929914e-05, "loss": 0.2502, "step": 5030 }, { "epoch": 0.27569402617313693, "grad_norm": 0.29672420024871826, "learning_rate": 4.5948165723672206e-05, "loss": 0.2409, "step": 5035 }, { "epoch": 0.2759678037562284, "grad_norm": 0.3170616328716278, "learning_rate": 4.599379448804526e-05, "loss": 0.2497, "step": 5040 }, { "epoch": 0.2762415813393199, "grad_norm": 0.27060043811798096, "learning_rate": 4.603942325241833e-05, "loss": 0.259, "step": 5045 }, { "epoch": 0.2765153589224114, "grad_norm": 0.24112217128276825, "learning_rate": 4.6085052016791384e-05, "loss": 0.2524, "step": 5050 }, { "epoch": 0.2767891365055029, "grad_norm": 0.22170273959636688, "learning_rate": 4.613068078116445e-05, "loss": 0.2465, "step": 5055 }, { "epoch": 0.2770629140885944, "grad_norm": 0.29079389572143555, "learning_rate": 4.617630954553751e-05, "loss": 0.2419, "step": 5060 }, { "epoch": 0.2773366916716859, "grad_norm": 0.2551727592945099, "learning_rate": 4.622193830991057e-05, "loss": 0.2572, "step": 5065 }, { "epoch": 0.2776104692547774, "grad_norm": 0.24457399547100067, "learning_rate": 4.626756707428363e-05, "loss": 0.2421, "step": 5070 }, { "epoch": 0.27788424683786894, "grad_norm": 0.24592556059360504, "learning_rate": 4.6313195838656695e-05, "loss": 0.2428, "step": 5075 }, { "epoch": 0.27815802442096044, "grad_norm": 0.2890145182609558, "learning_rate": 4.635882460302975e-05, "loss": 0.2524, "step": 5080 }, { "epoch": 0.27843180200405193, "grad_norm": 0.25459936261177063, "learning_rate": 4.640445336740281e-05, "loss": 0.2416, "step": 5085 }, { "epoch": 0.2787055795871434, "grad_norm": 0.2912484407424927, "learning_rate": 4.645008213177587e-05, "loss": 0.2486, "step": 5090 }, { "epoch": 0.2789793571702349, "grad_norm": 0.20449577271938324, "learning_rate": 4.6495710896148935e-05, "loss": 0.2478, "step": 5095 }, { "epoch": 0.2792531347533264, "grad_norm": 0.223332941532135, "learning_rate": 4.654133966052199e-05, "loss": 0.238, "step": 5100 }, { "epoch": 0.2795269123364179, "grad_norm": 0.26221707463264465, "learning_rate": 4.6586968424895056e-05, "loss": 0.2513, "step": 5105 }, { "epoch": 0.2798006899195094, "grad_norm": 0.3115769624710083, "learning_rate": 4.663259718926812e-05, "loss": 0.2465, "step": 5110 }, { "epoch": 0.2800744675026009, "grad_norm": 0.29245448112487793, "learning_rate": 4.667822595364118e-05, "loss": 0.2394, "step": 5115 }, { "epoch": 0.2803482450856924, "grad_norm": 0.3398092985153198, "learning_rate": 4.672385471801424e-05, "loss": 0.2503, "step": 5120 }, { "epoch": 0.2806220226687839, "grad_norm": 0.2668996751308441, "learning_rate": 4.6769483482387297e-05, "loss": 0.2381, "step": 5125 }, { "epoch": 0.2808958002518754, "grad_norm": 0.24005448818206787, "learning_rate": 4.681511224676036e-05, "loss": 0.249, "step": 5130 }, { "epoch": 0.2811695778349669, "grad_norm": 0.234859898686409, "learning_rate": 4.686074101113342e-05, "loss": 0.2451, "step": 5135 }, { "epoch": 0.28144335541805837, "grad_norm": 0.22534124553203583, "learning_rate": 4.690636977550648e-05, "loss": 0.2367, "step": 5140 }, { "epoch": 0.28171713300114987, "grad_norm": 0.2104426622390747, "learning_rate": 4.6951998539879544e-05, "loss": 0.2407, "step": 5145 }, { "epoch": 0.28199091058424136, "grad_norm": 0.31946513056755066, "learning_rate": 4.69976273042526e-05, "loss": 0.2378, "step": 5150 }, { "epoch": 0.28226468816733286, "grad_norm": 0.24618956446647644, "learning_rate": 4.7043256068625664e-05, "loss": 0.2407, "step": 5155 }, { "epoch": 0.28253846575042435, "grad_norm": 0.27385932207107544, "learning_rate": 4.708888483299873e-05, "loss": 0.2392, "step": 5160 }, { "epoch": 0.28281224333351584, "grad_norm": 0.27104654908180237, "learning_rate": 4.7134513597371785e-05, "loss": 0.2298, "step": 5165 }, { "epoch": 0.28308602091660734, "grad_norm": 0.23738500475883484, "learning_rate": 4.718014236174484e-05, "loss": 0.2363, "step": 5170 }, { "epoch": 0.28335979849969883, "grad_norm": 0.2626991271972656, "learning_rate": 4.7225771126117905e-05, "loss": 0.2463, "step": 5175 }, { "epoch": 0.28363357608279033, "grad_norm": 0.25929954648017883, "learning_rate": 4.727139989049097e-05, "loss": 0.2405, "step": 5180 }, { "epoch": 0.2839073536658818, "grad_norm": 0.23842227458953857, "learning_rate": 4.7317028654864026e-05, "loss": 0.253, "step": 5185 }, { "epoch": 0.2841811312489733, "grad_norm": 0.25073328614234924, "learning_rate": 4.736265741923709e-05, "loss": 0.2604, "step": 5190 }, { "epoch": 0.2844549088320648, "grad_norm": 0.2672322392463684, "learning_rate": 4.740828618361015e-05, "loss": 0.2501, "step": 5195 }, { "epoch": 0.2847286864151563, "grad_norm": 0.26447659730911255, "learning_rate": 4.7453914947983216e-05, "loss": 0.2475, "step": 5200 }, { "epoch": 0.2850024639982478, "grad_norm": 0.2842998802661896, "learning_rate": 4.749954371235627e-05, "loss": 0.2488, "step": 5205 }, { "epoch": 0.2852762415813393, "grad_norm": 0.258220911026001, "learning_rate": 4.754517247672933e-05, "loss": 0.2472, "step": 5210 }, { "epoch": 0.2855500191644308, "grad_norm": 0.2666436731815338, "learning_rate": 4.7590801241102393e-05, "loss": 0.2497, "step": 5215 }, { "epoch": 0.2858237967475223, "grad_norm": 0.2629897892475128, "learning_rate": 4.763643000547545e-05, "loss": 0.2404, "step": 5220 }, { "epoch": 0.28609757433061384, "grad_norm": 0.27008315920829773, "learning_rate": 4.7682058769848514e-05, "loss": 0.2455, "step": 5225 }, { "epoch": 0.28637135191370533, "grad_norm": 0.3144492208957672, "learning_rate": 4.772768753422158e-05, "loss": 0.2504, "step": 5230 }, { "epoch": 0.2866451294967968, "grad_norm": 0.316577285528183, "learning_rate": 4.7773316298594634e-05, "loss": 0.2616, "step": 5235 }, { "epoch": 0.2869189070798883, "grad_norm": 0.2862798869609833, "learning_rate": 4.78189450629677e-05, "loss": 0.2458, "step": 5240 }, { "epoch": 0.2871926846629798, "grad_norm": 0.24132895469665527, "learning_rate": 4.786457382734076e-05, "loss": 0.2563, "step": 5245 }, { "epoch": 0.2874664622460713, "grad_norm": 0.21923503279685974, "learning_rate": 4.791020259171382e-05, "loss": 0.2476, "step": 5250 }, { "epoch": 0.2877402398291628, "grad_norm": 0.2847788333892822, "learning_rate": 4.7955831356086875e-05, "loss": 0.2458, "step": 5255 }, { "epoch": 0.2880140174122543, "grad_norm": 0.22953465580940247, "learning_rate": 4.800146012045994e-05, "loss": 0.2419, "step": 5260 }, { "epoch": 0.2882877949953458, "grad_norm": 0.23712998628616333, "learning_rate": 4.8047088884833e-05, "loss": 0.2392, "step": 5265 }, { "epoch": 0.2885615725784373, "grad_norm": 0.2516734302043915, "learning_rate": 4.809271764920606e-05, "loss": 0.2472, "step": 5270 }, { "epoch": 0.2888353501615288, "grad_norm": 0.2926303446292877, "learning_rate": 4.813834641357912e-05, "loss": 0.2477, "step": 5275 }, { "epoch": 0.2891091277446203, "grad_norm": 0.2702811062335968, "learning_rate": 4.8183975177952186e-05, "loss": 0.245, "step": 5280 }, { "epoch": 0.28938290532771177, "grad_norm": 0.25918546319007874, "learning_rate": 4.822960394232525e-05, "loss": 0.2478, "step": 5285 }, { "epoch": 0.28965668291080326, "grad_norm": 0.2658770978450775, "learning_rate": 4.8275232706698306e-05, "loss": 0.2531, "step": 5290 }, { "epoch": 0.28993046049389476, "grad_norm": 0.18325720727443695, "learning_rate": 4.832086147107136e-05, "loss": 0.238, "step": 5295 }, { "epoch": 0.29020423807698625, "grad_norm": 0.2090563178062439, "learning_rate": 4.836649023544443e-05, "loss": 0.2384, "step": 5300 }, { "epoch": 0.29047801566007775, "grad_norm": 0.1833166778087616, "learning_rate": 4.8412118999817484e-05, "loss": 0.2383, "step": 5305 }, { "epoch": 0.29075179324316924, "grad_norm": 0.19381508231163025, "learning_rate": 4.845774776419055e-05, "loss": 0.2315, "step": 5310 }, { "epoch": 0.29102557082626074, "grad_norm": 0.28383132815361023, "learning_rate": 4.850337652856361e-05, "loss": 0.2472, "step": 5315 }, { "epoch": 0.29129934840935223, "grad_norm": 0.24578556418418884, "learning_rate": 4.8549005292936674e-05, "loss": 0.2346, "step": 5320 }, { "epoch": 0.2915731259924437, "grad_norm": 0.28073784708976746, "learning_rate": 4.859463405730973e-05, "loss": 0.243, "step": 5325 }, { "epoch": 0.2918469035755352, "grad_norm": 0.2827487587928772, "learning_rate": 4.8640262821682795e-05, "loss": 0.2429, "step": 5330 }, { "epoch": 0.2921206811586267, "grad_norm": 0.23348042368888855, "learning_rate": 4.868589158605585e-05, "loss": 0.2431, "step": 5335 }, { "epoch": 0.2923944587417182, "grad_norm": 0.2850850820541382, "learning_rate": 4.873152035042891e-05, "loss": 0.2519, "step": 5340 }, { "epoch": 0.2926682363248097, "grad_norm": 0.2848722040653229, "learning_rate": 4.877714911480197e-05, "loss": 0.2534, "step": 5345 }, { "epoch": 0.2929420139079012, "grad_norm": 0.2548541724681854, "learning_rate": 4.8822777879175035e-05, "loss": 0.2289, "step": 5350 }, { "epoch": 0.2932157914909927, "grad_norm": 0.2502039074897766, "learning_rate": 4.886840664354809e-05, "loss": 0.249, "step": 5355 }, { "epoch": 0.2934895690740842, "grad_norm": 0.22863313555717468, "learning_rate": 4.8914035407921156e-05, "loss": 0.2351, "step": 5360 }, { "epoch": 0.2937633466571757, "grad_norm": 0.21204836666584015, "learning_rate": 4.895966417229422e-05, "loss": 0.2448, "step": 5365 }, { "epoch": 0.29403712424026723, "grad_norm": 0.26706981658935547, "learning_rate": 4.900529293666728e-05, "loss": 0.2405, "step": 5370 }, { "epoch": 0.29431090182335873, "grad_norm": 0.2339479774236679, "learning_rate": 4.905092170104034e-05, "loss": 0.2432, "step": 5375 }, { "epoch": 0.2945846794064502, "grad_norm": 0.2512146234512329, "learning_rate": 4.9096550465413396e-05, "loss": 0.2454, "step": 5380 }, { "epoch": 0.2948584569895417, "grad_norm": 0.2613063156604767, "learning_rate": 4.914217922978646e-05, "loss": 0.2334, "step": 5385 }, { "epoch": 0.2951322345726332, "grad_norm": 0.22538071870803833, "learning_rate": 4.918780799415952e-05, "loss": 0.2561, "step": 5390 }, { "epoch": 0.2954060121557247, "grad_norm": 0.2649984359741211, "learning_rate": 4.923343675853258e-05, "loss": 0.2501, "step": 5395 }, { "epoch": 0.2956797897388162, "grad_norm": 0.31491783261299133, "learning_rate": 4.9279065522905644e-05, "loss": 0.2541, "step": 5400 }, { "epoch": 0.2959535673219077, "grad_norm": 0.3023461699485779, "learning_rate": 4.932469428727871e-05, "loss": 0.2607, "step": 5405 }, { "epoch": 0.2962273449049992, "grad_norm": 0.23847714066505432, "learning_rate": 4.9370323051651764e-05, "loss": 0.2523, "step": 5410 }, { "epoch": 0.2965011224880907, "grad_norm": 0.22707439959049225, "learning_rate": 4.941595181602483e-05, "loss": 0.2479, "step": 5415 }, { "epoch": 0.2967749000711822, "grad_norm": 0.2816912531852722, "learning_rate": 4.9461580580397885e-05, "loss": 0.252, "step": 5420 }, { "epoch": 0.2970486776542737, "grad_norm": 0.27270999550819397, "learning_rate": 4.950720934477094e-05, "loss": 0.2411, "step": 5425 }, { "epoch": 0.29732245523736517, "grad_norm": 0.27671414613723755, "learning_rate": 4.9552838109144005e-05, "loss": 0.2473, "step": 5430 }, { "epoch": 0.29759623282045666, "grad_norm": 0.2447029948234558, "learning_rate": 4.959846687351707e-05, "loss": 0.2472, "step": 5435 }, { "epoch": 0.29787001040354816, "grad_norm": 0.2846169173717499, "learning_rate": 4.9644095637890125e-05, "loss": 0.2428, "step": 5440 }, { "epoch": 0.29814378798663965, "grad_norm": 0.3723735511302948, "learning_rate": 4.968972440226319e-05, "loss": 0.2463, "step": 5445 }, { "epoch": 0.29841756556973115, "grad_norm": 0.319545716047287, "learning_rate": 4.973535316663625e-05, "loss": 0.2497, "step": 5450 }, { "epoch": 0.29869134315282264, "grad_norm": 0.21567049622535706, "learning_rate": 4.9780981931009316e-05, "loss": 0.2365, "step": 5455 }, { "epoch": 0.29896512073591414, "grad_norm": 0.24364648759365082, "learning_rate": 4.982661069538237e-05, "loss": 0.2432, "step": 5460 }, { "epoch": 0.29923889831900563, "grad_norm": 0.270641565322876, "learning_rate": 4.987223945975543e-05, "loss": 0.2533, "step": 5465 }, { "epoch": 0.2995126759020971, "grad_norm": 0.3744702637195587, "learning_rate": 4.991786822412849e-05, "loss": 0.254, "step": 5470 }, { "epoch": 0.2997864534851886, "grad_norm": 0.2895120680332184, "learning_rate": 4.996349698850155e-05, "loss": 0.2488, "step": 5475 }, { "epoch": 0.3000602310682801, "grad_norm": 0.2393152117729187, "learning_rate": 4.9998986006895155e-05, "loss": 0.258, "step": 5480 }, { "epoch": 0.3003340086513716, "grad_norm": 0.2620828449726105, "learning_rate": 4.999391604137092e-05, "loss": 0.2358, "step": 5485 }, { "epoch": 0.3006077862344631, "grad_norm": 0.27625572681427, "learning_rate": 4.998884607584668e-05, "loss": 0.2525, "step": 5490 }, { "epoch": 0.3008815638175546, "grad_norm": 0.2815154790878296, "learning_rate": 4.998377611032245e-05, "loss": 0.2455, "step": 5495 }, { "epoch": 0.3011553414006461, "grad_norm": 0.1990695744752884, "learning_rate": 4.9978706144798215e-05, "loss": 0.2456, "step": 5500 }, { "epoch": 0.3014291189837376, "grad_norm": 0.24772176146507263, "learning_rate": 4.9973636179273985e-05, "loss": 0.2445, "step": 5505 }, { "epoch": 0.3017028965668291, "grad_norm": 0.32433265447616577, "learning_rate": 4.996856621374975e-05, "loss": 0.2474, "step": 5510 }, { "epoch": 0.30197667414992063, "grad_norm": 0.29233238101005554, "learning_rate": 4.996349624822552e-05, "loss": 0.2434, "step": 5515 }, { "epoch": 0.3022504517330121, "grad_norm": 0.24020643532276154, "learning_rate": 4.995842628270128e-05, "loss": 0.2406, "step": 5520 }, { "epoch": 0.3025242293161036, "grad_norm": 0.27110520005226135, "learning_rate": 4.9953356317177045e-05, "loss": 0.2527, "step": 5525 }, { "epoch": 0.3027980068991951, "grad_norm": 0.23757602274417877, "learning_rate": 4.9948286351652815e-05, "loss": 0.2581, "step": 5530 }, { "epoch": 0.3030717844822866, "grad_norm": 0.29453209042549133, "learning_rate": 4.994321638612858e-05, "loss": 0.2531, "step": 5535 }, { "epoch": 0.3033455620653781, "grad_norm": 0.21023854613304138, "learning_rate": 4.993814642060434e-05, "loss": 0.2344, "step": 5540 }, { "epoch": 0.3036193396484696, "grad_norm": 0.2395646870136261, "learning_rate": 4.9933076455080105e-05, "loss": 0.2462, "step": 5545 }, { "epoch": 0.3038931172315611, "grad_norm": 0.2556919753551483, "learning_rate": 4.9928006489555875e-05, "loss": 0.257, "step": 5550 }, { "epoch": 0.3041668948146526, "grad_norm": 0.2795666456222534, "learning_rate": 4.992293652403164e-05, "loss": 0.2514, "step": 5555 }, { "epoch": 0.3044406723977441, "grad_norm": 0.2544376254081726, "learning_rate": 4.99178665585074e-05, "loss": 0.2412, "step": 5560 }, { "epoch": 0.3047144499808356, "grad_norm": 0.21799229085445404, "learning_rate": 4.991279659298317e-05, "loss": 0.2451, "step": 5565 }, { "epoch": 0.3049882275639271, "grad_norm": 0.29981735348701477, "learning_rate": 4.9907726627458935e-05, "loss": 0.2401, "step": 5570 }, { "epoch": 0.30526200514701857, "grad_norm": 0.31966787576675415, "learning_rate": 4.99026566619347e-05, "loss": 0.2519, "step": 5575 }, { "epoch": 0.30553578273011006, "grad_norm": 0.2361517995595932, "learning_rate": 4.989758669641046e-05, "loss": 0.2644, "step": 5580 }, { "epoch": 0.30580956031320156, "grad_norm": 0.23177438974380493, "learning_rate": 4.989251673088623e-05, "loss": 0.2498, "step": 5585 }, { "epoch": 0.30608333789629305, "grad_norm": 0.24299980700016022, "learning_rate": 4.9887446765362e-05, "loss": 0.2508, "step": 5590 }, { "epoch": 0.30635711547938455, "grad_norm": 0.19759580492973328, "learning_rate": 4.9882376799837765e-05, "loss": 0.2307, "step": 5595 }, { "epoch": 0.30663089306247604, "grad_norm": 0.2812010943889618, "learning_rate": 4.987730683431353e-05, "loss": 0.2474, "step": 5600 }, { "epoch": 0.30690467064556753, "grad_norm": 0.2713581919670105, "learning_rate": 4.98722368687893e-05, "loss": 0.253, "step": 5605 }, { "epoch": 0.30717844822865903, "grad_norm": 0.2344558984041214, "learning_rate": 4.986716690326506e-05, "loss": 0.2507, "step": 5610 }, { "epoch": 0.3074522258117505, "grad_norm": 0.23628415167331696, "learning_rate": 4.9862096937740825e-05, "loss": 0.2406, "step": 5615 }, { "epoch": 0.307726003394842, "grad_norm": 0.3392857313156128, "learning_rate": 4.985702697221659e-05, "loss": 0.2378, "step": 5620 }, { "epoch": 0.3079997809779335, "grad_norm": 0.3337964117527008, "learning_rate": 4.985195700669236e-05, "loss": 0.2495, "step": 5625 }, { "epoch": 0.308273558561025, "grad_norm": 0.2650550305843353, "learning_rate": 4.984688704116812e-05, "loss": 0.2567, "step": 5630 }, { "epoch": 0.3085473361441165, "grad_norm": 0.3057557940483093, "learning_rate": 4.9841817075643885e-05, "loss": 0.2372, "step": 5635 }, { "epoch": 0.308821113727208, "grad_norm": 0.23509551584720612, "learning_rate": 4.9836747110119655e-05, "loss": 0.2374, "step": 5640 }, { "epoch": 0.3090948913102995, "grad_norm": 0.23383644223213196, "learning_rate": 4.983167714459542e-05, "loss": 0.2463, "step": 5645 }, { "epoch": 0.309368668893391, "grad_norm": 0.20760297775268555, "learning_rate": 4.982660717907118e-05, "loss": 0.2384, "step": 5650 }, { "epoch": 0.3096424464764825, "grad_norm": 0.2856541872024536, "learning_rate": 4.9821537213546945e-05, "loss": 0.2578, "step": 5655 }, { "epoch": 0.309916224059574, "grad_norm": 0.31789952516555786, "learning_rate": 4.9816467248022715e-05, "loss": 0.2474, "step": 5660 }, { "epoch": 0.3101900016426655, "grad_norm": 0.22524891793727875, "learning_rate": 4.981139728249848e-05, "loss": 0.2386, "step": 5665 }, { "epoch": 0.310463779225757, "grad_norm": 0.20161427557468414, "learning_rate": 4.980632731697425e-05, "loss": 0.2513, "step": 5670 }, { "epoch": 0.3107375568088485, "grad_norm": 0.23035278916358948, "learning_rate": 4.980125735145001e-05, "loss": 0.2348, "step": 5675 }, { "epoch": 0.31101133439194, "grad_norm": 0.20993374288082123, "learning_rate": 4.979618738592578e-05, "loss": 0.2434, "step": 5680 }, { "epoch": 0.3112851119750315, "grad_norm": 0.2736424207687378, "learning_rate": 4.9791117420401545e-05, "loss": 0.252, "step": 5685 }, { "epoch": 0.311558889558123, "grad_norm": 0.21735195815563202, "learning_rate": 4.978604745487731e-05, "loss": 0.2487, "step": 5690 }, { "epoch": 0.3118326671412145, "grad_norm": 0.2648100256919861, "learning_rate": 4.978097748935308e-05, "loss": 0.2448, "step": 5695 }, { "epoch": 0.312106444724306, "grad_norm": 0.32676780223846436, "learning_rate": 4.977590752382884e-05, "loss": 0.2429, "step": 5700 }, { "epoch": 0.3123802223073975, "grad_norm": 0.29528170824050903, "learning_rate": 4.9770837558304605e-05, "loss": 0.2456, "step": 5705 }, { "epoch": 0.312653999890489, "grad_norm": 0.2942656874656677, "learning_rate": 4.976576759278037e-05, "loss": 0.253, "step": 5710 }, { "epoch": 0.31292777747358047, "grad_norm": 0.20298036932945251, "learning_rate": 4.976069762725614e-05, "loss": 0.2458, "step": 5715 }, { "epoch": 0.31320155505667197, "grad_norm": 0.24217742681503296, "learning_rate": 4.97556276617319e-05, "loss": 0.2395, "step": 5720 }, { "epoch": 0.31347533263976346, "grad_norm": 0.21719874441623688, "learning_rate": 4.9750557696207665e-05, "loss": 0.2513, "step": 5725 }, { "epoch": 0.31374911022285495, "grad_norm": 0.2116779088973999, "learning_rate": 4.9745487730683435e-05, "loss": 0.2484, "step": 5730 }, { "epoch": 0.31402288780594645, "grad_norm": 0.2460099160671234, "learning_rate": 4.97404177651592e-05, "loss": 0.2313, "step": 5735 }, { "epoch": 0.31429666538903794, "grad_norm": 0.26019811630249023, "learning_rate": 4.973534779963496e-05, "loss": 0.2446, "step": 5740 }, { "epoch": 0.31457044297212944, "grad_norm": 0.23004445433616638, "learning_rate": 4.9730277834110725e-05, "loss": 0.25, "step": 5745 }, { "epoch": 0.31484422055522093, "grad_norm": 0.1849994659423828, "learning_rate": 4.9725207868586495e-05, "loss": 0.2373, "step": 5750 }, { "epoch": 0.3151179981383124, "grad_norm": 0.23696142435073853, "learning_rate": 4.9720137903062266e-05, "loss": 0.2446, "step": 5755 }, { "epoch": 0.3153917757214039, "grad_norm": 0.2658064365386963, "learning_rate": 4.971506793753803e-05, "loss": 0.2492, "step": 5760 }, { "epoch": 0.3156655533044954, "grad_norm": 0.23371991515159607, "learning_rate": 4.970999797201379e-05, "loss": 0.2363, "step": 5765 }, { "epoch": 0.3159393308875869, "grad_norm": 0.22265586256980896, "learning_rate": 4.970492800648956e-05, "loss": 0.2484, "step": 5770 }, { "epoch": 0.3162131084706784, "grad_norm": 0.23321068286895752, "learning_rate": 4.9699858040965326e-05, "loss": 0.2492, "step": 5775 }, { "epoch": 0.3164868860537699, "grad_norm": 0.23206250369548798, "learning_rate": 4.969478807544109e-05, "loss": 0.2391, "step": 5780 }, { "epoch": 0.3167606636368614, "grad_norm": 0.21715691685676575, "learning_rate": 4.968971810991685e-05, "loss": 0.2452, "step": 5785 }, { "epoch": 0.3170344412199529, "grad_norm": 0.2663290500640869, "learning_rate": 4.968464814439262e-05, "loss": 0.2341, "step": 5790 }, { "epoch": 0.3173082188030444, "grad_norm": 0.19692809879779816, "learning_rate": 4.9679578178868386e-05, "loss": 0.2376, "step": 5795 }, { "epoch": 0.3175819963861359, "grad_norm": 0.30996912717819214, "learning_rate": 4.967450821334415e-05, "loss": 0.2395, "step": 5800 }, { "epoch": 0.3178557739692274, "grad_norm": 0.3717040419578552, "learning_rate": 4.966943824781992e-05, "loss": 0.2506, "step": 5805 }, { "epoch": 0.3181295515523189, "grad_norm": 0.29255661368370056, "learning_rate": 4.966436828229568e-05, "loss": 0.2454, "step": 5810 }, { "epoch": 0.3184033291354104, "grad_norm": 0.2896672487258911, "learning_rate": 4.9659298316771446e-05, "loss": 0.2513, "step": 5815 }, { "epoch": 0.3186771067185019, "grad_norm": 0.30333882570266724, "learning_rate": 4.965422835124721e-05, "loss": 0.2317, "step": 5820 }, { "epoch": 0.3189508843015934, "grad_norm": 0.28422558307647705, "learning_rate": 4.964915838572298e-05, "loss": 0.2363, "step": 5825 }, { "epoch": 0.3192246618846849, "grad_norm": 0.28206607699394226, "learning_rate": 4.964408842019875e-05, "loss": 0.2375, "step": 5830 }, { "epoch": 0.3194984394677764, "grad_norm": 0.22028231620788574, "learning_rate": 4.963901845467451e-05, "loss": 0.2449, "step": 5835 }, { "epoch": 0.3197722170508679, "grad_norm": 0.23220428824424744, "learning_rate": 4.9633948489150276e-05, "loss": 0.2539, "step": 5840 }, { "epoch": 0.3200459946339594, "grad_norm": 0.28951892256736755, "learning_rate": 4.9628878523626046e-05, "loss": 0.2392, "step": 5845 }, { "epoch": 0.3203197722170509, "grad_norm": 0.2876027822494507, "learning_rate": 4.962380855810181e-05, "loss": 0.2482, "step": 5850 }, { "epoch": 0.3205935498001424, "grad_norm": 0.24740540981292725, "learning_rate": 4.961873859257757e-05, "loss": 0.2367, "step": 5855 }, { "epoch": 0.32086732738323387, "grad_norm": 0.22374556958675385, "learning_rate": 4.961366862705334e-05, "loss": 0.2387, "step": 5860 }, { "epoch": 0.32114110496632536, "grad_norm": 0.18054188787937164, "learning_rate": 4.9608598661529106e-05, "loss": 0.2457, "step": 5865 }, { "epoch": 0.32141488254941686, "grad_norm": 0.21867145597934723, "learning_rate": 4.960352869600487e-05, "loss": 0.2447, "step": 5870 }, { "epoch": 0.32168866013250835, "grad_norm": 0.2388889491558075, "learning_rate": 4.959845873048063e-05, "loss": 0.2461, "step": 5875 }, { "epoch": 0.32196243771559985, "grad_norm": 0.24578484892845154, "learning_rate": 4.95933887649564e-05, "loss": 0.264, "step": 5880 }, { "epoch": 0.32223621529869134, "grad_norm": 0.22913140058517456, "learning_rate": 4.9588318799432166e-05, "loss": 0.2399, "step": 5885 }, { "epoch": 0.32250999288178284, "grad_norm": 0.2342860847711563, "learning_rate": 4.958324883390793e-05, "loss": 0.2461, "step": 5890 }, { "epoch": 0.32278377046487433, "grad_norm": 0.21667057275772095, "learning_rate": 4.957817886838369e-05, "loss": 0.2481, "step": 5895 }, { "epoch": 0.3230575480479658, "grad_norm": 0.20320045948028564, "learning_rate": 4.957310890285946e-05, "loss": 0.2475, "step": 5900 }, { "epoch": 0.3233313256310573, "grad_norm": 0.20535248517990112, "learning_rate": 4.9568038937335226e-05, "loss": 0.2582, "step": 5905 }, { "epoch": 0.3236051032141488, "grad_norm": 0.2201147824525833, "learning_rate": 4.956296897181099e-05, "loss": 0.2461, "step": 5910 }, { "epoch": 0.3238788807972403, "grad_norm": 0.3023688495159149, "learning_rate": 4.955789900628676e-05, "loss": 0.2538, "step": 5915 }, { "epoch": 0.3241526583803318, "grad_norm": 0.19388824701309204, "learning_rate": 4.955282904076253e-05, "loss": 0.2382, "step": 5920 }, { "epoch": 0.3244264359634233, "grad_norm": 0.2163136899471283, "learning_rate": 4.954775907523829e-05, "loss": 0.2401, "step": 5925 }, { "epoch": 0.3247002135465148, "grad_norm": 0.24357065558433533, "learning_rate": 4.9542689109714056e-05, "loss": 0.2481, "step": 5930 }, { "epoch": 0.3249739911296063, "grad_norm": 0.22865638136863708, "learning_rate": 4.9537619144189826e-05, "loss": 0.237, "step": 5935 }, { "epoch": 0.3252477687126978, "grad_norm": 0.278434157371521, "learning_rate": 4.953254917866559e-05, "loss": 0.2442, "step": 5940 }, { "epoch": 0.3255215462957893, "grad_norm": 0.2480463981628418, "learning_rate": 4.952747921314135e-05, "loss": 0.2334, "step": 5945 }, { "epoch": 0.32579532387888077, "grad_norm": 0.2204529196023941, "learning_rate": 4.9522409247617116e-05, "loss": 0.2298, "step": 5950 }, { "epoch": 0.32606910146197227, "grad_norm": 0.2505747079849243, "learning_rate": 4.9517339282092886e-05, "loss": 0.2407, "step": 5955 }, { "epoch": 0.3263428790450638, "grad_norm": 0.23114660382270813, "learning_rate": 4.951226931656865e-05, "loss": 0.2391, "step": 5960 }, { "epoch": 0.3266166566281553, "grad_norm": 0.22351433336734772, "learning_rate": 4.950719935104441e-05, "loss": 0.2438, "step": 5965 }, { "epoch": 0.3268904342112468, "grad_norm": 0.2530883550643921, "learning_rate": 4.950212938552018e-05, "loss": 0.2451, "step": 5970 }, { "epoch": 0.3271642117943383, "grad_norm": 0.24677154421806335, "learning_rate": 4.9497059419995946e-05, "loss": 0.2387, "step": 5975 }, { "epoch": 0.3274379893774298, "grad_norm": 0.236911803483963, "learning_rate": 4.949198945447171e-05, "loss": 0.2411, "step": 5980 }, { "epoch": 0.3277117669605213, "grad_norm": 0.2103826403617859, "learning_rate": 4.948691948894747e-05, "loss": 0.2366, "step": 5985 }, { "epoch": 0.3279855445436128, "grad_norm": 0.20353081822395325, "learning_rate": 4.948184952342324e-05, "loss": 0.2471, "step": 5990 }, { "epoch": 0.3282593221267043, "grad_norm": 0.17828136682510376, "learning_rate": 4.947677955789901e-05, "loss": 0.2526, "step": 5995 }, { "epoch": 0.3285330997097958, "grad_norm": 0.20753076672554016, "learning_rate": 4.9471709592374776e-05, "loss": 0.2404, "step": 6000 }, { "epoch": 0.32880687729288727, "grad_norm": 0.20204727351665497, "learning_rate": 4.946663962685054e-05, "loss": 0.2354, "step": 6005 }, { "epoch": 0.32908065487597876, "grad_norm": 0.24349598586559296, "learning_rate": 4.946156966132631e-05, "loss": 0.2462, "step": 6010 }, { "epoch": 0.32935443245907026, "grad_norm": 0.23891036212444305, "learning_rate": 4.945649969580207e-05, "loss": 0.2421, "step": 6015 }, { "epoch": 0.32962821004216175, "grad_norm": 0.30095139145851135, "learning_rate": 4.9451429730277836e-05, "loss": 0.2513, "step": 6020 }, { "epoch": 0.32990198762525325, "grad_norm": 0.39220407605171204, "learning_rate": 4.94463597647536e-05, "loss": 0.2348, "step": 6025 }, { "epoch": 0.33017576520834474, "grad_norm": 0.309626966714859, "learning_rate": 4.944128979922937e-05, "loss": 0.244, "step": 6030 }, { "epoch": 0.33044954279143623, "grad_norm": 0.2832094728946686, "learning_rate": 4.943621983370513e-05, "loss": 0.238, "step": 6035 }, { "epoch": 0.33072332037452773, "grad_norm": 0.2543979287147522, "learning_rate": 4.9431149868180896e-05, "loss": 0.2396, "step": 6040 }, { "epoch": 0.3309970979576192, "grad_norm": 0.2177257537841797, "learning_rate": 4.9426079902656666e-05, "loss": 0.2551, "step": 6045 }, { "epoch": 0.3312708755407107, "grad_norm": 0.24621644616127014, "learning_rate": 4.942100993713243e-05, "loss": 0.2448, "step": 6050 }, { "epoch": 0.3315446531238022, "grad_norm": 0.23321138322353363, "learning_rate": 4.941593997160819e-05, "loss": 0.243, "step": 6055 }, { "epoch": 0.3318184307068937, "grad_norm": 0.19488611817359924, "learning_rate": 4.9410870006083956e-05, "loss": 0.2505, "step": 6060 }, { "epoch": 0.3320922082899852, "grad_norm": 0.22964276373386383, "learning_rate": 4.9405800040559726e-05, "loss": 0.2515, "step": 6065 }, { "epoch": 0.3323659858730767, "grad_norm": 0.20608554780483246, "learning_rate": 4.940073007503549e-05, "loss": 0.2414, "step": 6070 }, { "epoch": 0.3326397634561682, "grad_norm": 0.2835620939731598, "learning_rate": 4.939566010951126e-05, "loss": 0.2473, "step": 6075 }, { "epoch": 0.3329135410392597, "grad_norm": 0.21702034771442413, "learning_rate": 4.939059014398702e-05, "loss": 0.2427, "step": 6080 }, { "epoch": 0.3331873186223512, "grad_norm": 0.22561115026474, "learning_rate": 4.938552017846279e-05, "loss": 0.2478, "step": 6085 }, { "epoch": 0.3334610962054427, "grad_norm": 0.1976087987422943, "learning_rate": 4.9380450212938556e-05, "loss": 0.2314, "step": 6090 }, { "epoch": 0.33373487378853417, "grad_norm": 0.18592478334903717, "learning_rate": 4.937538024741432e-05, "loss": 0.2429, "step": 6095 }, { "epoch": 0.33400865137162566, "grad_norm": 0.2801607549190521, "learning_rate": 4.937031028189009e-05, "loss": 0.2388, "step": 6100 }, { "epoch": 0.3342824289547172, "grad_norm": 0.2859346270561218, "learning_rate": 4.936524031636585e-05, "loss": 0.2441, "step": 6105 }, { "epoch": 0.3345562065378087, "grad_norm": 0.25780630111694336, "learning_rate": 4.9360170350841616e-05, "loss": 0.2497, "step": 6110 }, { "epoch": 0.3348299841209002, "grad_norm": 0.23990947008132935, "learning_rate": 4.935510038531738e-05, "loss": 0.2408, "step": 6115 }, { "epoch": 0.3351037617039917, "grad_norm": 0.22334308922290802, "learning_rate": 4.935003041979315e-05, "loss": 0.2455, "step": 6120 }, { "epoch": 0.3353775392870832, "grad_norm": 0.22412587702274323, "learning_rate": 4.934496045426891e-05, "loss": 0.2482, "step": 6125 }, { "epoch": 0.3356513168701747, "grad_norm": 0.2084353119134903, "learning_rate": 4.9339890488744676e-05, "loss": 0.236, "step": 6130 }, { "epoch": 0.3359250944532662, "grad_norm": 0.2707473337650299, "learning_rate": 4.9334820523220446e-05, "loss": 0.2428, "step": 6135 }, { "epoch": 0.3361988720363577, "grad_norm": 0.23619946837425232, "learning_rate": 4.932975055769621e-05, "loss": 0.2512, "step": 6140 }, { "epoch": 0.33647264961944917, "grad_norm": 0.25237828493118286, "learning_rate": 4.932468059217197e-05, "loss": 0.256, "step": 6145 }, { "epoch": 0.33674642720254067, "grad_norm": 0.21645744144916534, "learning_rate": 4.9319610626647736e-05, "loss": 0.253, "step": 6150 }, { "epoch": 0.33702020478563216, "grad_norm": 0.24059739708900452, "learning_rate": 4.9314540661123506e-05, "loss": 0.2326, "step": 6155 }, { "epoch": 0.33729398236872365, "grad_norm": 0.22545504570007324, "learning_rate": 4.9309470695599276e-05, "loss": 0.2287, "step": 6160 }, { "epoch": 0.33756775995181515, "grad_norm": 0.23115742206573486, "learning_rate": 4.930440073007504e-05, "loss": 0.245, "step": 6165 }, { "epoch": 0.33784153753490664, "grad_norm": 0.23269076645374298, "learning_rate": 4.92993307645508e-05, "loss": 0.2511, "step": 6170 }, { "epoch": 0.33811531511799814, "grad_norm": 0.2110775262117386, "learning_rate": 4.929426079902657e-05, "loss": 0.2384, "step": 6175 }, { "epoch": 0.33838909270108963, "grad_norm": 0.21784332394599915, "learning_rate": 4.9289190833502336e-05, "loss": 0.2449, "step": 6180 }, { "epoch": 0.33866287028418113, "grad_norm": 0.2128448486328125, "learning_rate": 4.92841208679781e-05, "loss": 0.2455, "step": 6185 }, { "epoch": 0.3389366478672726, "grad_norm": 0.20647674798965454, "learning_rate": 4.927905090245386e-05, "loss": 0.2498, "step": 6190 }, { "epoch": 0.3392104254503641, "grad_norm": 0.22578662633895874, "learning_rate": 4.927398093692963e-05, "loss": 0.2432, "step": 6195 }, { "epoch": 0.3394842030334556, "grad_norm": 0.25849127769470215, "learning_rate": 4.9268910971405396e-05, "loss": 0.243, "step": 6200 }, { "epoch": 0.3397579806165471, "grad_norm": 0.23122543096542358, "learning_rate": 4.926384100588116e-05, "loss": 0.2469, "step": 6205 }, { "epoch": 0.3400317581996386, "grad_norm": 0.20798666775226593, "learning_rate": 4.925877104035693e-05, "loss": 0.2408, "step": 6210 }, { "epoch": 0.3403055357827301, "grad_norm": 0.1747974306344986, "learning_rate": 4.925370107483269e-05, "loss": 0.2394, "step": 6215 }, { "epoch": 0.3405793133658216, "grad_norm": 0.19611075520515442, "learning_rate": 4.9248631109308456e-05, "loss": 0.2363, "step": 6220 }, { "epoch": 0.3408530909489131, "grad_norm": 0.23067650198936462, "learning_rate": 4.924356114378422e-05, "loss": 0.2309, "step": 6225 }, { "epoch": 0.3411268685320046, "grad_norm": 0.23441766202449799, "learning_rate": 4.923849117825999e-05, "loss": 0.2494, "step": 6230 }, { "epoch": 0.3414006461150961, "grad_norm": 0.24504713714122772, "learning_rate": 4.923342121273575e-05, "loss": 0.2546, "step": 6235 }, { "epoch": 0.34167442369818757, "grad_norm": 0.21367794275283813, "learning_rate": 4.922835124721152e-05, "loss": 0.2446, "step": 6240 }, { "epoch": 0.34194820128127906, "grad_norm": 0.28252366185188293, "learning_rate": 4.9223281281687286e-05, "loss": 0.2451, "step": 6245 }, { "epoch": 0.3422219788643706, "grad_norm": 0.21642130613327026, "learning_rate": 4.9218211316163056e-05, "loss": 0.2382, "step": 6250 }, { "epoch": 0.3424957564474621, "grad_norm": 0.2747000753879547, "learning_rate": 4.921314135063882e-05, "loss": 0.2468, "step": 6255 }, { "epoch": 0.3427695340305536, "grad_norm": 0.23318545520305634, "learning_rate": 4.920807138511458e-05, "loss": 0.237, "step": 6260 }, { "epoch": 0.3430433116136451, "grad_norm": 0.19746212661266327, "learning_rate": 4.920300141959035e-05, "loss": 0.2323, "step": 6265 }, { "epoch": 0.3433170891967366, "grad_norm": 0.23279587924480438, "learning_rate": 4.9197931454066116e-05, "loss": 0.2454, "step": 6270 }, { "epoch": 0.3435908667798281, "grad_norm": 0.21070928871631622, "learning_rate": 4.919286148854188e-05, "loss": 0.2408, "step": 6275 }, { "epoch": 0.3438646443629196, "grad_norm": 0.22864322364330292, "learning_rate": 4.918779152301764e-05, "loss": 0.2494, "step": 6280 }, { "epoch": 0.3441384219460111, "grad_norm": 0.23985274136066437, "learning_rate": 4.918272155749341e-05, "loss": 0.2464, "step": 6285 }, { "epoch": 0.34441219952910257, "grad_norm": 0.25061339139938354, "learning_rate": 4.9177651591969176e-05, "loss": 0.2422, "step": 6290 }, { "epoch": 0.34468597711219406, "grad_norm": 0.214975506067276, "learning_rate": 4.917258162644494e-05, "loss": 0.2351, "step": 6295 }, { "epoch": 0.34495975469528556, "grad_norm": 0.23481178283691406, "learning_rate": 4.916751166092071e-05, "loss": 0.2447, "step": 6300 }, { "epoch": 0.34523353227837705, "grad_norm": 0.20848192274570465, "learning_rate": 4.916244169539647e-05, "loss": 0.2445, "step": 6305 }, { "epoch": 0.34550730986146855, "grad_norm": 0.2370147854089737, "learning_rate": 4.9157371729872236e-05, "loss": 0.2437, "step": 6310 }, { "epoch": 0.34578108744456004, "grad_norm": 0.25820091366767883, "learning_rate": 4.9152301764348e-05, "loss": 0.238, "step": 6315 }, { "epoch": 0.34605486502765154, "grad_norm": 0.31922727823257446, "learning_rate": 4.914723179882377e-05, "loss": 0.2404, "step": 6320 }, { "epoch": 0.34632864261074303, "grad_norm": 0.2214561402797699, "learning_rate": 4.914216183329954e-05, "loss": 0.2406, "step": 6325 }, { "epoch": 0.3466024201938345, "grad_norm": 0.23242363333702087, "learning_rate": 4.91370918677753e-05, "loss": 0.2358, "step": 6330 }, { "epoch": 0.346876197776926, "grad_norm": 0.21332430839538574, "learning_rate": 4.9132021902251066e-05, "loss": 0.2293, "step": 6335 }, { "epoch": 0.3471499753600175, "grad_norm": 0.19273245334625244, "learning_rate": 4.9126951936726836e-05, "loss": 0.2397, "step": 6340 }, { "epoch": 0.347423752943109, "grad_norm": 0.26568129658699036, "learning_rate": 4.91218819712026e-05, "loss": 0.2457, "step": 6345 }, { "epoch": 0.3476975305262005, "grad_norm": 0.19794748723506927, "learning_rate": 4.911681200567836e-05, "loss": 0.2363, "step": 6350 }, { "epoch": 0.347971308109292, "grad_norm": 0.2169848084449768, "learning_rate": 4.9111742040154126e-05, "loss": 0.2439, "step": 6355 }, { "epoch": 0.3482450856923835, "grad_norm": 0.20985732972621918, "learning_rate": 4.9106672074629896e-05, "loss": 0.2503, "step": 6360 }, { "epoch": 0.348518863275475, "grad_norm": 0.2148258537054062, "learning_rate": 4.910160210910566e-05, "loss": 0.2348, "step": 6365 }, { "epoch": 0.3487926408585665, "grad_norm": 0.2205447107553482, "learning_rate": 4.909653214358142e-05, "loss": 0.241, "step": 6370 }, { "epoch": 0.349066418441658, "grad_norm": 0.20441864430904388, "learning_rate": 4.909146217805719e-05, "loss": 0.2391, "step": 6375 }, { "epoch": 0.34934019602474947, "grad_norm": 0.21393993496894836, "learning_rate": 4.9086392212532956e-05, "loss": 0.2368, "step": 6380 }, { "epoch": 0.34961397360784097, "grad_norm": 0.21191446483135223, "learning_rate": 4.908132224700872e-05, "loss": 0.2467, "step": 6385 }, { "epoch": 0.34988775119093246, "grad_norm": 0.19290730357170105, "learning_rate": 4.907625228148448e-05, "loss": 0.2448, "step": 6390 }, { "epoch": 0.35016152877402396, "grad_norm": 0.209895521402359, "learning_rate": 4.907118231596025e-05, "loss": 0.246, "step": 6395 }, { "epoch": 0.3504353063571155, "grad_norm": 0.2133348137140274, "learning_rate": 4.906611235043602e-05, "loss": 0.2295, "step": 6400 }, { "epoch": 0.350709083940207, "grad_norm": 0.20145472884178162, "learning_rate": 4.9061042384911787e-05, "loss": 0.2337, "step": 6405 }, { "epoch": 0.3509828615232985, "grad_norm": 0.24488143622875214, "learning_rate": 4.905597241938755e-05, "loss": 0.2354, "step": 6410 }, { "epoch": 0.35125663910639, "grad_norm": 0.23549064993858337, "learning_rate": 4.905090245386332e-05, "loss": 0.2549, "step": 6415 }, { "epoch": 0.3515304166894815, "grad_norm": 0.2074953317642212, "learning_rate": 4.904583248833908e-05, "loss": 0.2393, "step": 6420 }, { "epoch": 0.351804194272573, "grad_norm": 0.2038867324590683, "learning_rate": 4.9040762522814847e-05, "loss": 0.2438, "step": 6425 }, { "epoch": 0.3520779718556645, "grad_norm": 0.19858179986476898, "learning_rate": 4.903569255729062e-05, "loss": 0.2492, "step": 6430 }, { "epoch": 0.35235174943875597, "grad_norm": 0.21952596306800842, "learning_rate": 4.903062259176638e-05, "loss": 0.2438, "step": 6435 }, { "epoch": 0.35262552702184746, "grad_norm": 0.20298460125923157, "learning_rate": 4.902555262624214e-05, "loss": 0.235, "step": 6440 }, { "epoch": 0.35289930460493896, "grad_norm": 0.2050754725933075, "learning_rate": 4.9020482660717907e-05, "loss": 0.236, "step": 6445 }, { "epoch": 0.35317308218803045, "grad_norm": 0.19484303891658783, "learning_rate": 4.901541269519368e-05, "loss": 0.2421, "step": 6450 }, { "epoch": 0.35344685977112195, "grad_norm": 0.2350703328847885, "learning_rate": 4.901034272966944e-05, "loss": 0.2416, "step": 6455 }, { "epoch": 0.35372063735421344, "grad_norm": 0.21665070950984955, "learning_rate": 4.90052727641452e-05, "loss": 0.2381, "step": 6460 }, { "epoch": 0.35399441493730494, "grad_norm": 0.2259826809167862, "learning_rate": 4.900020279862097e-05, "loss": 0.2345, "step": 6465 }, { "epoch": 0.35426819252039643, "grad_norm": 0.21810922026634216, "learning_rate": 4.8995132833096737e-05, "loss": 0.2368, "step": 6470 }, { "epoch": 0.3545419701034879, "grad_norm": 0.21778148412704468, "learning_rate": 4.89900628675725e-05, "loss": 0.2375, "step": 6475 }, { "epoch": 0.3548157476865794, "grad_norm": 0.21995438635349274, "learning_rate": 4.898499290204827e-05, "loss": 0.2433, "step": 6480 }, { "epoch": 0.3550895252696709, "grad_norm": 0.19220302999019623, "learning_rate": 4.897992293652403e-05, "loss": 0.2468, "step": 6485 }, { "epoch": 0.3553633028527624, "grad_norm": 0.2027302235364914, "learning_rate": 4.89748529709998e-05, "loss": 0.2438, "step": 6490 }, { "epoch": 0.3556370804358539, "grad_norm": 0.2221519947052002, "learning_rate": 4.896978300547557e-05, "loss": 0.241, "step": 6495 }, { "epoch": 0.3559108580189454, "grad_norm": 0.254651814699173, "learning_rate": 4.896471303995133e-05, "loss": 0.2461, "step": 6500 }, { "epoch": 0.3561846356020369, "grad_norm": 0.3382396697998047, "learning_rate": 4.89596430744271e-05, "loss": 0.2364, "step": 6505 }, { "epoch": 0.3564584131851284, "grad_norm": 0.22538349032402039, "learning_rate": 4.895457310890286e-05, "loss": 0.2441, "step": 6510 }, { "epoch": 0.3567321907682199, "grad_norm": 0.19722211360931396, "learning_rate": 4.894950314337863e-05, "loss": 0.2406, "step": 6515 }, { "epoch": 0.3570059683513114, "grad_norm": 0.21315714716911316, "learning_rate": 4.894443317785439e-05, "loss": 0.2289, "step": 6520 }, { "epoch": 0.35727974593440287, "grad_norm": 0.207956001162529, "learning_rate": 4.893936321233016e-05, "loss": 0.2474, "step": 6525 }, { "epoch": 0.35755352351749436, "grad_norm": 0.2673150300979614, "learning_rate": 4.893429324680592e-05, "loss": 0.2396, "step": 6530 }, { "epoch": 0.35782730110058586, "grad_norm": 0.27801594138145447, "learning_rate": 4.892922328128169e-05, "loss": 0.2498, "step": 6535 }, { "epoch": 0.35810107868367735, "grad_norm": 0.21169023215770721, "learning_rate": 4.892415331575746e-05, "loss": 0.2426, "step": 6540 }, { "epoch": 0.3583748562667689, "grad_norm": 0.23237881064414978, "learning_rate": 4.891908335023322e-05, "loss": 0.2445, "step": 6545 }, { "epoch": 0.3586486338498604, "grad_norm": 0.22494612634181976, "learning_rate": 4.891401338470898e-05, "loss": 0.245, "step": 6550 }, { "epoch": 0.3589224114329519, "grad_norm": 0.24192798137664795, "learning_rate": 4.890894341918475e-05, "loss": 0.2378, "step": 6555 }, { "epoch": 0.3591961890160434, "grad_norm": 0.19197691977024078, "learning_rate": 4.890387345366052e-05, "loss": 0.2304, "step": 6560 }, { "epoch": 0.3594699665991349, "grad_norm": 0.263588011264801, "learning_rate": 4.889880348813629e-05, "loss": 0.2391, "step": 6565 }, { "epoch": 0.3597437441822264, "grad_norm": 0.23370809853076935, "learning_rate": 4.889373352261205e-05, "loss": 0.2491, "step": 6570 }, { "epoch": 0.36001752176531787, "grad_norm": 0.21201086044311523, "learning_rate": 4.8888663557087813e-05, "loss": 0.2407, "step": 6575 }, { "epoch": 0.36029129934840937, "grad_norm": 0.23277217149734497, "learning_rate": 4.8883593591563584e-05, "loss": 0.2393, "step": 6580 }, { "epoch": 0.36056507693150086, "grad_norm": 0.23582841455936432, "learning_rate": 4.887852362603935e-05, "loss": 0.2404, "step": 6585 }, { "epoch": 0.36083885451459236, "grad_norm": 0.199085533618927, "learning_rate": 4.887345366051511e-05, "loss": 0.2435, "step": 6590 }, { "epoch": 0.36111263209768385, "grad_norm": 0.2621775269508362, "learning_rate": 4.886838369499088e-05, "loss": 0.2477, "step": 6595 }, { "epoch": 0.36138640968077534, "grad_norm": 0.24628423154354095, "learning_rate": 4.8863313729466644e-05, "loss": 0.2374, "step": 6600 }, { "epoch": 0.36166018726386684, "grad_norm": 0.2025853842496872, "learning_rate": 4.885824376394241e-05, "loss": 0.2508, "step": 6605 }, { "epoch": 0.36193396484695833, "grad_norm": 0.2071661800146103, "learning_rate": 4.885317379841817e-05, "loss": 0.2437, "step": 6610 }, { "epoch": 0.36220774243004983, "grad_norm": 0.2617977559566498, "learning_rate": 4.884810383289394e-05, "loss": 0.233, "step": 6615 }, { "epoch": 0.3624815200131413, "grad_norm": 0.21747232973575592, "learning_rate": 4.8843033867369704e-05, "loss": 0.2409, "step": 6620 }, { "epoch": 0.3627552975962328, "grad_norm": 0.2469691038131714, "learning_rate": 4.883796390184547e-05, "loss": 0.2449, "step": 6625 }, { "epoch": 0.3630290751793243, "grad_norm": 0.20733945071697235, "learning_rate": 4.883289393632123e-05, "loss": 0.2379, "step": 6630 }, { "epoch": 0.3633028527624158, "grad_norm": 0.26110392808914185, "learning_rate": 4.8827823970797e-05, "loss": 0.241, "step": 6635 }, { "epoch": 0.3635766303455073, "grad_norm": 0.21552664041519165, "learning_rate": 4.8822754005272764e-05, "loss": 0.2409, "step": 6640 }, { "epoch": 0.3638504079285988, "grad_norm": 0.2680889964103699, "learning_rate": 4.8817684039748534e-05, "loss": 0.2424, "step": 6645 }, { "epoch": 0.3641241855116903, "grad_norm": 0.23261107504367828, "learning_rate": 4.88126140742243e-05, "loss": 0.239, "step": 6650 }, { "epoch": 0.3643979630947818, "grad_norm": 0.20474016666412354, "learning_rate": 4.880754410870007e-05, "loss": 0.2472, "step": 6655 }, { "epoch": 0.3646717406778733, "grad_norm": 0.22893483936786652, "learning_rate": 4.880247414317583e-05, "loss": 0.2307, "step": 6660 }, { "epoch": 0.3649455182609648, "grad_norm": 0.24156929552555084, "learning_rate": 4.8797404177651594e-05, "loss": 0.2443, "step": 6665 }, { "epoch": 0.36521929584405627, "grad_norm": 0.21703657507896423, "learning_rate": 4.8792334212127364e-05, "loss": 0.2345, "step": 6670 }, { "epoch": 0.36549307342714776, "grad_norm": 0.22877715528011322, "learning_rate": 4.878726424660313e-05, "loss": 0.2325, "step": 6675 }, { "epoch": 0.36576685101023926, "grad_norm": 0.21375860273838043, "learning_rate": 4.878219428107889e-05, "loss": 0.2437, "step": 6680 }, { "epoch": 0.36604062859333075, "grad_norm": 0.2566604018211365, "learning_rate": 4.8777124315554654e-05, "loss": 0.2454, "step": 6685 }, { "epoch": 0.36631440617642225, "grad_norm": 0.24054011702537537, "learning_rate": 4.8772054350030424e-05, "loss": 0.2448, "step": 6690 }, { "epoch": 0.3665881837595138, "grad_norm": 0.18681542575359344, "learning_rate": 4.876698438450619e-05, "loss": 0.239, "step": 6695 }, { "epoch": 0.3668619613426053, "grad_norm": 0.23810048401355743, "learning_rate": 4.876191441898195e-05, "loss": 0.2444, "step": 6700 }, { "epoch": 0.3671357389256968, "grad_norm": 0.229444220662117, "learning_rate": 4.875684445345772e-05, "loss": 0.2272, "step": 6705 }, { "epoch": 0.3674095165087883, "grad_norm": 0.21229642629623413, "learning_rate": 4.8751774487933484e-05, "loss": 0.2358, "step": 6710 }, { "epoch": 0.3676832940918798, "grad_norm": 0.26557859778404236, "learning_rate": 4.874670452240925e-05, "loss": 0.2368, "step": 6715 }, { "epoch": 0.36795707167497127, "grad_norm": 0.21020598709583282, "learning_rate": 4.874163455688501e-05, "loss": 0.2462, "step": 6720 }, { "epoch": 0.36823084925806276, "grad_norm": 0.19941285252571106, "learning_rate": 4.873656459136079e-05, "loss": 0.2454, "step": 6725 }, { "epoch": 0.36850462684115426, "grad_norm": 0.22274784743785858, "learning_rate": 4.873149462583655e-05, "loss": 0.2366, "step": 6730 }, { "epoch": 0.36877840442424575, "grad_norm": 0.24986155331134796, "learning_rate": 4.8726424660312314e-05, "loss": 0.24, "step": 6735 }, { "epoch": 0.36905218200733725, "grad_norm": 0.24440713226795197, "learning_rate": 4.872135469478808e-05, "loss": 0.2343, "step": 6740 }, { "epoch": 0.36932595959042874, "grad_norm": 0.19842147827148438, "learning_rate": 4.871628472926385e-05, "loss": 0.247, "step": 6745 }, { "epoch": 0.36959973717352024, "grad_norm": 0.21012452244758606, "learning_rate": 4.871121476373961e-05, "loss": 0.2407, "step": 6750 }, { "epoch": 0.36987351475661173, "grad_norm": 0.22407174110412598, "learning_rate": 4.8706144798215374e-05, "loss": 0.2389, "step": 6755 }, { "epoch": 0.3701472923397032, "grad_norm": 0.20504425466060638, "learning_rate": 4.870107483269114e-05, "loss": 0.2407, "step": 6760 }, { "epoch": 0.3704210699227947, "grad_norm": 0.20641811192035675, "learning_rate": 4.869600486716691e-05, "loss": 0.2488, "step": 6765 }, { "epoch": 0.3706948475058862, "grad_norm": 0.22762957215309143, "learning_rate": 4.869093490164267e-05, "loss": 0.2411, "step": 6770 }, { "epoch": 0.3709686250889777, "grad_norm": 0.24137729406356812, "learning_rate": 4.8685864936118434e-05, "loss": 0.2424, "step": 6775 }, { "epoch": 0.3712424026720692, "grad_norm": 0.2361244559288025, "learning_rate": 4.8680794970594204e-05, "loss": 0.2331, "step": 6780 }, { "epoch": 0.3715161802551607, "grad_norm": 0.2697584331035614, "learning_rate": 4.867572500506997e-05, "loss": 0.228, "step": 6785 }, { "epoch": 0.3717899578382522, "grad_norm": 0.40600526332855225, "learning_rate": 4.867065503954573e-05, "loss": 0.2546, "step": 6790 }, { "epoch": 0.3720637354213437, "grad_norm": 0.2690499424934387, "learning_rate": 4.8665585074021494e-05, "loss": 0.2375, "step": 6795 }, { "epoch": 0.3723375130044352, "grad_norm": 0.24513450264930725, "learning_rate": 4.8660515108497264e-05, "loss": 0.2402, "step": 6800 }, { "epoch": 0.3726112905875267, "grad_norm": 0.20269936323165894, "learning_rate": 4.865544514297303e-05, "loss": 0.2455, "step": 6805 }, { "epoch": 0.3728850681706182, "grad_norm": 0.2389901578426361, "learning_rate": 4.86503751774488e-05, "loss": 0.2527, "step": 6810 }, { "epoch": 0.37315884575370967, "grad_norm": 0.24831224977970123, "learning_rate": 4.864530521192456e-05, "loss": 0.2374, "step": 6815 }, { "epoch": 0.37343262333680116, "grad_norm": 0.22702382504940033, "learning_rate": 4.864023524640033e-05, "loss": 0.2374, "step": 6820 }, { "epoch": 0.37370640091989266, "grad_norm": 0.22844305634498596, "learning_rate": 4.8635165280876094e-05, "loss": 0.2373, "step": 6825 }, { "epoch": 0.37398017850298415, "grad_norm": 0.23084856569766998, "learning_rate": 4.863009531535186e-05, "loss": 0.2375, "step": 6830 }, { "epoch": 0.37425395608607565, "grad_norm": 0.19697146117687225, "learning_rate": 4.862502534982763e-05, "loss": 0.2386, "step": 6835 }, { "epoch": 0.3745277336691672, "grad_norm": 0.18003864586353302, "learning_rate": 4.861995538430339e-05, "loss": 0.2391, "step": 6840 }, { "epoch": 0.3748015112522587, "grad_norm": 0.19891749322414398, "learning_rate": 4.8614885418779154e-05, "loss": 0.2322, "step": 6845 }, { "epoch": 0.3750752888353502, "grad_norm": 0.18628166615962982, "learning_rate": 4.860981545325492e-05, "loss": 0.2459, "step": 6850 }, { "epoch": 0.3753490664184417, "grad_norm": 0.19187454879283905, "learning_rate": 4.860474548773069e-05, "loss": 0.2436, "step": 6855 }, { "epoch": 0.3756228440015332, "grad_norm": 0.21416079998016357, "learning_rate": 4.859967552220645e-05, "loss": 0.2382, "step": 6860 }, { "epoch": 0.37589662158462467, "grad_norm": 0.18484070897102356, "learning_rate": 4.8594605556682214e-05, "loss": 0.2397, "step": 6865 }, { "epoch": 0.37617039916771616, "grad_norm": 0.2041803002357483, "learning_rate": 4.8589535591157984e-05, "loss": 0.2359, "step": 6870 }, { "epoch": 0.37644417675080766, "grad_norm": 0.23269936442375183, "learning_rate": 4.858446562563375e-05, "loss": 0.2463, "step": 6875 }, { "epoch": 0.37671795433389915, "grad_norm": 0.20481622219085693, "learning_rate": 4.857939566010951e-05, "loss": 0.2483, "step": 6880 }, { "epoch": 0.37699173191699065, "grad_norm": 0.22338494658470154, "learning_rate": 4.8574325694585274e-05, "loss": 0.2536, "step": 6885 }, { "epoch": 0.37726550950008214, "grad_norm": 0.22156468033790588, "learning_rate": 4.8569255729061044e-05, "loss": 0.2305, "step": 6890 }, { "epoch": 0.37753928708317364, "grad_norm": 0.23802827298641205, "learning_rate": 4.8564185763536814e-05, "loss": 0.2414, "step": 6895 }, { "epoch": 0.37781306466626513, "grad_norm": 0.2313682585954666, "learning_rate": 4.855911579801258e-05, "loss": 0.2388, "step": 6900 }, { "epoch": 0.3780868422493566, "grad_norm": 0.2239050716161728, "learning_rate": 4.855404583248834e-05, "loss": 0.2343, "step": 6905 }, { "epoch": 0.3783606198324481, "grad_norm": 0.20293161273002625, "learning_rate": 4.854897586696411e-05, "loss": 0.2354, "step": 6910 }, { "epoch": 0.3786343974155396, "grad_norm": 0.2100515365600586, "learning_rate": 4.8543905901439874e-05, "loss": 0.2372, "step": 6915 }, { "epoch": 0.3789081749986311, "grad_norm": 0.2266106754541397, "learning_rate": 4.853883593591564e-05, "loss": 0.2516, "step": 6920 }, { "epoch": 0.3791819525817226, "grad_norm": 0.23228196799755096, "learning_rate": 4.85337659703914e-05, "loss": 0.234, "step": 6925 }, { "epoch": 0.3794557301648141, "grad_norm": 0.27246516942977905, "learning_rate": 4.852869600486717e-05, "loss": 0.2342, "step": 6930 }, { "epoch": 0.3797295077479056, "grad_norm": 0.24544250965118408, "learning_rate": 4.8523626039342934e-05, "loss": 0.2456, "step": 6935 }, { "epoch": 0.3800032853309971, "grad_norm": 0.18507027626037598, "learning_rate": 4.85185560738187e-05, "loss": 0.2405, "step": 6940 }, { "epoch": 0.3802770629140886, "grad_norm": 0.23116517066955566, "learning_rate": 4.851348610829447e-05, "loss": 0.2366, "step": 6945 }, { "epoch": 0.3805508404971801, "grad_norm": 0.19577768445014954, "learning_rate": 4.850841614277023e-05, "loss": 0.2361, "step": 6950 }, { "epoch": 0.38082461808027157, "grad_norm": 0.2117483764886856, "learning_rate": 4.8503346177245994e-05, "loss": 0.232, "step": 6955 }, { "epoch": 0.38109839566336307, "grad_norm": 0.21185089647769928, "learning_rate": 4.849827621172176e-05, "loss": 0.2397, "step": 6960 }, { "epoch": 0.38137217324645456, "grad_norm": 0.2574279308319092, "learning_rate": 4.849320624619753e-05, "loss": 0.2364, "step": 6965 }, { "epoch": 0.38164595082954605, "grad_norm": 0.19231168925762177, "learning_rate": 4.84881362806733e-05, "loss": 0.232, "step": 6970 }, { "epoch": 0.38191972841263755, "grad_norm": 0.2557407319545746, "learning_rate": 4.848306631514906e-05, "loss": 0.2415, "step": 6975 }, { "epoch": 0.38219350599572904, "grad_norm": 0.22438015043735504, "learning_rate": 4.8477996349624824e-05, "loss": 0.2397, "step": 6980 }, { "epoch": 0.3824672835788206, "grad_norm": 0.25729161500930786, "learning_rate": 4.8472926384100594e-05, "loss": 0.2409, "step": 6985 }, { "epoch": 0.3827410611619121, "grad_norm": 0.1971912831068039, "learning_rate": 4.846785641857636e-05, "loss": 0.2279, "step": 6990 }, { "epoch": 0.3830148387450036, "grad_norm": 0.17515775561332703, "learning_rate": 4.846278645305212e-05, "loss": 0.2331, "step": 6995 }, { "epoch": 0.3832886163280951, "grad_norm": 0.27564218640327454, "learning_rate": 4.845771648752789e-05, "loss": 0.2487, "step": 7000 }, { "epoch": 0.38356239391118657, "grad_norm": 0.23889340460300446, "learning_rate": 4.8452646522003654e-05, "loss": 0.231, "step": 7005 }, { "epoch": 0.38383617149427807, "grad_norm": 0.22671227157115936, "learning_rate": 4.844757655647942e-05, "loss": 0.2387, "step": 7010 }, { "epoch": 0.38410994907736956, "grad_norm": 0.201130211353302, "learning_rate": 4.844250659095518e-05, "loss": 0.2452, "step": 7015 }, { "epoch": 0.38438372666046106, "grad_norm": 0.18271362781524658, "learning_rate": 4.843743662543095e-05, "loss": 0.2357, "step": 7020 }, { "epoch": 0.38465750424355255, "grad_norm": 0.18929579854011536, "learning_rate": 4.8432366659906714e-05, "loss": 0.2419, "step": 7025 }, { "epoch": 0.38493128182664405, "grad_norm": 0.2316053807735443, "learning_rate": 4.842729669438248e-05, "loss": 0.2413, "step": 7030 }, { "epoch": 0.38520505940973554, "grad_norm": 0.266983300447464, "learning_rate": 4.842222672885825e-05, "loss": 0.2329, "step": 7035 }, { "epoch": 0.38547883699282703, "grad_norm": 0.2267126888036728, "learning_rate": 4.841715676333401e-05, "loss": 0.2327, "step": 7040 }, { "epoch": 0.38575261457591853, "grad_norm": 0.2228465974330902, "learning_rate": 4.8412086797809774e-05, "loss": 0.2327, "step": 7045 }, { "epoch": 0.38602639215901, "grad_norm": 0.2474225014448166, "learning_rate": 4.8407016832285544e-05, "loss": 0.2381, "step": 7050 }, { "epoch": 0.3863001697421015, "grad_norm": 0.22089321911334991, "learning_rate": 4.840194686676131e-05, "loss": 0.243, "step": 7055 }, { "epoch": 0.386573947325193, "grad_norm": 0.21079117059707642, "learning_rate": 4.839687690123708e-05, "loss": 0.2508, "step": 7060 }, { "epoch": 0.3868477249082845, "grad_norm": 0.2661382257938385, "learning_rate": 4.839180693571284e-05, "loss": 0.2433, "step": 7065 }, { "epoch": 0.387121502491376, "grad_norm": 0.22033262252807617, "learning_rate": 4.8386736970188604e-05, "loss": 0.2346, "step": 7070 }, { "epoch": 0.3873952800744675, "grad_norm": 0.2016552835702896, "learning_rate": 4.8381667004664374e-05, "loss": 0.2318, "step": 7075 }, { "epoch": 0.387669057657559, "grad_norm": 0.21893905103206635, "learning_rate": 4.837659703914014e-05, "loss": 0.2362, "step": 7080 }, { "epoch": 0.3879428352406505, "grad_norm": 0.20521926879882812, "learning_rate": 4.83715270736159e-05, "loss": 0.2351, "step": 7085 }, { "epoch": 0.388216612823742, "grad_norm": 0.2122030109167099, "learning_rate": 4.8366457108091664e-05, "loss": 0.2323, "step": 7090 }, { "epoch": 0.3884903904068335, "grad_norm": 0.19214285910129547, "learning_rate": 4.8361387142567434e-05, "loss": 0.232, "step": 7095 }, { "epoch": 0.38876416798992497, "grad_norm": 0.2274629920721054, "learning_rate": 4.83563171770432e-05, "loss": 0.246, "step": 7100 }, { "epoch": 0.38903794557301646, "grad_norm": 0.21885190904140472, "learning_rate": 4.835124721151896e-05, "loss": 0.2424, "step": 7105 }, { "epoch": 0.38931172315610796, "grad_norm": 0.22801463305950165, "learning_rate": 4.834617724599473e-05, "loss": 0.2417, "step": 7110 }, { "epoch": 0.38958550073919945, "grad_norm": 0.2535187304019928, "learning_rate": 4.8341107280470494e-05, "loss": 0.2375, "step": 7115 }, { "epoch": 0.38985927832229095, "grad_norm": 0.21095189452171326, "learning_rate": 4.833603731494626e-05, "loss": 0.2351, "step": 7120 }, { "epoch": 0.39013305590538244, "grad_norm": 0.2538881003856659, "learning_rate": 4.833096734942202e-05, "loss": 0.2341, "step": 7125 }, { "epoch": 0.39040683348847394, "grad_norm": 0.21418596804141998, "learning_rate": 4.832589738389779e-05, "loss": 0.2344, "step": 7130 }, { "epoch": 0.3906806110715655, "grad_norm": 0.2524416744709015, "learning_rate": 4.832082741837356e-05, "loss": 0.2389, "step": 7135 }, { "epoch": 0.390954388654657, "grad_norm": 0.2130250185728073, "learning_rate": 4.8315757452849324e-05, "loss": 0.2296, "step": 7140 }, { "epoch": 0.3912281662377485, "grad_norm": 0.21809159219264984, "learning_rate": 4.831068748732509e-05, "loss": 0.2279, "step": 7145 }, { "epoch": 0.39150194382083997, "grad_norm": 0.1838170439004898, "learning_rate": 4.830561752180086e-05, "loss": 0.2489, "step": 7150 }, { "epoch": 0.39177572140393147, "grad_norm": 0.19365428388118744, "learning_rate": 4.830054755627662e-05, "loss": 0.245, "step": 7155 }, { "epoch": 0.39204949898702296, "grad_norm": 0.19262009859085083, "learning_rate": 4.8295477590752384e-05, "loss": 0.2295, "step": 7160 }, { "epoch": 0.39232327657011445, "grad_norm": 0.2419893890619278, "learning_rate": 4.8290407625228154e-05, "loss": 0.2583, "step": 7165 }, { "epoch": 0.39259705415320595, "grad_norm": 0.20620478689670563, "learning_rate": 4.828533765970392e-05, "loss": 0.2354, "step": 7170 }, { "epoch": 0.39287083173629744, "grad_norm": 0.18720567226409912, "learning_rate": 4.828026769417968e-05, "loss": 0.244, "step": 7175 }, { "epoch": 0.39314460931938894, "grad_norm": 0.18551984429359436, "learning_rate": 4.8275197728655444e-05, "loss": 0.2383, "step": 7180 }, { "epoch": 0.39341838690248043, "grad_norm": 0.21192960441112518, "learning_rate": 4.8270127763131214e-05, "loss": 0.2413, "step": 7185 }, { "epoch": 0.3936921644855719, "grad_norm": 0.21494995057582855, "learning_rate": 4.826505779760698e-05, "loss": 0.2397, "step": 7190 }, { "epoch": 0.3939659420686634, "grad_norm": 0.2122163623571396, "learning_rate": 4.825998783208274e-05, "loss": 0.2348, "step": 7195 }, { "epoch": 0.3942397196517549, "grad_norm": 0.22321540117263794, "learning_rate": 4.825491786655851e-05, "loss": 0.2383, "step": 7200 }, { "epoch": 0.3945134972348464, "grad_norm": 0.22028124332427979, "learning_rate": 4.8249847901034274e-05, "loss": 0.2378, "step": 7205 }, { "epoch": 0.3947872748179379, "grad_norm": 0.26204368472099304, "learning_rate": 4.824477793551004e-05, "loss": 0.2371, "step": 7210 }, { "epoch": 0.3950610524010294, "grad_norm": 0.21825745701789856, "learning_rate": 4.823970796998581e-05, "loss": 0.2449, "step": 7215 }, { "epoch": 0.3953348299841209, "grad_norm": 0.22259195148944855, "learning_rate": 4.823463800446157e-05, "loss": 0.2355, "step": 7220 }, { "epoch": 0.3956086075672124, "grad_norm": 0.24360716342926025, "learning_rate": 4.822956803893734e-05, "loss": 0.2431, "step": 7225 }, { "epoch": 0.3958823851503039, "grad_norm": 0.2651551365852356, "learning_rate": 4.8224498073413105e-05, "loss": 0.2425, "step": 7230 }, { "epoch": 0.3961561627333954, "grad_norm": 0.23692038655281067, "learning_rate": 4.821942810788887e-05, "loss": 0.2339, "step": 7235 }, { "epoch": 0.3964299403164869, "grad_norm": 0.20855428278446198, "learning_rate": 4.821435814236464e-05, "loss": 0.2314, "step": 7240 }, { "epoch": 0.39670371789957837, "grad_norm": 0.22763286530971527, "learning_rate": 4.82092881768404e-05, "loss": 0.2395, "step": 7245 }, { "epoch": 0.39697749548266986, "grad_norm": 0.19968301057815552, "learning_rate": 4.8204218211316165e-05, "loss": 0.2492, "step": 7250 }, { "epoch": 0.39725127306576136, "grad_norm": 0.23700645565986633, "learning_rate": 4.819914824579193e-05, "loss": 0.245, "step": 7255 }, { "epoch": 0.39752505064885285, "grad_norm": 0.2140597403049469, "learning_rate": 4.81940782802677e-05, "loss": 0.2424, "step": 7260 }, { "epoch": 0.39779882823194435, "grad_norm": 0.23126520216464996, "learning_rate": 4.818900831474346e-05, "loss": 0.2337, "step": 7265 }, { "epoch": 0.39807260581503584, "grad_norm": 0.20694352686405182, "learning_rate": 4.8183938349219224e-05, "loss": 0.2333, "step": 7270 }, { "epoch": 0.39834638339812733, "grad_norm": 0.2165246307849884, "learning_rate": 4.8178868383694995e-05, "loss": 0.242, "step": 7275 }, { "epoch": 0.3986201609812189, "grad_norm": 0.21030448377132416, "learning_rate": 4.817379841817076e-05, "loss": 0.2495, "step": 7280 }, { "epoch": 0.3988939385643104, "grad_norm": 0.22656859457492828, "learning_rate": 4.816872845264652e-05, "loss": 0.238, "step": 7285 }, { "epoch": 0.3991677161474019, "grad_norm": 0.23838238418102264, "learning_rate": 4.8163658487122284e-05, "loss": 0.2296, "step": 7290 }, { "epoch": 0.39944149373049337, "grad_norm": 0.27578452229499817, "learning_rate": 4.815858852159806e-05, "loss": 0.2366, "step": 7295 }, { "epoch": 0.39971527131358486, "grad_norm": 0.23187126219272614, "learning_rate": 4.8153518556073825e-05, "loss": 0.2371, "step": 7300 }, { "epoch": 0.39998904889667636, "grad_norm": 0.26795637607574463, "learning_rate": 4.814844859054959e-05, "loss": 0.2359, "step": 7305 }, { "epoch": 0.40026282647976785, "grad_norm": 0.22944222390651703, "learning_rate": 4.814337862502535e-05, "loss": 0.2316, "step": 7310 }, { "epoch": 0.40053660406285935, "grad_norm": 0.30847057700157166, "learning_rate": 4.813830865950112e-05, "loss": 0.2457, "step": 7315 }, { "epoch": 0.40081038164595084, "grad_norm": 0.2367524653673172, "learning_rate": 4.8133238693976885e-05, "loss": 0.2355, "step": 7320 }, { "epoch": 0.40108415922904234, "grad_norm": 0.2000991851091385, "learning_rate": 4.812816872845265e-05, "loss": 0.2284, "step": 7325 }, { "epoch": 0.40135793681213383, "grad_norm": 0.17928044497966766, "learning_rate": 4.812309876292842e-05, "loss": 0.2389, "step": 7330 }, { "epoch": 0.4016317143952253, "grad_norm": 0.19791245460510254, "learning_rate": 4.811802879740418e-05, "loss": 0.2327, "step": 7335 }, { "epoch": 0.4019054919783168, "grad_norm": 0.2148992419242859, "learning_rate": 4.8112958831879945e-05, "loss": 0.2302, "step": 7340 }, { "epoch": 0.4021792695614083, "grad_norm": 0.21100029349327087, "learning_rate": 4.810788886635571e-05, "loss": 0.2309, "step": 7345 }, { "epoch": 0.4024530471444998, "grad_norm": 0.21853667497634888, "learning_rate": 4.810281890083148e-05, "loss": 0.2329, "step": 7350 }, { "epoch": 0.4027268247275913, "grad_norm": 0.2311994880437851, "learning_rate": 4.809774893530724e-05, "loss": 0.2288, "step": 7355 }, { "epoch": 0.4030006023106828, "grad_norm": 0.3224336802959442, "learning_rate": 4.8092678969783005e-05, "loss": 0.2339, "step": 7360 }, { "epoch": 0.4032743798937743, "grad_norm": 0.20612719655036926, "learning_rate": 4.808760900425877e-05, "loss": 0.2372, "step": 7365 }, { "epoch": 0.4035481574768658, "grad_norm": 0.17268885672092438, "learning_rate": 4.808253903873454e-05, "loss": 0.2304, "step": 7370 }, { "epoch": 0.4038219350599573, "grad_norm": 0.21666404604911804, "learning_rate": 4.807746907321031e-05, "loss": 0.2385, "step": 7375 }, { "epoch": 0.4040957126430488, "grad_norm": 0.2065901756286621, "learning_rate": 4.807239910768607e-05, "loss": 0.2319, "step": 7380 }, { "epoch": 0.40436949022614027, "grad_norm": 0.19941817224025726, "learning_rate": 4.8067329142161835e-05, "loss": 0.2368, "step": 7385 }, { "epoch": 0.40464326780923177, "grad_norm": 0.20717470347881317, "learning_rate": 4.8062259176637605e-05, "loss": 0.2448, "step": 7390 }, { "epoch": 0.40491704539232326, "grad_norm": 0.21491053700447083, "learning_rate": 4.805718921111337e-05, "loss": 0.2292, "step": 7395 }, { "epoch": 0.40519082297541476, "grad_norm": 0.19712042808532715, "learning_rate": 4.805211924558913e-05, "loss": 0.2417, "step": 7400 }, { "epoch": 0.40546460055850625, "grad_norm": 0.24184733629226685, "learning_rate": 4.80470492800649e-05, "loss": 0.2483, "step": 7405 }, { "epoch": 0.40573837814159774, "grad_norm": 0.24047577381134033, "learning_rate": 4.8041979314540665e-05, "loss": 0.2354, "step": 7410 }, { "epoch": 0.40601215572468924, "grad_norm": 0.19971033930778503, "learning_rate": 4.803690934901643e-05, "loss": 0.2291, "step": 7415 }, { "epoch": 0.40628593330778073, "grad_norm": 0.22493046522140503, "learning_rate": 4.803183938349219e-05, "loss": 0.2304, "step": 7420 }, { "epoch": 0.40655971089087223, "grad_norm": 0.23694325983524323, "learning_rate": 4.802676941796796e-05, "loss": 0.2287, "step": 7425 }, { "epoch": 0.4068334884739638, "grad_norm": 0.25304338335990906, "learning_rate": 4.8021699452443725e-05, "loss": 0.2377, "step": 7430 }, { "epoch": 0.4071072660570553, "grad_norm": 0.2674511969089508, "learning_rate": 4.801662948691949e-05, "loss": 0.2375, "step": 7435 }, { "epoch": 0.40738104364014677, "grad_norm": 0.2355969250202179, "learning_rate": 4.801155952139526e-05, "loss": 0.2448, "step": 7440 }, { "epoch": 0.40765482122323826, "grad_norm": 0.23574486374855042, "learning_rate": 4.800648955587102e-05, "loss": 0.2413, "step": 7445 }, { "epoch": 0.40792859880632976, "grad_norm": 0.23215952515602112, "learning_rate": 4.8001419590346785e-05, "loss": 0.2389, "step": 7450 }, { "epoch": 0.40820237638942125, "grad_norm": 0.34480270743370056, "learning_rate": 4.799634962482255e-05, "loss": 0.2404, "step": 7455 }, { "epoch": 0.40847615397251275, "grad_norm": 0.2876080274581909, "learning_rate": 4.7991279659298325e-05, "loss": 0.2357, "step": 7460 }, { "epoch": 0.40874993155560424, "grad_norm": 0.21183454990386963, "learning_rate": 4.798620969377409e-05, "loss": 0.2309, "step": 7465 }, { "epoch": 0.40902370913869573, "grad_norm": 0.18029393255710602, "learning_rate": 4.798113972824985e-05, "loss": 0.234, "step": 7470 }, { "epoch": 0.40929748672178723, "grad_norm": 0.2541693449020386, "learning_rate": 4.7976069762725615e-05, "loss": 0.2405, "step": 7475 }, { "epoch": 0.4095712643048787, "grad_norm": 0.2322515994310379, "learning_rate": 4.7970999797201385e-05, "loss": 0.233, "step": 7480 }, { "epoch": 0.4098450418879702, "grad_norm": 0.2573477625846863, "learning_rate": 4.796592983167715e-05, "loss": 0.2398, "step": 7485 }, { "epoch": 0.4101188194710617, "grad_norm": 0.2393936812877655, "learning_rate": 4.796085986615291e-05, "loss": 0.2561, "step": 7490 }, { "epoch": 0.4103925970541532, "grad_norm": 0.21215663850307465, "learning_rate": 4.7955789900628675e-05, "loss": 0.2366, "step": 7495 }, { "epoch": 0.4106663746372447, "grad_norm": 0.20214605331420898, "learning_rate": 4.7950719935104445e-05, "loss": 0.2424, "step": 7500 }, { "epoch": 0.4109401522203362, "grad_norm": 0.25630083680152893, "learning_rate": 4.794564996958021e-05, "loss": 0.2334, "step": 7505 }, { "epoch": 0.4112139298034277, "grad_norm": 0.2387090027332306, "learning_rate": 4.794058000405597e-05, "loss": 0.2315, "step": 7510 }, { "epoch": 0.4114877073865192, "grad_norm": 0.20356285572052002, "learning_rate": 4.793551003853174e-05, "loss": 0.2392, "step": 7515 }, { "epoch": 0.4117614849696107, "grad_norm": 0.21914935111999512, "learning_rate": 4.7930440073007505e-05, "loss": 0.2394, "step": 7520 }, { "epoch": 0.4120352625527022, "grad_norm": 0.185110941529274, "learning_rate": 4.792537010748327e-05, "loss": 0.2364, "step": 7525 }, { "epoch": 0.41230904013579367, "grad_norm": 0.2373766005039215, "learning_rate": 4.792030014195903e-05, "loss": 0.2385, "step": 7530 }, { "epoch": 0.41258281771888516, "grad_norm": 0.24296720325946808, "learning_rate": 4.79152301764348e-05, "loss": 0.2308, "step": 7535 }, { "epoch": 0.41285659530197666, "grad_norm": 0.20312301814556122, "learning_rate": 4.791016021091057e-05, "loss": 0.2311, "step": 7540 }, { "epoch": 0.41313037288506815, "grad_norm": 0.24634182453155518, "learning_rate": 4.7905090245386335e-05, "loss": 0.2376, "step": 7545 }, { "epoch": 0.41340415046815965, "grad_norm": 0.19945910573005676, "learning_rate": 4.79000202798621e-05, "loss": 0.2264, "step": 7550 }, { "epoch": 0.41367792805125114, "grad_norm": 0.20827369391918182, "learning_rate": 4.789495031433787e-05, "loss": 0.2367, "step": 7555 }, { "epoch": 0.41395170563434264, "grad_norm": 0.20451436936855316, "learning_rate": 4.788988034881363e-05, "loss": 0.2297, "step": 7560 }, { "epoch": 0.41422548321743413, "grad_norm": 0.1938130408525467, "learning_rate": 4.7884810383289395e-05, "loss": 0.2391, "step": 7565 }, { "epoch": 0.4144992608005256, "grad_norm": 0.19432510435581207, "learning_rate": 4.7879740417765165e-05, "loss": 0.2295, "step": 7570 }, { "epoch": 0.4147730383836172, "grad_norm": 0.17472627758979797, "learning_rate": 4.787467045224093e-05, "loss": 0.2424, "step": 7575 }, { "epoch": 0.41504681596670867, "grad_norm": 0.2632206082344055, "learning_rate": 4.786960048671669e-05, "loss": 0.2314, "step": 7580 }, { "epoch": 0.41532059354980017, "grad_norm": 0.21015706658363342, "learning_rate": 4.7864530521192455e-05, "loss": 0.2463, "step": 7585 }, { "epoch": 0.41559437113289166, "grad_norm": 0.2350633293390274, "learning_rate": 4.7859460555668225e-05, "loss": 0.2316, "step": 7590 }, { "epoch": 0.41586814871598315, "grad_norm": 0.22889874875545502, "learning_rate": 4.785439059014399e-05, "loss": 0.229, "step": 7595 }, { "epoch": 0.41614192629907465, "grad_norm": 0.20007270574569702, "learning_rate": 4.784932062461975e-05, "loss": 0.2341, "step": 7600 }, { "epoch": 0.41641570388216614, "grad_norm": 0.18941427767276764, "learning_rate": 4.784425065909552e-05, "loss": 0.231, "step": 7605 }, { "epoch": 0.41668948146525764, "grad_norm": 0.22342535853385925, "learning_rate": 4.7839180693571285e-05, "loss": 0.2298, "step": 7610 }, { "epoch": 0.41696325904834913, "grad_norm": 0.18713268637657166, "learning_rate": 4.783411072804705e-05, "loss": 0.243, "step": 7615 }, { "epoch": 0.4172370366314406, "grad_norm": 0.2279476821422577, "learning_rate": 4.782904076252282e-05, "loss": 0.2409, "step": 7620 }, { "epoch": 0.4175108142145321, "grad_norm": 0.2086780071258545, "learning_rate": 4.782397079699858e-05, "loss": 0.2348, "step": 7625 }, { "epoch": 0.4177845917976236, "grad_norm": 0.23898085951805115, "learning_rate": 4.781890083147435e-05, "loss": 0.2375, "step": 7630 }, { "epoch": 0.4180583693807151, "grad_norm": 0.20116741955280304, "learning_rate": 4.7813830865950115e-05, "loss": 0.2408, "step": 7635 }, { "epoch": 0.4183321469638066, "grad_norm": 0.19341684877872467, "learning_rate": 4.780876090042588e-05, "loss": 0.2281, "step": 7640 }, { "epoch": 0.4186059245468981, "grad_norm": 0.20986412465572357, "learning_rate": 4.780369093490165e-05, "loss": 0.2339, "step": 7645 }, { "epoch": 0.4188797021299896, "grad_norm": 0.1998644769191742, "learning_rate": 4.779862096937741e-05, "loss": 0.241, "step": 7650 }, { "epoch": 0.4191534797130811, "grad_norm": 0.2047320455312729, "learning_rate": 4.7793551003853175e-05, "loss": 0.2382, "step": 7655 }, { "epoch": 0.4194272572961726, "grad_norm": 0.3028927147388458, "learning_rate": 4.778848103832894e-05, "loss": 0.237, "step": 7660 }, { "epoch": 0.4197010348792641, "grad_norm": 0.2821822762489319, "learning_rate": 4.778341107280471e-05, "loss": 0.2438, "step": 7665 }, { "epoch": 0.4199748124623556, "grad_norm": 0.2092042863368988, "learning_rate": 4.777834110728047e-05, "loss": 0.2353, "step": 7670 }, { "epoch": 0.42024859004544707, "grad_norm": 0.21579086780548096, "learning_rate": 4.7773271141756235e-05, "loss": 0.2294, "step": 7675 }, { "epoch": 0.42052236762853856, "grad_norm": 0.2147272676229477, "learning_rate": 4.7768201176232005e-05, "loss": 0.238, "step": 7680 }, { "epoch": 0.42079614521163006, "grad_norm": 0.17574599385261536, "learning_rate": 4.776313121070777e-05, "loss": 0.2258, "step": 7685 }, { "epoch": 0.42106992279472155, "grad_norm": 0.2064691185951233, "learning_rate": 4.775806124518353e-05, "loss": 0.235, "step": 7690 }, { "epoch": 0.42134370037781305, "grad_norm": 0.20732121169567108, "learning_rate": 4.7752991279659295e-05, "loss": 0.2423, "step": 7695 }, { "epoch": 0.42161747796090454, "grad_norm": 0.31325045228004456, "learning_rate": 4.7747921314135065e-05, "loss": 0.231, "step": 7700 }, { "epoch": 0.42189125554399604, "grad_norm": 0.2743137776851654, "learning_rate": 4.7742851348610835e-05, "loss": 0.236, "step": 7705 }, { "epoch": 0.42216503312708753, "grad_norm": 0.24069632589817047, "learning_rate": 4.77377813830866e-05, "loss": 0.236, "step": 7710 }, { "epoch": 0.422438810710179, "grad_norm": 0.24216532707214355, "learning_rate": 4.773271141756236e-05, "loss": 0.2371, "step": 7715 }, { "epoch": 0.4227125882932706, "grad_norm": 0.2225797325372696, "learning_rate": 4.772764145203813e-05, "loss": 0.2419, "step": 7720 }, { "epoch": 0.42298636587636207, "grad_norm": 0.17908625304698944, "learning_rate": 4.7722571486513895e-05, "loss": 0.2301, "step": 7725 }, { "epoch": 0.42326014345945356, "grad_norm": 0.21514132618904114, "learning_rate": 4.771750152098966e-05, "loss": 0.2297, "step": 7730 }, { "epoch": 0.42353392104254506, "grad_norm": 0.2172630876302719, "learning_rate": 4.771243155546543e-05, "loss": 0.2308, "step": 7735 }, { "epoch": 0.42380769862563655, "grad_norm": 0.22180898487567902, "learning_rate": 4.770736158994119e-05, "loss": 0.2429, "step": 7740 }, { "epoch": 0.42408147620872805, "grad_norm": 0.2397008240222931, "learning_rate": 4.7702291624416955e-05, "loss": 0.2272, "step": 7745 }, { "epoch": 0.42435525379181954, "grad_norm": 0.22008641064167023, "learning_rate": 4.769722165889272e-05, "loss": 0.2368, "step": 7750 }, { "epoch": 0.42462903137491104, "grad_norm": 0.21002937853336334, "learning_rate": 4.769215169336849e-05, "loss": 0.2344, "step": 7755 }, { "epoch": 0.42490280895800253, "grad_norm": 0.21215228736400604, "learning_rate": 4.768708172784425e-05, "loss": 0.2371, "step": 7760 }, { "epoch": 0.425176586541094, "grad_norm": 0.18841832876205444, "learning_rate": 4.7682011762320015e-05, "loss": 0.2268, "step": 7765 }, { "epoch": 0.4254503641241855, "grad_norm": 0.23883281648159027, "learning_rate": 4.7676941796795785e-05, "loss": 0.2418, "step": 7770 }, { "epoch": 0.425724141707277, "grad_norm": 0.21742981672286987, "learning_rate": 4.767187183127155e-05, "loss": 0.2278, "step": 7775 }, { "epoch": 0.4259979192903685, "grad_norm": 0.19600191712379456, "learning_rate": 4.766680186574731e-05, "loss": 0.2418, "step": 7780 }, { "epoch": 0.42627169687346, "grad_norm": 0.20478825271129608, "learning_rate": 4.766173190022308e-05, "loss": 0.2325, "step": 7785 }, { "epoch": 0.4265454744565515, "grad_norm": 0.20682843029499054, "learning_rate": 4.7656661934698845e-05, "loss": 0.2347, "step": 7790 }, { "epoch": 0.426819252039643, "grad_norm": 0.224244624376297, "learning_rate": 4.7651591969174615e-05, "loss": 0.232, "step": 7795 }, { "epoch": 0.4270930296227345, "grad_norm": 0.2162884622812271, "learning_rate": 4.764652200365038e-05, "loss": 0.2307, "step": 7800 }, { "epoch": 0.427366807205826, "grad_norm": 0.219514861702919, "learning_rate": 4.764145203812614e-05, "loss": 0.2336, "step": 7805 }, { "epoch": 0.4276405847889175, "grad_norm": 0.20449484884738922, "learning_rate": 4.763638207260191e-05, "loss": 0.2395, "step": 7810 }, { "epoch": 0.42791436237200897, "grad_norm": 0.19567322731018066, "learning_rate": 4.7631312107077675e-05, "loss": 0.2365, "step": 7815 }, { "epoch": 0.42818813995510047, "grad_norm": 0.19170346856117249, "learning_rate": 4.762624214155344e-05, "loss": 0.2403, "step": 7820 }, { "epoch": 0.42846191753819196, "grad_norm": 0.19994696974754333, "learning_rate": 4.76211721760292e-05, "loss": 0.2292, "step": 7825 }, { "epoch": 0.42873569512128346, "grad_norm": 0.2221546322107315, "learning_rate": 4.761610221050497e-05, "loss": 0.234, "step": 7830 }, { "epoch": 0.42900947270437495, "grad_norm": 0.20184409618377686, "learning_rate": 4.7611032244980735e-05, "loss": 0.2378, "step": 7835 }, { "epoch": 0.42928325028746644, "grad_norm": 0.21352973580360413, "learning_rate": 4.76059622794565e-05, "loss": 0.2336, "step": 7840 }, { "epoch": 0.42955702787055794, "grad_norm": 0.178548663854599, "learning_rate": 4.760089231393227e-05, "loss": 0.2368, "step": 7845 }, { "epoch": 0.42983080545364943, "grad_norm": 0.18446065485477448, "learning_rate": 4.759582234840803e-05, "loss": 0.2291, "step": 7850 }, { "epoch": 0.43010458303674093, "grad_norm": 0.24087071418762207, "learning_rate": 4.7590752382883795e-05, "loss": 0.2433, "step": 7855 }, { "epoch": 0.4303783606198324, "grad_norm": 0.1903569996356964, "learning_rate": 4.758568241735956e-05, "loss": 0.2256, "step": 7860 }, { "epoch": 0.4306521382029239, "grad_norm": 0.23955409228801727, "learning_rate": 4.7580612451835336e-05, "loss": 0.2339, "step": 7865 }, { "epoch": 0.43092591578601547, "grad_norm": 0.23421607911586761, "learning_rate": 4.75755424863111e-05, "loss": 0.2328, "step": 7870 }, { "epoch": 0.43119969336910696, "grad_norm": 0.21348702907562256, "learning_rate": 4.757047252078686e-05, "loss": 0.2375, "step": 7875 }, { "epoch": 0.43147347095219846, "grad_norm": 0.1861080676317215, "learning_rate": 4.7565402555262625e-05, "loss": 0.2366, "step": 7880 }, { "epoch": 0.43174724853528995, "grad_norm": 0.20399777591228485, "learning_rate": 4.7560332589738396e-05, "loss": 0.2336, "step": 7885 }, { "epoch": 0.43202102611838145, "grad_norm": 0.21407529711723328, "learning_rate": 4.755526262421416e-05, "loss": 0.2292, "step": 7890 }, { "epoch": 0.43229480370147294, "grad_norm": 0.20959536731243134, "learning_rate": 4.755019265868992e-05, "loss": 0.2431, "step": 7895 }, { "epoch": 0.43256858128456444, "grad_norm": 0.2189949005842209, "learning_rate": 4.754512269316569e-05, "loss": 0.2361, "step": 7900 }, { "epoch": 0.43284235886765593, "grad_norm": 0.261859655380249, "learning_rate": 4.7540052727641456e-05, "loss": 0.2346, "step": 7905 }, { "epoch": 0.4331161364507474, "grad_norm": 0.2195693552494049, "learning_rate": 4.753498276211722e-05, "loss": 0.2303, "step": 7910 }, { "epoch": 0.4333899140338389, "grad_norm": 0.19116340577602386, "learning_rate": 4.752991279659298e-05, "loss": 0.2343, "step": 7915 }, { "epoch": 0.4336636916169304, "grad_norm": 0.20644670724868774, "learning_rate": 4.752484283106875e-05, "loss": 0.2316, "step": 7920 }, { "epoch": 0.4339374692000219, "grad_norm": 0.18498405814170837, "learning_rate": 4.7519772865544516e-05, "loss": 0.233, "step": 7925 }, { "epoch": 0.4342112467831134, "grad_norm": 0.24561965465545654, "learning_rate": 4.751470290002028e-05, "loss": 0.231, "step": 7930 }, { "epoch": 0.4344850243662049, "grad_norm": 0.23354755342006683, "learning_rate": 4.750963293449605e-05, "loss": 0.232, "step": 7935 }, { "epoch": 0.4347588019492964, "grad_norm": 0.21524964272975922, "learning_rate": 4.750456296897181e-05, "loss": 0.2348, "step": 7940 }, { "epoch": 0.4350325795323879, "grad_norm": 0.2339736670255661, "learning_rate": 4.749949300344758e-05, "loss": 0.2313, "step": 7945 }, { "epoch": 0.4353063571154794, "grad_norm": 0.22457633912563324, "learning_rate": 4.7494423037923346e-05, "loss": 0.2296, "step": 7950 }, { "epoch": 0.4355801346985709, "grad_norm": 0.24868682026863098, "learning_rate": 4.748935307239911e-05, "loss": 0.2302, "step": 7955 }, { "epoch": 0.43585391228166237, "grad_norm": 0.2789388597011566, "learning_rate": 4.748428310687488e-05, "loss": 0.2303, "step": 7960 }, { "epoch": 0.43612768986475386, "grad_norm": 0.22514203190803528, "learning_rate": 4.747921314135064e-05, "loss": 0.2439, "step": 7965 }, { "epoch": 0.43640146744784536, "grad_norm": 0.2544602155685425, "learning_rate": 4.7474143175826406e-05, "loss": 0.2433, "step": 7970 }, { "epoch": 0.43667524503093685, "grad_norm": 0.2102338820695877, "learning_rate": 4.7469073210302176e-05, "loss": 0.2256, "step": 7975 }, { "epoch": 0.43694902261402835, "grad_norm": 0.20788757503032684, "learning_rate": 4.746400324477794e-05, "loss": 0.2273, "step": 7980 }, { "epoch": 0.43722280019711984, "grad_norm": 0.18543992936611176, "learning_rate": 4.74589332792537e-05, "loss": 0.2287, "step": 7985 }, { "epoch": 0.43749657778021134, "grad_norm": 0.21647928655147552, "learning_rate": 4.7453863313729466e-05, "loss": 0.2397, "step": 7990 }, { "epoch": 0.43777035536330283, "grad_norm": 0.2567577064037323, "learning_rate": 4.7448793348205236e-05, "loss": 0.2375, "step": 7995 }, { "epoch": 0.4380441329463943, "grad_norm": 0.23159906268119812, "learning_rate": 4.7443723382681e-05, "loss": 0.2382, "step": 8000 }, { "epoch": 0.4383179105294858, "grad_norm": 0.2431924045085907, "learning_rate": 4.743865341715676e-05, "loss": 0.2267, "step": 8005 }, { "epoch": 0.4385916881125773, "grad_norm": 0.24315936863422394, "learning_rate": 4.743358345163253e-05, "loss": 0.2464, "step": 8010 }, { "epoch": 0.43886546569566887, "grad_norm": 0.21110770106315613, "learning_rate": 4.7428513486108296e-05, "loss": 0.2414, "step": 8015 }, { "epoch": 0.43913924327876036, "grad_norm": 0.19221365451812744, "learning_rate": 4.742344352058406e-05, "loss": 0.2392, "step": 8020 }, { "epoch": 0.43941302086185186, "grad_norm": 0.23613759875297546, "learning_rate": 4.741837355505982e-05, "loss": 0.2345, "step": 8025 }, { "epoch": 0.43968679844494335, "grad_norm": 0.22659076750278473, "learning_rate": 4.74133035895356e-05, "loss": 0.2373, "step": 8030 }, { "epoch": 0.43996057602803484, "grad_norm": 0.21385371685028076, "learning_rate": 4.740823362401136e-05, "loss": 0.2398, "step": 8035 }, { "epoch": 0.44023435361112634, "grad_norm": 0.20829372107982635, "learning_rate": 4.7403163658487126e-05, "loss": 0.2351, "step": 8040 }, { "epoch": 0.44050813119421783, "grad_norm": 0.21441946923732758, "learning_rate": 4.739809369296289e-05, "loss": 0.2402, "step": 8045 }, { "epoch": 0.44078190877730933, "grad_norm": 0.25714147090911865, "learning_rate": 4.739302372743866e-05, "loss": 0.2445, "step": 8050 }, { "epoch": 0.4410556863604008, "grad_norm": 0.28145989775657654, "learning_rate": 4.738795376191442e-05, "loss": 0.2429, "step": 8055 }, { "epoch": 0.4413294639434923, "grad_norm": 0.2711997926235199, "learning_rate": 4.7382883796390186e-05, "loss": 0.238, "step": 8060 }, { "epoch": 0.4416032415265838, "grad_norm": 0.3119252920150757, "learning_rate": 4.7377813830865956e-05, "loss": 0.2319, "step": 8065 }, { "epoch": 0.4418770191096753, "grad_norm": 0.2443036437034607, "learning_rate": 4.737274386534172e-05, "loss": 0.2434, "step": 8070 }, { "epoch": 0.4421507966927668, "grad_norm": 0.23383751511573792, "learning_rate": 4.736767389981748e-05, "loss": 0.2361, "step": 8075 }, { "epoch": 0.4424245742758583, "grad_norm": 0.22940407693386078, "learning_rate": 4.7362603934293246e-05, "loss": 0.2387, "step": 8080 }, { "epoch": 0.4426983518589498, "grad_norm": 0.2300073206424713, "learning_rate": 4.7357533968769016e-05, "loss": 0.2328, "step": 8085 }, { "epoch": 0.4429721294420413, "grad_norm": 0.22850240767002106, "learning_rate": 4.735246400324478e-05, "loss": 0.23, "step": 8090 }, { "epoch": 0.4432459070251328, "grad_norm": 0.19602012634277344, "learning_rate": 4.734739403772054e-05, "loss": 0.2454, "step": 8095 }, { "epoch": 0.4435196846082243, "grad_norm": 0.1801815629005432, "learning_rate": 4.7342324072196306e-05, "loss": 0.2282, "step": 8100 }, { "epoch": 0.44379346219131577, "grad_norm": 0.20661352574825287, "learning_rate": 4.7337254106672076e-05, "loss": 0.2337, "step": 8105 }, { "epoch": 0.44406723977440726, "grad_norm": 0.21507081389427185, "learning_rate": 4.7332184141147846e-05, "loss": 0.2443, "step": 8110 }, { "epoch": 0.44434101735749876, "grad_norm": 0.22565799951553345, "learning_rate": 4.732711417562361e-05, "loss": 0.2302, "step": 8115 }, { "epoch": 0.44461479494059025, "grad_norm": 0.22274799644947052, "learning_rate": 4.732204421009937e-05, "loss": 0.2332, "step": 8120 }, { "epoch": 0.44488857252368175, "grad_norm": 0.19319657981395721, "learning_rate": 4.731697424457514e-05, "loss": 0.2446, "step": 8125 }, { "epoch": 0.44516235010677324, "grad_norm": 0.23285815119743347, "learning_rate": 4.7311904279050906e-05, "loss": 0.2406, "step": 8130 }, { "epoch": 0.44543612768986474, "grad_norm": 0.1758141666650772, "learning_rate": 4.730683431352667e-05, "loss": 0.2301, "step": 8135 }, { "epoch": 0.44570990527295623, "grad_norm": 0.3203333914279938, "learning_rate": 4.730176434800244e-05, "loss": 0.2329, "step": 8140 }, { "epoch": 0.4459836828560477, "grad_norm": 0.271967351436615, "learning_rate": 4.72966943824782e-05, "loss": 0.2425, "step": 8145 }, { "epoch": 0.4462574604391392, "grad_norm": 0.22059667110443115, "learning_rate": 4.7291624416953966e-05, "loss": 0.2367, "step": 8150 }, { "epoch": 0.4465312380222307, "grad_norm": 0.20156382024288177, "learning_rate": 4.728655445142973e-05, "loss": 0.2383, "step": 8155 }, { "epoch": 0.4468050156053222, "grad_norm": 0.22130049765110016, "learning_rate": 4.72814844859055e-05, "loss": 0.2346, "step": 8160 }, { "epoch": 0.44707879318841376, "grad_norm": 0.23642848432064056, "learning_rate": 4.727641452038126e-05, "loss": 0.2364, "step": 8165 }, { "epoch": 0.44735257077150525, "grad_norm": 0.2442694455385208, "learning_rate": 4.7271344554857026e-05, "loss": 0.2356, "step": 8170 }, { "epoch": 0.44762634835459675, "grad_norm": 0.1882135272026062, "learning_rate": 4.7266274589332796e-05, "loss": 0.2369, "step": 8175 }, { "epoch": 0.44790012593768824, "grad_norm": 0.21201826632022858, "learning_rate": 4.726120462380856e-05, "loss": 0.24, "step": 8180 }, { "epoch": 0.44817390352077974, "grad_norm": 0.22175775468349457, "learning_rate": 4.725613465828432e-05, "loss": 0.2357, "step": 8185 }, { "epoch": 0.44844768110387123, "grad_norm": 0.18450932204723358, "learning_rate": 4.725106469276009e-05, "loss": 0.2296, "step": 8190 }, { "epoch": 0.4487214586869627, "grad_norm": 0.2081582099199295, "learning_rate": 4.724599472723586e-05, "loss": 0.2385, "step": 8195 }, { "epoch": 0.4489952362700542, "grad_norm": 0.1902361363172531, "learning_rate": 4.7240924761711626e-05, "loss": 0.2321, "step": 8200 }, { "epoch": 0.4492690138531457, "grad_norm": 0.19428086280822754, "learning_rate": 4.723585479618739e-05, "loss": 0.2287, "step": 8205 }, { "epoch": 0.4495427914362372, "grad_norm": 0.1982533186674118, "learning_rate": 4.723078483066315e-05, "loss": 0.2335, "step": 8210 }, { "epoch": 0.4498165690193287, "grad_norm": 0.2553078532218933, "learning_rate": 4.722571486513892e-05, "loss": 0.2451, "step": 8215 }, { "epoch": 0.4500903466024202, "grad_norm": 0.19283972680568695, "learning_rate": 4.7220644899614686e-05, "loss": 0.2358, "step": 8220 }, { "epoch": 0.4503641241855117, "grad_norm": 0.23829281330108643, "learning_rate": 4.721557493409045e-05, "loss": 0.239, "step": 8225 }, { "epoch": 0.4506379017686032, "grad_norm": 0.23714353144168854, "learning_rate": 4.721050496856621e-05, "loss": 0.2393, "step": 8230 }, { "epoch": 0.4509116793516947, "grad_norm": 0.195048987865448, "learning_rate": 4.720543500304198e-05, "loss": 0.2313, "step": 8235 }, { "epoch": 0.4511854569347862, "grad_norm": 0.1993405520915985, "learning_rate": 4.7200365037517746e-05, "loss": 0.2369, "step": 8240 }, { "epoch": 0.45145923451787767, "grad_norm": 0.2150842547416687, "learning_rate": 4.719529507199351e-05, "loss": 0.2315, "step": 8245 }, { "epoch": 0.45173301210096917, "grad_norm": 0.21596144139766693, "learning_rate": 4.719022510646928e-05, "loss": 0.2381, "step": 8250 }, { "epoch": 0.45200678968406066, "grad_norm": 0.1910632997751236, "learning_rate": 4.718515514094504e-05, "loss": 0.2308, "step": 8255 }, { "epoch": 0.45228056726715216, "grad_norm": 0.19542883336544037, "learning_rate": 4.7180085175420806e-05, "loss": 0.2386, "step": 8260 }, { "epoch": 0.45255434485024365, "grad_norm": 0.18576093018054962, "learning_rate": 4.717501520989657e-05, "loss": 0.2301, "step": 8265 }, { "epoch": 0.45282812243333515, "grad_norm": 0.19061677157878876, "learning_rate": 4.7169945244372346e-05, "loss": 0.2469, "step": 8270 }, { "epoch": 0.45310190001642664, "grad_norm": 0.24555020034313202, "learning_rate": 4.716487527884811e-05, "loss": 0.2209, "step": 8275 }, { "epoch": 0.45337567759951813, "grad_norm": 0.19625914096832275, "learning_rate": 4.715980531332387e-05, "loss": 0.2361, "step": 8280 }, { "epoch": 0.45364945518260963, "grad_norm": 0.1872810274362564, "learning_rate": 4.7154735347799636e-05, "loss": 0.2323, "step": 8285 }, { "epoch": 0.4539232327657011, "grad_norm": 0.1676620990037918, "learning_rate": 4.7149665382275406e-05, "loss": 0.2316, "step": 8290 }, { "epoch": 0.4541970103487926, "grad_norm": 0.2326296716928482, "learning_rate": 4.714459541675117e-05, "loss": 0.239, "step": 8295 }, { "epoch": 0.4544707879318841, "grad_norm": 0.18146954476833344, "learning_rate": 4.713952545122693e-05, "loss": 0.2366, "step": 8300 }, { "epoch": 0.4547445655149756, "grad_norm": 0.2442280352115631, "learning_rate": 4.71344554857027e-05, "loss": 0.2449, "step": 8305 }, { "epoch": 0.45501834309806716, "grad_norm": 0.19200150668621063, "learning_rate": 4.7129385520178466e-05, "loss": 0.2361, "step": 8310 }, { "epoch": 0.45529212068115865, "grad_norm": 0.18395687639713287, "learning_rate": 4.712431555465423e-05, "loss": 0.2238, "step": 8315 }, { "epoch": 0.45556589826425015, "grad_norm": 0.20532378554344177, "learning_rate": 4.711924558912999e-05, "loss": 0.2312, "step": 8320 }, { "epoch": 0.45583967584734164, "grad_norm": 0.23721793293952942, "learning_rate": 4.711417562360576e-05, "loss": 0.2329, "step": 8325 }, { "epoch": 0.45611345343043314, "grad_norm": 0.17957369983196259, "learning_rate": 4.7109105658081526e-05, "loss": 0.2417, "step": 8330 }, { "epoch": 0.45638723101352463, "grad_norm": 0.19997847080230713, "learning_rate": 4.710403569255729e-05, "loss": 0.2269, "step": 8335 }, { "epoch": 0.4566610085966161, "grad_norm": 0.2039460688829422, "learning_rate": 4.709896572703306e-05, "loss": 0.2415, "step": 8340 }, { "epoch": 0.4569347861797076, "grad_norm": 0.18740171194076538, "learning_rate": 4.709389576150882e-05, "loss": 0.2402, "step": 8345 }, { "epoch": 0.4572085637627991, "grad_norm": 0.22063666582107544, "learning_rate": 4.7088825795984586e-05, "loss": 0.2361, "step": 8350 }, { "epoch": 0.4574823413458906, "grad_norm": 0.23589803278446198, "learning_rate": 4.7083755830460356e-05, "loss": 0.229, "step": 8355 }, { "epoch": 0.4577561189289821, "grad_norm": 0.2504451870918274, "learning_rate": 4.707868586493612e-05, "loss": 0.2277, "step": 8360 }, { "epoch": 0.4580298965120736, "grad_norm": 0.20984117686748505, "learning_rate": 4.707361589941189e-05, "loss": 0.2305, "step": 8365 }, { "epoch": 0.4583036740951651, "grad_norm": 0.18124860525131226, "learning_rate": 4.706854593388765e-05, "loss": 0.2301, "step": 8370 }, { "epoch": 0.4585774516782566, "grad_norm": 0.19493010640144348, "learning_rate": 4.7063475968363416e-05, "loss": 0.2471, "step": 8375 }, { "epoch": 0.4588512292613481, "grad_norm": 0.1888474076986313, "learning_rate": 4.7058406002839186e-05, "loss": 0.235, "step": 8380 }, { "epoch": 0.4591250068444396, "grad_norm": 0.25954365730285645, "learning_rate": 4.705333603731495e-05, "loss": 0.2366, "step": 8385 }, { "epoch": 0.45939878442753107, "grad_norm": 0.22186096012592316, "learning_rate": 4.704826607179071e-05, "loss": 0.2414, "step": 8390 }, { "epoch": 0.45967256201062257, "grad_norm": 0.22932171821594238, "learning_rate": 4.7043196106266476e-05, "loss": 0.2333, "step": 8395 }, { "epoch": 0.45994633959371406, "grad_norm": 0.18925543129444122, "learning_rate": 4.7038126140742246e-05, "loss": 0.2344, "step": 8400 }, { "epoch": 0.46022011717680555, "grad_norm": 0.24283230304718018, "learning_rate": 4.703305617521801e-05, "loss": 0.2358, "step": 8405 }, { "epoch": 0.46049389475989705, "grad_norm": 0.21266615390777588, "learning_rate": 4.702798620969377e-05, "loss": 0.2321, "step": 8410 }, { "epoch": 0.46076767234298854, "grad_norm": 0.20003315806388855, "learning_rate": 4.702291624416954e-05, "loss": 0.2411, "step": 8415 }, { "epoch": 0.46104144992608004, "grad_norm": 0.2029002159833908, "learning_rate": 4.7017846278645306e-05, "loss": 0.224, "step": 8420 }, { "epoch": 0.46131522750917153, "grad_norm": 0.2187013328075409, "learning_rate": 4.701277631312107e-05, "loss": 0.2321, "step": 8425 }, { "epoch": 0.461589005092263, "grad_norm": 0.24750728905200958, "learning_rate": 4.700770634759683e-05, "loss": 0.2416, "step": 8430 }, { "epoch": 0.4618627826753545, "grad_norm": 0.24185645580291748, "learning_rate": 4.700263638207261e-05, "loss": 0.2429, "step": 8435 }, { "epoch": 0.462136560258446, "grad_norm": 0.19449949264526367, "learning_rate": 4.699756641654837e-05, "loss": 0.2371, "step": 8440 }, { "epoch": 0.4624103378415375, "grad_norm": 0.1828162670135498, "learning_rate": 4.6992496451024136e-05, "loss": 0.234, "step": 8445 }, { "epoch": 0.462684115424629, "grad_norm": 0.1969452202320099, "learning_rate": 4.69874264854999e-05, "loss": 0.2237, "step": 8450 }, { "epoch": 0.4629578930077205, "grad_norm": 0.21580973267555237, "learning_rate": 4.698235651997567e-05, "loss": 0.2369, "step": 8455 }, { "epoch": 0.46323167059081205, "grad_norm": 0.18370535969734192, "learning_rate": 4.697728655445143e-05, "loss": 0.2257, "step": 8460 }, { "epoch": 0.46350544817390354, "grad_norm": 0.2284282147884369, "learning_rate": 4.6972216588927196e-05, "loss": 0.2322, "step": 8465 }, { "epoch": 0.46377922575699504, "grad_norm": 0.23011787235736847, "learning_rate": 4.6967146623402966e-05, "loss": 0.2353, "step": 8470 }, { "epoch": 0.46405300334008653, "grad_norm": 0.1990990787744522, "learning_rate": 4.696207665787873e-05, "loss": 0.2321, "step": 8475 }, { "epoch": 0.46432678092317803, "grad_norm": 0.19580650329589844, "learning_rate": 4.695700669235449e-05, "loss": 0.2329, "step": 8480 }, { "epoch": 0.4646005585062695, "grad_norm": 0.22822874784469604, "learning_rate": 4.6951936726830256e-05, "loss": 0.2399, "step": 8485 }, { "epoch": 0.464874336089361, "grad_norm": 0.21784009039402008, "learning_rate": 4.6946866761306026e-05, "loss": 0.232, "step": 8490 }, { "epoch": 0.4651481136724525, "grad_norm": 0.17547707259655, "learning_rate": 4.694179679578179e-05, "loss": 0.2261, "step": 8495 }, { "epoch": 0.465421891255544, "grad_norm": 0.20452754199504852, "learning_rate": 4.693672683025755e-05, "loss": 0.2341, "step": 8500 }, { "epoch": 0.4656956688386355, "grad_norm": 0.2105681151151657, "learning_rate": 4.693165686473332e-05, "loss": 0.235, "step": 8505 }, { "epoch": 0.465969446421727, "grad_norm": 0.2106797993183136, "learning_rate": 4.6926586899209086e-05, "loss": 0.2329, "step": 8510 }, { "epoch": 0.4662432240048185, "grad_norm": 0.1869816929101944, "learning_rate": 4.6921516933684857e-05, "loss": 0.2266, "step": 8515 }, { "epoch": 0.46651700158791, "grad_norm": 0.18796108663082123, "learning_rate": 4.691644696816062e-05, "loss": 0.2238, "step": 8520 }, { "epoch": 0.4667907791710015, "grad_norm": 0.21213603019714355, "learning_rate": 4.691137700263638e-05, "loss": 0.2397, "step": 8525 }, { "epoch": 0.467064556754093, "grad_norm": 0.2117937058210373, "learning_rate": 4.690630703711215e-05, "loss": 0.243, "step": 8530 }, { "epoch": 0.46733833433718447, "grad_norm": 0.1819540411233902, "learning_rate": 4.6901237071587917e-05, "loss": 0.2314, "step": 8535 }, { "epoch": 0.46761211192027596, "grad_norm": 0.18099351227283478, "learning_rate": 4.689616710606368e-05, "loss": 0.2383, "step": 8540 }, { "epoch": 0.46788588950336746, "grad_norm": 0.213851198554039, "learning_rate": 4.689109714053945e-05, "loss": 0.2268, "step": 8545 }, { "epoch": 0.46815966708645895, "grad_norm": 0.19870156049728394, "learning_rate": 4.688602717501521e-05, "loss": 0.2234, "step": 8550 }, { "epoch": 0.46843344466955045, "grad_norm": 0.2317986637353897, "learning_rate": 4.6880957209490977e-05, "loss": 0.2369, "step": 8555 }, { "epoch": 0.46870722225264194, "grad_norm": 0.24177075922489166, "learning_rate": 4.687588724396674e-05, "loss": 0.2391, "step": 8560 }, { "epoch": 0.46898099983573344, "grad_norm": 0.2469109743833542, "learning_rate": 4.687081727844251e-05, "loss": 0.2305, "step": 8565 }, { "epoch": 0.46925477741882493, "grad_norm": 0.23671934008598328, "learning_rate": 4.686574731291827e-05, "loss": 0.2374, "step": 8570 }, { "epoch": 0.4695285550019164, "grad_norm": 0.20853504538536072, "learning_rate": 4.6860677347394037e-05, "loss": 0.2219, "step": 8575 }, { "epoch": 0.4698023325850079, "grad_norm": 0.24800114333629608, "learning_rate": 4.685560738186981e-05, "loss": 0.2341, "step": 8580 }, { "epoch": 0.4700761101680994, "grad_norm": 0.1839602291584015, "learning_rate": 4.685053741634557e-05, "loss": 0.2213, "step": 8585 }, { "epoch": 0.4703498877511909, "grad_norm": 0.18691548705101013, "learning_rate": 4.684546745082133e-05, "loss": 0.232, "step": 8590 }, { "epoch": 0.4706236653342824, "grad_norm": 0.2005351185798645, "learning_rate": 4.6840397485297097e-05, "loss": 0.2401, "step": 8595 }, { "epoch": 0.4708974429173739, "grad_norm": 0.22944244742393494, "learning_rate": 4.6835327519772873e-05, "loss": 0.2367, "step": 8600 }, { "epoch": 0.47117122050046545, "grad_norm": 0.19850121438503265, "learning_rate": 4.683025755424864e-05, "loss": 0.2375, "step": 8605 }, { "epoch": 0.47144499808355694, "grad_norm": 0.17880204319953918, "learning_rate": 4.68251875887244e-05, "loss": 0.2313, "step": 8610 }, { "epoch": 0.47171877566664844, "grad_norm": 0.21004916727542877, "learning_rate": 4.682011762320016e-05, "loss": 0.2373, "step": 8615 }, { "epoch": 0.47199255324973993, "grad_norm": 0.18917101621627808, "learning_rate": 4.6815047657675933e-05, "loss": 0.2332, "step": 8620 }, { "epoch": 0.4722663308328314, "grad_norm": 0.16761744022369385, "learning_rate": 4.68099776921517e-05, "loss": 0.2386, "step": 8625 }, { "epoch": 0.4725401084159229, "grad_norm": 0.19118492305278778, "learning_rate": 4.680490772662746e-05, "loss": 0.2287, "step": 8630 }, { "epoch": 0.4728138859990144, "grad_norm": 0.19484083354473114, "learning_rate": 4.679983776110323e-05, "loss": 0.2378, "step": 8635 }, { "epoch": 0.4730876635821059, "grad_norm": 0.15917959809303284, "learning_rate": 4.6794767795578993e-05, "loss": 0.2362, "step": 8640 }, { "epoch": 0.4733614411651974, "grad_norm": 0.20010608434677124, "learning_rate": 4.678969783005476e-05, "loss": 0.2333, "step": 8645 }, { "epoch": 0.4736352187482889, "grad_norm": 0.18660834431648254, "learning_rate": 4.678462786453052e-05, "loss": 0.2309, "step": 8650 }, { "epoch": 0.4739089963313804, "grad_norm": 0.21706229448318481, "learning_rate": 4.677955789900629e-05, "loss": 0.2248, "step": 8655 }, { "epoch": 0.4741827739144719, "grad_norm": 0.2323511391878128, "learning_rate": 4.677448793348205e-05, "loss": 0.2352, "step": 8660 }, { "epoch": 0.4744565514975634, "grad_norm": 0.21034294366836548, "learning_rate": 4.676941796795782e-05, "loss": 0.2222, "step": 8665 }, { "epoch": 0.4747303290806549, "grad_norm": 0.19388028979301453, "learning_rate": 4.676434800243359e-05, "loss": 0.2333, "step": 8670 }, { "epoch": 0.4750041066637464, "grad_norm": 0.20948095619678497, "learning_rate": 4.675927803690935e-05, "loss": 0.2331, "step": 8675 }, { "epoch": 0.47527788424683787, "grad_norm": 0.2300228774547577, "learning_rate": 4.675420807138512e-05, "loss": 0.2486, "step": 8680 }, { "epoch": 0.47555166182992936, "grad_norm": 0.21179786324501038, "learning_rate": 4.6749138105860883e-05, "loss": 0.2326, "step": 8685 }, { "epoch": 0.47582543941302086, "grad_norm": 0.19477294385433197, "learning_rate": 4.674406814033665e-05, "loss": 0.2355, "step": 8690 }, { "epoch": 0.47609921699611235, "grad_norm": 0.1866220384836197, "learning_rate": 4.673899817481242e-05, "loss": 0.2302, "step": 8695 }, { "epoch": 0.47637299457920385, "grad_norm": 0.22767773270606995, "learning_rate": 4.673392820928818e-05, "loss": 0.2357, "step": 8700 }, { "epoch": 0.47664677216229534, "grad_norm": 0.22299718856811523, "learning_rate": 4.6728858243763943e-05, "loss": 0.2239, "step": 8705 }, { "epoch": 0.47692054974538683, "grad_norm": 0.1975536048412323, "learning_rate": 4.6723788278239714e-05, "loss": 0.2355, "step": 8710 }, { "epoch": 0.47719432732847833, "grad_norm": 0.23845502734184265, "learning_rate": 4.671871831271548e-05, "loss": 0.2358, "step": 8715 }, { "epoch": 0.4774681049115698, "grad_norm": 0.2528804540634155, "learning_rate": 4.671364834719124e-05, "loss": 0.2268, "step": 8720 }, { "epoch": 0.4777418824946613, "grad_norm": 0.2055647373199463, "learning_rate": 4.6708578381667003e-05, "loss": 0.2348, "step": 8725 }, { "epoch": 0.4780156600777528, "grad_norm": 0.2250152975320816, "learning_rate": 4.6703508416142774e-05, "loss": 0.2353, "step": 8730 }, { "epoch": 0.4782894376608443, "grad_norm": 0.21682341396808624, "learning_rate": 4.669843845061854e-05, "loss": 0.2309, "step": 8735 }, { "epoch": 0.4785632152439358, "grad_norm": 0.19990594685077667, "learning_rate": 4.66933684850943e-05, "loss": 0.2496, "step": 8740 }, { "epoch": 0.4788369928270273, "grad_norm": 0.15296624600887299, "learning_rate": 4.668829851957007e-05, "loss": 0.224, "step": 8745 }, { "epoch": 0.47911077041011885, "grad_norm": 0.20556168258190155, "learning_rate": 4.6683228554045834e-05, "loss": 0.2344, "step": 8750 }, { "epoch": 0.47938454799321034, "grad_norm": 0.1905013471841812, "learning_rate": 4.66781585885216e-05, "loss": 0.2425, "step": 8755 }, { "epoch": 0.47965832557630184, "grad_norm": 0.21172787249088287, "learning_rate": 4.667308862299737e-05, "loss": 0.238, "step": 8760 }, { "epoch": 0.47993210315939333, "grad_norm": 0.19338271021842957, "learning_rate": 4.666801865747314e-05, "loss": 0.2368, "step": 8765 }, { "epoch": 0.4802058807424848, "grad_norm": 0.20205965638160706, "learning_rate": 4.66629486919489e-05, "loss": 0.2291, "step": 8770 }, { "epoch": 0.4804796583255763, "grad_norm": 0.2000514715909958, "learning_rate": 4.6657878726424664e-05, "loss": 0.2267, "step": 8775 }, { "epoch": 0.4807534359086678, "grad_norm": 0.19909793138504028, "learning_rate": 4.665280876090043e-05, "loss": 0.2352, "step": 8780 }, { "epoch": 0.4810272134917593, "grad_norm": 0.1887601613998413, "learning_rate": 4.66477387953762e-05, "loss": 0.2431, "step": 8785 }, { "epoch": 0.4813009910748508, "grad_norm": 0.1940741389989853, "learning_rate": 4.664266882985196e-05, "loss": 0.2345, "step": 8790 }, { "epoch": 0.4815747686579423, "grad_norm": 0.27744945883750916, "learning_rate": 4.6637598864327724e-05, "loss": 0.2401, "step": 8795 }, { "epoch": 0.4818485462410338, "grad_norm": 0.2409457117319107, "learning_rate": 4.6632528898803494e-05, "loss": 0.2269, "step": 8800 }, { "epoch": 0.4821223238241253, "grad_norm": 0.16902244091033936, "learning_rate": 4.662745893327926e-05, "loss": 0.2346, "step": 8805 }, { "epoch": 0.4823961014072168, "grad_norm": 0.18854603171348572, "learning_rate": 4.662238896775502e-05, "loss": 0.2363, "step": 8810 }, { "epoch": 0.4826698789903083, "grad_norm": 0.19366905093193054, "learning_rate": 4.6617319002230784e-05, "loss": 0.2277, "step": 8815 }, { "epoch": 0.48294365657339977, "grad_norm": 0.1651798039674759, "learning_rate": 4.6612249036706554e-05, "loss": 0.2227, "step": 8820 }, { "epoch": 0.48321743415649127, "grad_norm": 0.17524319887161255, "learning_rate": 4.660717907118232e-05, "loss": 0.2329, "step": 8825 }, { "epoch": 0.48349121173958276, "grad_norm": 0.21761132776737213, "learning_rate": 4.660210910565808e-05, "loss": 0.2394, "step": 8830 }, { "epoch": 0.48376498932267425, "grad_norm": 0.17345501482486725, "learning_rate": 4.6597039140133844e-05, "loss": 0.2241, "step": 8835 }, { "epoch": 0.48403876690576575, "grad_norm": 0.20785005390644073, "learning_rate": 4.659196917460962e-05, "loss": 0.2223, "step": 8840 }, { "epoch": 0.48431254448885724, "grad_norm": 0.2100970298051834, "learning_rate": 4.6586899209085384e-05, "loss": 0.2259, "step": 8845 }, { "epoch": 0.48458632207194874, "grad_norm": 0.20255045592784882, "learning_rate": 4.658182924356115e-05, "loss": 0.2324, "step": 8850 }, { "epoch": 0.48486009965504023, "grad_norm": 0.19040547311306, "learning_rate": 4.657675927803691e-05, "loss": 0.2405, "step": 8855 }, { "epoch": 0.4851338772381317, "grad_norm": 0.19020770490169525, "learning_rate": 4.657168931251268e-05, "loss": 0.2208, "step": 8860 }, { "epoch": 0.4854076548212232, "grad_norm": 0.22786115109920502, "learning_rate": 4.6566619346988444e-05, "loss": 0.2386, "step": 8865 }, { "epoch": 0.4856814324043147, "grad_norm": 0.22680410742759705, "learning_rate": 4.656154938146421e-05, "loss": 0.2271, "step": 8870 }, { "epoch": 0.4859552099874062, "grad_norm": 0.19465482234954834, "learning_rate": 4.655647941593998e-05, "loss": 0.2264, "step": 8875 }, { "epoch": 0.4862289875704977, "grad_norm": 0.21814927458763123, "learning_rate": 4.655140945041574e-05, "loss": 0.2228, "step": 8880 }, { "epoch": 0.4865027651535892, "grad_norm": 0.17910777032375336, "learning_rate": 4.6546339484891504e-05, "loss": 0.2196, "step": 8885 }, { "epoch": 0.4867765427366807, "grad_norm": 0.2244405597448349, "learning_rate": 4.654126951936727e-05, "loss": 0.2402, "step": 8890 }, { "epoch": 0.4870503203197722, "grad_norm": 0.19977505505084991, "learning_rate": 4.653619955384304e-05, "loss": 0.2381, "step": 8895 }, { "epoch": 0.48732409790286374, "grad_norm": 0.20727384090423584, "learning_rate": 4.65311295883188e-05, "loss": 0.2321, "step": 8900 }, { "epoch": 0.48759787548595523, "grad_norm": 0.3120695948600769, "learning_rate": 4.6526059622794564e-05, "loss": 0.2296, "step": 8905 }, { "epoch": 0.48787165306904673, "grad_norm": 0.2831647992134094, "learning_rate": 4.6520989657270334e-05, "loss": 0.2479, "step": 8910 }, { "epoch": 0.4881454306521382, "grad_norm": 0.22754213213920593, "learning_rate": 4.65159196917461e-05, "loss": 0.2376, "step": 8915 }, { "epoch": 0.4884192082352297, "grad_norm": 0.22954025864601135, "learning_rate": 4.651084972622186e-05, "loss": 0.2191, "step": 8920 }, { "epoch": 0.4886929858183212, "grad_norm": 0.2138175517320633, "learning_rate": 4.650577976069763e-05, "loss": 0.2291, "step": 8925 }, { "epoch": 0.4889667634014127, "grad_norm": 0.17522908747196198, "learning_rate": 4.65007097951734e-05, "loss": 0.2283, "step": 8930 }, { "epoch": 0.4892405409845042, "grad_norm": 0.1880052238702774, "learning_rate": 4.6495639829649164e-05, "loss": 0.2271, "step": 8935 }, { "epoch": 0.4895143185675957, "grad_norm": 0.2326999008655548, "learning_rate": 4.649056986412493e-05, "loss": 0.2323, "step": 8940 }, { "epoch": 0.4897880961506872, "grad_norm": 0.1963959038257599, "learning_rate": 4.648549989860069e-05, "loss": 0.233, "step": 8945 }, { "epoch": 0.4900618737337787, "grad_norm": 0.18293268978595734, "learning_rate": 4.648042993307646e-05, "loss": 0.2283, "step": 8950 }, { "epoch": 0.4903356513168702, "grad_norm": 0.19140174984931946, "learning_rate": 4.6475359967552224e-05, "loss": 0.2289, "step": 8955 }, { "epoch": 0.4906094288999617, "grad_norm": 0.19318532943725586, "learning_rate": 4.647029000202799e-05, "loss": 0.2359, "step": 8960 }, { "epoch": 0.49088320648305317, "grad_norm": 0.21952198445796967, "learning_rate": 4.646522003650375e-05, "loss": 0.2383, "step": 8965 }, { "epoch": 0.49115698406614466, "grad_norm": 0.20746274292469025, "learning_rate": 4.646015007097952e-05, "loss": 0.2288, "step": 8970 }, { "epoch": 0.49143076164923616, "grad_norm": 0.25092899799346924, "learning_rate": 4.6455080105455284e-05, "loss": 0.232, "step": 8975 }, { "epoch": 0.49170453923232765, "grad_norm": 0.18662120401859283, "learning_rate": 4.645001013993105e-05, "loss": 0.2302, "step": 8980 }, { "epoch": 0.49197831681541915, "grad_norm": 0.1777348518371582, "learning_rate": 4.644494017440682e-05, "loss": 0.2453, "step": 8985 }, { "epoch": 0.49225209439851064, "grad_norm": 0.17343221604824066, "learning_rate": 4.643987020888258e-05, "loss": 0.2292, "step": 8990 }, { "epoch": 0.49252587198160214, "grad_norm": 0.201108917593956, "learning_rate": 4.6434800243358344e-05, "loss": 0.2289, "step": 8995 }, { "epoch": 0.49279964956469363, "grad_norm": 0.22907808423042297, "learning_rate": 4.642973027783411e-05, "loss": 0.2434, "step": 9000 }, { "epoch": 0.4930734271477851, "grad_norm": 0.18034328520298004, "learning_rate": 4.6424660312309884e-05, "loss": 0.2356, "step": 9005 }, { "epoch": 0.4933472047308766, "grad_norm": 0.1938774138689041, "learning_rate": 4.641959034678565e-05, "loss": 0.2311, "step": 9010 }, { "epoch": 0.4936209823139681, "grad_norm": 0.20899416506290436, "learning_rate": 4.641452038126141e-05, "loss": 0.2282, "step": 9015 }, { "epoch": 0.4938947598970596, "grad_norm": 0.17086200416088104, "learning_rate": 4.6409450415737174e-05, "loss": 0.2246, "step": 9020 }, { "epoch": 0.4941685374801511, "grad_norm": 0.1936352401971817, "learning_rate": 4.6404380450212944e-05, "loss": 0.2334, "step": 9025 }, { "epoch": 0.4944423150632426, "grad_norm": 0.22463706135749817, "learning_rate": 4.639931048468871e-05, "loss": 0.2316, "step": 9030 }, { "epoch": 0.4947160926463341, "grad_norm": 0.19273465871810913, "learning_rate": 4.639424051916447e-05, "loss": 0.2338, "step": 9035 }, { "epoch": 0.4949898702294256, "grad_norm": 0.18459317088127136, "learning_rate": 4.638917055364024e-05, "loss": 0.2284, "step": 9040 }, { "epoch": 0.49526364781251714, "grad_norm": 0.1845342367887497, "learning_rate": 4.6384100588116004e-05, "loss": 0.2246, "step": 9045 }, { "epoch": 0.49553742539560863, "grad_norm": 0.21081672608852386, "learning_rate": 4.637903062259177e-05, "loss": 0.2425, "step": 9050 }, { "epoch": 0.4958112029787001, "grad_norm": 0.18030881881713867, "learning_rate": 4.637396065706753e-05, "loss": 0.2289, "step": 9055 }, { "epoch": 0.4960849805617916, "grad_norm": 0.18999364972114563, "learning_rate": 4.63688906915433e-05, "loss": 0.2296, "step": 9060 }, { "epoch": 0.4963587581448831, "grad_norm": 0.16803091764450073, "learning_rate": 4.6363820726019064e-05, "loss": 0.2284, "step": 9065 }, { "epoch": 0.4966325357279746, "grad_norm": 0.2049739509820938, "learning_rate": 4.635875076049483e-05, "loss": 0.238, "step": 9070 }, { "epoch": 0.4969063133110661, "grad_norm": 0.20674407482147217, "learning_rate": 4.63536807949706e-05, "loss": 0.2259, "step": 9075 }, { "epoch": 0.4971800908941576, "grad_norm": 0.21996623277664185, "learning_rate": 4.634861082944636e-05, "loss": 0.2373, "step": 9080 }, { "epoch": 0.4974538684772491, "grad_norm": 0.23557983338832855, "learning_rate": 4.634354086392213e-05, "loss": 0.2437, "step": 9085 }, { "epoch": 0.4977276460603406, "grad_norm": 0.22787566483020782, "learning_rate": 4.6338470898397894e-05, "loss": 0.2276, "step": 9090 }, { "epoch": 0.4980014236434321, "grad_norm": 0.22377833724021912, "learning_rate": 4.633340093287366e-05, "loss": 0.2381, "step": 9095 }, { "epoch": 0.4982752012265236, "grad_norm": 0.22132577002048492, "learning_rate": 4.632833096734943e-05, "loss": 0.2277, "step": 9100 }, { "epoch": 0.4985489788096151, "grad_norm": 0.21548019349575043, "learning_rate": 4.632326100182519e-05, "loss": 0.2294, "step": 9105 }, { "epoch": 0.49882275639270657, "grad_norm": 0.28353098034858704, "learning_rate": 4.6318191036300954e-05, "loss": 0.2293, "step": 9110 }, { "epoch": 0.49909653397579806, "grad_norm": 0.23246389627456665, "learning_rate": 4.6313121070776724e-05, "loss": 0.2355, "step": 9115 }, { "epoch": 0.49937031155888956, "grad_norm": 0.21083128452301025, "learning_rate": 4.630805110525249e-05, "loss": 0.2459, "step": 9120 }, { "epoch": 0.49964408914198105, "grad_norm": 0.17948879301548004, "learning_rate": 4.630298113972825e-05, "loss": 0.2315, "step": 9125 }, { "epoch": 0.49991786672507255, "grad_norm": 0.19414186477661133, "learning_rate": 4.6297911174204014e-05, "loss": 0.2307, "step": 9130 }, { "epoch": 0.5001916443081641, "grad_norm": 0.21579481661319733, "learning_rate": 4.6292841208679784e-05, "loss": 0.241, "step": 9135 }, { "epoch": 0.5004654218912555, "grad_norm": 0.18013550341129303, "learning_rate": 4.628777124315555e-05, "loss": 0.2269, "step": 9140 }, { "epoch": 0.5007391994743471, "grad_norm": 0.20939482748508453, "learning_rate": 4.628270127763131e-05, "loss": 0.2308, "step": 9145 }, { "epoch": 0.5010129770574385, "grad_norm": 0.22245927155017853, "learning_rate": 4.627763131210708e-05, "loss": 0.2249, "step": 9150 }, { "epoch": 0.5012867546405301, "grad_norm": 0.1550302803516388, "learning_rate": 4.6272561346582844e-05, "loss": 0.2189, "step": 9155 }, { "epoch": 0.5015605322236215, "grad_norm": 0.1864713579416275, "learning_rate": 4.626749138105861e-05, "loss": 0.2247, "step": 9160 }, { "epoch": 0.5018343098067131, "grad_norm": 0.24957424402236938, "learning_rate": 4.626242141553437e-05, "loss": 0.2224, "step": 9165 }, { "epoch": 0.5021080873898045, "grad_norm": 0.2314077615737915, "learning_rate": 4.625735145001015e-05, "loss": 0.2219, "step": 9170 }, { "epoch": 0.502381864972896, "grad_norm": 0.20803697407245636, "learning_rate": 4.625228148448591e-05, "loss": 0.2357, "step": 9175 }, { "epoch": 0.5026556425559875, "grad_norm": 0.1864837408065796, "learning_rate": 4.6247211518961674e-05, "loss": 0.235, "step": 9180 }, { "epoch": 0.502929420139079, "grad_norm": 0.22972138226032257, "learning_rate": 4.624214155343744e-05, "loss": 0.233, "step": 9185 }, { "epoch": 0.5032031977221705, "grad_norm": 0.21640874445438385, "learning_rate": 4.623707158791321e-05, "loss": 0.2261, "step": 9190 }, { "epoch": 0.503476975305262, "grad_norm": 0.2021062970161438, "learning_rate": 4.623200162238897e-05, "loss": 0.2376, "step": 9195 }, { "epoch": 0.5037507528883535, "grad_norm": 0.18911723792552948, "learning_rate": 4.6226931656864734e-05, "loss": 0.2269, "step": 9200 }, { "epoch": 0.504024530471445, "grad_norm": 0.19874200224876404, "learning_rate": 4.6221861691340504e-05, "loss": 0.2267, "step": 9205 }, { "epoch": 0.5042983080545365, "grad_norm": 0.2131076604127884, "learning_rate": 4.621679172581627e-05, "loss": 0.2274, "step": 9210 }, { "epoch": 0.504572085637628, "grad_norm": 0.1812731772661209, "learning_rate": 4.621172176029203e-05, "loss": 0.2271, "step": 9215 }, { "epoch": 0.5048458632207194, "grad_norm": 0.24122348427772522, "learning_rate": 4.6206651794767794e-05, "loss": 0.2358, "step": 9220 }, { "epoch": 0.505119640803811, "grad_norm": 0.2743595242500305, "learning_rate": 4.6201581829243564e-05, "loss": 0.2301, "step": 9225 }, { "epoch": 0.5053934183869024, "grad_norm": 0.2014274150133133, "learning_rate": 4.619651186371933e-05, "loss": 0.2199, "step": 9230 }, { "epoch": 0.505667195969994, "grad_norm": 0.19368208944797516, "learning_rate": 4.619144189819509e-05, "loss": 0.2304, "step": 9235 }, { "epoch": 0.5059409735530854, "grad_norm": 0.19728490710258484, "learning_rate": 4.618637193267086e-05, "loss": 0.2314, "step": 9240 }, { "epoch": 0.506214751136177, "grad_norm": 0.23986570537090302, "learning_rate": 4.6181301967146624e-05, "loss": 0.2323, "step": 9245 }, { "epoch": 0.5064885287192684, "grad_norm": 0.1916555017232895, "learning_rate": 4.6176232001622394e-05, "loss": 0.2331, "step": 9250 }, { "epoch": 0.50676230630236, "grad_norm": 0.20122802257537842, "learning_rate": 4.617116203609816e-05, "loss": 0.2283, "step": 9255 }, { "epoch": 0.5070360838854514, "grad_norm": 0.1962350457906723, "learning_rate": 4.616609207057392e-05, "loss": 0.226, "step": 9260 }, { "epoch": 0.507309861468543, "grad_norm": 0.17710037529468536, "learning_rate": 4.616102210504969e-05, "loss": 0.2278, "step": 9265 }, { "epoch": 0.5075836390516345, "grad_norm": 0.2604007124900818, "learning_rate": 4.6155952139525454e-05, "loss": 0.2283, "step": 9270 }, { "epoch": 0.5078574166347259, "grad_norm": 0.2254844754934311, "learning_rate": 4.615088217400122e-05, "loss": 0.2225, "step": 9275 }, { "epoch": 0.5081311942178175, "grad_norm": 0.21923983097076416, "learning_rate": 4.614581220847699e-05, "loss": 0.2242, "step": 9280 }, { "epoch": 0.5084049718009089, "grad_norm": 0.267498254776001, "learning_rate": 4.614074224295275e-05, "loss": 0.2223, "step": 9285 }, { "epoch": 0.5086787493840005, "grad_norm": 0.1843288540840149, "learning_rate": 4.6135672277428514e-05, "loss": 0.2305, "step": 9290 }, { "epoch": 0.5089525269670919, "grad_norm": 0.1956331431865692, "learning_rate": 4.613060231190428e-05, "loss": 0.2233, "step": 9295 }, { "epoch": 0.5092263045501835, "grad_norm": 0.249457448720932, "learning_rate": 4.612553234638005e-05, "loss": 0.2253, "step": 9300 }, { "epoch": 0.5095000821332749, "grad_norm": 0.19375409185886383, "learning_rate": 4.612046238085581e-05, "loss": 0.2338, "step": 9305 }, { "epoch": 0.5097738597163665, "grad_norm": 0.20932242274284363, "learning_rate": 4.6115392415331574e-05, "loss": 0.2359, "step": 9310 }, { "epoch": 0.5100476372994579, "grad_norm": 0.17723460495471954, "learning_rate": 4.6110322449807344e-05, "loss": 0.2311, "step": 9315 }, { "epoch": 0.5103214148825495, "grad_norm": 0.17786629498004913, "learning_rate": 4.610525248428311e-05, "loss": 0.2301, "step": 9320 }, { "epoch": 0.5105951924656409, "grad_norm": 0.17334416508674622, "learning_rate": 4.610018251875887e-05, "loss": 0.2249, "step": 9325 }, { "epoch": 0.5108689700487324, "grad_norm": 0.17319029569625854, "learning_rate": 4.609511255323464e-05, "loss": 0.23, "step": 9330 }, { "epoch": 0.5111427476318239, "grad_norm": 0.17838436365127563, "learning_rate": 4.609004258771041e-05, "loss": 0.2281, "step": 9335 }, { "epoch": 0.5114165252149154, "grad_norm": 0.15008151531219482, "learning_rate": 4.6084972622186175e-05, "loss": 0.225, "step": 9340 }, { "epoch": 0.5116903027980069, "grad_norm": 0.17941397428512573, "learning_rate": 4.607990265666194e-05, "loss": 0.2366, "step": 9345 }, { "epoch": 0.5119640803810984, "grad_norm": 0.2163480818271637, "learning_rate": 4.60748326911377e-05, "loss": 0.2306, "step": 9350 }, { "epoch": 0.5122378579641899, "grad_norm": 0.19718529284000397, "learning_rate": 4.606976272561347e-05, "loss": 0.2298, "step": 9355 }, { "epoch": 0.5125116355472814, "grad_norm": 0.178236186504364, "learning_rate": 4.6064692760089235e-05, "loss": 0.2284, "step": 9360 }, { "epoch": 0.5127854131303728, "grad_norm": 0.18903811275959015, "learning_rate": 4.6059622794565e-05, "loss": 0.2349, "step": 9365 }, { "epoch": 0.5130591907134644, "grad_norm": 0.16627761721611023, "learning_rate": 4.605455282904077e-05, "loss": 0.2357, "step": 9370 }, { "epoch": 0.5133329682965558, "grad_norm": 0.29447492957115173, "learning_rate": 4.604948286351653e-05, "loss": 0.236, "step": 9375 }, { "epoch": 0.5136067458796474, "grad_norm": 0.274273544549942, "learning_rate": 4.6044412897992295e-05, "loss": 0.2348, "step": 9380 }, { "epoch": 0.5138805234627388, "grad_norm": 0.23329302668571472, "learning_rate": 4.603934293246806e-05, "loss": 0.232, "step": 9385 }, { "epoch": 0.5141543010458304, "grad_norm": 0.19062647223472595, "learning_rate": 4.603427296694383e-05, "loss": 0.2324, "step": 9390 }, { "epoch": 0.5144280786289218, "grad_norm": 0.19518189132213593, "learning_rate": 4.602920300141959e-05, "loss": 0.2346, "step": 9395 }, { "epoch": 0.5147018562120134, "grad_norm": 0.202829509973526, "learning_rate": 4.6024133035895355e-05, "loss": 0.2326, "step": 9400 }, { "epoch": 0.5149756337951048, "grad_norm": 0.19046735763549805, "learning_rate": 4.601906307037112e-05, "loss": 0.2263, "step": 9405 }, { "epoch": 0.5152494113781964, "grad_norm": 0.20092953741550446, "learning_rate": 4.6013993104846895e-05, "loss": 0.2318, "step": 9410 }, { "epoch": 0.5155231889612879, "grad_norm": 0.2065623253583908, "learning_rate": 4.600892313932266e-05, "loss": 0.2329, "step": 9415 }, { "epoch": 0.5157969665443793, "grad_norm": 0.18108467757701874, "learning_rate": 4.600385317379842e-05, "loss": 0.2291, "step": 9420 }, { "epoch": 0.5160707441274709, "grad_norm": 0.1787896603345871, "learning_rate": 4.5998783208274185e-05, "loss": 0.2235, "step": 9425 }, { "epoch": 0.5163445217105623, "grad_norm": 0.19466786086559296, "learning_rate": 4.5993713242749955e-05, "loss": 0.2341, "step": 9430 }, { "epoch": 0.5166182992936539, "grad_norm": 0.20728148519992828, "learning_rate": 4.598864327722572e-05, "loss": 0.2463, "step": 9435 }, { "epoch": 0.5168920768767453, "grad_norm": 0.2026294767856598, "learning_rate": 4.598357331170148e-05, "loss": 0.2196, "step": 9440 }, { "epoch": 0.5171658544598369, "grad_norm": 0.21563047170639038, "learning_rate": 4.597850334617725e-05, "loss": 0.2331, "step": 9445 }, { "epoch": 0.5174396320429283, "grad_norm": 0.2262890636920929, "learning_rate": 4.5973433380653015e-05, "loss": 0.2243, "step": 9450 }, { "epoch": 0.5177134096260199, "grad_norm": 0.21172232925891876, "learning_rate": 4.596836341512878e-05, "loss": 0.22, "step": 9455 }, { "epoch": 0.5179871872091113, "grad_norm": 0.18703441321849823, "learning_rate": 4.596329344960454e-05, "loss": 0.2248, "step": 9460 }, { "epoch": 0.5182609647922028, "grad_norm": 0.21682767570018768, "learning_rate": 4.595822348408031e-05, "loss": 0.2214, "step": 9465 }, { "epoch": 0.5185347423752943, "grad_norm": 0.19861970841884613, "learning_rate": 4.5953153518556075e-05, "loss": 0.2312, "step": 9470 }, { "epoch": 0.5188085199583858, "grad_norm": 0.21663999557495117, "learning_rate": 4.594808355303184e-05, "loss": 0.2424, "step": 9475 }, { "epoch": 0.5190822975414773, "grad_norm": 0.2600632607936859, "learning_rate": 4.594301358750761e-05, "loss": 0.2374, "step": 9480 }, { "epoch": 0.5193560751245688, "grad_norm": 0.2135033905506134, "learning_rate": 4.593794362198337e-05, "loss": 0.23, "step": 9485 }, { "epoch": 0.5196298527076603, "grad_norm": 0.1964971423149109, "learning_rate": 4.5932873656459135e-05, "loss": 0.2345, "step": 9490 }, { "epoch": 0.5199036302907518, "grad_norm": 0.17097006738185883, "learning_rate": 4.5927803690934905e-05, "loss": 0.2241, "step": 9495 }, { "epoch": 0.5201774078738433, "grad_norm": 0.18684616684913635, "learning_rate": 4.5922733725410675e-05, "loss": 0.2297, "step": 9500 }, { "epoch": 0.5204511854569348, "grad_norm": 0.16825103759765625, "learning_rate": 4.591766375988644e-05, "loss": 0.2369, "step": 9505 }, { "epoch": 0.5207249630400262, "grad_norm": 0.2221442312002182, "learning_rate": 4.59125937943622e-05, "loss": 0.2294, "step": 9510 }, { "epoch": 0.5209987406231178, "grad_norm": 0.18489907681941986, "learning_rate": 4.5907523828837965e-05, "loss": 0.2327, "step": 9515 }, { "epoch": 0.5212725182062092, "grad_norm": 0.20358653366565704, "learning_rate": 4.5902453863313735e-05, "loss": 0.2274, "step": 9520 }, { "epoch": 0.5215462957893008, "grad_norm": 0.2028941959142685, "learning_rate": 4.58973838977895e-05, "loss": 0.2343, "step": 9525 }, { "epoch": 0.5218200733723922, "grad_norm": 0.17307457327842712, "learning_rate": 4.589231393226526e-05, "loss": 0.2253, "step": 9530 }, { "epoch": 0.5220938509554838, "grad_norm": 0.19192498922348022, "learning_rate": 4.5887243966741025e-05, "loss": 0.2244, "step": 9535 }, { "epoch": 0.5223676285385752, "grad_norm": 0.1593315601348877, "learning_rate": 4.5882174001216795e-05, "loss": 0.2302, "step": 9540 }, { "epoch": 0.5226414061216668, "grad_norm": 0.25648611783981323, "learning_rate": 4.587710403569256e-05, "loss": 0.2329, "step": 9545 }, { "epoch": 0.5229151837047582, "grad_norm": 0.1906779259443283, "learning_rate": 4.587203407016832e-05, "loss": 0.2193, "step": 9550 }, { "epoch": 0.5231889612878498, "grad_norm": 0.20343196392059326, "learning_rate": 4.586696410464409e-05, "loss": 0.2297, "step": 9555 }, { "epoch": 0.5234627388709413, "grad_norm": 0.2067863643169403, "learning_rate": 4.5861894139119855e-05, "loss": 0.235, "step": 9560 }, { "epoch": 0.5237365164540327, "grad_norm": 0.18613635003566742, "learning_rate": 4.585682417359562e-05, "loss": 0.2171, "step": 9565 }, { "epoch": 0.5240102940371243, "grad_norm": 0.18287092447280884, "learning_rate": 4.585175420807138e-05, "loss": 0.236, "step": 9570 }, { "epoch": 0.5242840716202157, "grad_norm": 0.22411835193634033, "learning_rate": 4.584668424254716e-05, "loss": 0.2385, "step": 9575 }, { "epoch": 0.5245578492033073, "grad_norm": 0.162009596824646, "learning_rate": 4.584161427702292e-05, "loss": 0.2383, "step": 9580 }, { "epoch": 0.5248316267863987, "grad_norm": 0.1860199123620987, "learning_rate": 4.5836544311498685e-05, "loss": 0.2432, "step": 9585 }, { "epoch": 0.5251054043694903, "grad_norm": 0.19504518806934357, "learning_rate": 4.583147434597445e-05, "loss": 0.2333, "step": 9590 }, { "epoch": 0.5253791819525817, "grad_norm": 0.1895214021205902, "learning_rate": 4.582640438045022e-05, "loss": 0.2368, "step": 9595 }, { "epoch": 0.5256529595356733, "grad_norm": 0.22956277430057526, "learning_rate": 4.582133441492598e-05, "loss": 0.2308, "step": 9600 }, { "epoch": 0.5259267371187647, "grad_norm": 0.1856480836868286, "learning_rate": 4.5816264449401745e-05, "loss": 0.2396, "step": 9605 }, { "epoch": 0.5262005147018562, "grad_norm": 0.1907370537519455, "learning_rate": 4.5811194483877515e-05, "loss": 0.2285, "step": 9610 }, { "epoch": 0.5264742922849477, "grad_norm": 0.22448506951332092, "learning_rate": 4.580612451835328e-05, "loss": 0.226, "step": 9615 }, { "epoch": 0.5267480698680392, "grad_norm": 0.21286046504974365, "learning_rate": 4.580105455282904e-05, "loss": 0.2298, "step": 9620 }, { "epoch": 0.5270218474511307, "grad_norm": 0.1931898593902588, "learning_rate": 4.5795984587304805e-05, "loss": 0.2369, "step": 9625 }, { "epoch": 0.5272956250342222, "grad_norm": 0.170674130320549, "learning_rate": 4.5790914621780575e-05, "loss": 0.2294, "step": 9630 }, { "epoch": 0.5275694026173137, "grad_norm": 0.2039734274148941, "learning_rate": 4.578584465625634e-05, "loss": 0.2361, "step": 9635 }, { "epoch": 0.5278431802004052, "grad_norm": 0.20076465606689453, "learning_rate": 4.57807746907321e-05, "loss": 0.2346, "step": 9640 }, { "epoch": 0.5281169577834967, "grad_norm": 0.18371650576591492, "learning_rate": 4.577570472520787e-05, "loss": 0.2357, "step": 9645 }, { "epoch": 0.5283907353665882, "grad_norm": 0.22335699200630188, "learning_rate": 4.5770634759683635e-05, "loss": 0.2368, "step": 9650 }, { "epoch": 0.5286645129496796, "grad_norm": 0.20312844216823578, "learning_rate": 4.5765564794159405e-05, "loss": 0.2312, "step": 9655 }, { "epoch": 0.5289382905327712, "grad_norm": 0.22026321291923523, "learning_rate": 4.576049482863517e-05, "loss": 0.2296, "step": 9660 }, { "epoch": 0.5292120681158626, "grad_norm": 0.2011476308107376, "learning_rate": 4.575542486311093e-05, "loss": 0.231, "step": 9665 }, { "epoch": 0.5294858456989542, "grad_norm": 0.2444762885570526, "learning_rate": 4.57503548975867e-05, "loss": 0.2333, "step": 9670 }, { "epoch": 0.5297596232820456, "grad_norm": 0.2241465300321579, "learning_rate": 4.5745284932062465e-05, "loss": 0.2328, "step": 9675 }, { "epoch": 0.5300334008651372, "grad_norm": 0.2155475914478302, "learning_rate": 4.574021496653823e-05, "loss": 0.2339, "step": 9680 }, { "epoch": 0.5303071784482286, "grad_norm": 0.18477730453014374, "learning_rate": 4.5735145001014e-05, "loss": 0.2219, "step": 9685 }, { "epoch": 0.5305809560313202, "grad_norm": 0.20591960847377777, "learning_rate": 4.573007503548976e-05, "loss": 0.2274, "step": 9690 }, { "epoch": 0.5308547336144116, "grad_norm": 0.22905372083187103, "learning_rate": 4.5725005069965525e-05, "loss": 0.2272, "step": 9695 }, { "epoch": 0.5311285111975032, "grad_norm": 0.206644669175148, "learning_rate": 4.571993510444129e-05, "loss": 0.2439, "step": 9700 }, { "epoch": 0.5314022887805947, "grad_norm": 0.19342611730098724, "learning_rate": 4.571486513891706e-05, "loss": 0.2395, "step": 9705 }, { "epoch": 0.5316760663636861, "grad_norm": 0.19742406904697418, "learning_rate": 4.570979517339282e-05, "loss": 0.2298, "step": 9710 }, { "epoch": 0.5319498439467777, "grad_norm": 0.18697096407413483, "learning_rate": 4.5704725207868585e-05, "loss": 0.2369, "step": 9715 }, { "epoch": 0.5322236215298691, "grad_norm": 0.18024471402168274, "learning_rate": 4.5699655242344355e-05, "loss": 0.2321, "step": 9720 }, { "epoch": 0.5324973991129607, "grad_norm": 0.27363210916519165, "learning_rate": 4.569458527682012e-05, "loss": 0.2229, "step": 9725 }, { "epoch": 0.5327711766960521, "grad_norm": 0.15696777403354645, "learning_rate": 4.568951531129588e-05, "loss": 0.2335, "step": 9730 }, { "epoch": 0.5330449542791437, "grad_norm": 0.26674070954322815, "learning_rate": 4.568444534577165e-05, "loss": 0.23, "step": 9735 }, { "epoch": 0.5333187318622351, "grad_norm": 0.2041003704071045, "learning_rate": 4.567937538024742e-05, "loss": 0.2312, "step": 9740 }, { "epoch": 0.5335925094453267, "grad_norm": 0.19300836324691772, "learning_rate": 4.5674305414723185e-05, "loss": 0.2334, "step": 9745 }, { "epoch": 0.5338662870284181, "grad_norm": 0.19260132312774658, "learning_rate": 4.566923544919895e-05, "loss": 0.2299, "step": 9750 }, { "epoch": 0.5341400646115096, "grad_norm": 0.16214153170585632, "learning_rate": 4.566416548367471e-05, "loss": 0.2343, "step": 9755 }, { "epoch": 0.5344138421946011, "grad_norm": 0.20341068506240845, "learning_rate": 4.565909551815048e-05, "loss": 0.2324, "step": 9760 }, { "epoch": 0.5346876197776926, "grad_norm": 0.18558748066425323, "learning_rate": 4.5654025552626245e-05, "loss": 0.2263, "step": 9765 }, { "epoch": 0.5349613973607841, "grad_norm": 0.19186808168888092, "learning_rate": 4.564895558710201e-05, "loss": 0.2339, "step": 9770 }, { "epoch": 0.5352351749438756, "grad_norm": 0.2577619254589081, "learning_rate": 4.564388562157778e-05, "loss": 0.2254, "step": 9775 }, { "epoch": 0.5355089525269671, "grad_norm": 0.18825596570968628, "learning_rate": 4.563881565605354e-05, "loss": 0.2424, "step": 9780 }, { "epoch": 0.5357827301100586, "grad_norm": 0.17852799594402313, "learning_rate": 4.5633745690529305e-05, "loss": 0.2362, "step": 9785 }, { "epoch": 0.53605650769315, "grad_norm": 0.18725034594535828, "learning_rate": 4.562867572500507e-05, "loss": 0.2258, "step": 9790 }, { "epoch": 0.5363302852762416, "grad_norm": 0.2341981679201126, "learning_rate": 4.562360575948084e-05, "loss": 0.233, "step": 9795 }, { "epoch": 0.536604062859333, "grad_norm": 0.18345318734645844, "learning_rate": 4.56185357939566e-05, "loss": 0.234, "step": 9800 }, { "epoch": 0.5368778404424246, "grad_norm": 0.2152281403541565, "learning_rate": 4.5613465828432365e-05, "loss": 0.2274, "step": 9805 }, { "epoch": 0.537151618025516, "grad_norm": 0.16906319558620453, "learning_rate": 4.5608395862908135e-05, "loss": 0.2396, "step": 9810 }, { "epoch": 0.5374253956086076, "grad_norm": 0.20785443484783173, "learning_rate": 4.56033258973839e-05, "loss": 0.23, "step": 9815 }, { "epoch": 0.537699173191699, "grad_norm": 0.18740229308605194, "learning_rate": 4.559825593185967e-05, "loss": 0.2308, "step": 9820 }, { "epoch": 0.5379729507747906, "grad_norm": 0.2248396873474121, "learning_rate": 4.559318596633543e-05, "loss": 0.2333, "step": 9825 }, { "epoch": 0.538246728357882, "grad_norm": 0.19731657207012177, "learning_rate": 4.5588116000811195e-05, "loss": 0.2355, "step": 9830 }, { "epoch": 0.5385205059409736, "grad_norm": 0.19875837862491608, "learning_rate": 4.5583046035286965e-05, "loss": 0.2381, "step": 9835 }, { "epoch": 0.538794283524065, "grad_norm": 0.2177029252052307, "learning_rate": 4.557797606976273e-05, "loss": 0.2229, "step": 9840 }, { "epoch": 0.5390680611071565, "grad_norm": 0.17590254545211792, "learning_rate": 4.557290610423849e-05, "loss": 0.2292, "step": 9845 }, { "epoch": 0.539341838690248, "grad_norm": 0.18052154779434204, "learning_rate": 4.556783613871426e-05, "loss": 0.2198, "step": 9850 }, { "epoch": 0.5396156162733395, "grad_norm": 0.27897000312805176, "learning_rate": 4.5562766173190025e-05, "loss": 0.227, "step": 9855 }, { "epoch": 0.5398893938564311, "grad_norm": 0.2047768384218216, "learning_rate": 4.555769620766579e-05, "loss": 0.2356, "step": 9860 }, { "epoch": 0.5401631714395225, "grad_norm": 0.205315500497818, "learning_rate": 4.555262624214155e-05, "loss": 0.2377, "step": 9865 }, { "epoch": 0.5404369490226141, "grad_norm": 0.2871553301811218, "learning_rate": 4.554755627661732e-05, "loss": 0.2454, "step": 9870 }, { "epoch": 0.5407107266057055, "grad_norm": 0.2153303623199463, "learning_rate": 4.5542486311093085e-05, "loss": 0.2217, "step": 9875 }, { "epoch": 0.5409845041887971, "grad_norm": 0.20650897920131683, "learning_rate": 4.553741634556885e-05, "loss": 0.2319, "step": 9880 }, { "epoch": 0.5412582817718885, "grad_norm": 0.18298819661140442, "learning_rate": 4.553234638004462e-05, "loss": 0.2309, "step": 9885 }, { "epoch": 0.54153205935498, "grad_norm": 0.24492575228214264, "learning_rate": 4.552727641452038e-05, "loss": 0.2296, "step": 9890 }, { "epoch": 0.5418058369380715, "grad_norm": 0.22284196317195892, "learning_rate": 4.5522206448996145e-05, "loss": 0.2343, "step": 9895 }, { "epoch": 0.542079614521163, "grad_norm": 0.2527672052383423, "learning_rate": 4.5517136483471915e-05, "loss": 0.2319, "step": 9900 }, { "epoch": 0.5423533921042545, "grad_norm": 0.19754630327224731, "learning_rate": 4.5512066517947685e-05, "loss": 0.2371, "step": 9905 }, { "epoch": 0.542627169687346, "grad_norm": 0.18797925114631653, "learning_rate": 4.550699655242345e-05, "loss": 0.227, "step": 9910 }, { "epoch": 0.5429009472704375, "grad_norm": 0.20109735429286957, "learning_rate": 4.550192658689921e-05, "loss": 0.2395, "step": 9915 }, { "epoch": 0.543174724853529, "grad_norm": 0.21327227354049683, "learning_rate": 4.5496856621374975e-05, "loss": 0.2254, "step": 9920 }, { "epoch": 0.5434485024366205, "grad_norm": 0.19030626118183136, "learning_rate": 4.5491786655850745e-05, "loss": 0.2284, "step": 9925 }, { "epoch": 0.543722280019712, "grad_norm": 0.1959933191537857, "learning_rate": 4.548671669032651e-05, "loss": 0.2286, "step": 9930 }, { "epoch": 0.5439960576028035, "grad_norm": 0.19309046864509583, "learning_rate": 4.548164672480227e-05, "loss": 0.2351, "step": 9935 }, { "epoch": 0.544269835185895, "grad_norm": 0.17482608556747437, "learning_rate": 4.547657675927804e-05, "loss": 0.2446, "step": 9940 }, { "epoch": 0.5445436127689864, "grad_norm": 0.17677804827690125, "learning_rate": 4.5471506793753805e-05, "loss": 0.2314, "step": 9945 }, { "epoch": 0.544817390352078, "grad_norm": 0.19431227445602417, "learning_rate": 4.546643682822957e-05, "loss": 0.2358, "step": 9950 }, { "epoch": 0.5450911679351694, "grad_norm": 0.16539452970027924, "learning_rate": 4.546136686270533e-05, "loss": 0.2336, "step": 9955 }, { "epoch": 0.545364945518261, "grad_norm": 0.1791263371706009, "learning_rate": 4.54562968971811e-05, "loss": 0.2297, "step": 9960 }, { "epoch": 0.5456387231013524, "grad_norm": 0.19640663266181946, "learning_rate": 4.5451226931656865e-05, "loss": 0.225, "step": 9965 }, { "epoch": 0.545912500684444, "grad_norm": 0.1758813112974167, "learning_rate": 4.544615696613263e-05, "loss": 0.2324, "step": 9970 }, { "epoch": 0.5461862782675354, "grad_norm": 0.17937980592250824, "learning_rate": 4.54410870006084e-05, "loss": 0.2254, "step": 9975 }, { "epoch": 0.546460055850627, "grad_norm": 0.18707658350467682, "learning_rate": 4.543601703508417e-05, "loss": 0.2405, "step": 9980 }, { "epoch": 0.5467338334337184, "grad_norm": 0.1590159684419632, "learning_rate": 4.543094706955993e-05, "loss": 0.2283, "step": 9985 }, { "epoch": 0.54700761101681, "grad_norm": 0.14923925697803497, "learning_rate": 4.5425877104035696e-05, "loss": 0.2315, "step": 9990 }, { "epoch": 0.5472813885999014, "grad_norm": 0.19022443890571594, "learning_rate": 4.542080713851146e-05, "loss": 0.2284, "step": 9995 }, { "epoch": 0.5475551661829929, "grad_norm": 0.17103132605552673, "learning_rate": 4.541573717298723e-05, "loss": 0.233, "step": 10000 }, { "epoch": 0.5478289437660845, "grad_norm": 0.1837172508239746, "learning_rate": 4.541066720746299e-05, "loss": 0.2245, "step": 10005 }, { "epoch": 0.5481027213491759, "grad_norm": 0.1892271637916565, "learning_rate": 4.5405597241938756e-05, "loss": 0.2306, "step": 10010 }, { "epoch": 0.5483764989322675, "grad_norm": 0.19455961883068085, "learning_rate": 4.5400527276414526e-05, "loss": 0.2323, "step": 10015 }, { "epoch": 0.5486502765153589, "grad_norm": 0.2107716053724289, "learning_rate": 4.539545731089029e-05, "loss": 0.2442, "step": 10020 }, { "epoch": 0.5489240540984505, "grad_norm": 0.1672796756029129, "learning_rate": 4.539038734536605e-05, "loss": 0.2389, "step": 10025 }, { "epoch": 0.5491978316815419, "grad_norm": 0.2139965295791626, "learning_rate": 4.5385317379841815e-05, "loss": 0.2229, "step": 10030 }, { "epoch": 0.5494716092646335, "grad_norm": 0.20346131920814514, "learning_rate": 4.5380247414317586e-05, "loss": 0.2365, "step": 10035 }, { "epoch": 0.5497453868477249, "grad_norm": 0.25116243958473206, "learning_rate": 4.537517744879335e-05, "loss": 0.2345, "step": 10040 }, { "epoch": 0.5500191644308164, "grad_norm": 0.19420142471790314, "learning_rate": 4.537010748326911e-05, "loss": 0.2245, "step": 10045 }, { "epoch": 0.5502929420139079, "grad_norm": 0.1838400661945343, "learning_rate": 4.536503751774488e-05, "loss": 0.2286, "step": 10050 }, { "epoch": 0.5505667195969994, "grad_norm": 0.19564558565616608, "learning_rate": 4.5359967552220646e-05, "loss": 0.2351, "step": 10055 }, { "epoch": 0.5508404971800909, "grad_norm": 0.172449991106987, "learning_rate": 4.535489758669641e-05, "loss": 0.2315, "step": 10060 }, { "epoch": 0.5511142747631824, "grad_norm": 0.19794787466526031, "learning_rate": 4.534982762117218e-05, "loss": 0.2388, "step": 10065 }, { "epoch": 0.5513880523462739, "grad_norm": 0.1726180464029312, "learning_rate": 4.534475765564795e-05, "loss": 0.2269, "step": 10070 }, { "epoch": 0.5516618299293654, "grad_norm": 0.19469371438026428, "learning_rate": 4.533968769012371e-05, "loss": 0.231, "step": 10075 }, { "epoch": 0.5519356075124568, "grad_norm": 0.1918666660785675, "learning_rate": 4.5334617724599476e-05, "loss": 0.2252, "step": 10080 }, { "epoch": 0.5522093850955484, "grad_norm": 0.2632257640361786, "learning_rate": 4.532954775907524e-05, "loss": 0.2215, "step": 10085 }, { "epoch": 0.5524831626786398, "grad_norm": 0.1608302891254425, "learning_rate": 4.532447779355101e-05, "loss": 0.2315, "step": 10090 }, { "epoch": 0.5527569402617314, "grad_norm": 0.20610854029655457, "learning_rate": 4.531940782802677e-05, "loss": 0.2323, "step": 10095 }, { "epoch": 0.5530307178448228, "grad_norm": 0.1593005359172821, "learning_rate": 4.5314337862502536e-05, "loss": 0.2276, "step": 10100 }, { "epoch": 0.5533044954279144, "grad_norm": 0.21307715773582458, "learning_rate": 4.5309267896978306e-05, "loss": 0.2313, "step": 10105 }, { "epoch": 0.5535782730110058, "grad_norm": 0.17618845403194427, "learning_rate": 4.530419793145407e-05, "loss": 0.2318, "step": 10110 }, { "epoch": 0.5538520505940974, "grad_norm": 0.18358586728572845, "learning_rate": 4.529912796592983e-05, "loss": 0.2241, "step": 10115 }, { "epoch": 0.5541258281771888, "grad_norm": 0.18426862359046936, "learning_rate": 4.5294058000405596e-05, "loss": 0.2399, "step": 10120 }, { "epoch": 0.5543996057602804, "grad_norm": 0.22083494067192078, "learning_rate": 4.5288988034881366e-05, "loss": 0.2294, "step": 10125 }, { "epoch": 0.5546733833433718, "grad_norm": 0.19534321129322052, "learning_rate": 4.528391806935713e-05, "loss": 0.2244, "step": 10130 }, { "epoch": 0.5549471609264633, "grad_norm": 0.20165061950683594, "learning_rate": 4.527884810383289e-05, "loss": 0.2263, "step": 10135 }, { "epoch": 0.5552209385095548, "grad_norm": 0.17788872122764587, "learning_rate": 4.5273778138308656e-05, "loss": 0.2305, "step": 10140 }, { "epoch": 0.5554947160926463, "grad_norm": 0.17780344188213348, "learning_rate": 4.526870817278443e-05, "loss": 0.2312, "step": 10145 }, { "epoch": 0.5557684936757379, "grad_norm": 0.16994960606098175, "learning_rate": 4.5263638207260196e-05, "loss": 0.2276, "step": 10150 }, { "epoch": 0.5560422712588293, "grad_norm": 0.17144180834293365, "learning_rate": 4.525856824173596e-05, "loss": 0.2259, "step": 10155 }, { "epoch": 0.5563160488419209, "grad_norm": 0.18713495135307312, "learning_rate": 4.525349827621172e-05, "loss": 0.2216, "step": 10160 }, { "epoch": 0.5565898264250123, "grad_norm": 0.1947137713432312, "learning_rate": 4.524842831068749e-05, "loss": 0.2455, "step": 10165 }, { "epoch": 0.5568636040081039, "grad_norm": 0.16640113294124603, "learning_rate": 4.5243358345163256e-05, "loss": 0.225, "step": 10170 }, { "epoch": 0.5571373815911953, "grad_norm": 0.19436217844486237, "learning_rate": 4.523828837963902e-05, "loss": 0.2244, "step": 10175 }, { "epoch": 0.5574111591742869, "grad_norm": 0.21353991329669952, "learning_rate": 4.523321841411479e-05, "loss": 0.2324, "step": 10180 }, { "epoch": 0.5576849367573783, "grad_norm": 0.19330233335494995, "learning_rate": 4.522814844859055e-05, "loss": 0.2323, "step": 10185 }, { "epoch": 0.5579587143404698, "grad_norm": 0.21747291088104248, "learning_rate": 4.5223078483066316e-05, "loss": 0.2322, "step": 10190 }, { "epoch": 0.5582324919235613, "grad_norm": 0.21911334991455078, "learning_rate": 4.521800851754208e-05, "loss": 0.2299, "step": 10195 }, { "epoch": 0.5585062695066528, "grad_norm": 0.2145731896162033, "learning_rate": 4.521293855201785e-05, "loss": 0.2283, "step": 10200 }, { "epoch": 0.5587800470897443, "grad_norm": 0.16863392293453217, "learning_rate": 4.520786858649361e-05, "loss": 0.2243, "step": 10205 }, { "epoch": 0.5590538246728358, "grad_norm": 0.27203768491744995, "learning_rate": 4.5202798620969376e-05, "loss": 0.2385, "step": 10210 }, { "epoch": 0.5593276022559273, "grad_norm": 0.19205109775066376, "learning_rate": 4.5197728655445146e-05, "loss": 0.2323, "step": 10215 }, { "epoch": 0.5596013798390188, "grad_norm": 0.2170315384864807, "learning_rate": 4.519265868992091e-05, "loss": 0.2397, "step": 10220 }, { "epoch": 0.5598751574221102, "grad_norm": 0.19155322015285492, "learning_rate": 4.518758872439668e-05, "loss": 0.2308, "step": 10225 }, { "epoch": 0.5601489350052018, "grad_norm": 0.20403547585010529, "learning_rate": 4.518251875887244e-05, "loss": 0.2325, "step": 10230 }, { "epoch": 0.5604227125882932, "grad_norm": 0.20610113441944122, "learning_rate": 4.517744879334821e-05, "loss": 0.232, "step": 10235 }, { "epoch": 0.5606964901713848, "grad_norm": 0.20202821493148804, "learning_rate": 4.5172378827823976e-05, "loss": 0.2247, "step": 10240 }, { "epoch": 0.5609702677544762, "grad_norm": 0.17200767993927002, "learning_rate": 4.516730886229974e-05, "loss": 0.2341, "step": 10245 }, { "epoch": 0.5612440453375678, "grad_norm": 0.19963662326335907, "learning_rate": 4.51622388967755e-05, "loss": 0.217, "step": 10250 }, { "epoch": 0.5615178229206592, "grad_norm": 0.18714234232902527, "learning_rate": 4.515716893125127e-05, "loss": 0.2296, "step": 10255 }, { "epoch": 0.5617916005037508, "grad_norm": 0.1926690638065338, "learning_rate": 4.5152098965727036e-05, "loss": 0.2335, "step": 10260 }, { "epoch": 0.5620653780868422, "grad_norm": 0.18100199103355408, "learning_rate": 4.51470290002028e-05, "loss": 0.2327, "step": 10265 }, { "epoch": 0.5623391556699338, "grad_norm": 0.18577836453914642, "learning_rate": 4.514195903467856e-05, "loss": 0.2394, "step": 10270 }, { "epoch": 0.5626129332530252, "grad_norm": 0.20483338832855225, "learning_rate": 4.513688906915433e-05, "loss": 0.2363, "step": 10275 }, { "epoch": 0.5628867108361167, "grad_norm": 0.16135315597057343, "learning_rate": 4.5131819103630096e-05, "loss": 0.2293, "step": 10280 }, { "epoch": 0.5631604884192082, "grad_norm": 0.18408535420894623, "learning_rate": 4.512674913810586e-05, "loss": 0.2232, "step": 10285 }, { "epoch": 0.5634342660022997, "grad_norm": 0.19041064381599426, "learning_rate": 4.512167917258163e-05, "loss": 0.2428, "step": 10290 }, { "epoch": 0.5637080435853913, "grad_norm": 0.1750362366437912, "learning_rate": 4.511660920705739e-05, "loss": 0.2327, "step": 10295 }, { "epoch": 0.5639818211684827, "grad_norm": 0.17659784853458405, "learning_rate": 4.5111539241533156e-05, "loss": 0.2387, "step": 10300 }, { "epoch": 0.5642555987515743, "grad_norm": 0.18090498447418213, "learning_rate": 4.5106469276008926e-05, "loss": 0.2353, "step": 10305 }, { "epoch": 0.5645293763346657, "grad_norm": 0.24036146700382233, "learning_rate": 4.5101399310484696e-05, "loss": 0.2318, "step": 10310 }, { "epoch": 0.5648031539177573, "grad_norm": 0.18700793385505676, "learning_rate": 4.509632934496046e-05, "loss": 0.2338, "step": 10315 }, { "epoch": 0.5650769315008487, "grad_norm": 0.18535378575325012, "learning_rate": 4.509125937943622e-05, "loss": 0.2322, "step": 10320 }, { "epoch": 0.5653507090839403, "grad_norm": 0.19410809874534607, "learning_rate": 4.5086189413911986e-05, "loss": 0.2321, "step": 10325 }, { "epoch": 0.5656244866670317, "grad_norm": 0.19061578810214996, "learning_rate": 4.5081119448387756e-05, "loss": 0.2359, "step": 10330 }, { "epoch": 0.5658982642501232, "grad_norm": 0.2119346410036087, "learning_rate": 4.507604948286352e-05, "loss": 0.2281, "step": 10335 }, { "epoch": 0.5661720418332147, "grad_norm": 0.17522868514060974, "learning_rate": 4.507097951733928e-05, "loss": 0.2375, "step": 10340 }, { "epoch": 0.5664458194163062, "grad_norm": 0.1772579848766327, "learning_rate": 4.506590955181505e-05, "loss": 0.2303, "step": 10345 }, { "epoch": 0.5667195969993977, "grad_norm": 0.14460468292236328, "learning_rate": 4.5060839586290816e-05, "loss": 0.2271, "step": 10350 }, { "epoch": 0.5669933745824892, "grad_norm": 0.17525862157344818, "learning_rate": 4.505576962076658e-05, "loss": 0.2248, "step": 10355 }, { "epoch": 0.5672671521655807, "grad_norm": 0.17471204698085785, "learning_rate": 4.505069965524234e-05, "loss": 0.2404, "step": 10360 }, { "epoch": 0.5675409297486722, "grad_norm": 0.15856097638607025, "learning_rate": 4.504562968971811e-05, "loss": 0.2347, "step": 10365 }, { "epoch": 0.5678147073317636, "grad_norm": 0.1967550367116928, "learning_rate": 4.5040559724193876e-05, "loss": 0.2198, "step": 10370 }, { "epoch": 0.5680884849148552, "grad_norm": 0.18242046236991882, "learning_rate": 4.503548975866964e-05, "loss": 0.2333, "step": 10375 }, { "epoch": 0.5683622624979466, "grad_norm": 0.18917393684387207, "learning_rate": 4.503041979314541e-05, "loss": 0.2168, "step": 10380 }, { "epoch": 0.5686360400810382, "grad_norm": 0.1818486601114273, "learning_rate": 4.502534982762117e-05, "loss": 0.2323, "step": 10385 }, { "epoch": 0.5689098176641296, "grad_norm": 0.17940452694892883, "learning_rate": 4.502027986209694e-05, "loss": 0.2259, "step": 10390 }, { "epoch": 0.5691835952472212, "grad_norm": 0.25655919313430786, "learning_rate": 4.5015209896572706e-05, "loss": 0.2317, "step": 10395 }, { "epoch": 0.5694573728303126, "grad_norm": 0.18075695633888245, "learning_rate": 4.501013993104847e-05, "loss": 0.2339, "step": 10400 }, { "epoch": 0.5697311504134042, "grad_norm": 0.22748388350009918, "learning_rate": 4.500506996552424e-05, "loss": 0.2185, "step": 10405 }, { "epoch": 0.5700049279964956, "grad_norm": 0.20984961092472076, "learning_rate": 4.5e-05, "loss": 0.2331, "step": 10410 }, { "epoch": 0.5702787055795872, "grad_norm": 0.20470203459262848, "learning_rate": 4.4994930034475766e-05, "loss": 0.2337, "step": 10415 }, { "epoch": 0.5705524831626786, "grad_norm": 0.19205805659294128, "learning_rate": 4.4989860068951536e-05, "loss": 0.2335, "step": 10420 }, { "epoch": 0.5708262607457701, "grad_norm": 0.21229764819145203, "learning_rate": 4.49847901034273e-05, "loss": 0.2343, "step": 10425 }, { "epoch": 0.5711000383288616, "grad_norm": 0.15418332815170288, "learning_rate": 4.497972013790306e-05, "loss": 0.2209, "step": 10430 }, { "epoch": 0.5713738159119531, "grad_norm": 0.208919957280159, "learning_rate": 4.4974650172378826e-05, "loss": 0.2247, "step": 10435 }, { "epoch": 0.5716475934950446, "grad_norm": 0.2093200981616974, "learning_rate": 4.4969580206854596e-05, "loss": 0.229, "step": 10440 }, { "epoch": 0.5719213710781361, "grad_norm": 0.1708718091249466, "learning_rate": 4.496451024133036e-05, "loss": 0.2302, "step": 10445 }, { "epoch": 0.5721951486612277, "grad_norm": 0.1947043538093567, "learning_rate": 4.495944027580612e-05, "loss": 0.2283, "step": 10450 }, { "epoch": 0.5724689262443191, "grad_norm": 0.22254595160484314, "learning_rate": 4.495437031028189e-05, "loss": 0.2295, "step": 10455 }, { "epoch": 0.5727427038274107, "grad_norm": 0.19786673784255981, "learning_rate": 4.4949300344757656e-05, "loss": 0.2235, "step": 10460 }, { "epoch": 0.5730164814105021, "grad_norm": 0.21355201303958893, "learning_rate": 4.494423037923342e-05, "loss": 0.2309, "step": 10465 }, { "epoch": 0.5732902589935936, "grad_norm": 0.21227599680423737, "learning_rate": 4.493916041370919e-05, "loss": 0.2241, "step": 10470 }, { "epoch": 0.5735640365766851, "grad_norm": 0.19992993772029877, "learning_rate": 4.493409044818496e-05, "loss": 0.2263, "step": 10475 }, { "epoch": 0.5738378141597766, "grad_norm": 0.17900751531124115, "learning_rate": 4.492902048266072e-05, "loss": 0.2306, "step": 10480 }, { "epoch": 0.5741115917428681, "grad_norm": 0.18372401595115662, "learning_rate": 4.4923950517136486e-05, "loss": 0.2328, "step": 10485 }, { "epoch": 0.5743853693259596, "grad_norm": 0.2037709802389145, "learning_rate": 4.491888055161225e-05, "loss": 0.2244, "step": 10490 }, { "epoch": 0.5746591469090511, "grad_norm": 0.2181611806154251, "learning_rate": 4.491381058608802e-05, "loss": 0.2331, "step": 10495 }, { "epoch": 0.5749329244921426, "grad_norm": 0.20689339935779572, "learning_rate": 4.490874062056378e-05, "loss": 0.2323, "step": 10500 }, { "epoch": 0.575206702075234, "grad_norm": 0.17578962445259094, "learning_rate": 4.4903670655039546e-05, "loss": 0.23, "step": 10505 }, { "epoch": 0.5754804796583256, "grad_norm": 0.16643524169921875, "learning_rate": 4.4898600689515316e-05, "loss": 0.2283, "step": 10510 }, { "epoch": 0.575754257241417, "grad_norm": 0.2207583338022232, "learning_rate": 4.489353072399108e-05, "loss": 0.23, "step": 10515 }, { "epoch": 0.5760280348245086, "grad_norm": 0.2082028090953827, "learning_rate": 4.488846075846684e-05, "loss": 0.2464, "step": 10520 }, { "epoch": 0.5763018124076, "grad_norm": 0.25237616896629333, "learning_rate": 4.4883390792942606e-05, "loss": 0.2195, "step": 10525 }, { "epoch": 0.5765755899906916, "grad_norm": 0.14823020994663239, "learning_rate": 4.4878320827418376e-05, "loss": 0.2279, "step": 10530 }, { "epoch": 0.576849367573783, "grad_norm": 0.18786273896694183, "learning_rate": 4.487325086189414e-05, "loss": 0.2308, "step": 10535 }, { "epoch": 0.5771231451568746, "grad_norm": 0.18445166945457458, "learning_rate": 4.48681808963699e-05, "loss": 0.2255, "step": 10540 }, { "epoch": 0.577396922739966, "grad_norm": 0.17645341157913208, "learning_rate": 4.486311093084567e-05, "loss": 0.2158, "step": 10545 }, { "epoch": 0.5776707003230576, "grad_norm": 0.20915880799293518, "learning_rate": 4.485804096532144e-05, "loss": 0.2241, "step": 10550 }, { "epoch": 0.577944477906149, "grad_norm": 0.1873706579208374, "learning_rate": 4.4852970999797206e-05, "loss": 0.2258, "step": 10555 }, { "epoch": 0.5782182554892406, "grad_norm": 0.17092952132225037, "learning_rate": 4.484790103427297e-05, "loss": 0.2226, "step": 10560 }, { "epoch": 0.578492033072332, "grad_norm": 0.19597697257995605, "learning_rate": 4.484283106874873e-05, "loss": 0.2224, "step": 10565 }, { "epoch": 0.5787658106554235, "grad_norm": 0.20534932613372803, "learning_rate": 4.48377611032245e-05, "loss": 0.228, "step": 10570 }, { "epoch": 0.579039588238515, "grad_norm": 0.1947941929101944, "learning_rate": 4.4832691137700266e-05, "loss": 0.2208, "step": 10575 }, { "epoch": 0.5793133658216065, "grad_norm": 0.17987772822380066, "learning_rate": 4.482762117217603e-05, "loss": 0.2409, "step": 10580 }, { "epoch": 0.579587143404698, "grad_norm": 0.17398078739643097, "learning_rate": 4.48225512066518e-05, "loss": 0.2249, "step": 10585 }, { "epoch": 0.5798609209877895, "grad_norm": 0.15801481902599335, "learning_rate": 4.481748124112756e-05, "loss": 0.2303, "step": 10590 }, { "epoch": 0.5801346985708811, "grad_norm": 0.18696659803390503, "learning_rate": 4.4812411275603326e-05, "loss": 0.2215, "step": 10595 }, { "epoch": 0.5804084761539725, "grad_norm": 0.1720026433467865, "learning_rate": 4.480734131007909e-05, "loss": 0.2283, "step": 10600 }, { "epoch": 0.5806822537370641, "grad_norm": 0.2052363008260727, "learning_rate": 4.480227134455486e-05, "loss": 0.2305, "step": 10605 }, { "epoch": 0.5809560313201555, "grad_norm": 0.18169201910495758, "learning_rate": 4.479720137903062e-05, "loss": 0.2237, "step": 10610 }, { "epoch": 0.581229808903247, "grad_norm": 0.1861555427312851, "learning_rate": 4.4792131413506386e-05, "loss": 0.2214, "step": 10615 }, { "epoch": 0.5815035864863385, "grad_norm": 0.19959844648838043, "learning_rate": 4.4787061447982156e-05, "loss": 0.2239, "step": 10620 }, { "epoch": 0.58177736406943, "grad_norm": 0.17905429005622864, "learning_rate": 4.478199148245792e-05, "loss": 0.2304, "step": 10625 }, { "epoch": 0.5820511416525215, "grad_norm": 0.19514340162277222, "learning_rate": 4.477692151693369e-05, "loss": 0.2302, "step": 10630 }, { "epoch": 0.582324919235613, "grad_norm": 0.20386648178100586, "learning_rate": 4.477185155140945e-05, "loss": 0.2303, "step": 10635 }, { "epoch": 0.5825986968187045, "grad_norm": 0.22275781631469727, "learning_rate": 4.476678158588522e-05, "loss": 0.2275, "step": 10640 }, { "epoch": 0.582872474401796, "grad_norm": 0.1639631986618042, "learning_rate": 4.4761711620360987e-05, "loss": 0.2385, "step": 10645 }, { "epoch": 0.5831462519848875, "grad_norm": 0.18062661588191986, "learning_rate": 4.475664165483675e-05, "loss": 0.2356, "step": 10650 }, { "epoch": 0.583420029567979, "grad_norm": 0.18101105093955994, "learning_rate": 4.475157168931251e-05, "loss": 0.2275, "step": 10655 }, { "epoch": 0.5836938071510704, "grad_norm": 0.1833914965391159, "learning_rate": 4.474650172378828e-05, "loss": 0.2278, "step": 10660 }, { "epoch": 0.583967584734162, "grad_norm": 0.18897180259227753, "learning_rate": 4.4741431758264047e-05, "loss": 0.2307, "step": 10665 }, { "epoch": 0.5842413623172534, "grad_norm": 0.18531671166419983, "learning_rate": 4.473636179273981e-05, "loss": 0.2195, "step": 10670 }, { "epoch": 0.584515139900345, "grad_norm": 0.18228572607040405, "learning_rate": 4.473129182721558e-05, "loss": 0.2292, "step": 10675 }, { "epoch": 0.5847889174834364, "grad_norm": 0.19637799263000488, "learning_rate": 4.472622186169134e-05, "loss": 0.2235, "step": 10680 }, { "epoch": 0.585062695066528, "grad_norm": 0.19298458099365234, "learning_rate": 4.4721151896167107e-05, "loss": 0.2212, "step": 10685 }, { "epoch": 0.5853364726496194, "grad_norm": 0.23678109049797058, "learning_rate": 4.471608193064287e-05, "loss": 0.2333, "step": 10690 }, { "epoch": 0.585610250232711, "grad_norm": 0.1803489625453949, "learning_rate": 4.471101196511864e-05, "loss": 0.2283, "step": 10695 }, { "epoch": 0.5858840278158024, "grad_norm": 0.17063957452774048, "learning_rate": 4.47059419995944e-05, "loss": 0.229, "step": 10700 }, { "epoch": 0.586157805398894, "grad_norm": 0.17945300042629242, "learning_rate": 4.4700872034070167e-05, "loss": 0.2398, "step": 10705 }, { "epoch": 0.5864315829819854, "grad_norm": 0.20574800670146942, "learning_rate": 4.469580206854594e-05, "loss": 0.2393, "step": 10710 }, { "epoch": 0.5867053605650769, "grad_norm": 0.1704779863357544, "learning_rate": 4.469073210302171e-05, "loss": 0.2286, "step": 10715 }, { "epoch": 0.5869791381481684, "grad_norm": 0.20839384198188782, "learning_rate": 4.468566213749747e-05, "loss": 0.2418, "step": 10720 }, { "epoch": 0.5872529157312599, "grad_norm": 0.20463083684444427, "learning_rate": 4.468059217197323e-05, "loss": 0.2212, "step": 10725 }, { "epoch": 0.5875266933143514, "grad_norm": 0.16819055378437042, "learning_rate": 4.4675522206449e-05, "loss": 0.2277, "step": 10730 }, { "epoch": 0.5878004708974429, "grad_norm": 0.2108403444290161, "learning_rate": 4.467045224092477e-05, "loss": 0.2271, "step": 10735 }, { "epoch": 0.5880742484805345, "grad_norm": 0.23599062860012054, "learning_rate": 4.466538227540053e-05, "loss": 0.2202, "step": 10740 }, { "epoch": 0.5883480260636259, "grad_norm": 0.21158602833747864, "learning_rate": 4.466031230987629e-05, "loss": 0.2347, "step": 10745 }, { "epoch": 0.5886218036467175, "grad_norm": 0.24313104152679443, "learning_rate": 4.4655242344352063e-05, "loss": 0.2321, "step": 10750 }, { "epoch": 0.5888955812298089, "grad_norm": 0.2386503964662552, "learning_rate": 4.465017237882783e-05, "loss": 0.2345, "step": 10755 }, { "epoch": 0.5891693588129004, "grad_norm": 0.22050467133522034, "learning_rate": 4.464510241330359e-05, "loss": 0.2187, "step": 10760 }, { "epoch": 0.5894431363959919, "grad_norm": 0.1889670193195343, "learning_rate": 4.464003244777935e-05, "loss": 0.2265, "step": 10765 }, { "epoch": 0.5897169139790834, "grad_norm": 0.19190603494644165, "learning_rate": 4.4634962482255123e-05, "loss": 0.2433, "step": 10770 }, { "epoch": 0.5899906915621749, "grad_norm": 0.16201002895832062, "learning_rate": 4.462989251673089e-05, "loss": 0.2227, "step": 10775 }, { "epoch": 0.5902644691452664, "grad_norm": 0.15613716840744019, "learning_rate": 4.462482255120665e-05, "loss": 0.2169, "step": 10780 }, { "epoch": 0.5905382467283579, "grad_norm": 0.20734718441963196, "learning_rate": 4.461975258568242e-05, "loss": 0.2394, "step": 10785 }, { "epoch": 0.5908120243114494, "grad_norm": 0.16546845436096191, "learning_rate": 4.4614682620158183e-05, "loss": 0.2214, "step": 10790 }, { "epoch": 0.5910858018945409, "grad_norm": 0.23935173451900482, "learning_rate": 4.4609612654633953e-05, "loss": 0.224, "step": 10795 }, { "epoch": 0.5913595794776324, "grad_norm": 0.17006920278072357, "learning_rate": 4.460454268910972e-05, "loss": 0.2255, "step": 10800 }, { "epoch": 0.5916333570607238, "grad_norm": 0.23253290355205536, "learning_rate": 4.459947272358549e-05, "loss": 0.2299, "step": 10805 }, { "epoch": 0.5919071346438154, "grad_norm": 0.19398896396160126, "learning_rate": 4.459440275806125e-05, "loss": 0.2296, "step": 10810 }, { "epoch": 0.5921809122269068, "grad_norm": 0.21319356560707092, "learning_rate": 4.4589332792537013e-05, "loss": 0.2343, "step": 10815 }, { "epoch": 0.5924546898099984, "grad_norm": 0.2060227394104004, "learning_rate": 4.458426282701278e-05, "loss": 0.2306, "step": 10820 }, { "epoch": 0.5927284673930898, "grad_norm": 0.2053355574607849, "learning_rate": 4.457919286148855e-05, "loss": 0.2226, "step": 10825 }, { "epoch": 0.5930022449761814, "grad_norm": 0.16989091038703918, "learning_rate": 4.457412289596431e-05, "loss": 0.2268, "step": 10830 }, { "epoch": 0.5932760225592728, "grad_norm": 0.17404751479625702, "learning_rate": 4.4569052930440073e-05, "loss": 0.2242, "step": 10835 }, { "epoch": 0.5935498001423644, "grad_norm": 0.40520650148391724, "learning_rate": 4.4563982964915844e-05, "loss": 0.235, "step": 10840 }, { "epoch": 0.5938235777254558, "grad_norm": 0.2259800136089325, "learning_rate": 4.455891299939161e-05, "loss": 0.2241, "step": 10845 }, { "epoch": 0.5940973553085473, "grad_norm": 0.17484420537948608, "learning_rate": 4.455384303386737e-05, "loss": 0.2266, "step": 10850 }, { "epoch": 0.5943711328916388, "grad_norm": 0.15474268794059753, "learning_rate": 4.4548773068343133e-05, "loss": 0.2217, "step": 10855 }, { "epoch": 0.5946449104747303, "grad_norm": 0.1850224882364273, "learning_rate": 4.4543703102818904e-05, "loss": 0.2358, "step": 10860 }, { "epoch": 0.5949186880578218, "grad_norm": 0.18705818057060242, "learning_rate": 4.453863313729467e-05, "loss": 0.2181, "step": 10865 }, { "epoch": 0.5951924656409133, "grad_norm": 0.18318186700344086, "learning_rate": 4.453356317177043e-05, "loss": 0.2332, "step": 10870 }, { "epoch": 0.5954662432240048, "grad_norm": 0.19888214766979218, "learning_rate": 4.45284932062462e-05, "loss": 0.2344, "step": 10875 }, { "epoch": 0.5957400208070963, "grad_norm": 0.1862458437681198, "learning_rate": 4.452342324072197e-05, "loss": 0.2276, "step": 10880 }, { "epoch": 0.5960137983901879, "grad_norm": 0.1760464906692505, "learning_rate": 4.4518353275197734e-05, "loss": 0.2328, "step": 10885 }, { "epoch": 0.5962875759732793, "grad_norm": 0.19275395572185516, "learning_rate": 4.45132833096735e-05, "loss": 0.2289, "step": 10890 }, { "epoch": 0.5965613535563709, "grad_norm": 0.18262870609760284, "learning_rate": 4.450821334414926e-05, "loss": 0.2311, "step": 10895 }, { "epoch": 0.5968351311394623, "grad_norm": 0.1693640500307083, "learning_rate": 4.450314337862503e-05, "loss": 0.2303, "step": 10900 }, { "epoch": 0.5971089087225538, "grad_norm": 0.15003281831741333, "learning_rate": 4.4498073413100794e-05, "loss": 0.22, "step": 10905 }, { "epoch": 0.5973826863056453, "grad_norm": 0.17905841767787933, "learning_rate": 4.449300344757656e-05, "loss": 0.2265, "step": 10910 }, { "epoch": 0.5976564638887368, "grad_norm": 0.16943149268627167, "learning_rate": 4.448793348205233e-05, "loss": 0.2284, "step": 10915 }, { "epoch": 0.5979302414718283, "grad_norm": 0.19391389191150665, "learning_rate": 4.448286351652809e-05, "loss": 0.2319, "step": 10920 }, { "epoch": 0.5982040190549198, "grad_norm": 0.18831098079681396, "learning_rate": 4.4477793551003854e-05, "loss": 0.2207, "step": 10925 }, { "epoch": 0.5984777966380113, "grad_norm": 0.22828011214733124, "learning_rate": 4.447272358547962e-05, "loss": 0.2408, "step": 10930 }, { "epoch": 0.5987515742211028, "grad_norm": 0.16315749287605286, "learning_rate": 4.446765361995539e-05, "loss": 0.2194, "step": 10935 }, { "epoch": 0.5990253518041943, "grad_norm": 0.1674269139766693, "learning_rate": 4.446258365443115e-05, "loss": 0.23, "step": 10940 }, { "epoch": 0.5992991293872858, "grad_norm": 0.1904485523700714, "learning_rate": 4.4457513688906914e-05, "loss": 0.2387, "step": 10945 }, { "epoch": 0.5995729069703772, "grad_norm": 0.16663230955600739, "learning_rate": 4.4452443723382684e-05, "loss": 0.2294, "step": 10950 }, { "epoch": 0.5998466845534688, "grad_norm": 0.1847054809331894, "learning_rate": 4.444737375785845e-05, "loss": 0.2235, "step": 10955 }, { "epoch": 0.6001204621365602, "grad_norm": 0.1744331568479538, "learning_rate": 4.444230379233422e-05, "loss": 0.2333, "step": 10960 }, { "epoch": 0.6003942397196518, "grad_norm": 0.19795748591423035, "learning_rate": 4.443723382680998e-05, "loss": 0.2267, "step": 10965 }, { "epoch": 0.6006680173027432, "grad_norm": 0.20636355876922607, "learning_rate": 4.443216386128575e-05, "loss": 0.2316, "step": 10970 }, { "epoch": 0.6009417948858348, "grad_norm": 0.23019343614578247, "learning_rate": 4.4427093895761514e-05, "loss": 0.2302, "step": 10975 }, { "epoch": 0.6012155724689262, "grad_norm": 0.18947750329971313, "learning_rate": 4.442202393023728e-05, "loss": 0.2205, "step": 10980 }, { "epoch": 0.6014893500520178, "grad_norm": 0.1686854362487793, "learning_rate": 4.441695396471304e-05, "loss": 0.2264, "step": 10985 }, { "epoch": 0.6017631276351092, "grad_norm": 0.16091153025627136, "learning_rate": 4.441188399918881e-05, "loss": 0.221, "step": 10990 }, { "epoch": 0.6020369052182007, "grad_norm": 0.17078106105327606, "learning_rate": 4.4406814033664574e-05, "loss": 0.23, "step": 10995 }, { "epoch": 0.6023106828012922, "grad_norm": 0.1620083600282669, "learning_rate": 4.440174406814034e-05, "loss": 0.2421, "step": 11000 }, { "epoch": 0.6025844603843837, "grad_norm": 0.24116557836532593, "learning_rate": 4.43966741026161e-05, "loss": 0.2232, "step": 11005 }, { "epoch": 0.6028582379674752, "grad_norm": 0.20892108976840973, "learning_rate": 4.439160413709187e-05, "loss": 0.2328, "step": 11010 }, { "epoch": 0.6031320155505667, "grad_norm": 0.1865408718585968, "learning_rate": 4.4386534171567634e-05, "loss": 0.222, "step": 11015 }, { "epoch": 0.6034057931336582, "grad_norm": 0.20315372943878174, "learning_rate": 4.43814642060434e-05, "loss": 0.2222, "step": 11020 }, { "epoch": 0.6036795707167497, "grad_norm": 0.1757231205701828, "learning_rate": 4.437639424051917e-05, "loss": 0.225, "step": 11025 }, { "epoch": 0.6039533482998413, "grad_norm": 0.1982816904783249, "learning_rate": 4.437132427499493e-05, "loss": 0.2311, "step": 11030 }, { "epoch": 0.6042271258829327, "grad_norm": 0.16632063686847687, "learning_rate": 4.4366254309470694e-05, "loss": 0.2236, "step": 11035 }, { "epoch": 0.6045009034660243, "grad_norm": 0.1746959090232849, "learning_rate": 4.4361184343946464e-05, "loss": 0.2424, "step": 11040 }, { "epoch": 0.6047746810491157, "grad_norm": 0.23477110266685486, "learning_rate": 4.4356114378422234e-05, "loss": 0.22, "step": 11045 }, { "epoch": 0.6050484586322072, "grad_norm": 0.1951999068260193, "learning_rate": 4.4351044412898e-05, "loss": 0.2334, "step": 11050 }, { "epoch": 0.6053222362152987, "grad_norm": 0.16417589783668518, "learning_rate": 4.434597444737376e-05, "loss": 0.2213, "step": 11055 }, { "epoch": 0.6055960137983902, "grad_norm": 0.2035595178604126, "learning_rate": 4.4340904481849524e-05, "loss": 0.2177, "step": 11060 }, { "epoch": 0.6058697913814817, "grad_norm": 0.20763985812664032, "learning_rate": 4.4335834516325294e-05, "loss": 0.2257, "step": 11065 }, { "epoch": 0.6061435689645732, "grad_norm": 0.20706333220005035, "learning_rate": 4.433076455080106e-05, "loss": 0.2227, "step": 11070 }, { "epoch": 0.6064173465476647, "grad_norm": 0.2740388810634613, "learning_rate": 4.432569458527682e-05, "loss": 0.2177, "step": 11075 }, { "epoch": 0.6066911241307562, "grad_norm": 0.18829859793186188, "learning_rate": 4.432062461975259e-05, "loss": 0.2432, "step": 11080 }, { "epoch": 0.6069649017138476, "grad_norm": 0.21092484891414642, "learning_rate": 4.4315554654228354e-05, "loss": 0.2309, "step": 11085 }, { "epoch": 0.6072386792969392, "grad_norm": 0.17860695719718933, "learning_rate": 4.431048468870412e-05, "loss": 0.2366, "step": 11090 }, { "epoch": 0.6075124568800306, "grad_norm": 0.17009010910987854, "learning_rate": 4.430541472317988e-05, "loss": 0.22, "step": 11095 }, { "epoch": 0.6077862344631222, "grad_norm": 0.17510661482810974, "learning_rate": 4.430034475765565e-05, "loss": 0.2333, "step": 11100 }, { "epoch": 0.6080600120462136, "grad_norm": 0.16978785395622253, "learning_rate": 4.4295274792131414e-05, "loss": 0.2242, "step": 11105 }, { "epoch": 0.6083337896293052, "grad_norm": 0.2556231915950775, "learning_rate": 4.429020482660718e-05, "loss": 0.2291, "step": 11110 }, { "epoch": 0.6086075672123966, "grad_norm": 0.17179779708385468, "learning_rate": 4.428513486108295e-05, "loss": 0.2304, "step": 11115 }, { "epoch": 0.6088813447954882, "grad_norm": 0.18611468374729156, "learning_rate": 4.428006489555872e-05, "loss": 0.2228, "step": 11120 }, { "epoch": 0.6091551223785796, "grad_norm": 0.18673215806484222, "learning_rate": 4.427499493003448e-05, "loss": 0.2316, "step": 11125 }, { "epoch": 0.6094288999616712, "grad_norm": 0.18410874903202057, "learning_rate": 4.4269924964510244e-05, "loss": 0.2264, "step": 11130 }, { "epoch": 0.6097026775447626, "grad_norm": 0.1774316281080246, "learning_rate": 4.426485499898601e-05, "loss": 0.2291, "step": 11135 }, { "epoch": 0.6099764551278541, "grad_norm": 0.17415474355220795, "learning_rate": 4.425978503346178e-05, "loss": 0.2281, "step": 11140 }, { "epoch": 0.6102502327109456, "grad_norm": 0.17913931608200073, "learning_rate": 4.425471506793754e-05, "loss": 0.2304, "step": 11145 }, { "epoch": 0.6105240102940371, "grad_norm": 0.14086611568927765, "learning_rate": 4.4249645102413304e-05, "loss": 0.2256, "step": 11150 }, { "epoch": 0.6107977878771286, "grad_norm": 0.14695386588573456, "learning_rate": 4.4244575136889074e-05, "loss": 0.2293, "step": 11155 }, { "epoch": 0.6110715654602201, "grad_norm": 0.18421140313148499, "learning_rate": 4.423950517136484e-05, "loss": 0.2311, "step": 11160 }, { "epoch": 0.6113453430433116, "grad_norm": 0.17024587094783783, "learning_rate": 4.42344352058406e-05, "loss": 0.2225, "step": 11165 }, { "epoch": 0.6116191206264031, "grad_norm": 0.17959728837013245, "learning_rate": 4.4229365240316364e-05, "loss": 0.2403, "step": 11170 }, { "epoch": 0.6118928982094946, "grad_norm": 0.16802141070365906, "learning_rate": 4.4224295274792134e-05, "loss": 0.2204, "step": 11175 }, { "epoch": 0.6121666757925861, "grad_norm": 0.19945424795150757, "learning_rate": 4.42192253092679e-05, "loss": 0.2267, "step": 11180 }, { "epoch": 0.6124404533756777, "grad_norm": 0.23985280096530914, "learning_rate": 4.421415534374366e-05, "loss": 0.229, "step": 11185 }, { "epoch": 0.6127142309587691, "grad_norm": 0.1651318073272705, "learning_rate": 4.420908537821943e-05, "loss": 0.2283, "step": 11190 }, { "epoch": 0.6129880085418606, "grad_norm": 0.1845356822013855, "learning_rate": 4.4204015412695194e-05, "loss": 0.2228, "step": 11195 }, { "epoch": 0.6132617861249521, "grad_norm": 0.15166756510734558, "learning_rate": 4.4198945447170964e-05, "loss": 0.222, "step": 11200 }, { "epoch": 0.6135355637080436, "grad_norm": 0.18983426690101624, "learning_rate": 4.419387548164673e-05, "loss": 0.2357, "step": 11205 }, { "epoch": 0.6138093412911351, "grad_norm": 0.189105823636055, "learning_rate": 4.41888055161225e-05, "loss": 0.2297, "step": 11210 }, { "epoch": 0.6140831188742266, "grad_norm": 0.16885283589363098, "learning_rate": 4.418373555059826e-05, "loss": 0.2252, "step": 11215 }, { "epoch": 0.6143568964573181, "grad_norm": 0.18673381209373474, "learning_rate": 4.4178665585074024e-05, "loss": 0.2282, "step": 11220 }, { "epoch": 0.6146306740404096, "grad_norm": 0.21095769107341766, "learning_rate": 4.417359561954979e-05, "loss": 0.2399, "step": 11225 }, { "epoch": 0.614904451623501, "grad_norm": 0.1849888116121292, "learning_rate": 4.416852565402556e-05, "loss": 0.2334, "step": 11230 }, { "epoch": 0.6151782292065926, "grad_norm": 0.1771889477968216, "learning_rate": 4.416345568850132e-05, "loss": 0.2246, "step": 11235 }, { "epoch": 0.615452006789684, "grad_norm": 0.16280493140220642, "learning_rate": 4.4158385722977084e-05, "loss": 0.2379, "step": 11240 }, { "epoch": 0.6157257843727756, "grad_norm": 0.22377315163612366, "learning_rate": 4.4153315757452854e-05, "loss": 0.2257, "step": 11245 }, { "epoch": 0.615999561955867, "grad_norm": 0.23529651761054993, "learning_rate": 4.414824579192862e-05, "loss": 0.2334, "step": 11250 }, { "epoch": 0.6162733395389586, "grad_norm": 0.2475888431072235, "learning_rate": 4.414317582640438e-05, "loss": 0.2253, "step": 11255 }, { "epoch": 0.61654711712205, "grad_norm": 0.18444767594337463, "learning_rate": 4.4138105860880144e-05, "loss": 0.2331, "step": 11260 }, { "epoch": 0.6168208947051416, "grad_norm": 0.14566262066364288, "learning_rate": 4.4133035895355914e-05, "loss": 0.2289, "step": 11265 }, { "epoch": 0.617094672288233, "grad_norm": 0.16681510210037231, "learning_rate": 4.412796592983168e-05, "loss": 0.2266, "step": 11270 }, { "epoch": 0.6173684498713246, "grad_norm": 0.16788463294506073, "learning_rate": 4.412289596430744e-05, "loss": 0.2347, "step": 11275 }, { "epoch": 0.617642227454416, "grad_norm": 0.16157720983028412, "learning_rate": 4.411782599878321e-05, "loss": 0.2284, "step": 11280 }, { "epoch": 0.6179160050375075, "grad_norm": 0.20811238884925842, "learning_rate": 4.411275603325898e-05, "loss": 0.2222, "step": 11285 }, { "epoch": 0.618189782620599, "grad_norm": 0.17412792146205902, "learning_rate": 4.4107686067734744e-05, "loss": 0.2201, "step": 11290 }, { "epoch": 0.6184635602036905, "grad_norm": 0.1900346577167511, "learning_rate": 4.410261610221051e-05, "loss": 0.2292, "step": 11295 }, { "epoch": 0.618737337786782, "grad_norm": 0.16678358614444733, "learning_rate": 4.409754613668627e-05, "loss": 0.2236, "step": 11300 }, { "epoch": 0.6190111153698735, "grad_norm": 0.13943369686603546, "learning_rate": 4.409247617116204e-05, "loss": 0.2228, "step": 11305 }, { "epoch": 0.619284892952965, "grad_norm": 0.1596153825521469, "learning_rate": 4.4087406205637804e-05, "loss": 0.2279, "step": 11310 }, { "epoch": 0.6195586705360565, "grad_norm": 0.1971106082201004, "learning_rate": 4.408233624011357e-05, "loss": 0.2276, "step": 11315 }, { "epoch": 0.619832448119148, "grad_norm": 0.18272429704666138, "learning_rate": 4.407726627458934e-05, "loss": 0.2248, "step": 11320 }, { "epoch": 0.6201062257022395, "grad_norm": 0.19166390597820282, "learning_rate": 4.40721963090651e-05, "loss": 0.2291, "step": 11325 }, { "epoch": 0.620380003285331, "grad_norm": 0.18538281321525574, "learning_rate": 4.4067126343540864e-05, "loss": 0.228, "step": 11330 }, { "epoch": 0.6206537808684225, "grad_norm": 0.16689538955688477, "learning_rate": 4.406205637801663e-05, "loss": 0.2218, "step": 11335 }, { "epoch": 0.620927558451514, "grad_norm": 0.1626131385564804, "learning_rate": 4.40569864124924e-05, "loss": 0.2302, "step": 11340 }, { "epoch": 0.6212013360346055, "grad_norm": 0.1719040423631668, "learning_rate": 4.405191644696816e-05, "loss": 0.2321, "step": 11345 }, { "epoch": 0.621475113617697, "grad_norm": 0.18001499772071838, "learning_rate": 4.4046846481443924e-05, "loss": 0.2292, "step": 11350 }, { "epoch": 0.6217488912007885, "grad_norm": 0.16863512992858887, "learning_rate": 4.4041776515919694e-05, "loss": 0.2194, "step": 11355 }, { "epoch": 0.62202266878388, "grad_norm": 0.1541566103696823, "learning_rate": 4.403670655039546e-05, "loss": 0.2201, "step": 11360 }, { "epoch": 0.6222964463669715, "grad_norm": 0.20564398169517517, "learning_rate": 4.403163658487123e-05, "loss": 0.2299, "step": 11365 }, { "epoch": 0.622570223950063, "grad_norm": 0.19964493811130524, "learning_rate": 4.402656661934699e-05, "loss": 0.2287, "step": 11370 }, { "epoch": 0.6228440015331544, "grad_norm": 0.1967330127954483, "learning_rate": 4.402149665382276e-05, "loss": 0.2244, "step": 11375 }, { "epoch": 0.623117779116246, "grad_norm": 0.1850447654724121, "learning_rate": 4.4016426688298524e-05, "loss": 0.2266, "step": 11380 }, { "epoch": 0.6233915566993374, "grad_norm": 0.2064153254032135, "learning_rate": 4.401135672277429e-05, "loss": 0.2271, "step": 11385 }, { "epoch": 0.623665334282429, "grad_norm": 0.1590408980846405, "learning_rate": 4.400628675725005e-05, "loss": 0.2306, "step": 11390 }, { "epoch": 0.6239391118655204, "grad_norm": 0.1790197640657425, "learning_rate": 4.400121679172582e-05, "loss": 0.2258, "step": 11395 }, { "epoch": 0.624212889448612, "grad_norm": 0.17149287462234497, "learning_rate": 4.3996146826201584e-05, "loss": 0.232, "step": 11400 }, { "epoch": 0.6244866670317034, "grad_norm": 0.1908823847770691, "learning_rate": 4.399107686067735e-05, "loss": 0.2202, "step": 11405 }, { "epoch": 0.624760444614795, "grad_norm": 0.18090775609016418, "learning_rate": 4.398600689515312e-05, "loss": 0.2323, "step": 11410 }, { "epoch": 0.6250342221978864, "grad_norm": 0.16512621939182281, "learning_rate": 4.398093692962888e-05, "loss": 0.2287, "step": 11415 }, { "epoch": 0.625307999780978, "grad_norm": 0.16199259459972382, "learning_rate": 4.3975866964104644e-05, "loss": 0.2261, "step": 11420 }, { "epoch": 0.6255817773640694, "grad_norm": 0.1492205709218979, "learning_rate": 4.397079699858041e-05, "loss": 0.2302, "step": 11425 }, { "epoch": 0.6258555549471609, "grad_norm": 0.18372613191604614, "learning_rate": 4.396572703305618e-05, "loss": 0.2304, "step": 11430 }, { "epoch": 0.6261293325302524, "grad_norm": 0.16085530817508698, "learning_rate": 4.396065706753194e-05, "loss": 0.235, "step": 11435 }, { "epoch": 0.6264031101133439, "grad_norm": 0.15520110726356506, "learning_rate": 4.3955587102007704e-05, "loss": 0.2227, "step": 11440 }, { "epoch": 0.6266768876964354, "grad_norm": 0.15244093537330627, "learning_rate": 4.3950517136483474e-05, "loss": 0.2276, "step": 11445 }, { "epoch": 0.6269506652795269, "grad_norm": 0.15629783272743225, "learning_rate": 4.3945447170959245e-05, "loss": 0.2317, "step": 11450 }, { "epoch": 0.6272244428626184, "grad_norm": 0.20034711062908173, "learning_rate": 4.394037720543501e-05, "loss": 0.2343, "step": 11455 }, { "epoch": 0.6274982204457099, "grad_norm": 0.2437441200017929, "learning_rate": 4.393530723991077e-05, "loss": 0.2274, "step": 11460 }, { "epoch": 0.6277719980288013, "grad_norm": 0.18710201978683472, "learning_rate": 4.3930237274386534e-05, "loss": 0.2277, "step": 11465 }, { "epoch": 0.6280457756118929, "grad_norm": 0.16772663593292236, "learning_rate": 4.3925167308862305e-05, "loss": 0.2332, "step": 11470 }, { "epoch": 0.6283195531949844, "grad_norm": 0.19824327528476715, "learning_rate": 4.392009734333807e-05, "loss": 0.2317, "step": 11475 }, { "epoch": 0.6285933307780759, "grad_norm": 0.1948237419128418, "learning_rate": 4.391502737781383e-05, "loss": 0.2182, "step": 11480 }, { "epoch": 0.6288671083611674, "grad_norm": 0.20758309960365295, "learning_rate": 4.39099574122896e-05, "loss": 0.224, "step": 11485 }, { "epoch": 0.6291408859442589, "grad_norm": 0.1687682420015335, "learning_rate": 4.3904887446765365e-05, "loss": 0.2279, "step": 11490 }, { "epoch": 0.6294146635273504, "grad_norm": 0.24501845240592957, "learning_rate": 4.389981748124113e-05, "loss": 0.2297, "step": 11495 }, { "epoch": 0.6296884411104419, "grad_norm": 0.1596069037914276, "learning_rate": 4.389474751571689e-05, "loss": 0.2257, "step": 11500 }, { "epoch": 0.6299622186935334, "grad_norm": 0.20703069865703583, "learning_rate": 4.388967755019266e-05, "loss": 0.2224, "step": 11505 }, { "epoch": 0.6302359962766249, "grad_norm": 0.1935301423072815, "learning_rate": 4.3884607584668425e-05, "loss": 0.2245, "step": 11510 }, { "epoch": 0.6305097738597164, "grad_norm": 0.16902555525302887, "learning_rate": 4.387953761914419e-05, "loss": 0.2309, "step": 11515 }, { "epoch": 0.6307835514428078, "grad_norm": 0.2110823094844818, "learning_rate": 4.387446765361996e-05, "loss": 0.2388, "step": 11520 }, { "epoch": 0.6310573290258994, "grad_norm": 0.21479582786560059, "learning_rate": 4.386939768809573e-05, "loss": 0.2226, "step": 11525 }, { "epoch": 0.6313311066089908, "grad_norm": 0.19489900767803192, "learning_rate": 4.386432772257149e-05, "loss": 0.2356, "step": 11530 }, { "epoch": 0.6316048841920824, "grad_norm": 0.18026353418827057, "learning_rate": 4.3859257757047255e-05, "loss": 0.2177, "step": 11535 }, { "epoch": 0.6318786617751738, "grad_norm": 0.2288321852684021, "learning_rate": 4.3854187791523025e-05, "loss": 0.2402, "step": 11540 }, { "epoch": 0.6321524393582654, "grad_norm": 0.16370359063148499, "learning_rate": 4.384911782599879e-05, "loss": 0.2247, "step": 11545 }, { "epoch": 0.6324262169413568, "grad_norm": 0.15363337099552155, "learning_rate": 4.384404786047455e-05, "loss": 0.2194, "step": 11550 }, { "epoch": 0.6326999945244484, "grad_norm": 0.19980251789093018, "learning_rate": 4.3838977894950315e-05, "loss": 0.2285, "step": 11555 }, { "epoch": 0.6329737721075398, "grad_norm": 0.2383044958114624, "learning_rate": 4.3833907929426085e-05, "loss": 0.2176, "step": 11560 }, { "epoch": 0.6332475496906314, "grad_norm": 0.16856135427951813, "learning_rate": 4.382883796390185e-05, "loss": 0.2267, "step": 11565 }, { "epoch": 0.6335213272737228, "grad_norm": 0.1636246293783188, "learning_rate": 4.382376799837761e-05, "loss": 0.2232, "step": 11570 }, { "epoch": 0.6337951048568143, "grad_norm": 0.16344133019447327, "learning_rate": 4.381869803285338e-05, "loss": 0.229, "step": 11575 }, { "epoch": 0.6340688824399058, "grad_norm": 0.18762022256851196, "learning_rate": 4.3813628067329145e-05, "loss": 0.2236, "step": 11580 }, { "epoch": 0.6343426600229973, "grad_norm": 0.19077342748641968, "learning_rate": 4.380855810180491e-05, "loss": 0.2218, "step": 11585 }, { "epoch": 0.6346164376060888, "grad_norm": 0.1973077356815338, "learning_rate": 4.380348813628067e-05, "loss": 0.228, "step": 11590 }, { "epoch": 0.6348902151891803, "grad_norm": 0.17768308520317078, "learning_rate": 4.379841817075644e-05, "loss": 0.2333, "step": 11595 }, { "epoch": 0.6351639927722718, "grad_norm": 0.15902727842330933, "learning_rate": 4.3793348205232205e-05, "loss": 0.2247, "step": 11600 }, { "epoch": 0.6354377703553633, "grad_norm": 0.19111302495002747, "learning_rate": 4.378827823970797e-05, "loss": 0.2284, "step": 11605 }, { "epoch": 0.6357115479384547, "grad_norm": 0.1773681491613388, "learning_rate": 4.378320827418374e-05, "loss": 0.2315, "step": 11610 }, { "epoch": 0.6359853255215463, "grad_norm": 0.18087181448936462, "learning_rate": 4.377813830865951e-05, "loss": 0.2181, "step": 11615 }, { "epoch": 0.6362591031046378, "grad_norm": 0.19910405576229095, "learning_rate": 4.377306834313527e-05, "loss": 0.2243, "step": 11620 }, { "epoch": 0.6365328806877293, "grad_norm": 0.2051791548728943, "learning_rate": 4.3767998377611035e-05, "loss": 0.2296, "step": 11625 }, { "epoch": 0.6368066582708208, "grad_norm": 0.17946775257587433, "learning_rate": 4.37629284120868e-05, "loss": 0.2292, "step": 11630 }, { "epoch": 0.6370804358539123, "grad_norm": 0.152216374874115, "learning_rate": 4.375785844656257e-05, "loss": 0.2263, "step": 11635 }, { "epoch": 0.6373542134370038, "grad_norm": 0.18107940256595612, "learning_rate": 4.375278848103833e-05, "loss": 0.2312, "step": 11640 }, { "epoch": 0.6376279910200953, "grad_norm": 0.18769718706607819, "learning_rate": 4.3747718515514095e-05, "loss": 0.23, "step": 11645 }, { "epoch": 0.6379017686031868, "grad_norm": 0.19940517842769623, "learning_rate": 4.3742648549989865e-05, "loss": 0.223, "step": 11650 }, { "epoch": 0.6381755461862783, "grad_norm": 0.17358872294425964, "learning_rate": 4.373757858446563e-05, "loss": 0.2189, "step": 11655 }, { "epoch": 0.6384493237693698, "grad_norm": 0.16135264933109283, "learning_rate": 4.373250861894139e-05, "loss": 0.2319, "step": 11660 }, { "epoch": 0.6387231013524612, "grad_norm": 0.18470701575279236, "learning_rate": 4.3727438653417155e-05, "loss": 0.224, "step": 11665 }, { "epoch": 0.6389968789355528, "grad_norm": 0.1790611743927002, "learning_rate": 4.3722368687892925e-05, "loss": 0.2226, "step": 11670 }, { "epoch": 0.6392706565186442, "grad_norm": 0.18842384219169617, "learning_rate": 4.371729872236869e-05, "loss": 0.2248, "step": 11675 }, { "epoch": 0.6395444341017358, "grad_norm": 0.16724228858947754, "learning_rate": 4.371222875684445e-05, "loss": 0.2191, "step": 11680 }, { "epoch": 0.6398182116848272, "grad_norm": 0.166738361120224, "learning_rate": 4.370715879132022e-05, "loss": 0.227, "step": 11685 }, { "epoch": 0.6400919892679188, "grad_norm": 0.1684912145137787, "learning_rate": 4.370208882579599e-05, "loss": 0.2204, "step": 11690 }, { "epoch": 0.6403657668510102, "grad_norm": 0.15864789485931396, "learning_rate": 4.3697018860271755e-05, "loss": 0.2217, "step": 11695 }, { "epoch": 0.6406395444341018, "grad_norm": 0.16828450560569763, "learning_rate": 4.369194889474752e-05, "loss": 0.2241, "step": 11700 }, { "epoch": 0.6409133220171932, "grad_norm": 0.26432880759239197, "learning_rate": 4.368687892922329e-05, "loss": 0.2364, "step": 11705 }, { "epoch": 0.6411870996002847, "grad_norm": 0.20076651871204376, "learning_rate": 4.368180896369905e-05, "loss": 0.2213, "step": 11710 }, { "epoch": 0.6414608771833762, "grad_norm": 0.21437907218933105, "learning_rate": 4.3676738998174815e-05, "loss": 0.2304, "step": 11715 }, { "epoch": 0.6417346547664677, "grad_norm": 0.16237862408161163, "learning_rate": 4.367166903265058e-05, "loss": 0.2302, "step": 11720 }, { "epoch": 0.6420084323495592, "grad_norm": 0.16179965436458588, "learning_rate": 4.366659906712635e-05, "loss": 0.2267, "step": 11725 }, { "epoch": 0.6422822099326507, "grad_norm": 0.16221430897712708, "learning_rate": 4.366152910160211e-05, "loss": 0.2131, "step": 11730 }, { "epoch": 0.6425559875157422, "grad_norm": 0.15574003756046295, "learning_rate": 4.3656459136077875e-05, "loss": 0.2261, "step": 11735 }, { "epoch": 0.6428297650988337, "grad_norm": 0.1677011251449585, "learning_rate": 4.365138917055364e-05, "loss": 0.2303, "step": 11740 }, { "epoch": 0.6431035426819252, "grad_norm": 0.15928791463375092, "learning_rate": 4.364631920502941e-05, "loss": 0.2274, "step": 11745 }, { "epoch": 0.6433773202650167, "grad_norm": 0.18291813135147095, "learning_rate": 4.364124923950517e-05, "loss": 0.2323, "step": 11750 }, { "epoch": 0.6436510978481081, "grad_norm": 0.1691235601902008, "learning_rate": 4.3636179273980935e-05, "loss": 0.224, "step": 11755 }, { "epoch": 0.6439248754311997, "grad_norm": 0.1726246476173401, "learning_rate": 4.3631109308456705e-05, "loss": 0.223, "step": 11760 }, { "epoch": 0.6441986530142912, "grad_norm": 0.16832992434501648, "learning_rate": 4.362603934293247e-05, "loss": 0.2253, "step": 11765 }, { "epoch": 0.6444724305973827, "grad_norm": 0.16686640679836273, "learning_rate": 4.362096937740824e-05, "loss": 0.2251, "step": 11770 }, { "epoch": 0.6447462081804742, "grad_norm": 0.20336879789829254, "learning_rate": 4.3615899411884e-05, "loss": 0.23, "step": 11775 }, { "epoch": 0.6450199857635657, "grad_norm": 0.2064741998910904, "learning_rate": 4.361082944635977e-05, "loss": 0.2204, "step": 11780 }, { "epoch": 0.6452937633466572, "grad_norm": 0.2026134729385376, "learning_rate": 4.3605759480835535e-05, "loss": 0.2351, "step": 11785 }, { "epoch": 0.6455675409297487, "grad_norm": 0.17706957459449768, "learning_rate": 4.36006895153113e-05, "loss": 0.2219, "step": 11790 }, { "epoch": 0.6458413185128402, "grad_norm": 0.1491750329732895, "learning_rate": 4.359561954978706e-05, "loss": 0.2242, "step": 11795 }, { "epoch": 0.6461150960959317, "grad_norm": 0.1635763943195343, "learning_rate": 4.359054958426283e-05, "loss": 0.2328, "step": 11800 }, { "epoch": 0.6463888736790232, "grad_norm": 0.19635459780693054, "learning_rate": 4.3585479618738595e-05, "loss": 0.2215, "step": 11805 }, { "epoch": 0.6466626512621146, "grad_norm": 0.18298840522766113, "learning_rate": 4.358040965321436e-05, "loss": 0.2318, "step": 11810 }, { "epoch": 0.6469364288452062, "grad_norm": 0.19074909389019012, "learning_rate": 4.357533968769013e-05, "loss": 0.2273, "step": 11815 }, { "epoch": 0.6472102064282976, "grad_norm": 0.19161969423294067, "learning_rate": 4.357026972216589e-05, "loss": 0.2183, "step": 11820 }, { "epoch": 0.6474839840113892, "grad_norm": 0.17303353548049927, "learning_rate": 4.3565199756641655e-05, "loss": 0.2352, "step": 11825 }, { "epoch": 0.6477577615944806, "grad_norm": 0.16892243921756744, "learning_rate": 4.356012979111742e-05, "loss": 0.2249, "step": 11830 }, { "epoch": 0.6480315391775722, "grad_norm": 0.1755814552307129, "learning_rate": 4.355505982559319e-05, "loss": 0.2328, "step": 11835 }, { "epoch": 0.6483053167606636, "grad_norm": 0.1642669439315796, "learning_rate": 4.354998986006895e-05, "loss": 0.2253, "step": 11840 }, { "epoch": 0.6485790943437552, "grad_norm": 0.1874639093875885, "learning_rate": 4.3544919894544715e-05, "loss": 0.2257, "step": 11845 }, { "epoch": 0.6488528719268466, "grad_norm": 0.16982285678386688, "learning_rate": 4.3539849929020485e-05, "loss": 0.2323, "step": 11850 }, { "epoch": 0.6491266495099381, "grad_norm": 0.16937948763370514, "learning_rate": 4.3534779963496255e-05, "loss": 0.2273, "step": 11855 }, { "epoch": 0.6494004270930296, "grad_norm": 0.18147583305835724, "learning_rate": 4.352970999797202e-05, "loss": 0.2153, "step": 11860 }, { "epoch": 0.6496742046761211, "grad_norm": 0.20412525534629822, "learning_rate": 4.352464003244778e-05, "loss": 0.2412, "step": 11865 }, { "epoch": 0.6499479822592126, "grad_norm": 0.21449369192123413, "learning_rate": 4.3519570066923545e-05, "loss": 0.2281, "step": 11870 }, { "epoch": 0.6502217598423041, "grad_norm": 0.18963874876499176, "learning_rate": 4.3514500101399315e-05, "loss": 0.2331, "step": 11875 }, { "epoch": 0.6504955374253956, "grad_norm": 0.16781696677207947, "learning_rate": 4.350943013587508e-05, "loss": 0.2298, "step": 11880 }, { "epoch": 0.6507693150084871, "grad_norm": 0.1884540319442749, "learning_rate": 4.350436017035084e-05, "loss": 0.2135, "step": 11885 }, { "epoch": 0.6510430925915786, "grad_norm": 0.22105377912521362, "learning_rate": 4.349929020482661e-05, "loss": 0.2264, "step": 11890 }, { "epoch": 0.6513168701746701, "grad_norm": 0.20459914207458496, "learning_rate": 4.3494220239302375e-05, "loss": 0.2241, "step": 11895 }, { "epoch": 0.6515906477577615, "grad_norm": 0.174761101603508, "learning_rate": 4.348915027377814e-05, "loss": 0.2327, "step": 11900 }, { "epoch": 0.6518644253408531, "grad_norm": 0.14644666016101837, "learning_rate": 4.34840803082539e-05, "loss": 0.2248, "step": 11905 }, { "epoch": 0.6521382029239445, "grad_norm": 0.1747933328151703, "learning_rate": 4.347901034272967e-05, "loss": 0.2307, "step": 11910 }, { "epoch": 0.6524119805070361, "grad_norm": 0.19446305930614471, "learning_rate": 4.3473940377205435e-05, "loss": 0.2235, "step": 11915 }, { "epoch": 0.6526857580901276, "grad_norm": 0.1890510469675064, "learning_rate": 4.34688704116812e-05, "loss": 0.2195, "step": 11920 }, { "epoch": 0.6529595356732191, "grad_norm": 0.20925574004650116, "learning_rate": 4.346380044615697e-05, "loss": 0.218, "step": 11925 }, { "epoch": 0.6532333132563106, "grad_norm": 0.15153393149375916, "learning_rate": 4.345873048063273e-05, "loss": 0.228, "step": 11930 }, { "epoch": 0.6535070908394021, "grad_norm": 0.1565830260515213, "learning_rate": 4.34536605151085e-05, "loss": 0.2234, "step": 11935 }, { "epoch": 0.6537808684224936, "grad_norm": 0.1544036567211151, "learning_rate": 4.3448590549584265e-05, "loss": 0.2221, "step": 11940 }, { "epoch": 0.654054646005585, "grad_norm": 0.17358800768852234, "learning_rate": 4.3443520584060035e-05, "loss": 0.2335, "step": 11945 }, { "epoch": 0.6543284235886766, "grad_norm": 0.18482081592082977, "learning_rate": 4.34384506185358e-05, "loss": 0.227, "step": 11950 }, { "epoch": 0.654602201171768, "grad_norm": 0.17168167233467102, "learning_rate": 4.343338065301156e-05, "loss": 0.221, "step": 11955 }, { "epoch": 0.6548759787548596, "grad_norm": 0.1871645450592041, "learning_rate": 4.3428310687487325e-05, "loss": 0.2254, "step": 11960 }, { "epoch": 0.655149756337951, "grad_norm": 0.1722816675901413, "learning_rate": 4.3423240721963095e-05, "loss": 0.2235, "step": 11965 }, { "epoch": 0.6554235339210426, "grad_norm": 0.20067188143730164, "learning_rate": 4.341817075643886e-05, "loss": 0.2397, "step": 11970 }, { "epoch": 0.655697311504134, "grad_norm": 0.15719640254974365, "learning_rate": 4.341310079091462e-05, "loss": 0.2372, "step": 11975 }, { "epoch": 0.6559710890872256, "grad_norm": 0.16481702029705048, "learning_rate": 4.340803082539039e-05, "loss": 0.2159, "step": 11980 }, { "epoch": 0.656244866670317, "grad_norm": 0.1736931949853897, "learning_rate": 4.3402960859866155e-05, "loss": 0.231, "step": 11985 }, { "epoch": 0.6565186442534086, "grad_norm": 0.16944096982479095, "learning_rate": 4.339789089434192e-05, "loss": 0.2201, "step": 11990 }, { "epoch": 0.6567924218365, "grad_norm": 0.18041780591011047, "learning_rate": 4.339282092881768e-05, "loss": 0.214, "step": 11995 }, { "epoch": 0.6570661994195915, "grad_norm": 0.15593835711479187, "learning_rate": 4.338775096329345e-05, "loss": 0.2267, "step": 12000 }, { "epoch": 0.657339977002683, "grad_norm": 0.2534622550010681, "learning_rate": 4.3382680997769215e-05, "loss": 0.2326, "step": 12005 }, { "epoch": 0.6576137545857745, "grad_norm": 0.19980734586715698, "learning_rate": 4.337761103224498e-05, "loss": 0.2329, "step": 12010 }, { "epoch": 0.657887532168866, "grad_norm": 0.19901308417320251, "learning_rate": 4.337254106672075e-05, "loss": 0.2352, "step": 12015 }, { "epoch": 0.6581613097519575, "grad_norm": 0.2234528362751007, "learning_rate": 4.336747110119652e-05, "loss": 0.2211, "step": 12020 }, { "epoch": 0.658435087335049, "grad_norm": 0.19069258868694305, "learning_rate": 4.336240113567228e-05, "loss": 0.2245, "step": 12025 }, { "epoch": 0.6587088649181405, "grad_norm": 0.28013232350349426, "learning_rate": 4.3357331170148045e-05, "loss": 0.2327, "step": 12030 }, { "epoch": 0.658982642501232, "grad_norm": 0.20289695262908936, "learning_rate": 4.335226120462381e-05, "loss": 0.2287, "step": 12035 }, { "epoch": 0.6592564200843235, "grad_norm": 0.192132830619812, "learning_rate": 4.334719123909958e-05, "loss": 0.2259, "step": 12040 }, { "epoch": 0.6595301976674149, "grad_norm": 0.19337663054466248, "learning_rate": 4.334212127357534e-05, "loss": 0.2288, "step": 12045 }, { "epoch": 0.6598039752505065, "grad_norm": 0.1908634454011917, "learning_rate": 4.3337051308051105e-05, "loss": 0.2275, "step": 12050 }, { "epoch": 0.6600777528335979, "grad_norm": 0.20960406959056854, "learning_rate": 4.3331981342526875e-05, "loss": 0.2307, "step": 12055 }, { "epoch": 0.6603515304166895, "grad_norm": 0.17018090188503265, "learning_rate": 4.332691137700264e-05, "loss": 0.229, "step": 12060 }, { "epoch": 0.660625307999781, "grad_norm": 0.171340674161911, "learning_rate": 4.33218414114784e-05, "loss": 0.2179, "step": 12065 }, { "epoch": 0.6608990855828725, "grad_norm": 0.15242035686969757, "learning_rate": 4.3316771445954165e-05, "loss": 0.2164, "step": 12070 }, { "epoch": 0.661172863165964, "grad_norm": 0.19557800889015198, "learning_rate": 4.3311701480429935e-05, "loss": 0.2322, "step": 12075 }, { "epoch": 0.6614466407490555, "grad_norm": 0.16947250068187714, "learning_rate": 4.33066315149057e-05, "loss": 0.2243, "step": 12080 }, { "epoch": 0.661720418332147, "grad_norm": 0.1912458837032318, "learning_rate": 4.330156154938146e-05, "loss": 0.2284, "step": 12085 }, { "epoch": 0.6619941959152384, "grad_norm": 0.18254104256629944, "learning_rate": 4.329649158385723e-05, "loss": 0.2363, "step": 12090 }, { "epoch": 0.66226797349833, "grad_norm": 0.18119469285011292, "learning_rate": 4.3291421618333e-05, "loss": 0.2273, "step": 12095 }, { "epoch": 0.6625417510814214, "grad_norm": 0.18326769769191742, "learning_rate": 4.3286351652808766e-05, "loss": 0.2244, "step": 12100 }, { "epoch": 0.662815528664513, "grad_norm": 0.1928960233926773, "learning_rate": 4.328128168728453e-05, "loss": 0.2202, "step": 12105 }, { "epoch": 0.6630893062476044, "grad_norm": 0.22728955745697021, "learning_rate": 4.32762117217603e-05, "loss": 0.2242, "step": 12110 }, { "epoch": 0.663363083830696, "grad_norm": 0.17118361592292786, "learning_rate": 4.327114175623606e-05, "loss": 0.224, "step": 12115 }, { "epoch": 0.6636368614137874, "grad_norm": 0.17616678774356842, "learning_rate": 4.3266071790711826e-05, "loss": 0.2259, "step": 12120 }, { "epoch": 0.663910638996879, "grad_norm": 0.1791708916425705, "learning_rate": 4.326100182518759e-05, "loss": 0.2244, "step": 12125 }, { "epoch": 0.6641844165799704, "grad_norm": 0.1825903356075287, "learning_rate": 4.325593185966336e-05, "loss": 0.2251, "step": 12130 }, { "epoch": 0.664458194163062, "grad_norm": 0.17295949161052704, "learning_rate": 4.325086189413912e-05, "loss": 0.2248, "step": 12135 }, { "epoch": 0.6647319717461534, "grad_norm": 0.19359156489372253, "learning_rate": 4.3245791928614886e-05, "loss": 0.2342, "step": 12140 }, { "epoch": 0.6650057493292449, "grad_norm": 0.17100758850574493, "learning_rate": 4.3240721963090656e-05, "loss": 0.2316, "step": 12145 }, { "epoch": 0.6652795269123364, "grad_norm": 0.14756496250629425, "learning_rate": 4.323565199756642e-05, "loss": 0.2323, "step": 12150 }, { "epoch": 0.6655533044954279, "grad_norm": 0.16236431896686554, "learning_rate": 4.323058203204218e-05, "loss": 0.2254, "step": 12155 }, { "epoch": 0.6658270820785194, "grad_norm": 0.1611800342798233, "learning_rate": 4.3225512066517946e-05, "loss": 0.2194, "step": 12160 }, { "epoch": 0.6661008596616109, "grad_norm": 0.16859810054302216, "learning_rate": 4.3220442100993716e-05, "loss": 0.2303, "step": 12165 }, { "epoch": 0.6663746372447024, "grad_norm": 0.17550028860569, "learning_rate": 4.321537213546948e-05, "loss": 0.2255, "step": 12170 }, { "epoch": 0.6666484148277939, "grad_norm": 0.14662456512451172, "learning_rate": 4.321030216994524e-05, "loss": 0.2258, "step": 12175 }, { "epoch": 0.6669221924108854, "grad_norm": 0.18011003732681274, "learning_rate": 4.320523220442101e-05, "loss": 0.2306, "step": 12180 }, { "epoch": 0.6671959699939769, "grad_norm": 0.1879175454378128, "learning_rate": 4.320016223889678e-05, "loss": 0.2248, "step": 12185 }, { "epoch": 0.6674697475770683, "grad_norm": 0.16281947493553162, "learning_rate": 4.3195092273372546e-05, "loss": 0.223, "step": 12190 }, { "epoch": 0.6677435251601599, "grad_norm": 0.18047142028808594, "learning_rate": 4.319002230784831e-05, "loss": 0.223, "step": 12195 }, { "epoch": 0.6680173027432513, "grad_norm": 0.17951801419258118, "learning_rate": 4.318495234232407e-05, "loss": 0.2219, "step": 12200 }, { "epoch": 0.6682910803263429, "grad_norm": 0.18515032529830933, "learning_rate": 4.317988237679984e-05, "loss": 0.2282, "step": 12205 }, { "epoch": 0.6685648579094344, "grad_norm": 0.15378780663013458, "learning_rate": 4.3174812411275606e-05, "loss": 0.2182, "step": 12210 }, { "epoch": 0.6688386354925259, "grad_norm": 0.18337933719158173, "learning_rate": 4.316974244575137e-05, "loss": 0.2268, "step": 12215 }, { "epoch": 0.6691124130756174, "grad_norm": 0.1816677302122116, "learning_rate": 4.316467248022714e-05, "loss": 0.2191, "step": 12220 }, { "epoch": 0.6693861906587089, "grad_norm": 0.17381606996059418, "learning_rate": 4.31596025147029e-05, "loss": 0.2299, "step": 12225 }, { "epoch": 0.6696599682418004, "grad_norm": 0.19856461882591248, "learning_rate": 4.3154532549178666e-05, "loss": 0.2282, "step": 12230 }, { "epoch": 0.6699337458248918, "grad_norm": 0.1756526380777359, "learning_rate": 4.314946258365443e-05, "loss": 0.2228, "step": 12235 }, { "epoch": 0.6702075234079834, "grad_norm": 0.18165463209152222, "learning_rate": 4.31443926181302e-05, "loss": 0.2273, "step": 12240 }, { "epoch": 0.6704813009910748, "grad_norm": 0.16618619859218597, "learning_rate": 4.313932265260596e-05, "loss": 0.2209, "step": 12245 }, { "epoch": 0.6707550785741664, "grad_norm": 0.15523597598075867, "learning_rate": 4.3134252687081726e-05, "loss": 0.2203, "step": 12250 }, { "epoch": 0.6710288561572578, "grad_norm": 0.19662237167358398, "learning_rate": 4.3129182721557496e-05, "loss": 0.2295, "step": 12255 }, { "epoch": 0.6713026337403494, "grad_norm": 0.1556985229253769, "learning_rate": 4.3124112756033266e-05, "loss": 0.2208, "step": 12260 }, { "epoch": 0.6715764113234408, "grad_norm": 0.18416517972946167, "learning_rate": 4.311904279050903e-05, "loss": 0.2184, "step": 12265 }, { "epoch": 0.6718501889065324, "grad_norm": 0.1989271640777588, "learning_rate": 4.311397282498479e-05, "loss": 0.2218, "step": 12270 }, { "epoch": 0.6721239664896238, "grad_norm": 0.17201881110668182, "learning_rate": 4.310890285946056e-05, "loss": 0.2137, "step": 12275 }, { "epoch": 0.6723977440727154, "grad_norm": 0.2015795111656189, "learning_rate": 4.3103832893936326e-05, "loss": 0.223, "step": 12280 }, { "epoch": 0.6726715216558068, "grad_norm": 0.18608804047107697, "learning_rate": 4.309876292841209e-05, "loss": 0.2297, "step": 12285 }, { "epoch": 0.6729452992388983, "grad_norm": 0.20075088739395142, "learning_rate": 4.309369296288785e-05, "loss": 0.2264, "step": 12290 }, { "epoch": 0.6732190768219898, "grad_norm": 0.2024172991514206, "learning_rate": 4.308862299736362e-05, "loss": 0.2273, "step": 12295 }, { "epoch": 0.6734928544050813, "grad_norm": 0.197797954082489, "learning_rate": 4.3083553031839386e-05, "loss": 0.2404, "step": 12300 }, { "epoch": 0.6737666319881728, "grad_norm": 0.20034091174602509, "learning_rate": 4.307848306631515e-05, "loss": 0.2201, "step": 12305 }, { "epoch": 0.6740404095712643, "grad_norm": 0.20210592448711395, "learning_rate": 4.307341310079092e-05, "loss": 0.2183, "step": 12310 }, { "epoch": 0.6743141871543558, "grad_norm": 0.17629100382328033, "learning_rate": 4.306834313526668e-05, "loss": 0.2244, "step": 12315 }, { "epoch": 0.6745879647374473, "grad_norm": 0.16507498919963837, "learning_rate": 4.3063273169742446e-05, "loss": 0.2313, "step": 12320 }, { "epoch": 0.6748617423205387, "grad_norm": 0.1647995561361313, "learning_rate": 4.305820320421821e-05, "loss": 0.2246, "step": 12325 }, { "epoch": 0.6751355199036303, "grad_norm": 0.15515263378620148, "learning_rate": 4.305313323869398e-05, "loss": 0.2185, "step": 12330 }, { "epoch": 0.6754092974867217, "grad_norm": 0.2926589846611023, "learning_rate": 4.304806327316974e-05, "loss": 0.239, "step": 12335 }, { "epoch": 0.6756830750698133, "grad_norm": 0.16866402328014374, "learning_rate": 4.304299330764551e-05, "loss": 0.23, "step": 12340 }, { "epoch": 0.6759568526529047, "grad_norm": 0.20105956494808197, "learning_rate": 4.3037923342121276e-05, "loss": 0.2241, "step": 12345 }, { "epoch": 0.6762306302359963, "grad_norm": 0.2052726000547409, "learning_rate": 4.3032853376597046e-05, "loss": 0.2359, "step": 12350 }, { "epoch": 0.6765044078190878, "grad_norm": 0.15938910841941833, "learning_rate": 4.302778341107281e-05, "loss": 0.2238, "step": 12355 }, { "epoch": 0.6767781854021793, "grad_norm": 0.19520965218544006, "learning_rate": 4.302271344554857e-05, "loss": 0.2419, "step": 12360 }, { "epoch": 0.6770519629852708, "grad_norm": 0.169305220246315, "learning_rate": 4.3017643480024336e-05, "loss": 0.2285, "step": 12365 }, { "epoch": 0.6773257405683623, "grad_norm": 0.21450446546077728, "learning_rate": 4.3012573514500106e-05, "loss": 0.2251, "step": 12370 }, { "epoch": 0.6775995181514538, "grad_norm": 0.16330185532569885, "learning_rate": 4.300750354897587e-05, "loss": 0.2201, "step": 12375 }, { "epoch": 0.6778732957345452, "grad_norm": 0.21019718050956726, "learning_rate": 4.300243358345163e-05, "loss": 0.234, "step": 12380 }, { "epoch": 0.6781470733176368, "grad_norm": 0.2101137489080429, "learning_rate": 4.29973636179274e-05, "loss": 0.2363, "step": 12385 }, { "epoch": 0.6784208509007282, "grad_norm": 0.2235863357782364, "learning_rate": 4.2992293652403166e-05, "loss": 0.2379, "step": 12390 }, { "epoch": 0.6786946284838198, "grad_norm": 0.16542020440101624, "learning_rate": 4.298722368687893e-05, "loss": 0.2253, "step": 12395 }, { "epoch": 0.6789684060669112, "grad_norm": 0.1714320182800293, "learning_rate": 4.298215372135469e-05, "loss": 0.2287, "step": 12400 }, { "epoch": 0.6792421836500028, "grad_norm": 0.18008540570735931, "learning_rate": 4.297708375583046e-05, "loss": 0.225, "step": 12405 }, { "epoch": 0.6795159612330942, "grad_norm": 0.17770913243293762, "learning_rate": 4.2972013790306226e-05, "loss": 0.2248, "step": 12410 }, { "epoch": 0.6797897388161858, "grad_norm": 0.18915238976478577, "learning_rate": 4.296694382478199e-05, "loss": 0.2251, "step": 12415 }, { "epoch": 0.6800635163992772, "grad_norm": 0.18410061299800873, "learning_rate": 4.296187385925776e-05, "loss": 0.2249, "step": 12420 }, { "epoch": 0.6803372939823688, "grad_norm": 0.19816243648529053, "learning_rate": 4.295680389373353e-05, "loss": 0.2154, "step": 12425 }, { "epoch": 0.6806110715654602, "grad_norm": 0.18483145534992218, "learning_rate": 4.295173392820929e-05, "loss": 0.2232, "step": 12430 }, { "epoch": 0.6808848491485517, "grad_norm": 0.1929839551448822, "learning_rate": 4.2946663962685056e-05, "loss": 0.2244, "step": 12435 }, { "epoch": 0.6811586267316432, "grad_norm": 0.16510075330734253, "learning_rate": 4.2941593997160826e-05, "loss": 0.2192, "step": 12440 }, { "epoch": 0.6814324043147347, "grad_norm": 0.17073878645896912, "learning_rate": 4.293652403163659e-05, "loss": 0.2358, "step": 12445 }, { "epoch": 0.6817061818978262, "grad_norm": 0.15835833549499512, "learning_rate": 4.293145406611235e-05, "loss": 0.2197, "step": 12450 }, { "epoch": 0.6819799594809177, "grad_norm": 0.1493745595216751, "learning_rate": 4.2926384100588116e-05, "loss": 0.2163, "step": 12455 }, { "epoch": 0.6822537370640092, "grad_norm": 0.1785757839679718, "learning_rate": 4.2921314135063886e-05, "loss": 0.2317, "step": 12460 }, { "epoch": 0.6825275146471007, "grad_norm": 0.19271136820316315, "learning_rate": 4.291624416953965e-05, "loss": 0.2257, "step": 12465 }, { "epoch": 0.6828012922301921, "grad_norm": 0.2151539921760559, "learning_rate": 4.291117420401541e-05, "loss": 0.2306, "step": 12470 }, { "epoch": 0.6830750698132837, "grad_norm": 0.199159175157547, "learning_rate": 4.2906104238491176e-05, "loss": 0.2255, "step": 12475 }, { "epoch": 0.6833488473963751, "grad_norm": 0.190541073679924, "learning_rate": 4.2901034272966946e-05, "loss": 0.2178, "step": 12480 }, { "epoch": 0.6836226249794667, "grad_norm": 0.24529917538166046, "learning_rate": 4.289596430744271e-05, "loss": 0.2287, "step": 12485 }, { "epoch": 0.6838964025625581, "grad_norm": 0.18082834780216217, "learning_rate": 4.289089434191847e-05, "loss": 0.2285, "step": 12490 }, { "epoch": 0.6841701801456497, "grad_norm": 0.25416281819343567, "learning_rate": 4.288582437639424e-05, "loss": 0.2319, "step": 12495 }, { "epoch": 0.6844439577287412, "grad_norm": 0.19489875435829163, "learning_rate": 4.2880754410870006e-05, "loss": 0.228, "step": 12500 }, { "epoch": 0.6847177353118327, "grad_norm": 0.15016601979732513, "learning_rate": 4.2875684445345776e-05, "loss": 0.2206, "step": 12505 }, { "epoch": 0.6849915128949242, "grad_norm": 0.1579144150018692, "learning_rate": 4.287061447982154e-05, "loss": 0.224, "step": 12510 }, { "epoch": 0.6852652904780157, "grad_norm": 0.17392271757125854, "learning_rate": 4.286554451429731e-05, "loss": 0.2188, "step": 12515 }, { "epoch": 0.6855390680611072, "grad_norm": 0.20774126052856445, "learning_rate": 4.286047454877307e-05, "loss": 0.2319, "step": 12520 }, { "epoch": 0.6858128456441986, "grad_norm": 0.2153085321187973, "learning_rate": 4.2855404583248836e-05, "loss": 0.2279, "step": 12525 }, { "epoch": 0.6860866232272902, "grad_norm": 0.18381629884243011, "learning_rate": 4.28503346177246e-05, "loss": 0.229, "step": 12530 }, { "epoch": 0.6863604008103816, "grad_norm": 0.17989197373390198, "learning_rate": 4.284526465220037e-05, "loss": 0.2276, "step": 12535 }, { "epoch": 0.6866341783934732, "grad_norm": 0.19616638123989105, "learning_rate": 4.284019468667613e-05, "loss": 0.2192, "step": 12540 }, { "epoch": 0.6869079559765646, "grad_norm": 0.15812818706035614, "learning_rate": 4.2835124721151896e-05, "loss": 0.2322, "step": 12545 }, { "epoch": 0.6871817335596562, "grad_norm": 0.19767077267169952, "learning_rate": 4.2830054755627666e-05, "loss": 0.2287, "step": 12550 }, { "epoch": 0.6874555111427476, "grad_norm": 0.2028033584356308, "learning_rate": 4.282498479010343e-05, "loss": 0.2287, "step": 12555 }, { "epoch": 0.6877292887258392, "grad_norm": 0.18847258388996124, "learning_rate": 4.281991482457919e-05, "loss": 0.2284, "step": 12560 }, { "epoch": 0.6880030663089306, "grad_norm": 0.2021034210920334, "learning_rate": 4.2814844859054956e-05, "loss": 0.2227, "step": 12565 }, { "epoch": 0.6882768438920221, "grad_norm": 0.19945302605628967, "learning_rate": 4.2809774893530726e-05, "loss": 0.2236, "step": 12570 }, { "epoch": 0.6885506214751136, "grad_norm": 0.19621357321739197, "learning_rate": 4.280470492800649e-05, "loss": 0.2253, "step": 12575 }, { "epoch": 0.6888243990582051, "grad_norm": 0.17604701220989227, "learning_rate": 4.279963496248225e-05, "loss": 0.2322, "step": 12580 }, { "epoch": 0.6890981766412966, "grad_norm": 0.1676141917705536, "learning_rate": 4.279456499695802e-05, "loss": 0.2266, "step": 12585 }, { "epoch": 0.6893719542243881, "grad_norm": 0.19096896052360535, "learning_rate": 4.278949503143379e-05, "loss": 0.2227, "step": 12590 }, { "epoch": 0.6896457318074796, "grad_norm": 0.16201381385326385, "learning_rate": 4.2784425065909556e-05, "loss": 0.225, "step": 12595 }, { "epoch": 0.6899195093905711, "grad_norm": 0.17872951924800873, "learning_rate": 4.277935510038532e-05, "loss": 0.234, "step": 12600 }, { "epoch": 0.6901932869736626, "grad_norm": 0.16616560518741608, "learning_rate": 4.277428513486108e-05, "loss": 0.2258, "step": 12605 }, { "epoch": 0.6904670645567541, "grad_norm": 0.16350509226322174, "learning_rate": 4.276921516933685e-05, "loss": 0.2275, "step": 12610 }, { "epoch": 0.6907408421398455, "grad_norm": 0.17183054983615875, "learning_rate": 4.2764145203812616e-05, "loss": 0.2184, "step": 12615 }, { "epoch": 0.6910146197229371, "grad_norm": 0.15291020274162292, "learning_rate": 4.275907523828838e-05, "loss": 0.224, "step": 12620 }, { "epoch": 0.6912883973060285, "grad_norm": 0.18508099019527435, "learning_rate": 4.275400527276415e-05, "loss": 0.2306, "step": 12625 }, { "epoch": 0.6915621748891201, "grad_norm": 0.16866375505924225, "learning_rate": 4.274893530723991e-05, "loss": 0.2271, "step": 12630 }, { "epoch": 0.6918359524722115, "grad_norm": 0.21077555418014526, "learning_rate": 4.2743865341715676e-05, "loss": 0.2269, "step": 12635 }, { "epoch": 0.6921097300553031, "grad_norm": 0.17912641167640686, "learning_rate": 4.273879537619144e-05, "loss": 0.2273, "step": 12640 }, { "epoch": 0.6923835076383945, "grad_norm": 0.18329551815986633, "learning_rate": 4.273372541066721e-05, "loss": 0.2243, "step": 12645 }, { "epoch": 0.6926572852214861, "grad_norm": 0.19355164468288422, "learning_rate": 4.272865544514297e-05, "loss": 0.2348, "step": 12650 }, { "epoch": 0.6929310628045776, "grad_norm": 0.16390357911586761, "learning_rate": 4.2723585479618736e-05, "loss": 0.2205, "step": 12655 }, { "epoch": 0.693204840387669, "grad_norm": 0.1517106294631958, "learning_rate": 4.2718515514094506e-05, "loss": 0.2266, "step": 12660 }, { "epoch": 0.6934786179707606, "grad_norm": 0.17660607397556305, "learning_rate": 4.2713445548570276e-05, "loss": 0.2154, "step": 12665 }, { "epoch": 0.693752395553852, "grad_norm": 0.19633954763412476, "learning_rate": 4.270837558304604e-05, "loss": 0.2233, "step": 12670 }, { "epoch": 0.6940261731369436, "grad_norm": 0.20135240256786346, "learning_rate": 4.27033056175218e-05, "loss": 0.2334, "step": 12675 }, { "epoch": 0.694299950720035, "grad_norm": 0.2014864683151245, "learning_rate": 4.269823565199757e-05, "loss": 0.2268, "step": 12680 }, { "epoch": 0.6945737283031266, "grad_norm": 0.18771310150623322, "learning_rate": 4.2693165686473336e-05, "loss": 0.2237, "step": 12685 }, { "epoch": 0.694847505886218, "grad_norm": 0.19107162952423096, "learning_rate": 4.26880957209491e-05, "loss": 0.2287, "step": 12690 }, { "epoch": 0.6951212834693096, "grad_norm": 0.14466947317123413, "learning_rate": 4.268302575542486e-05, "loss": 0.2097, "step": 12695 }, { "epoch": 0.695395061052401, "grad_norm": 0.17786869406700134, "learning_rate": 4.267795578990063e-05, "loss": 0.2171, "step": 12700 }, { "epoch": 0.6956688386354926, "grad_norm": 0.15600410103797913, "learning_rate": 4.2672885824376396e-05, "loss": 0.2259, "step": 12705 }, { "epoch": 0.695942616218584, "grad_norm": 0.15149199962615967, "learning_rate": 4.266781585885216e-05, "loss": 0.2227, "step": 12710 }, { "epoch": 0.6962163938016755, "grad_norm": 0.17922581732273102, "learning_rate": 4.266274589332793e-05, "loss": 0.229, "step": 12715 }, { "epoch": 0.696490171384767, "grad_norm": 0.2168828845024109, "learning_rate": 4.265767592780369e-05, "loss": 0.2219, "step": 12720 }, { "epoch": 0.6967639489678585, "grad_norm": 0.16657564043998718, "learning_rate": 4.2652605962279456e-05, "loss": 0.2326, "step": 12725 }, { "epoch": 0.69703772655095, "grad_norm": 0.2087090164422989, "learning_rate": 4.264753599675522e-05, "loss": 0.2173, "step": 12730 }, { "epoch": 0.6973115041340415, "grad_norm": 0.1801263988018036, "learning_rate": 4.264246603123099e-05, "loss": 0.2274, "step": 12735 }, { "epoch": 0.697585281717133, "grad_norm": 0.1489858627319336, "learning_rate": 4.263739606570675e-05, "loss": 0.2165, "step": 12740 }, { "epoch": 0.6978590593002245, "grad_norm": 0.14118316769599915, "learning_rate": 4.2632326100182516e-05, "loss": 0.2208, "step": 12745 }, { "epoch": 0.698132836883316, "grad_norm": 0.17119264602661133, "learning_rate": 4.2627256134658287e-05, "loss": 0.2225, "step": 12750 }, { "epoch": 0.6984066144664075, "grad_norm": 0.1882389783859253, "learning_rate": 4.2622186169134057e-05, "loss": 0.2258, "step": 12755 }, { "epoch": 0.6986803920494989, "grad_norm": 0.22786474227905273, "learning_rate": 4.261711620360982e-05, "loss": 0.2244, "step": 12760 }, { "epoch": 0.6989541696325905, "grad_norm": 0.17605306208133698, "learning_rate": 4.261204623808558e-05, "loss": 0.2307, "step": 12765 }, { "epoch": 0.6992279472156819, "grad_norm": 0.17853814363479614, "learning_rate": 4.2606976272561347e-05, "loss": 0.2263, "step": 12770 }, { "epoch": 0.6995017247987735, "grad_norm": 0.16011542081832886, "learning_rate": 4.2601906307037117e-05, "loss": 0.2246, "step": 12775 }, { "epoch": 0.6997755023818649, "grad_norm": 0.1808764785528183, "learning_rate": 4.259683634151288e-05, "loss": 0.2192, "step": 12780 }, { "epoch": 0.7000492799649565, "grad_norm": 0.19560478627681732, "learning_rate": 4.259176637598864e-05, "loss": 0.2238, "step": 12785 }, { "epoch": 0.7003230575480479, "grad_norm": 0.175944522023201, "learning_rate": 4.258669641046441e-05, "loss": 0.2228, "step": 12790 }, { "epoch": 0.7005968351311395, "grad_norm": 0.20979949831962585, "learning_rate": 4.2581626444940177e-05, "loss": 0.2246, "step": 12795 }, { "epoch": 0.700870612714231, "grad_norm": 0.1901860535144806, "learning_rate": 4.257655647941594e-05, "loss": 0.232, "step": 12800 }, { "epoch": 0.7011443902973225, "grad_norm": 0.14886797964572906, "learning_rate": 4.25714865138917e-05, "loss": 0.2263, "step": 12805 }, { "epoch": 0.701418167880414, "grad_norm": 0.16224221885204315, "learning_rate": 4.256641654836747e-05, "loss": 0.2285, "step": 12810 }, { "epoch": 0.7016919454635054, "grad_norm": 0.2395179122686386, "learning_rate": 4.2561346582843237e-05, "loss": 0.2232, "step": 12815 }, { "epoch": 0.701965723046597, "grad_norm": 0.22140711545944214, "learning_rate": 4.2556276617319e-05, "loss": 0.2278, "step": 12820 }, { "epoch": 0.7022395006296884, "grad_norm": 0.18299472332000732, "learning_rate": 4.255120665179477e-05, "loss": 0.2256, "step": 12825 }, { "epoch": 0.70251327821278, "grad_norm": 0.16556088626384735, "learning_rate": 4.254613668627054e-05, "loss": 0.2259, "step": 12830 }, { "epoch": 0.7027870557958714, "grad_norm": 0.16841530799865723, "learning_rate": 4.25410667207463e-05, "loss": 0.2251, "step": 12835 }, { "epoch": 0.703060833378963, "grad_norm": 0.1809326410293579, "learning_rate": 4.253599675522207e-05, "loss": 0.2265, "step": 12840 }, { "epoch": 0.7033346109620544, "grad_norm": 0.15984675288200378, "learning_rate": 4.253092678969784e-05, "loss": 0.239, "step": 12845 }, { "epoch": 0.703608388545146, "grad_norm": 0.17176201939582825, "learning_rate": 4.25258568241736e-05, "loss": 0.2129, "step": 12850 }, { "epoch": 0.7038821661282374, "grad_norm": 0.20632712543010712, "learning_rate": 4.252078685864936e-05, "loss": 0.2271, "step": 12855 }, { "epoch": 0.704155943711329, "grad_norm": 0.16669027507305145, "learning_rate": 4.251571689312513e-05, "loss": 0.219, "step": 12860 }, { "epoch": 0.7044297212944204, "grad_norm": 0.17100569605827332, "learning_rate": 4.25106469276009e-05, "loss": 0.2195, "step": 12865 }, { "epoch": 0.7047034988775119, "grad_norm": 0.1598571091890335, "learning_rate": 4.250557696207666e-05, "loss": 0.2151, "step": 12870 }, { "epoch": 0.7049772764606034, "grad_norm": 0.17128272354602814, "learning_rate": 4.250050699655242e-05, "loss": 0.2286, "step": 12875 }, { "epoch": 0.7052510540436949, "grad_norm": 0.23298001289367676, "learning_rate": 4.2495437031028193e-05, "loss": 0.2213, "step": 12880 }, { "epoch": 0.7055248316267864, "grad_norm": 0.18131278455257416, "learning_rate": 4.249036706550396e-05, "loss": 0.2307, "step": 12885 }, { "epoch": 0.7057986092098779, "grad_norm": 0.16617339849472046, "learning_rate": 4.248529709997972e-05, "loss": 0.2291, "step": 12890 }, { "epoch": 0.7060723867929694, "grad_norm": 0.1843181848526001, "learning_rate": 4.248022713445548e-05, "loss": 0.2212, "step": 12895 }, { "epoch": 0.7063461643760609, "grad_norm": 0.14962559938430786, "learning_rate": 4.2475157168931253e-05, "loss": 0.2254, "step": 12900 }, { "epoch": 0.7066199419591523, "grad_norm": 0.157479390501976, "learning_rate": 4.247008720340702e-05, "loss": 0.2247, "step": 12905 }, { "epoch": 0.7068937195422439, "grad_norm": 0.1619628220796585, "learning_rate": 4.246501723788279e-05, "loss": 0.2126, "step": 12910 }, { "epoch": 0.7071674971253353, "grad_norm": 0.1919460892677307, "learning_rate": 4.245994727235855e-05, "loss": 0.2338, "step": 12915 }, { "epoch": 0.7074412747084269, "grad_norm": 0.15672150254249573, "learning_rate": 4.245487730683432e-05, "loss": 0.2131, "step": 12920 }, { "epoch": 0.7077150522915183, "grad_norm": 0.16107146441936493, "learning_rate": 4.2449807341310083e-05, "loss": 0.2239, "step": 12925 }, { "epoch": 0.7079888298746099, "grad_norm": 0.1664184182882309, "learning_rate": 4.244473737578585e-05, "loss": 0.2178, "step": 12930 }, { "epoch": 0.7082626074577013, "grad_norm": 0.18003547191619873, "learning_rate": 4.243966741026161e-05, "loss": 0.2295, "step": 12935 }, { "epoch": 0.7085363850407929, "grad_norm": 0.17001506686210632, "learning_rate": 4.243459744473738e-05, "loss": 0.222, "step": 12940 }, { "epoch": 0.7088101626238844, "grad_norm": 0.16727042198181152, "learning_rate": 4.2429527479213143e-05, "loss": 0.2274, "step": 12945 }, { "epoch": 0.7090839402069758, "grad_norm": 0.16516587138175964, "learning_rate": 4.242445751368891e-05, "loss": 0.2132, "step": 12950 }, { "epoch": 0.7093577177900674, "grad_norm": 0.1664104461669922, "learning_rate": 4.241938754816468e-05, "loss": 0.2237, "step": 12955 }, { "epoch": 0.7096314953731588, "grad_norm": 0.16164125502109528, "learning_rate": 4.241431758264044e-05, "loss": 0.2188, "step": 12960 }, { "epoch": 0.7099052729562504, "grad_norm": 0.14379015564918518, "learning_rate": 4.2409247617116203e-05, "loss": 0.2292, "step": 12965 }, { "epoch": 0.7101790505393418, "grad_norm": 0.1571703851222992, "learning_rate": 4.240417765159197e-05, "loss": 0.2161, "step": 12970 }, { "epoch": 0.7104528281224334, "grad_norm": 0.21589995920658112, "learning_rate": 4.239910768606774e-05, "loss": 0.2222, "step": 12975 }, { "epoch": 0.7107266057055248, "grad_norm": 0.1955016404390335, "learning_rate": 4.23940377205435e-05, "loss": 0.2283, "step": 12980 }, { "epoch": 0.7110003832886164, "grad_norm": 0.17725136876106262, "learning_rate": 4.2388967755019263e-05, "loss": 0.2193, "step": 12985 }, { "epoch": 0.7112741608717078, "grad_norm": 0.1573362946510315, "learning_rate": 4.2383897789495034e-05, "loss": 0.221, "step": 12990 }, { "epoch": 0.7115479384547994, "grad_norm": 0.18656863272190094, "learning_rate": 4.2378827823970804e-05, "loss": 0.2263, "step": 12995 }, { "epoch": 0.7118217160378908, "grad_norm": 0.22729814052581787, "learning_rate": 4.237375785844657e-05, "loss": 0.2208, "step": 13000 }, { "epoch": 0.7120954936209823, "grad_norm": 0.18243369460105896, "learning_rate": 4.236868789292233e-05, "loss": 0.231, "step": 13005 }, { "epoch": 0.7123692712040738, "grad_norm": 0.21150276064872742, "learning_rate": 4.23636179273981e-05, "loss": 0.2336, "step": 13010 }, { "epoch": 0.7126430487871653, "grad_norm": 0.16411231458187103, "learning_rate": 4.2358547961873864e-05, "loss": 0.2187, "step": 13015 }, { "epoch": 0.7129168263702568, "grad_norm": 0.1904536634683609, "learning_rate": 4.235347799634963e-05, "loss": 0.2235, "step": 13020 }, { "epoch": 0.7131906039533483, "grad_norm": 0.1525496393442154, "learning_rate": 4.234840803082539e-05, "loss": 0.2247, "step": 13025 }, { "epoch": 0.7134643815364398, "grad_norm": 0.17160168290138245, "learning_rate": 4.234333806530116e-05, "loss": 0.2293, "step": 13030 }, { "epoch": 0.7137381591195313, "grad_norm": 0.17013469338417053, "learning_rate": 4.2338268099776924e-05, "loss": 0.213, "step": 13035 }, { "epoch": 0.7140119367026228, "grad_norm": 0.18887662887573242, "learning_rate": 4.233319813425269e-05, "loss": 0.2287, "step": 13040 }, { "epoch": 0.7142857142857143, "grad_norm": 0.20966169238090515, "learning_rate": 4.232812816872846e-05, "loss": 0.2244, "step": 13045 }, { "epoch": 0.7145594918688057, "grad_norm": 0.17091840505599976, "learning_rate": 4.232305820320422e-05, "loss": 0.2231, "step": 13050 }, { "epoch": 0.7148332694518973, "grad_norm": 0.20039695501327515, "learning_rate": 4.2317988237679984e-05, "loss": 0.2269, "step": 13055 }, { "epoch": 0.7151070470349887, "grad_norm": 0.16024380922317505, "learning_rate": 4.231291827215575e-05, "loss": 0.2235, "step": 13060 }, { "epoch": 0.7153808246180803, "grad_norm": 0.17102326452732086, "learning_rate": 4.230784830663152e-05, "loss": 0.2173, "step": 13065 }, { "epoch": 0.7156546022011717, "grad_norm": 0.1566060483455658, "learning_rate": 4.230277834110728e-05, "loss": 0.2182, "step": 13070 }, { "epoch": 0.7159283797842633, "grad_norm": 0.1595357060432434, "learning_rate": 4.229770837558305e-05, "loss": 0.2276, "step": 13075 }, { "epoch": 0.7162021573673547, "grad_norm": 0.19841817021369934, "learning_rate": 4.2292638410058814e-05, "loss": 0.2221, "step": 13080 }, { "epoch": 0.7164759349504463, "grad_norm": 0.2208450436592102, "learning_rate": 4.2287568444534584e-05, "loss": 0.2238, "step": 13085 }, { "epoch": 0.7167497125335378, "grad_norm": 0.1652340441942215, "learning_rate": 4.228249847901035e-05, "loss": 0.2255, "step": 13090 }, { "epoch": 0.7170234901166292, "grad_norm": 0.22636784613132477, "learning_rate": 4.227742851348611e-05, "loss": 0.2311, "step": 13095 }, { "epoch": 0.7172972676997208, "grad_norm": 0.20977997779846191, "learning_rate": 4.2272358547961874e-05, "loss": 0.226, "step": 13100 }, { "epoch": 0.7175710452828122, "grad_norm": 0.16378362476825714, "learning_rate": 4.2267288582437644e-05, "loss": 0.2289, "step": 13105 }, { "epoch": 0.7178448228659038, "grad_norm": 0.15053009986877441, "learning_rate": 4.226221861691341e-05, "loss": 0.233, "step": 13110 }, { "epoch": 0.7181186004489952, "grad_norm": 0.19982734322547913, "learning_rate": 4.225714865138917e-05, "loss": 0.223, "step": 13115 }, { "epoch": 0.7183923780320868, "grad_norm": 0.22916002571582794, "learning_rate": 4.225207868586494e-05, "loss": 0.2298, "step": 13120 }, { "epoch": 0.7186661556151782, "grad_norm": 0.17934609949588776, "learning_rate": 4.2247008720340704e-05, "loss": 0.2279, "step": 13125 }, { "epoch": 0.7189399331982698, "grad_norm": 0.15788927674293518, "learning_rate": 4.224193875481647e-05, "loss": 0.2275, "step": 13130 }, { "epoch": 0.7192137107813612, "grad_norm": 0.16416630148887634, "learning_rate": 4.223686878929223e-05, "loss": 0.2244, "step": 13135 }, { "epoch": 0.7194874883644528, "grad_norm": 0.16473522782325745, "learning_rate": 4.2231798823768e-05, "loss": 0.2291, "step": 13140 }, { "epoch": 0.7197612659475442, "grad_norm": 0.17508076131343842, "learning_rate": 4.2226728858243764e-05, "loss": 0.2211, "step": 13145 }, { "epoch": 0.7200350435306357, "grad_norm": 0.1709747314453125, "learning_rate": 4.222165889271953e-05, "loss": 0.2295, "step": 13150 }, { "epoch": 0.7203088211137272, "grad_norm": 0.2818590998649597, "learning_rate": 4.22165889271953e-05, "loss": 0.2215, "step": 13155 }, { "epoch": 0.7205825986968187, "grad_norm": 0.20586742460727692, "learning_rate": 4.221151896167107e-05, "loss": 0.2305, "step": 13160 }, { "epoch": 0.7208563762799102, "grad_norm": 0.14920839667320251, "learning_rate": 4.220644899614683e-05, "loss": 0.2363, "step": 13165 }, { "epoch": 0.7211301538630017, "grad_norm": 0.1473599523305893, "learning_rate": 4.2201379030622594e-05, "loss": 0.2283, "step": 13170 }, { "epoch": 0.7214039314460932, "grad_norm": 0.2222895622253418, "learning_rate": 4.2196309065098364e-05, "loss": 0.229, "step": 13175 }, { "epoch": 0.7216777090291847, "grad_norm": 0.1939231902360916, "learning_rate": 4.219123909957413e-05, "loss": 0.2195, "step": 13180 }, { "epoch": 0.7219514866122761, "grad_norm": 0.15888389945030212, "learning_rate": 4.218616913404989e-05, "loss": 0.2264, "step": 13185 }, { "epoch": 0.7222252641953677, "grad_norm": 0.16989344358444214, "learning_rate": 4.2181099168525654e-05, "loss": 0.2211, "step": 13190 }, { "epoch": 0.7224990417784591, "grad_norm": 0.19484087824821472, "learning_rate": 4.2176029203001424e-05, "loss": 0.2323, "step": 13195 }, { "epoch": 0.7227728193615507, "grad_norm": 0.1596769243478775, "learning_rate": 4.217095923747719e-05, "loss": 0.2377, "step": 13200 }, { "epoch": 0.7230465969446421, "grad_norm": 0.15973536670207977, "learning_rate": 4.216588927195295e-05, "loss": 0.2215, "step": 13205 }, { "epoch": 0.7233203745277337, "grad_norm": 0.17808304727077484, "learning_rate": 4.2160819306428714e-05, "loss": 0.2232, "step": 13210 }, { "epoch": 0.7235941521108251, "grad_norm": 0.16581077873706818, "learning_rate": 4.2155749340904484e-05, "loss": 0.2333, "step": 13215 }, { "epoch": 0.7238679296939167, "grad_norm": 0.16109400987625122, "learning_rate": 4.215067937538025e-05, "loss": 0.2255, "step": 13220 }, { "epoch": 0.7241417072770081, "grad_norm": 0.16547717154026031, "learning_rate": 4.214560940985601e-05, "loss": 0.2233, "step": 13225 }, { "epoch": 0.7244154848600997, "grad_norm": 0.15967781841754913, "learning_rate": 4.214053944433178e-05, "loss": 0.2272, "step": 13230 }, { "epoch": 0.7246892624431912, "grad_norm": 0.16226978600025177, "learning_rate": 4.213546947880755e-05, "loss": 0.2317, "step": 13235 }, { "epoch": 0.7249630400262826, "grad_norm": 0.1711771935224533, "learning_rate": 4.2130399513283314e-05, "loss": 0.233, "step": 13240 }, { "epoch": 0.7252368176093742, "grad_norm": 0.1825258433818817, "learning_rate": 4.212532954775908e-05, "loss": 0.2209, "step": 13245 }, { "epoch": 0.7255105951924656, "grad_norm": 0.1523265838623047, "learning_rate": 4.212025958223485e-05, "loss": 0.2266, "step": 13250 }, { "epoch": 0.7257843727755572, "grad_norm": 0.15417717397212982, "learning_rate": 4.211518961671061e-05, "loss": 0.2219, "step": 13255 }, { "epoch": 0.7260581503586486, "grad_norm": 0.1504202038049698, "learning_rate": 4.2110119651186374e-05, "loss": 0.2261, "step": 13260 }, { "epoch": 0.7263319279417402, "grad_norm": 0.1517956554889679, "learning_rate": 4.210504968566214e-05, "loss": 0.2289, "step": 13265 }, { "epoch": 0.7266057055248316, "grad_norm": 0.178424671292305, "learning_rate": 4.209997972013791e-05, "loss": 0.2228, "step": 13270 }, { "epoch": 0.7268794831079232, "grad_norm": 0.1719619482755661, "learning_rate": 4.209490975461367e-05, "loss": 0.2205, "step": 13275 }, { "epoch": 0.7271532606910146, "grad_norm": 0.16431070864200592, "learning_rate": 4.2089839789089434e-05, "loss": 0.2217, "step": 13280 }, { "epoch": 0.7274270382741062, "grad_norm": 0.2119227796792984, "learning_rate": 4.2084769823565204e-05, "loss": 0.2355, "step": 13285 }, { "epoch": 0.7277008158571976, "grad_norm": 0.2098151594400406, "learning_rate": 4.207969985804097e-05, "loss": 0.2204, "step": 13290 }, { "epoch": 0.7279745934402891, "grad_norm": 0.17609867453575134, "learning_rate": 4.207462989251673e-05, "loss": 0.2297, "step": 13295 }, { "epoch": 0.7282483710233806, "grad_norm": 0.18679581582546234, "learning_rate": 4.2069559926992494e-05, "loss": 0.2332, "step": 13300 }, { "epoch": 0.7285221486064721, "grad_norm": 0.15989169478416443, "learning_rate": 4.2064489961468264e-05, "loss": 0.2219, "step": 13305 }, { "epoch": 0.7287959261895636, "grad_norm": 0.17337626218795776, "learning_rate": 4.205941999594403e-05, "loss": 0.2325, "step": 13310 }, { "epoch": 0.7290697037726551, "grad_norm": 0.1940174698829651, "learning_rate": 4.20543500304198e-05, "loss": 0.2138, "step": 13315 }, { "epoch": 0.7293434813557466, "grad_norm": 0.16246052086353302, "learning_rate": 4.204928006489556e-05, "loss": 0.2259, "step": 13320 }, { "epoch": 0.7296172589388381, "grad_norm": 0.18396645784378052, "learning_rate": 4.204421009937133e-05, "loss": 0.2217, "step": 13325 }, { "epoch": 0.7298910365219295, "grad_norm": 0.1724519431591034, "learning_rate": 4.2039140133847094e-05, "loss": 0.2247, "step": 13330 }, { "epoch": 0.7301648141050211, "grad_norm": 0.167569100856781, "learning_rate": 4.203407016832286e-05, "loss": 0.2284, "step": 13335 }, { "epoch": 0.7304385916881125, "grad_norm": 0.17769396305084229, "learning_rate": 4.202900020279862e-05, "loss": 0.2273, "step": 13340 }, { "epoch": 0.7307123692712041, "grad_norm": 0.23459392786026, "learning_rate": 4.202393023727439e-05, "loss": 0.2187, "step": 13345 }, { "epoch": 0.7309861468542955, "grad_norm": 0.19826191663742065, "learning_rate": 4.2018860271750154e-05, "loss": 0.225, "step": 13350 }, { "epoch": 0.7312599244373871, "grad_norm": 0.16487132012844086, "learning_rate": 4.201379030622592e-05, "loss": 0.2258, "step": 13355 }, { "epoch": 0.7315337020204785, "grad_norm": 0.185324028134346, "learning_rate": 4.200872034070169e-05, "loss": 0.2239, "step": 13360 }, { "epoch": 0.7318074796035701, "grad_norm": 0.14686696231365204, "learning_rate": 4.200365037517745e-05, "loss": 0.226, "step": 13365 }, { "epoch": 0.7320812571866615, "grad_norm": 0.16913388669490814, "learning_rate": 4.1998580409653214e-05, "loss": 0.2216, "step": 13370 }, { "epoch": 0.732355034769753, "grad_norm": 0.16688013076782227, "learning_rate": 4.199351044412898e-05, "loss": 0.2361, "step": 13375 }, { "epoch": 0.7326288123528445, "grad_norm": 0.16249103844165802, "learning_rate": 4.198844047860475e-05, "loss": 0.2299, "step": 13380 }, { "epoch": 0.732902589935936, "grad_norm": 0.16769534349441528, "learning_rate": 4.198337051308051e-05, "loss": 0.2305, "step": 13385 }, { "epoch": 0.7331763675190276, "grad_norm": 0.16110798716545105, "learning_rate": 4.1978300547556274e-05, "loss": 0.2224, "step": 13390 }, { "epoch": 0.733450145102119, "grad_norm": 0.17605990171432495, "learning_rate": 4.1973230582032044e-05, "loss": 0.2222, "step": 13395 }, { "epoch": 0.7337239226852106, "grad_norm": 0.14169497787952423, "learning_rate": 4.1968160616507814e-05, "loss": 0.2203, "step": 13400 }, { "epoch": 0.733997700268302, "grad_norm": 0.23733025789260864, "learning_rate": 4.196309065098358e-05, "loss": 0.2305, "step": 13405 }, { "epoch": 0.7342714778513936, "grad_norm": 0.1630563586950302, "learning_rate": 4.195802068545934e-05, "loss": 0.221, "step": 13410 }, { "epoch": 0.734545255434485, "grad_norm": 0.19183295965194702, "learning_rate": 4.195295071993511e-05, "loss": 0.2323, "step": 13415 }, { "epoch": 0.7348190330175766, "grad_norm": 0.15800034999847412, "learning_rate": 4.1947880754410874e-05, "loss": 0.22, "step": 13420 }, { "epoch": 0.735092810600668, "grad_norm": 0.16881681978702545, "learning_rate": 4.194281078888664e-05, "loss": 0.2191, "step": 13425 }, { "epoch": 0.7353665881837596, "grad_norm": 0.14784522354602814, "learning_rate": 4.19377408233624e-05, "loss": 0.2257, "step": 13430 }, { "epoch": 0.735640365766851, "grad_norm": 0.16327497363090515, "learning_rate": 4.193267085783817e-05, "loss": 0.2286, "step": 13435 }, { "epoch": 0.7359141433499425, "grad_norm": 0.1781768798828125, "learning_rate": 4.1927600892313934e-05, "loss": 0.2189, "step": 13440 }, { "epoch": 0.736187920933034, "grad_norm": 0.17004168033599854, "learning_rate": 4.19225309267897e-05, "loss": 0.2241, "step": 13445 }, { "epoch": 0.7364616985161255, "grad_norm": 0.17765343189239502, "learning_rate": 4.191746096126547e-05, "loss": 0.2228, "step": 13450 }, { "epoch": 0.736735476099217, "grad_norm": 0.1787460595369339, "learning_rate": 4.191239099574123e-05, "loss": 0.2274, "step": 13455 }, { "epoch": 0.7370092536823085, "grad_norm": 0.18411974608898163, "learning_rate": 4.1907321030216994e-05, "loss": 0.2258, "step": 13460 }, { "epoch": 0.7372830312654, "grad_norm": 0.19882594048976898, "learning_rate": 4.190225106469276e-05, "loss": 0.2232, "step": 13465 }, { "epoch": 0.7375568088484915, "grad_norm": 0.16385890543460846, "learning_rate": 4.189718109916853e-05, "loss": 0.2231, "step": 13470 }, { "epoch": 0.737830586431583, "grad_norm": 0.17939111590385437, "learning_rate": 4.189211113364429e-05, "loss": 0.2265, "step": 13475 }, { "epoch": 0.7381043640146745, "grad_norm": 0.1559719443321228, "learning_rate": 4.188704116812006e-05, "loss": 0.2234, "step": 13480 }, { "epoch": 0.7383781415977659, "grad_norm": 0.21466688811779022, "learning_rate": 4.1881971202595824e-05, "loss": 0.2206, "step": 13485 }, { "epoch": 0.7386519191808575, "grad_norm": 0.1723637729883194, "learning_rate": 4.1876901237071594e-05, "loss": 0.2189, "step": 13490 }, { "epoch": 0.7389256967639489, "grad_norm": 0.183359295129776, "learning_rate": 4.187183127154736e-05, "loss": 0.2159, "step": 13495 }, { "epoch": 0.7391994743470405, "grad_norm": 0.17711837589740753, "learning_rate": 4.186676130602312e-05, "loss": 0.2294, "step": 13500 }, { "epoch": 0.7394732519301319, "grad_norm": 0.18625274300575256, "learning_rate": 4.1861691340498884e-05, "loss": 0.2134, "step": 13505 }, { "epoch": 0.7397470295132235, "grad_norm": 0.18850836157798767, "learning_rate": 4.1856621374974654e-05, "loss": 0.2168, "step": 13510 }, { "epoch": 0.7400208070963149, "grad_norm": 0.15739566087722778, "learning_rate": 4.185155140945042e-05, "loss": 0.2235, "step": 13515 }, { "epoch": 0.7402945846794065, "grad_norm": 0.18904215097427368, "learning_rate": 4.184648144392618e-05, "loss": 0.2216, "step": 13520 }, { "epoch": 0.7405683622624979, "grad_norm": 0.15327759087085724, "learning_rate": 4.184141147840195e-05, "loss": 0.2197, "step": 13525 }, { "epoch": 0.7408421398455894, "grad_norm": 0.1677713841199875, "learning_rate": 4.1836341512877714e-05, "loss": 0.2139, "step": 13530 }, { "epoch": 0.741115917428681, "grad_norm": 0.22640715539455414, "learning_rate": 4.183127154735348e-05, "loss": 0.2316, "step": 13535 }, { "epoch": 0.7413896950117724, "grad_norm": 0.1487283557653427, "learning_rate": 4.182620158182924e-05, "loss": 0.2201, "step": 13540 }, { "epoch": 0.741663472594864, "grad_norm": 0.17658081650733948, "learning_rate": 4.182113161630501e-05, "loss": 0.2233, "step": 13545 }, { "epoch": 0.7419372501779554, "grad_norm": 0.1774463802576065, "learning_rate": 4.1816061650780774e-05, "loss": 0.2203, "step": 13550 }, { "epoch": 0.742211027761047, "grad_norm": 0.19643919169902802, "learning_rate": 4.181099168525654e-05, "loss": 0.2143, "step": 13555 }, { "epoch": 0.7424848053441384, "grad_norm": 0.16526086628437042, "learning_rate": 4.180592171973231e-05, "loss": 0.2179, "step": 13560 }, { "epoch": 0.74275858292723, "grad_norm": 0.18882641196250916, "learning_rate": 4.180085175420808e-05, "loss": 0.2331, "step": 13565 }, { "epoch": 0.7430323605103214, "grad_norm": 0.18848782777786255, "learning_rate": 4.179578178868384e-05, "loss": 0.2225, "step": 13570 }, { "epoch": 0.743306138093413, "grad_norm": 0.19795480370521545, "learning_rate": 4.1790711823159604e-05, "loss": 0.2192, "step": 13575 }, { "epoch": 0.7435799156765044, "grad_norm": 0.20938147604465485, "learning_rate": 4.1785641857635375e-05, "loss": 0.2408, "step": 13580 }, { "epoch": 0.7438536932595959, "grad_norm": 0.16906775534152985, "learning_rate": 4.178057189211114e-05, "loss": 0.2243, "step": 13585 }, { "epoch": 0.7441274708426874, "grad_norm": 0.17995300889015198, "learning_rate": 4.17755019265869e-05, "loss": 0.2287, "step": 13590 }, { "epoch": 0.7444012484257789, "grad_norm": 0.13867217302322388, "learning_rate": 4.1770431961062664e-05, "loss": 0.22, "step": 13595 }, { "epoch": 0.7446750260088704, "grad_norm": 0.1725863218307495, "learning_rate": 4.1765361995538435e-05, "loss": 0.2307, "step": 13600 }, { "epoch": 0.7449488035919619, "grad_norm": 0.1758088618516922, "learning_rate": 4.17602920300142e-05, "loss": 0.2318, "step": 13605 }, { "epoch": 0.7452225811750534, "grad_norm": 0.16346366703510284, "learning_rate": 4.175522206448996e-05, "loss": 0.2201, "step": 13610 }, { "epoch": 0.7454963587581449, "grad_norm": 0.14662232995033264, "learning_rate": 4.175015209896573e-05, "loss": 0.2275, "step": 13615 }, { "epoch": 0.7457701363412363, "grad_norm": 0.1661667823791504, "learning_rate": 4.1745082133441495e-05, "loss": 0.2276, "step": 13620 }, { "epoch": 0.7460439139243279, "grad_norm": 0.17296910285949707, "learning_rate": 4.174001216791726e-05, "loss": 0.2238, "step": 13625 }, { "epoch": 0.7463176915074193, "grad_norm": 0.15815739333629608, "learning_rate": 4.173494220239302e-05, "loss": 0.2238, "step": 13630 }, { "epoch": 0.7465914690905109, "grad_norm": 0.1608763188123703, "learning_rate": 4.172987223686879e-05, "loss": 0.224, "step": 13635 }, { "epoch": 0.7468652466736023, "grad_norm": 0.15499451756477356, "learning_rate": 4.1724802271344555e-05, "loss": 0.2302, "step": 13640 }, { "epoch": 0.7471390242566939, "grad_norm": 0.16065378487110138, "learning_rate": 4.1719732305820325e-05, "loss": 0.2204, "step": 13645 }, { "epoch": 0.7474128018397853, "grad_norm": 0.14139971137046814, "learning_rate": 4.171466234029609e-05, "loss": 0.2176, "step": 13650 }, { "epoch": 0.7476865794228769, "grad_norm": 0.14750079810619354, "learning_rate": 4.170959237477186e-05, "loss": 0.2238, "step": 13655 }, { "epoch": 0.7479603570059683, "grad_norm": 0.16423077881336212, "learning_rate": 4.170452240924762e-05, "loss": 0.2173, "step": 13660 }, { "epoch": 0.7482341345890599, "grad_norm": 0.18766231834888458, "learning_rate": 4.1699452443723385e-05, "loss": 0.2244, "step": 13665 }, { "epoch": 0.7485079121721513, "grad_norm": 0.1590864211320877, "learning_rate": 4.169438247819915e-05, "loss": 0.2089, "step": 13670 }, { "epoch": 0.7487816897552428, "grad_norm": 0.17438948154449463, "learning_rate": 4.168931251267492e-05, "loss": 0.2177, "step": 13675 }, { "epoch": 0.7490554673383344, "grad_norm": 0.17160673439502716, "learning_rate": 4.168424254715068e-05, "loss": 0.2257, "step": 13680 }, { "epoch": 0.7493292449214258, "grad_norm": 0.18656130135059357, "learning_rate": 4.1679172581626445e-05, "loss": 0.2265, "step": 13685 }, { "epoch": 0.7496030225045174, "grad_norm": 0.17797330021858215, "learning_rate": 4.1674102616102215e-05, "loss": 0.2293, "step": 13690 }, { "epoch": 0.7498768000876088, "grad_norm": 0.1707361489534378, "learning_rate": 4.166903265057798e-05, "loss": 0.2255, "step": 13695 }, { "epoch": 0.7501505776707004, "grad_norm": 0.16251134872436523, "learning_rate": 4.166396268505374e-05, "loss": 0.2216, "step": 13700 }, { "epoch": 0.7504243552537918, "grad_norm": 0.16053199768066406, "learning_rate": 4.1658892719529505e-05, "loss": 0.2374, "step": 13705 }, { "epoch": 0.7506981328368834, "grad_norm": 0.16094717383384705, "learning_rate": 4.1653822754005275e-05, "loss": 0.237, "step": 13710 }, { "epoch": 0.7509719104199748, "grad_norm": 0.15030376613140106, "learning_rate": 4.164875278848104e-05, "loss": 0.2131, "step": 13715 }, { "epoch": 0.7512456880030663, "grad_norm": 0.2039029896259308, "learning_rate": 4.16436828229568e-05, "loss": 0.226, "step": 13720 }, { "epoch": 0.7515194655861578, "grad_norm": 0.18931007385253906, "learning_rate": 4.163861285743257e-05, "loss": 0.2211, "step": 13725 }, { "epoch": 0.7517932431692493, "grad_norm": 0.17629995942115784, "learning_rate": 4.163354289190834e-05, "loss": 0.2196, "step": 13730 }, { "epoch": 0.7520670207523408, "grad_norm": 0.15071269869804382, "learning_rate": 4.1628472926384105e-05, "loss": 0.2351, "step": 13735 }, { "epoch": 0.7523407983354323, "grad_norm": 0.18744643032550812, "learning_rate": 4.162340296085987e-05, "loss": 0.2256, "step": 13740 }, { "epoch": 0.7526145759185238, "grad_norm": 0.1634008139371872, "learning_rate": 4.161833299533564e-05, "loss": 0.2229, "step": 13745 }, { "epoch": 0.7528883535016153, "grad_norm": 0.1549721509218216, "learning_rate": 4.16132630298114e-05, "loss": 0.2264, "step": 13750 }, { "epoch": 0.7531621310847068, "grad_norm": 0.15767651796340942, "learning_rate": 4.1608193064287165e-05, "loss": 0.2305, "step": 13755 }, { "epoch": 0.7534359086677983, "grad_norm": 0.1800607591867447, "learning_rate": 4.160312309876293e-05, "loss": 0.2349, "step": 13760 }, { "epoch": 0.7537096862508897, "grad_norm": 0.17011146247386932, "learning_rate": 4.15980531332387e-05, "loss": 0.2407, "step": 13765 }, { "epoch": 0.7539834638339813, "grad_norm": 0.18149107694625854, "learning_rate": 4.159298316771446e-05, "loss": 0.2284, "step": 13770 }, { "epoch": 0.7542572414170727, "grad_norm": 0.15754660964012146, "learning_rate": 4.1587913202190225e-05, "loss": 0.2208, "step": 13775 }, { "epoch": 0.7545310190001643, "grad_norm": 0.16048982739448547, "learning_rate": 4.1582843236665995e-05, "loss": 0.2207, "step": 13780 }, { "epoch": 0.7548047965832557, "grad_norm": 0.15811270475387573, "learning_rate": 4.157777327114176e-05, "loss": 0.231, "step": 13785 }, { "epoch": 0.7550785741663473, "grad_norm": 0.2028486728668213, "learning_rate": 4.157270330561752e-05, "loss": 0.2267, "step": 13790 }, { "epoch": 0.7553523517494387, "grad_norm": 0.20582877099514008, "learning_rate": 4.1567633340093285e-05, "loss": 0.2234, "step": 13795 }, { "epoch": 0.7556261293325303, "grad_norm": 0.2134588658809662, "learning_rate": 4.1562563374569055e-05, "loss": 0.2335, "step": 13800 }, { "epoch": 0.7558999069156217, "grad_norm": 0.17186562716960907, "learning_rate": 4.1557493409044825e-05, "loss": 0.2239, "step": 13805 }, { "epoch": 0.7561736844987132, "grad_norm": 0.17578525841236115, "learning_rate": 4.155242344352059e-05, "loss": 0.2215, "step": 13810 }, { "epoch": 0.7564474620818047, "grad_norm": 0.14897659420967102, "learning_rate": 4.154735347799635e-05, "loss": 0.2249, "step": 13815 }, { "epoch": 0.7567212396648962, "grad_norm": 0.1717822402715683, "learning_rate": 4.154228351247212e-05, "loss": 0.2244, "step": 13820 }, { "epoch": 0.7569950172479878, "grad_norm": 0.1785632222890854, "learning_rate": 4.1537213546947885e-05, "loss": 0.2138, "step": 13825 }, { "epoch": 0.7572687948310792, "grad_norm": 0.18519766628742218, "learning_rate": 4.153214358142365e-05, "loss": 0.2237, "step": 13830 }, { "epoch": 0.7575425724141708, "grad_norm": 0.2122422754764557, "learning_rate": 4.152707361589941e-05, "loss": 0.2316, "step": 13835 }, { "epoch": 0.7578163499972622, "grad_norm": 0.17093607783317566, "learning_rate": 4.152200365037518e-05, "loss": 0.2326, "step": 13840 }, { "epoch": 0.7580901275803538, "grad_norm": 0.1560967117547989, "learning_rate": 4.1516933684850945e-05, "loss": 0.2206, "step": 13845 }, { "epoch": 0.7583639051634452, "grad_norm": 0.14964377880096436, "learning_rate": 4.151186371932671e-05, "loss": 0.2175, "step": 13850 }, { "epoch": 0.7586376827465368, "grad_norm": 0.13256677985191345, "learning_rate": 4.150679375380248e-05, "loss": 0.2212, "step": 13855 }, { "epoch": 0.7589114603296282, "grad_norm": 0.1661567986011505, "learning_rate": 4.150172378827824e-05, "loss": 0.2301, "step": 13860 }, { "epoch": 0.7591852379127197, "grad_norm": 0.17519955337047577, "learning_rate": 4.1496653822754005e-05, "loss": 0.2309, "step": 13865 }, { "epoch": 0.7594590154958112, "grad_norm": 0.1465652883052826, "learning_rate": 4.149158385722977e-05, "loss": 0.2147, "step": 13870 }, { "epoch": 0.7597327930789027, "grad_norm": 0.1481904834508896, "learning_rate": 4.148651389170554e-05, "loss": 0.2256, "step": 13875 }, { "epoch": 0.7600065706619942, "grad_norm": 0.174861341714859, "learning_rate": 4.14814439261813e-05, "loss": 0.2165, "step": 13880 }, { "epoch": 0.7602803482450857, "grad_norm": 0.1702394187450409, "learning_rate": 4.147637396065707e-05, "loss": 0.2298, "step": 13885 }, { "epoch": 0.7605541258281772, "grad_norm": 0.18385925889015198, "learning_rate": 4.1471303995132835e-05, "loss": 0.2279, "step": 13890 }, { "epoch": 0.7608279034112687, "grad_norm": 0.17987611889839172, "learning_rate": 4.1466234029608605e-05, "loss": 0.2295, "step": 13895 }, { "epoch": 0.7611016809943602, "grad_norm": 0.19211234152317047, "learning_rate": 4.146116406408437e-05, "loss": 0.216, "step": 13900 }, { "epoch": 0.7613754585774517, "grad_norm": 0.1796383410692215, "learning_rate": 4.145609409856013e-05, "loss": 0.2218, "step": 13905 }, { "epoch": 0.7616492361605431, "grad_norm": 0.1677517592906952, "learning_rate": 4.14510241330359e-05, "loss": 0.226, "step": 13910 }, { "epoch": 0.7619230137436347, "grad_norm": 0.1731880009174347, "learning_rate": 4.1445954167511665e-05, "loss": 0.2289, "step": 13915 }, { "epoch": 0.7621967913267261, "grad_norm": 0.19669367372989655, "learning_rate": 4.144088420198743e-05, "loss": 0.2192, "step": 13920 }, { "epoch": 0.7624705689098177, "grad_norm": 0.17470964789390564, "learning_rate": 4.143581423646319e-05, "loss": 0.2201, "step": 13925 }, { "epoch": 0.7627443464929091, "grad_norm": 0.2403358668088913, "learning_rate": 4.143074427093896e-05, "loss": 0.2281, "step": 13930 }, { "epoch": 0.7630181240760007, "grad_norm": 0.18046647310256958, "learning_rate": 4.1425674305414725e-05, "loss": 0.2296, "step": 13935 }, { "epoch": 0.7632919016590921, "grad_norm": 0.2247885763645172, "learning_rate": 4.142060433989049e-05, "loss": 0.2204, "step": 13940 }, { "epoch": 0.7635656792421837, "grad_norm": 0.21702758967876434, "learning_rate": 4.141553437436625e-05, "loss": 0.2198, "step": 13945 }, { "epoch": 0.7638394568252751, "grad_norm": 0.1980310082435608, "learning_rate": 4.141046440884202e-05, "loss": 0.2216, "step": 13950 }, { "epoch": 0.7641132344083666, "grad_norm": 0.14492951333522797, "learning_rate": 4.1405394443317785e-05, "loss": 0.2244, "step": 13955 }, { "epoch": 0.7643870119914581, "grad_norm": 0.17846296727657318, "learning_rate": 4.140032447779355e-05, "loss": 0.2174, "step": 13960 }, { "epoch": 0.7646607895745496, "grad_norm": 0.13783670961856842, "learning_rate": 4.139525451226932e-05, "loss": 0.2312, "step": 13965 }, { "epoch": 0.7649345671576412, "grad_norm": 0.17812742292881012, "learning_rate": 4.139018454674509e-05, "loss": 0.2272, "step": 13970 }, { "epoch": 0.7652083447407326, "grad_norm": 0.16430430114269257, "learning_rate": 4.138511458122085e-05, "loss": 0.2131, "step": 13975 }, { "epoch": 0.7654821223238242, "grad_norm": 0.19344830513000488, "learning_rate": 4.1380044615696615e-05, "loss": 0.2195, "step": 13980 }, { "epoch": 0.7657558999069156, "grad_norm": 0.21199822425842285, "learning_rate": 4.1374974650172385e-05, "loss": 0.222, "step": 13985 }, { "epoch": 0.7660296774900072, "grad_norm": 0.20410564541816711, "learning_rate": 4.136990468464815e-05, "loss": 0.2161, "step": 13990 }, { "epoch": 0.7663034550730986, "grad_norm": 0.18247951567173004, "learning_rate": 4.136483471912391e-05, "loss": 0.2303, "step": 13995 }, { "epoch": 0.7665772326561902, "grad_norm": 0.15586329996585846, "learning_rate": 4.1359764753599675e-05, "loss": 0.2272, "step": 14000 }, { "epoch": 0.7668510102392816, "grad_norm": 0.19138172268867493, "learning_rate": 4.1354694788075445e-05, "loss": 0.2249, "step": 14005 }, { "epoch": 0.7671247878223731, "grad_norm": 0.1639437973499298, "learning_rate": 4.134962482255121e-05, "loss": 0.2254, "step": 14010 }, { "epoch": 0.7673985654054646, "grad_norm": 0.167312890291214, "learning_rate": 4.134455485702697e-05, "loss": 0.2263, "step": 14015 }, { "epoch": 0.7676723429885561, "grad_norm": 0.18074874579906464, "learning_rate": 4.133948489150274e-05, "loss": 0.2228, "step": 14020 }, { "epoch": 0.7679461205716476, "grad_norm": 0.18502689898014069, "learning_rate": 4.1334414925978505e-05, "loss": 0.2201, "step": 14025 }, { "epoch": 0.7682198981547391, "grad_norm": 0.17369671165943146, "learning_rate": 4.132934496045427e-05, "loss": 0.2277, "step": 14030 }, { "epoch": 0.7684936757378306, "grad_norm": 0.153066024184227, "learning_rate": 4.132427499493003e-05, "loss": 0.2174, "step": 14035 }, { "epoch": 0.7687674533209221, "grad_norm": 0.15851861238479614, "learning_rate": 4.13192050294058e-05, "loss": 0.2284, "step": 14040 }, { "epoch": 0.7690412309040136, "grad_norm": 0.18500961363315582, "learning_rate": 4.1314135063881565e-05, "loss": 0.2224, "step": 14045 }, { "epoch": 0.7693150084871051, "grad_norm": 0.1769901067018509, "learning_rate": 4.1309065098357335e-05, "loss": 0.2272, "step": 14050 }, { "epoch": 0.7695887860701965, "grad_norm": 0.1672559380531311, "learning_rate": 4.13039951328331e-05, "loss": 0.2192, "step": 14055 }, { "epoch": 0.7698625636532881, "grad_norm": 0.1639028638601303, "learning_rate": 4.129892516730887e-05, "loss": 0.2178, "step": 14060 }, { "epoch": 0.7701363412363795, "grad_norm": 0.14595620334148407, "learning_rate": 4.129385520178463e-05, "loss": 0.2244, "step": 14065 }, { "epoch": 0.7704101188194711, "grad_norm": 0.1921313852071762, "learning_rate": 4.1288785236260395e-05, "loss": 0.226, "step": 14070 }, { "epoch": 0.7706838964025625, "grad_norm": 0.152192160487175, "learning_rate": 4.128371527073616e-05, "loss": 0.2235, "step": 14075 }, { "epoch": 0.7709576739856541, "grad_norm": 0.20524844527244568, "learning_rate": 4.127864530521193e-05, "loss": 0.2199, "step": 14080 }, { "epoch": 0.7712314515687455, "grad_norm": 0.16740556061267853, "learning_rate": 4.127357533968769e-05, "loss": 0.2161, "step": 14085 }, { "epoch": 0.7715052291518371, "grad_norm": 0.20160067081451416, "learning_rate": 4.1268505374163455e-05, "loss": 0.2277, "step": 14090 }, { "epoch": 0.7717790067349285, "grad_norm": 0.20181912183761597, "learning_rate": 4.1263435408639225e-05, "loss": 0.2207, "step": 14095 }, { "epoch": 0.77205278431802, "grad_norm": 0.16700774431228638, "learning_rate": 4.125836544311499e-05, "loss": 0.229, "step": 14100 }, { "epoch": 0.7723265619011115, "grad_norm": 0.16653890907764435, "learning_rate": 4.125329547759075e-05, "loss": 0.2263, "step": 14105 }, { "epoch": 0.772600339484203, "grad_norm": 0.1526593714952469, "learning_rate": 4.1248225512066515e-05, "loss": 0.2249, "step": 14110 }, { "epoch": 0.7728741170672945, "grad_norm": 0.13825128972530365, "learning_rate": 4.1243155546542285e-05, "loss": 0.2143, "step": 14115 }, { "epoch": 0.773147894650386, "grad_norm": 0.16572630405426025, "learning_rate": 4.123808558101805e-05, "loss": 0.2235, "step": 14120 }, { "epoch": 0.7734216722334776, "grad_norm": 0.17624935507774353, "learning_rate": 4.123301561549381e-05, "loss": 0.2209, "step": 14125 }, { "epoch": 0.773695449816569, "grad_norm": 0.18032589554786682, "learning_rate": 4.122794564996958e-05, "loss": 0.2203, "step": 14130 }, { "epoch": 0.7739692273996606, "grad_norm": 0.2086024284362793, "learning_rate": 4.122287568444535e-05, "loss": 0.2345, "step": 14135 }, { "epoch": 0.774243004982752, "grad_norm": 0.1565304398536682, "learning_rate": 4.1217805718921115e-05, "loss": 0.2245, "step": 14140 }, { "epoch": 0.7745167825658436, "grad_norm": 0.1613844484090805, "learning_rate": 4.121273575339688e-05, "loss": 0.2255, "step": 14145 }, { "epoch": 0.774790560148935, "grad_norm": 0.16621196269989014, "learning_rate": 4.120766578787265e-05, "loss": 0.2248, "step": 14150 }, { "epoch": 0.7750643377320265, "grad_norm": 0.18891890347003937, "learning_rate": 4.120259582234841e-05, "loss": 0.2119, "step": 14155 }, { "epoch": 0.775338115315118, "grad_norm": 0.1500297337770462, "learning_rate": 4.1197525856824175e-05, "loss": 0.2269, "step": 14160 }, { "epoch": 0.7756118928982095, "grad_norm": 0.1695835441350937, "learning_rate": 4.119245589129994e-05, "loss": 0.2278, "step": 14165 }, { "epoch": 0.775885670481301, "grad_norm": 0.19056342542171478, "learning_rate": 4.118738592577571e-05, "loss": 0.2206, "step": 14170 }, { "epoch": 0.7761594480643925, "grad_norm": 0.17608551681041718, "learning_rate": 4.118231596025147e-05, "loss": 0.2189, "step": 14175 }, { "epoch": 0.776433225647484, "grad_norm": 0.17288175225257874, "learning_rate": 4.1177245994727235e-05, "loss": 0.222, "step": 14180 }, { "epoch": 0.7767070032305755, "grad_norm": 0.15276457369327545, "learning_rate": 4.1172176029203005e-05, "loss": 0.2187, "step": 14185 }, { "epoch": 0.776980780813667, "grad_norm": 0.21976549923419952, "learning_rate": 4.116710606367877e-05, "loss": 0.2233, "step": 14190 }, { "epoch": 0.7772545583967585, "grad_norm": 0.18445727229118347, "learning_rate": 4.116203609815453e-05, "loss": 0.2146, "step": 14195 }, { "epoch": 0.7775283359798499, "grad_norm": 0.16172920167446136, "learning_rate": 4.1156966132630295e-05, "loss": 0.2154, "step": 14200 }, { "epoch": 0.7778021135629415, "grad_norm": 0.14939917623996735, "learning_rate": 4.1151896167106065e-05, "loss": 0.2203, "step": 14205 }, { "epoch": 0.7780758911460329, "grad_norm": 0.1435355842113495, "learning_rate": 4.1146826201581836e-05, "loss": 0.2271, "step": 14210 }, { "epoch": 0.7783496687291245, "grad_norm": 0.14750368893146515, "learning_rate": 4.11417562360576e-05, "loss": 0.2234, "step": 14215 }, { "epoch": 0.7786234463122159, "grad_norm": 0.16924019157886505, "learning_rate": 4.113668627053336e-05, "loss": 0.2331, "step": 14220 }, { "epoch": 0.7788972238953075, "grad_norm": 0.19593842327594757, "learning_rate": 4.113161630500913e-05, "loss": 0.2289, "step": 14225 }, { "epoch": 0.7791710014783989, "grad_norm": 0.16406939923763275, "learning_rate": 4.1126546339484896e-05, "loss": 0.225, "step": 14230 }, { "epoch": 0.7794447790614905, "grad_norm": 0.20893625915050507, "learning_rate": 4.112147637396066e-05, "loss": 0.2194, "step": 14235 }, { "epoch": 0.7797185566445819, "grad_norm": 0.17091719806194305, "learning_rate": 4.111640640843642e-05, "loss": 0.223, "step": 14240 }, { "epoch": 0.7799923342276734, "grad_norm": 0.20542621612548828, "learning_rate": 4.111133644291219e-05, "loss": 0.2346, "step": 14245 }, { "epoch": 0.7802661118107649, "grad_norm": 0.16649077832698822, "learning_rate": 4.1106266477387956e-05, "loss": 0.2221, "step": 14250 }, { "epoch": 0.7805398893938564, "grad_norm": 0.15461967885494232, "learning_rate": 4.110119651186372e-05, "loss": 0.2265, "step": 14255 }, { "epoch": 0.7808136669769479, "grad_norm": 0.16510224342346191, "learning_rate": 4.109612654633949e-05, "loss": 0.2225, "step": 14260 }, { "epoch": 0.7810874445600394, "grad_norm": 0.17353427410125732, "learning_rate": 4.109105658081525e-05, "loss": 0.232, "step": 14265 }, { "epoch": 0.781361222143131, "grad_norm": 0.15240949392318726, "learning_rate": 4.1085986615291016e-05, "loss": 0.2223, "step": 14270 }, { "epoch": 0.7816349997262224, "grad_norm": 0.15505678951740265, "learning_rate": 4.108091664976678e-05, "loss": 0.2279, "step": 14275 }, { "epoch": 0.781908777309314, "grad_norm": 0.2148774266242981, "learning_rate": 4.107584668424255e-05, "loss": 0.2317, "step": 14280 }, { "epoch": 0.7821825548924054, "grad_norm": 0.14839738607406616, "learning_rate": 4.107077671871831e-05, "loss": 0.2171, "step": 14285 }, { "epoch": 0.782456332475497, "grad_norm": 0.1660291999578476, "learning_rate": 4.1065706753194076e-05, "loss": 0.222, "step": 14290 }, { "epoch": 0.7827301100585884, "grad_norm": 0.15528078377246857, "learning_rate": 4.1060636787669846e-05, "loss": 0.235, "step": 14295 }, { "epoch": 0.7830038876416799, "grad_norm": 0.15748335421085358, "learning_rate": 4.1055566822145616e-05, "loss": 0.2293, "step": 14300 }, { "epoch": 0.7832776652247714, "grad_norm": 0.18770486116409302, "learning_rate": 4.105049685662138e-05, "loss": 0.2215, "step": 14305 }, { "epoch": 0.7835514428078629, "grad_norm": 0.18942782282829285, "learning_rate": 4.104542689109714e-05, "loss": 0.2279, "step": 14310 }, { "epoch": 0.7838252203909544, "grad_norm": 0.1847858428955078, "learning_rate": 4.104035692557291e-05, "loss": 0.229, "step": 14315 }, { "epoch": 0.7840989979740459, "grad_norm": 0.1738376021385193, "learning_rate": 4.1035286960048676e-05, "loss": 0.2338, "step": 14320 }, { "epoch": 0.7843727755571374, "grad_norm": 0.21252283453941345, "learning_rate": 4.103021699452444e-05, "loss": 0.2341, "step": 14325 }, { "epoch": 0.7846465531402289, "grad_norm": 0.14460524916648865, "learning_rate": 4.10251470290002e-05, "loss": 0.2225, "step": 14330 }, { "epoch": 0.7849203307233203, "grad_norm": 0.1884353905916214, "learning_rate": 4.102007706347597e-05, "loss": 0.23, "step": 14335 }, { "epoch": 0.7851941083064119, "grad_norm": 0.16442245244979858, "learning_rate": 4.1015007097951736e-05, "loss": 0.2225, "step": 14340 }, { "epoch": 0.7854678858895033, "grad_norm": 0.2091803103685379, "learning_rate": 4.10099371324275e-05, "loss": 0.2127, "step": 14345 }, { "epoch": 0.7857416634725949, "grad_norm": 0.15559075772762299, "learning_rate": 4.100486716690327e-05, "loss": 0.227, "step": 14350 }, { "epoch": 0.7860154410556863, "grad_norm": 0.17472434043884277, "learning_rate": 4.099979720137903e-05, "loss": 0.2214, "step": 14355 }, { "epoch": 0.7862892186387779, "grad_norm": 0.18481266498565674, "learning_rate": 4.0994727235854796e-05, "loss": 0.2394, "step": 14360 }, { "epoch": 0.7865629962218693, "grad_norm": 0.17177718877792358, "learning_rate": 4.098965727033056e-05, "loss": 0.2393, "step": 14365 }, { "epoch": 0.7868367738049609, "grad_norm": 0.19271473586559296, "learning_rate": 4.098458730480633e-05, "loss": 0.2335, "step": 14370 }, { "epoch": 0.7871105513880523, "grad_norm": 0.173203706741333, "learning_rate": 4.09795173392821e-05, "loss": 0.2222, "step": 14375 }, { "epoch": 0.7873843289711439, "grad_norm": 0.17422029376029968, "learning_rate": 4.097444737375786e-05, "loss": 0.2232, "step": 14380 }, { "epoch": 0.7876581065542353, "grad_norm": 0.1492665559053421, "learning_rate": 4.0969377408233626e-05, "loss": 0.2328, "step": 14385 }, { "epoch": 0.7879318841373268, "grad_norm": 0.16843555867671967, "learning_rate": 4.0964307442709396e-05, "loss": 0.2179, "step": 14390 }, { "epoch": 0.7882056617204183, "grad_norm": 0.19311830401420593, "learning_rate": 4.095923747718516e-05, "loss": 0.22, "step": 14395 }, { "epoch": 0.7884794393035098, "grad_norm": 0.16822463274002075, "learning_rate": 4.095416751166092e-05, "loss": 0.2269, "step": 14400 }, { "epoch": 0.7887532168866013, "grad_norm": 0.1790720820426941, "learning_rate": 4.0949097546136686e-05, "loss": 0.2218, "step": 14405 }, { "epoch": 0.7890269944696928, "grad_norm": 0.15223300457000732, "learning_rate": 4.0944027580612456e-05, "loss": 0.2211, "step": 14410 }, { "epoch": 0.7893007720527844, "grad_norm": 0.2086077779531479, "learning_rate": 4.093895761508822e-05, "loss": 0.218, "step": 14415 }, { "epoch": 0.7895745496358758, "grad_norm": 0.16145546734333038, "learning_rate": 4.093388764956398e-05, "loss": 0.2244, "step": 14420 }, { "epoch": 0.7898483272189674, "grad_norm": 0.16392548382282257, "learning_rate": 4.092881768403975e-05, "loss": 0.2253, "step": 14425 }, { "epoch": 0.7901221048020588, "grad_norm": 0.1693848967552185, "learning_rate": 4.0923747718515516e-05, "loss": 0.2217, "step": 14430 }, { "epoch": 0.7903958823851504, "grad_norm": 0.1627994179725647, "learning_rate": 4.091867775299128e-05, "loss": 0.2289, "step": 14435 }, { "epoch": 0.7906696599682418, "grad_norm": 0.16364778578281403, "learning_rate": 4.091360778746704e-05, "loss": 0.2362, "step": 14440 }, { "epoch": 0.7909434375513333, "grad_norm": 0.1778314709663391, "learning_rate": 4.090853782194281e-05, "loss": 0.225, "step": 14445 }, { "epoch": 0.7912172151344248, "grad_norm": 0.20128986239433289, "learning_rate": 4.0903467856418576e-05, "loss": 0.2322, "step": 14450 }, { "epoch": 0.7914909927175163, "grad_norm": 0.1603744924068451, "learning_rate": 4.0898397890894346e-05, "loss": 0.2262, "step": 14455 }, { "epoch": 0.7917647703006078, "grad_norm": 0.15944962203502655, "learning_rate": 4.089332792537011e-05, "loss": 0.2292, "step": 14460 }, { "epoch": 0.7920385478836993, "grad_norm": 0.15189169347286224, "learning_rate": 4.088825795984588e-05, "loss": 0.2197, "step": 14465 }, { "epoch": 0.7923123254667908, "grad_norm": 0.1453537493944168, "learning_rate": 4.088318799432164e-05, "loss": 0.2245, "step": 14470 }, { "epoch": 0.7925861030498823, "grad_norm": 0.13820631802082062, "learning_rate": 4.0878118028797406e-05, "loss": 0.2209, "step": 14475 }, { "epoch": 0.7928598806329737, "grad_norm": 0.17734010517597198, "learning_rate": 4.0873048063273176e-05, "loss": 0.2191, "step": 14480 }, { "epoch": 0.7931336582160653, "grad_norm": 0.19165794551372528, "learning_rate": 4.086797809774894e-05, "loss": 0.2218, "step": 14485 }, { "epoch": 0.7934074357991567, "grad_norm": 0.17018796503543854, "learning_rate": 4.08629081322247e-05, "loss": 0.2171, "step": 14490 }, { "epoch": 0.7936812133822483, "grad_norm": 0.1924801915884018, "learning_rate": 4.0857838166700466e-05, "loss": 0.2219, "step": 14495 }, { "epoch": 0.7939549909653397, "grad_norm": 0.17007218301296234, "learning_rate": 4.0852768201176236e-05, "loss": 0.225, "step": 14500 }, { "epoch": 0.7942287685484313, "grad_norm": 0.16999602317810059, "learning_rate": 4.0847698235652e-05, "loss": 0.221, "step": 14505 }, { "epoch": 0.7945025461315227, "grad_norm": 0.14754217863082886, "learning_rate": 4.084262827012776e-05, "loss": 0.2145, "step": 14510 }, { "epoch": 0.7947763237146143, "grad_norm": 0.20728543400764465, "learning_rate": 4.083755830460353e-05, "loss": 0.227, "step": 14515 }, { "epoch": 0.7950501012977057, "grad_norm": 0.16531743109226227, "learning_rate": 4.0832488339079296e-05, "loss": 0.2217, "step": 14520 }, { "epoch": 0.7953238788807973, "grad_norm": 0.17446178197860718, "learning_rate": 4.082741837355506e-05, "loss": 0.2141, "step": 14525 }, { "epoch": 0.7955976564638887, "grad_norm": 0.15577173233032227, "learning_rate": 4.082234840803082e-05, "loss": 0.2269, "step": 14530 }, { "epoch": 0.7958714340469802, "grad_norm": 0.17328426241874695, "learning_rate": 4.081727844250659e-05, "loss": 0.2155, "step": 14535 }, { "epoch": 0.7961452116300717, "grad_norm": 0.144614577293396, "learning_rate": 4.081220847698236e-05, "loss": 0.2113, "step": 14540 }, { "epoch": 0.7964189892131632, "grad_norm": 0.16208024322986603, "learning_rate": 4.0807138511458126e-05, "loss": 0.2274, "step": 14545 }, { "epoch": 0.7966927667962547, "grad_norm": 0.1492638736963272, "learning_rate": 4.080206854593389e-05, "loss": 0.2148, "step": 14550 }, { "epoch": 0.7969665443793462, "grad_norm": 0.17725344002246857, "learning_rate": 4.079699858040966e-05, "loss": 0.2209, "step": 14555 }, { "epoch": 0.7972403219624378, "grad_norm": 0.183041051030159, "learning_rate": 4.079192861488542e-05, "loss": 0.2246, "step": 14560 }, { "epoch": 0.7975140995455292, "grad_norm": 0.1619638353586197, "learning_rate": 4.0786858649361186e-05, "loss": 0.2241, "step": 14565 }, { "epoch": 0.7977878771286208, "grad_norm": 0.15360109508037567, "learning_rate": 4.078178868383695e-05, "loss": 0.2224, "step": 14570 }, { "epoch": 0.7980616547117122, "grad_norm": 0.15287841856479645, "learning_rate": 4.077671871831272e-05, "loss": 0.2172, "step": 14575 }, { "epoch": 0.7983354322948037, "grad_norm": 0.16152040660381317, "learning_rate": 4.077164875278848e-05, "loss": 0.2207, "step": 14580 }, { "epoch": 0.7986092098778952, "grad_norm": 0.19818530976772308, "learning_rate": 4.0766578787264246e-05, "loss": 0.2263, "step": 14585 }, { "epoch": 0.7988829874609867, "grad_norm": 0.14973154664039612, "learning_rate": 4.0761508821740016e-05, "loss": 0.2221, "step": 14590 }, { "epoch": 0.7991567650440782, "grad_norm": 0.18949399888515472, "learning_rate": 4.075643885621578e-05, "loss": 0.2274, "step": 14595 }, { "epoch": 0.7994305426271697, "grad_norm": 0.17224742472171783, "learning_rate": 4.075136889069154e-05, "loss": 0.2249, "step": 14600 }, { "epoch": 0.7997043202102612, "grad_norm": 0.16920463740825653, "learning_rate": 4.0746298925167306e-05, "loss": 0.2136, "step": 14605 }, { "epoch": 0.7999780977933527, "grad_norm": 0.17858447134494781, "learning_rate": 4.0741228959643076e-05, "loss": 0.224, "step": 14610 }, { "epoch": 0.8002518753764442, "grad_norm": 0.14916667342185974, "learning_rate": 4.073615899411884e-05, "loss": 0.2144, "step": 14615 }, { "epoch": 0.8005256529595357, "grad_norm": 0.13363513350486755, "learning_rate": 4.073108902859461e-05, "loss": 0.2152, "step": 14620 }, { "epoch": 0.8007994305426271, "grad_norm": 0.17475183308124542, "learning_rate": 4.072601906307037e-05, "loss": 0.2251, "step": 14625 }, { "epoch": 0.8010732081257187, "grad_norm": 0.15638884902000427, "learning_rate": 4.072094909754614e-05, "loss": 0.2238, "step": 14630 }, { "epoch": 0.8013469857088101, "grad_norm": 0.14696891605854034, "learning_rate": 4.0715879132021906e-05, "loss": 0.2183, "step": 14635 }, { "epoch": 0.8016207632919017, "grad_norm": 0.16320300102233887, "learning_rate": 4.071080916649767e-05, "loss": 0.2265, "step": 14640 }, { "epoch": 0.8018945408749931, "grad_norm": 0.13569821417331696, "learning_rate": 4.070573920097344e-05, "loss": 0.2271, "step": 14645 }, { "epoch": 0.8021683184580847, "grad_norm": 0.15547998249530792, "learning_rate": 4.07006692354492e-05, "loss": 0.2223, "step": 14650 }, { "epoch": 0.8024420960411761, "grad_norm": 0.1465386152267456, "learning_rate": 4.0695599269924966e-05, "loss": 0.226, "step": 14655 }, { "epoch": 0.8027158736242677, "grad_norm": 0.1747608333826065, "learning_rate": 4.069052930440073e-05, "loss": 0.2205, "step": 14660 }, { "epoch": 0.8029896512073591, "grad_norm": 0.14369609951972961, "learning_rate": 4.06854593388765e-05, "loss": 0.2204, "step": 14665 }, { "epoch": 0.8032634287904507, "grad_norm": 0.1738387495279312, "learning_rate": 4.068038937335226e-05, "loss": 0.221, "step": 14670 }, { "epoch": 0.8035372063735421, "grad_norm": 0.1870768815279007, "learning_rate": 4.0675319407828026e-05, "loss": 0.2227, "step": 14675 }, { "epoch": 0.8038109839566336, "grad_norm": 0.16090044379234314, "learning_rate": 4.067024944230379e-05, "loss": 0.2262, "step": 14680 }, { "epoch": 0.8040847615397251, "grad_norm": 0.17347221076488495, "learning_rate": 4.066517947677956e-05, "loss": 0.2222, "step": 14685 }, { "epoch": 0.8043585391228166, "grad_norm": 0.17541152238845825, "learning_rate": 4.066010951125532e-05, "loss": 0.2269, "step": 14690 }, { "epoch": 0.8046323167059081, "grad_norm": 0.14609035849571228, "learning_rate": 4.0655039545731086e-05, "loss": 0.2139, "step": 14695 }, { "epoch": 0.8049060942889996, "grad_norm": 0.20090718567371368, "learning_rate": 4.0649969580206856e-05, "loss": 0.2215, "step": 14700 }, { "epoch": 0.8051798718720912, "grad_norm": 0.1681567132472992, "learning_rate": 4.0644899614682626e-05, "loss": 0.2306, "step": 14705 }, { "epoch": 0.8054536494551826, "grad_norm": 0.17204350233078003, "learning_rate": 4.063982964915839e-05, "loss": 0.2125, "step": 14710 }, { "epoch": 0.8057274270382742, "grad_norm": 0.19132618606090546, "learning_rate": 4.063475968363415e-05, "loss": 0.2183, "step": 14715 }, { "epoch": 0.8060012046213656, "grad_norm": 0.17889092862606049, "learning_rate": 4.062968971810992e-05, "loss": 0.2155, "step": 14720 }, { "epoch": 0.8062749822044571, "grad_norm": 0.15149052441120148, "learning_rate": 4.0624619752585686e-05, "loss": 0.2211, "step": 14725 }, { "epoch": 0.8065487597875486, "grad_norm": 0.17441321909427643, "learning_rate": 4.061954978706145e-05, "loss": 0.2204, "step": 14730 }, { "epoch": 0.8068225373706401, "grad_norm": 0.14316198229789734, "learning_rate": 4.061447982153721e-05, "loss": 0.2238, "step": 14735 }, { "epoch": 0.8070963149537316, "grad_norm": 0.15461665391921997, "learning_rate": 4.060940985601298e-05, "loss": 0.2263, "step": 14740 }, { "epoch": 0.8073700925368231, "grad_norm": 0.14694878458976746, "learning_rate": 4.0604339890488746e-05, "loss": 0.2261, "step": 14745 }, { "epoch": 0.8076438701199146, "grad_norm": 0.14633670449256897, "learning_rate": 4.059926992496451e-05, "loss": 0.2271, "step": 14750 }, { "epoch": 0.8079176477030061, "grad_norm": 0.17008879780769348, "learning_rate": 4.059419995944028e-05, "loss": 0.2245, "step": 14755 }, { "epoch": 0.8081914252860976, "grad_norm": 0.13573604822158813, "learning_rate": 4.058912999391604e-05, "loss": 0.2188, "step": 14760 }, { "epoch": 0.8084652028691891, "grad_norm": 0.16947203874588013, "learning_rate": 4.0584060028391806e-05, "loss": 0.2321, "step": 14765 }, { "epoch": 0.8087389804522805, "grad_norm": 0.1603507548570633, "learning_rate": 4.057899006286757e-05, "loss": 0.2237, "step": 14770 }, { "epoch": 0.8090127580353721, "grad_norm": 0.16749420762062073, "learning_rate": 4.057392009734334e-05, "loss": 0.2224, "step": 14775 }, { "epoch": 0.8092865356184635, "grad_norm": 0.15266557037830353, "learning_rate": 4.056885013181911e-05, "loss": 0.2183, "step": 14780 }, { "epoch": 0.8095603132015551, "grad_norm": 0.1848883330821991, "learning_rate": 4.056378016629487e-05, "loss": 0.2136, "step": 14785 }, { "epoch": 0.8098340907846465, "grad_norm": 0.17066888511180878, "learning_rate": 4.0558710200770636e-05, "loss": 0.2196, "step": 14790 }, { "epoch": 0.8101078683677381, "grad_norm": 0.15416733920574188, "learning_rate": 4.0553640235246406e-05, "loss": 0.2291, "step": 14795 }, { "epoch": 0.8103816459508295, "grad_norm": 0.1926862895488739, "learning_rate": 4.054857026972217e-05, "loss": 0.224, "step": 14800 }, { "epoch": 0.8106554235339211, "grad_norm": 0.1796591877937317, "learning_rate": 4.054350030419793e-05, "loss": 0.2131, "step": 14805 }, { "epoch": 0.8109292011170125, "grad_norm": 0.16152742505073547, "learning_rate": 4.0538430338673696e-05, "loss": 0.213, "step": 14810 }, { "epoch": 0.811202978700104, "grad_norm": 0.18816012144088745, "learning_rate": 4.0533360373149466e-05, "loss": 0.2218, "step": 14815 }, { "epoch": 0.8114767562831955, "grad_norm": 0.15718087553977966, "learning_rate": 4.052829040762523e-05, "loss": 0.2234, "step": 14820 }, { "epoch": 0.811750533866287, "grad_norm": 0.1822943091392517, "learning_rate": 4.052322044210099e-05, "loss": 0.2245, "step": 14825 }, { "epoch": 0.8120243114493785, "grad_norm": 0.15343596041202545, "learning_rate": 4.051815047657676e-05, "loss": 0.2312, "step": 14830 }, { "epoch": 0.81229808903247, "grad_norm": 0.18710258603096008, "learning_rate": 4.0513080511052526e-05, "loss": 0.2173, "step": 14835 }, { "epoch": 0.8125718666155615, "grad_norm": 0.16118693351745605, "learning_rate": 4.050801054552829e-05, "loss": 0.2202, "step": 14840 }, { "epoch": 0.812845644198653, "grad_norm": 0.18782761693000793, "learning_rate": 4.050294058000405e-05, "loss": 0.2271, "step": 14845 }, { "epoch": 0.8131194217817445, "grad_norm": 0.16862238943576813, "learning_rate": 4.049787061447982e-05, "loss": 0.2255, "step": 14850 }, { "epoch": 0.813393199364836, "grad_norm": 0.18053099513053894, "learning_rate": 4.0492800648955586e-05, "loss": 0.2235, "step": 14855 }, { "epoch": 0.8136669769479276, "grad_norm": 0.150792196393013, "learning_rate": 4.048773068343135e-05, "loss": 0.2151, "step": 14860 }, { "epoch": 0.813940754531019, "grad_norm": 0.148281991481781, "learning_rate": 4.048266071790712e-05, "loss": 0.2273, "step": 14865 }, { "epoch": 0.8142145321141105, "grad_norm": 0.16282358765602112, "learning_rate": 4.047759075238289e-05, "loss": 0.2214, "step": 14870 }, { "epoch": 0.814488309697202, "grad_norm": 0.14628352224826813, "learning_rate": 4.047252078685865e-05, "loss": 0.2209, "step": 14875 }, { "epoch": 0.8147620872802935, "grad_norm": 0.1630326807498932, "learning_rate": 4.0467450821334417e-05, "loss": 0.2102, "step": 14880 }, { "epoch": 0.815035864863385, "grad_norm": 0.1868254393339157, "learning_rate": 4.0462380855810187e-05, "loss": 0.204, "step": 14885 }, { "epoch": 0.8153096424464765, "grad_norm": 0.14126895368099213, "learning_rate": 4.045731089028595e-05, "loss": 0.2234, "step": 14890 }, { "epoch": 0.815583420029568, "grad_norm": 0.1650109589099884, "learning_rate": 4.045224092476171e-05, "loss": 0.2264, "step": 14895 }, { "epoch": 0.8158571976126595, "grad_norm": 0.17103873193264008, "learning_rate": 4.0447170959237477e-05, "loss": 0.2154, "step": 14900 }, { "epoch": 0.816130975195751, "grad_norm": 0.17956088483333588, "learning_rate": 4.0442100993713247e-05, "loss": 0.2275, "step": 14905 }, { "epoch": 0.8164047527788425, "grad_norm": 0.24319592118263245, "learning_rate": 4.043703102818901e-05, "loss": 0.2252, "step": 14910 }, { "epoch": 0.8166785303619339, "grad_norm": 0.1765403300523758, "learning_rate": 4.043196106266477e-05, "loss": 0.2219, "step": 14915 }, { "epoch": 0.8169523079450255, "grad_norm": 0.17874407768249512, "learning_rate": 4.042689109714054e-05, "loss": 0.2164, "step": 14920 }, { "epoch": 0.8172260855281169, "grad_norm": 0.1593550741672516, "learning_rate": 4.0421821131616307e-05, "loss": 0.2277, "step": 14925 }, { "epoch": 0.8174998631112085, "grad_norm": 0.14714425802230835, "learning_rate": 4.041675116609207e-05, "loss": 0.2166, "step": 14930 }, { "epoch": 0.8177736406942999, "grad_norm": 0.14964702725410461, "learning_rate": 4.041168120056783e-05, "loss": 0.2117, "step": 14935 }, { "epoch": 0.8180474182773915, "grad_norm": 0.15346725285053253, "learning_rate": 4.04066112350436e-05, "loss": 0.2254, "step": 14940 }, { "epoch": 0.8183211958604829, "grad_norm": 0.14870710670948029, "learning_rate": 4.040154126951937e-05, "loss": 0.2216, "step": 14945 }, { "epoch": 0.8185949734435745, "grad_norm": 0.16081079840660095, "learning_rate": 4.039647130399514e-05, "loss": 0.2177, "step": 14950 }, { "epoch": 0.8188687510266659, "grad_norm": 0.14669562876224518, "learning_rate": 4.03914013384709e-05, "loss": 0.231, "step": 14955 }, { "epoch": 0.8191425286097574, "grad_norm": 0.16671280562877655, "learning_rate": 4.038633137294667e-05, "loss": 0.2242, "step": 14960 }, { "epoch": 0.8194163061928489, "grad_norm": 0.1812104433774948, "learning_rate": 4.038126140742243e-05, "loss": 0.2247, "step": 14965 }, { "epoch": 0.8196900837759404, "grad_norm": 0.16587349772453308, "learning_rate": 4.03761914418982e-05, "loss": 0.2265, "step": 14970 }, { "epoch": 0.8199638613590319, "grad_norm": 0.1497543454170227, "learning_rate": 4.037112147637396e-05, "loss": 0.2202, "step": 14975 }, { "epoch": 0.8202376389421234, "grad_norm": 0.19018517434597015, "learning_rate": 4.036605151084973e-05, "loss": 0.2314, "step": 14980 }, { "epoch": 0.8205114165252149, "grad_norm": 0.1556388884782791, "learning_rate": 4.036098154532549e-05, "loss": 0.236, "step": 14985 }, { "epoch": 0.8207851941083064, "grad_norm": 0.15061470866203308, "learning_rate": 4.035591157980126e-05, "loss": 0.2197, "step": 14990 }, { "epoch": 0.8210589716913979, "grad_norm": 0.1738547831773758, "learning_rate": 4.035084161427703e-05, "loss": 0.2223, "step": 14995 }, { "epoch": 0.8213327492744894, "grad_norm": 0.16938038170337677, "learning_rate": 4.034577164875279e-05, "loss": 0.2183, "step": 15000 }, { "epoch": 0.821606526857581, "grad_norm": 0.21184934675693512, "learning_rate": 4.034070168322855e-05, "loss": 0.2217, "step": 15005 }, { "epoch": 0.8218803044406724, "grad_norm": 0.1884184032678604, "learning_rate": 4.033563171770432e-05, "loss": 0.2289, "step": 15010 }, { "epoch": 0.8221540820237639, "grad_norm": 0.15617433190345764, "learning_rate": 4.033056175218009e-05, "loss": 0.2194, "step": 15015 }, { "epoch": 0.8224278596068554, "grad_norm": 0.14946863055229187, "learning_rate": 4.032549178665585e-05, "loss": 0.2235, "step": 15020 }, { "epoch": 0.8227016371899469, "grad_norm": 0.18838705122470856, "learning_rate": 4.032042182113162e-05, "loss": 0.2245, "step": 15025 }, { "epoch": 0.8229754147730384, "grad_norm": 0.150478333234787, "learning_rate": 4.0315351855607383e-05, "loss": 0.2168, "step": 15030 }, { "epoch": 0.8232491923561299, "grad_norm": 0.13545216619968414, "learning_rate": 4.0310281890083154e-05, "loss": 0.2097, "step": 15035 }, { "epoch": 0.8235229699392214, "grad_norm": 0.15135951340198517, "learning_rate": 4.030521192455892e-05, "loss": 0.2164, "step": 15040 }, { "epoch": 0.8237967475223129, "grad_norm": 0.166550874710083, "learning_rate": 4.030014195903468e-05, "loss": 0.2201, "step": 15045 }, { "epoch": 0.8240705251054044, "grad_norm": 0.16245073080062866, "learning_rate": 4.029507199351045e-05, "loss": 0.2244, "step": 15050 }, { "epoch": 0.8243443026884959, "grad_norm": 0.16984322667121887, "learning_rate": 4.0290002027986213e-05, "loss": 0.2294, "step": 15055 }, { "epoch": 0.8246180802715873, "grad_norm": 0.14570772647857666, "learning_rate": 4.028493206246198e-05, "loss": 0.2144, "step": 15060 }, { "epoch": 0.8248918578546789, "grad_norm": 0.1549348384141922, "learning_rate": 4.027986209693774e-05, "loss": 0.216, "step": 15065 }, { "epoch": 0.8251656354377703, "grad_norm": 0.13911011815071106, "learning_rate": 4.027479213141351e-05, "loss": 0.2227, "step": 15070 }, { "epoch": 0.8254394130208619, "grad_norm": 0.1630694568157196, "learning_rate": 4.0269722165889273e-05, "loss": 0.2251, "step": 15075 }, { "epoch": 0.8257131906039533, "grad_norm": 0.2072681337594986, "learning_rate": 4.026465220036504e-05, "loss": 0.2367, "step": 15080 }, { "epoch": 0.8259869681870449, "grad_norm": 0.17155010998249054, "learning_rate": 4.025958223484081e-05, "loss": 0.2318, "step": 15085 }, { "epoch": 0.8262607457701363, "grad_norm": 0.16933591663837433, "learning_rate": 4.025451226931657e-05, "loss": 0.2172, "step": 15090 }, { "epoch": 0.8265345233532279, "grad_norm": 0.1435484141111374, "learning_rate": 4.0249442303792333e-05, "loss": 0.2186, "step": 15095 }, { "epoch": 0.8268083009363193, "grad_norm": 0.1700490564107895, "learning_rate": 4.02443723382681e-05, "loss": 0.2228, "step": 15100 }, { "epoch": 0.8270820785194108, "grad_norm": 0.1677664965391159, "learning_rate": 4.023930237274387e-05, "loss": 0.2339, "step": 15105 }, { "epoch": 0.8273558561025023, "grad_norm": 0.14433172345161438, "learning_rate": 4.023423240721964e-05, "loss": 0.2188, "step": 15110 }, { "epoch": 0.8276296336855938, "grad_norm": 0.1433875560760498, "learning_rate": 4.02291624416954e-05, "loss": 0.2212, "step": 15115 }, { "epoch": 0.8279034112686853, "grad_norm": 0.1469411551952362, "learning_rate": 4.0224092476171164e-05, "loss": 0.2198, "step": 15120 }, { "epoch": 0.8281771888517768, "grad_norm": 0.15367743372917175, "learning_rate": 4.0219022510646934e-05, "loss": 0.225, "step": 15125 }, { "epoch": 0.8284509664348683, "grad_norm": 0.14780443906784058, "learning_rate": 4.02139525451227e-05, "loss": 0.2201, "step": 15130 }, { "epoch": 0.8287247440179598, "grad_norm": 0.18190841376781464, "learning_rate": 4.020888257959846e-05, "loss": 0.2202, "step": 15135 }, { "epoch": 0.8289985216010513, "grad_norm": 0.16921108961105347, "learning_rate": 4.0203812614074224e-05, "loss": 0.2281, "step": 15140 }, { "epoch": 0.8292722991841428, "grad_norm": 0.1481635719537735, "learning_rate": 4.0198742648549994e-05, "loss": 0.2249, "step": 15145 }, { "epoch": 0.8295460767672344, "grad_norm": 0.16817450523376465, "learning_rate": 4.019367268302576e-05, "loss": 0.214, "step": 15150 }, { "epoch": 0.8298198543503258, "grad_norm": 0.17212414741516113, "learning_rate": 4.018860271750152e-05, "loss": 0.2229, "step": 15155 }, { "epoch": 0.8300936319334173, "grad_norm": 0.15230414271354675, "learning_rate": 4.018353275197729e-05, "loss": 0.2311, "step": 15160 }, { "epoch": 0.8303674095165088, "grad_norm": 0.1668737828731537, "learning_rate": 4.0178462786453054e-05, "loss": 0.227, "step": 15165 }, { "epoch": 0.8306411870996003, "grad_norm": 0.17155757546424866, "learning_rate": 4.017339282092882e-05, "loss": 0.2204, "step": 15170 }, { "epoch": 0.8309149646826918, "grad_norm": 0.15344448387622833, "learning_rate": 4.016832285540458e-05, "loss": 0.2236, "step": 15175 }, { "epoch": 0.8311887422657833, "grad_norm": 0.16763967275619507, "learning_rate": 4.016325288988035e-05, "loss": 0.2224, "step": 15180 }, { "epoch": 0.8314625198488748, "grad_norm": 0.18426203727722168, "learning_rate": 4.0158182924356114e-05, "loss": 0.2332, "step": 15185 }, { "epoch": 0.8317362974319663, "grad_norm": 0.17914745211601257, "learning_rate": 4.0153112958831884e-05, "loss": 0.2105, "step": 15190 }, { "epoch": 0.8320100750150577, "grad_norm": 0.16951270401477814, "learning_rate": 4.014804299330765e-05, "loss": 0.2184, "step": 15195 }, { "epoch": 0.8322838525981493, "grad_norm": 0.16641108691692352, "learning_rate": 4.014297302778342e-05, "loss": 0.2189, "step": 15200 }, { "epoch": 0.8325576301812407, "grad_norm": 0.19760629534721375, "learning_rate": 4.013790306225918e-05, "loss": 0.2186, "step": 15205 }, { "epoch": 0.8328314077643323, "grad_norm": 0.22589612007141113, "learning_rate": 4.0132833096734944e-05, "loss": 0.2303, "step": 15210 }, { "epoch": 0.8331051853474237, "grad_norm": 0.22299310564994812, "learning_rate": 4.0127763131210714e-05, "loss": 0.2267, "step": 15215 }, { "epoch": 0.8333789629305153, "grad_norm": 0.19041435420513153, "learning_rate": 4.012269316568648e-05, "loss": 0.2235, "step": 15220 }, { "epoch": 0.8336527405136067, "grad_norm": 0.14738136529922485, "learning_rate": 4.011762320016224e-05, "loss": 0.2173, "step": 15225 }, { "epoch": 0.8339265180966983, "grad_norm": 0.14485874772071838, "learning_rate": 4.0112553234638004e-05, "loss": 0.2163, "step": 15230 }, { "epoch": 0.8342002956797897, "grad_norm": 0.15337160229682922, "learning_rate": 4.0107483269113774e-05, "loss": 0.2149, "step": 15235 }, { "epoch": 0.8344740732628813, "grad_norm": 0.20654650032520294, "learning_rate": 4.010241330358954e-05, "loss": 0.2213, "step": 15240 }, { "epoch": 0.8347478508459727, "grad_norm": 0.1716221123933792, "learning_rate": 4.00973433380653e-05, "loss": 0.2167, "step": 15245 }, { "epoch": 0.8350216284290642, "grad_norm": 0.1493944227695465, "learning_rate": 4.0092273372541064e-05, "loss": 0.2333, "step": 15250 }, { "epoch": 0.8352954060121557, "grad_norm": 0.16231264173984528, "learning_rate": 4.0087203407016834e-05, "loss": 0.2255, "step": 15255 }, { "epoch": 0.8355691835952472, "grad_norm": 0.18484389781951904, "learning_rate": 4.00821334414926e-05, "loss": 0.2157, "step": 15260 }, { "epoch": 0.8358429611783387, "grad_norm": 0.14631952345371246, "learning_rate": 4.007706347596836e-05, "loss": 0.2233, "step": 15265 }, { "epoch": 0.8361167387614302, "grad_norm": 0.16328680515289307, "learning_rate": 4.007199351044413e-05, "loss": 0.2203, "step": 15270 }, { "epoch": 0.8363905163445217, "grad_norm": 0.16414713859558105, "learning_rate": 4.00669235449199e-05, "loss": 0.2125, "step": 15275 }, { "epoch": 0.8366642939276132, "grad_norm": 0.16366659104824066, "learning_rate": 4.0061853579395664e-05, "loss": 0.2204, "step": 15280 }, { "epoch": 0.8369380715107047, "grad_norm": 0.170734703540802, "learning_rate": 4.005678361387143e-05, "loss": 0.2194, "step": 15285 }, { "epoch": 0.8372118490937962, "grad_norm": 0.15549162030220032, "learning_rate": 4.00517136483472e-05, "loss": 0.2223, "step": 15290 }, { "epoch": 0.8374856266768878, "grad_norm": 0.19756942987442017, "learning_rate": 4.004664368282296e-05, "loss": 0.2178, "step": 15295 }, { "epoch": 0.8377594042599792, "grad_norm": 0.18069370090961456, "learning_rate": 4.0041573717298724e-05, "loss": 0.2245, "step": 15300 }, { "epoch": 0.8380331818430707, "grad_norm": 0.1719038039445877, "learning_rate": 4.003650375177449e-05, "loss": 0.228, "step": 15305 }, { "epoch": 0.8383069594261622, "grad_norm": 0.1564861536026001, "learning_rate": 4.003143378625026e-05, "loss": 0.2127, "step": 15310 }, { "epoch": 0.8385807370092537, "grad_norm": 0.17939642071723938, "learning_rate": 4.002636382072602e-05, "loss": 0.2224, "step": 15315 }, { "epoch": 0.8388545145923452, "grad_norm": 0.170431450009346, "learning_rate": 4.0021293855201784e-05, "loss": 0.2239, "step": 15320 }, { "epoch": 0.8391282921754367, "grad_norm": 0.15254783630371094, "learning_rate": 4.0016223889677554e-05, "loss": 0.2244, "step": 15325 }, { "epoch": 0.8394020697585282, "grad_norm": 0.1646549552679062, "learning_rate": 4.001115392415332e-05, "loss": 0.2234, "step": 15330 }, { "epoch": 0.8396758473416197, "grad_norm": 0.17724491655826569, "learning_rate": 4.000608395862908e-05, "loss": 0.2189, "step": 15335 }, { "epoch": 0.8399496249247111, "grad_norm": 0.17715631425380707, "learning_rate": 4.0001013993104844e-05, "loss": 0.2223, "step": 15340 }, { "epoch": 0.8402234025078027, "grad_norm": 0.15918274223804474, "learning_rate": 3.9995944027580614e-05, "loss": 0.2291, "step": 15345 }, { "epoch": 0.8404971800908941, "grad_norm": 0.16597440838813782, "learning_rate": 3.9990874062056384e-05, "loss": 0.2232, "step": 15350 }, { "epoch": 0.8407709576739857, "grad_norm": 0.14793123304843903, "learning_rate": 3.998580409653215e-05, "loss": 0.2171, "step": 15355 }, { "epoch": 0.8410447352570771, "grad_norm": 0.1609436720609665, "learning_rate": 3.998073413100791e-05, "loss": 0.2199, "step": 15360 }, { "epoch": 0.8413185128401687, "grad_norm": 0.1587129831314087, "learning_rate": 3.997566416548368e-05, "loss": 0.2185, "step": 15365 }, { "epoch": 0.8415922904232601, "grad_norm": 0.14394065737724304, "learning_rate": 3.9970594199959444e-05, "loss": 0.2236, "step": 15370 }, { "epoch": 0.8418660680063517, "grad_norm": 0.15916545689105988, "learning_rate": 3.996552423443521e-05, "loss": 0.2179, "step": 15375 }, { "epoch": 0.8421398455894431, "grad_norm": 0.1809394210577011, "learning_rate": 3.996045426891098e-05, "loss": 0.2228, "step": 15380 }, { "epoch": 0.8424136231725347, "grad_norm": 0.20152275264263153, "learning_rate": 3.995538430338674e-05, "loss": 0.2139, "step": 15385 }, { "epoch": 0.8426874007556261, "grad_norm": 0.17557752132415771, "learning_rate": 3.9950314337862504e-05, "loss": 0.2317, "step": 15390 }, { "epoch": 0.8429611783387176, "grad_norm": 0.16094854474067688, "learning_rate": 3.994524437233827e-05, "loss": 0.2227, "step": 15395 }, { "epoch": 0.8432349559218091, "grad_norm": 0.1594294160604477, "learning_rate": 3.994017440681404e-05, "loss": 0.2171, "step": 15400 }, { "epoch": 0.8435087335049006, "grad_norm": 0.16499634087085724, "learning_rate": 3.99351044412898e-05, "loss": 0.2202, "step": 15405 }, { "epoch": 0.8437825110879921, "grad_norm": 0.18635965883731842, "learning_rate": 3.9930034475765564e-05, "loss": 0.2231, "step": 15410 }, { "epoch": 0.8440562886710836, "grad_norm": 0.18888364732265472, "learning_rate": 3.992496451024133e-05, "loss": 0.2222, "step": 15415 }, { "epoch": 0.8443300662541751, "grad_norm": 0.15148676931858063, "learning_rate": 3.99198945447171e-05, "loss": 0.2246, "step": 15420 }, { "epoch": 0.8446038438372666, "grad_norm": 0.1717880517244339, "learning_rate": 3.991482457919286e-05, "loss": 0.2276, "step": 15425 }, { "epoch": 0.844877621420358, "grad_norm": 0.1444482058286667, "learning_rate": 3.9909754613668624e-05, "loss": 0.2212, "step": 15430 }, { "epoch": 0.8451513990034496, "grad_norm": 0.16763027012348175, "learning_rate": 3.9904684648144394e-05, "loss": 0.2201, "step": 15435 }, { "epoch": 0.8454251765865411, "grad_norm": 0.1414741724729538, "learning_rate": 3.9899614682620164e-05, "loss": 0.2257, "step": 15440 }, { "epoch": 0.8456989541696326, "grad_norm": 0.17924709618091583, "learning_rate": 3.989454471709593e-05, "loss": 0.2195, "step": 15445 }, { "epoch": 0.8459727317527241, "grad_norm": 0.17425128817558289, "learning_rate": 3.988947475157169e-05, "loss": 0.2232, "step": 15450 }, { "epoch": 0.8462465093358156, "grad_norm": 0.15336973965168, "learning_rate": 3.988440478604746e-05, "loss": 0.2206, "step": 15455 }, { "epoch": 0.8465202869189071, "grad_norm": 0.15383383631706238, "learning_rate": 3.9879334820523224e-05, "loss": 0.2089, "step": 15460 }, { "epoch": 0.8467940645019986, "grad_norm": 0.1778469830751419, "learning_rate": 3.987426485499899e-05, "loss": 0.2241, "step": 15465 }, { "epoch": 0.8470678420850901, "grad_norm": 0.15400069952011108, "learning_rate": 3.986919488947475e-05, "loss": 0.2192, "step": 15470 }, { "epoch": 0.8473416196681816, "grad_norm": 0.16720014810562134, "learning_rate": 3.986412492395052e-05, "loss": 0.2148, "step": 15475 }, { "epoch": 0.8476153972512731, "grad_norm": 0.17097654938697815, "learning_rate": 3.9859054958426284e-05, "loss": 0.2209, "step": 15480 }, { "epoch": 0.8478891748343645, "grad_norm": 0.1540728211402893, "learning_rate": 3.985398499290205e-05, "loss": 0.2221, "step": 15485 }, { "epoch": 0.8481629524174561, "grad_norm": 0.17307691276073456, "learning_rate": 3.984891502737782e-05, "loss": 0.2283, "step": 15490 }, { "epoch": 0.8484367300005475, "grad_norm": 0.16903333365917206, "learning_rate": 3.984384506185358e-05, "loss": 0.2207, "step": 15495 }, { "epoch": 0.8487105075836391, "grad_norm": 0.17432928085327148, "learning_rate": 3.9838775096329344e-05, "loss": 0.2364, "step": 15500 }, { "epoch": 0.8489842851667305, "grad_norm": 0.17944903671741486, "learning_rate": 3.983370513080511e-05, "loss": 0.2245, "step": 15505 }, { "epoch": 0.8492580627498221, "grad_norm": 0.1618061661720276, "learning_rate": 3.982863516528088e-05, "loss": 0.2117, "step": 15510 }, { "epoch": 0.8495318403329135, "grad_norm": 0.14937913417816162, "learning_rate": 3.982356519975665e-05, "loss": 0.2232, "step": 15515 }, { "epoch": 0.8498056179160051, "grad_norm": 0.16988275945186615, "learning_rate": 3.981849523423241e-05, "loss": 0.2138, "step": 15520 }, { "epoch": 0.8500793954990965, "grad_norm": 0.16144995391368866, "learning_rate": 3.9813425268708174e-05, "loss": 0.2256, "step": 15525 }, { "epoch": 0.850353173082188, "grad_norm": 0.16082504391670227, "learning_rate": 3.9808355303183944e-05, "loss": 0.2207, "step": 15530 }, { "epoch": 0.8506269506652795, "grad_norm": 0.190180703997612, "learning_rate": 3.980328533765971e-05, "loss": 0.2304, "step": 15535 }, { "epoch": 0.850900728248371, "grad_norm": 0.16264384984970093, "learning_rate": 3.979821537213547e-05, "loss": 0.2198, "step": 15540 }, { "epoch": 0.8511745058314625, "grad_norm": 0.18792122602462769, "learning_rate": 3.9793145406611234e-05, "loss": 0.2261, "step": 15545 }, { "epoch": 0.851448283414554, "grad_norm": 0.16080205142498016, "learning_rate": 3.9788075441087004e-05, "loss": 0.2324, "step": 15550 }, { "epoch": 0.8517220609976455, "grad_norm": 0.14842283725738525, "learning_rate": 3.978300547556277e-05, "loss": 0.2179, "step": 15555 }, { "epoch": 0.851995838580737, "grad_norm": 0.142884761095047, "learning_rate": 3.977793551003853e-05, "loss": 0.2231, "step": 15560 }, { "epoch": 0.8522696161638285, "grad_norm": 0.18354284763336182, "learning_rate": 3.97728655445143e-05, "loss": 0.225, "step": 15565 }, { "epoch": 0.85254339374692, "grad_norm": 0.17002849280834198, "learning_rate": 3.9767795578990064e-05, "loss": 0.2192, "step": 15570 }, { "epoch": 0.8528171713300114, "grad_norm": 0.17876456677913666, "learning_rate": 3.976272561346583e-05, "loss": 0.2236, "step": 15575 }, { "epoch": 0.853090948913103, "grad_norm": 0.16425825655460358, "learning_rate": 3.975765564794159e-05, "loss": 0.218, "step": 15580 }, { "epoch": 0.8533647264961944, "grad_norm": 0.1716257929801941, "learning_rate": 3.975258568241736e-05, "loss": 0.2191, "step": 15585 }, { "epoch": 0.853638504079286, "grad_norm": 0.15242603421211243, "learning_rate": 3.9747515716893124e-05, "loss": 0.2201, "step": 15590 }, { "epoch": 0.8539122816623775, "grad_norm": 0.17459192872047424, "learning_rate": 3.9742445751368894e-05, "loss": 0.2237, "step": 15595 }, { "epoch": 0.854186059245469, "grad_norm": 0.15082424879074097, "learning_rate": 3.973737578584466e-05, "loss": 0.2233, "step": 15600 }, { "epoch": 0.8544598368285605, "grad_norm": 0.1657746136188507, "learning_rate": 3.973230582032043e-05, "loss": 0.2182, "step": 15605 }, { "epoch": 0.854733614411652, "grad_norm": 0.20998114347457886, "learning_rate": 3.972723585479619e-05, "loss": 0.2242, "step": 15610 }, { "epoch": 0.8550073919947435, "grad_norm": 0.16611316800117493, "learning_rate": 3.9722165889271954e-05, "loss": 0.2139, "step": 15615 }, { "epoch": 0.855281169577835, "grad_norm": 0.26052045822143555, "learning_rate": 3.9717095923747724e-05, "loss": 0.2198, "step": 15620 }, { "epoch": 0.8555549471609265, "grad_norm": 0.2150377333164215, "learning_rate": 3.971202595822349e-05, "loss": 0.2137, "step": 15625 }, { "epoch": 0.8558287247440179, "grad_norm": 0.16480416059494019, "learning_rate": 3.970695599269925e-05, "loss": 0.2259, "step": 15630 }, { "epoch": 0.8561025023271095, "grad_norm": 0.19686774909496307, "learning_rate": 3.9701886027175014e-05, "loss": 0.2265, "step": 15635 }, { "epoch": 0.8563762799102009, "grad_norm": 0.16548851132392883, "learning_rate": 3.9696816061650784e-05, "loss": 0.2186, "step": 15640 }, { "epoch": 0.8566500574932925, "grad_norm": 0.16294702887535095, "learning_rate": 3.969174609612655e-05, "loss": 0.2257, "step": 15645 }, { "epoch": 0.8569238350763839, "grad_norm": 0.1584261953830719, "learning_rate": 3.968667613060231e-05, "loss": 0.2226, "step": 15650 }, { "epoch": 0.8571976126594755, "grad_norm": 0.17887462675571442, "learning_rate": 3.968160616507808e-05, "loss": 0.2245, "step": 15655 }, { "epoch": 0.8574713902425669, "grad_norm": 0.15826432406902313, "learning_rate": 3.9676536199553844e-05, "loss": 0.2251, "step": 15660 }, { "epoch": 0.8577451678256585, "grad_norm": 0.16115319728851318, "learning_rate": 3.967146623402961e-05, "loss": 0.2197, "step": 15665 }, { "epoch": 0.8580189454087499, "grad_norm": 0.1556876003742218, "learning_rate": 3.966639626850537e-05, "loss": 0.2159, "step": 15670 }, { "epoch": 0.8582927229918415, "grad_norm": 0.15513509511947632, "learning_rate": 3.966132630298114e-05, "loss": 0.2124, "step": 15675 }, { "epoch": 0.8585665005749329, "grad_norm": 0.15990841388702393, "learning_rate": 3.965625633745691e-05, "loss": 0.2156, "step": 15680 }, { "epoch": 0.8588402781580244, "grad_norm": 0.16990415751934052, "learning_rate": 3.9651186371932674e-05, "loss": 0.2202, "step": 15685 }, { "epoch": 0.8591140557411159, "grad_norm": 0.14481598138809204, "learning_rate": 3.964611640640844e-05, "loss": 0.2127, "step": 15690 }, { "epoch": 0.8593878333242074, "grad_norm": 0.19731588661670685, "learning_rate": 3.964104644088421e-05, "loss": 0.2224, "step": 15695 }, { "epoch": 0.8596616109072989, "grad_norm": 0.16511763632297516, "learning_rate": 3.963597647535997e-05, "loss": 0.2265, "step": 15700 }, { "epoch": 0.8599353884903904, "grad_norm": 0.1539909392595291, "learning_rate": 3.9630906509835734e-05, "loss": 0.2285, "step": 15705 }, { "epoch": 0.8602091660734819, "grad_norm": 0.13777346909046173, "learning_rate": 3.96258365443115e-05, "loss": 0.2223, "step": 15710 }, { "epoch": 0.8604829436565734, "grad_norm": 0.15672792494297028, "learning_rate": 3.962076657878727e-05, "loss": 0.223, "step": 15715 }, { "epoch": 0.8607567212396648, "grad_norm": 0.16603031754493713, "learning_rate": 3.961569661326303e-05, "loss": 0.2267, "step": 15720 }, { "epoch": 0.8610304988227564, "grad_norm": 0.16857360303401947, "learning_rate": 3.9610626647738794e-05, "loss": 0.2228, "step": 15725 }, { "epoch": 0.8613042764058478, "grad_norm": 0.18166381120681763, "learning_rate": 3.9605556682214565e-05, "loss": 0.2217, "step": 15730 }, { "epoch": 0.8615780539889394, "grad_norm": 0.15300577878952026, "learning_rate": 3.960048671669033e-05, "loss": 0.2193, "step": 15735 }, { "epoch": 0.8618518315720309, "grad_norm": 0.20115652680397034, "learning_rate": 3.959541675116609e-05, "loss": 0.2296, "step": 15740 }, { "epoch": 0.8621256091551224, "grad_norm": 0.16501262784004211, "learning_rate": 3.9590346785641854e-05, "loss": 0.217, "step": 15745 }, { "epoch": 0.8623993867382139, "grad_norm": 0.18553827702999115, "learning_rate": 3.9585276820117625e-05, "loss": 0.2269, "step": 15750 }, { "epoch": 0.8626731643213054, "grad_norm": 0.16779768466949463, "learning_rate": 3.958020685459339e-05, "loss": 0.2152, "step": 15755 }, { "epoch": 0.8629469419043969, "grad_norm": 0.1716756522655487, "learning_rate": 3.957513688906916e-05, "loss": 0.2207, "step": 15760 }, { "epoch": 0.8632207194874884, "grad_norm": 0.157548189163208, "learning_rate": 3.957006692354492e-05, "loss": 0.2275, "step": 15765 }, { "epoch": 0.8634944970705799, "grad_norm": 0.15902118384838104, "learning_rate": 3.956499695802069e-05, "loss": 0.2222, "step": 15770 }, { "epoch": 0.8637682746536713, "grad_norm": 0.1611093133687973, "learning_rate": 3.9559926992496455e-05, "loss": 0.2151, "step": 15775 }, { "epoch": 0.8640420522367629, "grad_norm": 0.13530515134334564, "learning_rate": 3.955485702697222e-05, "loss": 0.2164, "step": 15780 }, { "epoch": 0.8643158298198543, "grad_norm": 0.2519269287586212, "learning_rate": 3.954978706144799e-05, "loss": 0.2187, "step": 15785 }, { "epoch": 0.8645896074029459, "grad_norm": 0.15961548686027527, "learning_rate": 3.954471709592375e-05, "loss": 0.2315, "step": 15790 }, { "epoch": 0.8648633849860373, "grad_norm": 0.16625608503818512, "learning_rate": 3.9539647130399515e-05, "loss": 0.2275, "step": 15795 }, { "epoch": 0.8651371625691289, "grad_norm": 0.14560678601264954, "learning_rate": 3.953457716487528e-05, "loss": 0.2257, "step": 15800 }, { "epoch": 0.8654109401522203, "grad_norm": 0.1669074445962906, "learning_rate": 3.952950719935105e-05, "loss": 0.2199, "step": 15805 }, { "epoch": 0.8656847177353119, "grad_norm": 0.17160175740718842, "learning_rate": 3.952443723382681e-05, "loss": 0.2101, "step": 15810 }, { "epoch": 0.8659584953184033, "grad_norm": 0.15773126482963562, "learning_rate": 3.9519367268302575e-05, "loss": 0.2221, "step": 15815 }, { "epoch": 0.8662322729014948, "grad_norm": 0.16806907951831818, "learning_rate": 3.9514297302778345e-05, "loss": 0.2188, "step": 15820 }, { "epoch": 0.8665060504845863, "grad_norm": 0.15913935005664825, "learning_rate": 3.950922733725411e-05, "loss": 0.2163, "step": 15825 }, { "epoch": 0.8667798280676778, "grad_norm": 0.14472326636314392, "learning_rate": 3.950415737172987e-05, "loss": 0.2272, "step": 15830 }, { "epoch": 0.8670536056507693, "grad_norm": 0.21982352435588837, "learning_rate": 3.9499087406205635e-05, "loss": 0.2244, "step": 15835 }, { "epoch": 0.8673273832338608, "grad_norm": 0.15873651206493378, "learning_rate": 3.9494017440681405e-05, "loss": 0.2219, "step": 15840 }, { "epoch": 0.8676011608169523, "grad_norm": 0.16503864526748657, "learning_rate": 3.9488947475157175e-05, "loss": 0.2304, "step": 15845 }, { "epoch": 0.8678749384000438, "grad_norm": 0.17791645228862762, "learning_rate": 3.948387750963294e-05, "loss": 0.2208, "step": 15850 }, { "epoch": 0.8681487159831353, "grad_norm": 0.14253585040569305, "learning_rate": 3.94788075441087e-05, "loss": 0.2176, "step": 15855 }, { "epoch": 0.8684224935662268, "grad_norm": 0.1644102782011032, "learning_rate": 3.947373757858447e-05, "loss": 0.2206, "step": 15860 }, { "epoch": 0.8686962711493182, "grad_norm": 0.1564023196697235, "learning_rate": 3.9468667613060235e-05, "loss": 0.2202, "step": 15865 }, { "epoch": 0.8689700487324098, "grad_norm": 0.1857893466949463, "learning_rate": 3.9463597647536e-05, "loss": 0.2157, "step": 15870 }, { "epoch": 0.8692438263155012, "grad_norm": 0.16908425092697144, "learning_rate": 3.945852768201176e-05, "loss": 0.2241, "step": 15875 }, { "epoch": 0.8695176038985928, "grad_norm": 0.1781388223171234, "learning_rate": 3.945345771648753e-05, "loss": 0.2234, "step": 15880 }, { "epoch": 0.8697913814816843, "grad_norm": 0.14852939546108246, "learning_rate": 3.9448387750963295e-05, "loss": 0.2226, "step": 15885 }, { "epoch": 0.8700651590647758, "grad_norm": 0.18562398850917816, "learning_rate": 3.944331778543906e-05, "loss": 0.2181, "step": 15890 }, { "epoch": 0.8703389366478673, "grad_norm": 0.18859244883060455, "learning_rate": 3.943824781991483e-05, "loss": 0.2263, "step": 15895 }, { "epoch": 0.8706127142309588, "grad_norm": 0.16601654887199402, "learning_rate": 3.943317785439059e-05, "loss": 0.2172, "step": 15900 }, { "epoch": 0.8708864918140503, "grad_norm": 0.15701569616794586, "learning_rate": 3.9428107888866355e-05, "loss": 0.2169, "step": 15905 }, { "epoch": 0.8711602693971418, "grad_norm": 0.17711463570594788, "learning_rate": 3.942303792334212e-05, "loss": 0.2187, "step": 15910 }, { "epoch": 0.8714340469802333, "grad_norm": 0.16424991190433502, "learning_rate": 3.941796795781789e-05, "loss": 0.2283, "step": 15915 }, { "epoch": 0.8717078245633247, "grad_norm": 0.16511507332324982, "learning_rate": 3.941289799229366e-05, "loss": 0.2214, "step": 15920 }, { "epoch": 0.8719816021464163, "grad_norm": 0.1652337908744812, "learning_rate": 3.940782802676942e-05, "loss": 0.2312, "step": 15925 }, { "epoch": 0.8722553797295077, "grad_norm": 0.16532935202121735, "learning_rate": 3.9402758061245185e-05, "loss": 0.2258, "step": 15930 }, { "epoch": 0.8725291573125993, "grad_norm": 0.16088493168354034, "learning_rate": 3.9397688095720955e-05, "loss": 0.2235, "step": 15935 }, { "epoch": 0.8728029348956907, "grad_norm": 0.1689336597919464, "learning_rate": 3.939261813019672e-05, "loss": 0.2221, "step": 15940 }, { "epoch": 0.8730767124787823, "grad_norm": 0.17203490436077118, "learning_rate": 3.938754816467248e-05, "loss": 0.2156, "step": 15945 }, { "epoch": 0.8733504900618737, "grad_norm": 0.14862509071826935, "learning_rate": 3.938247819914825e-05, "loss": 0.215, "step": 15950 }, { "epoch": 0.8736242676449653, "grad_norm": 0.17708911001682281, "learning_rate": 3.9377408233624015e-05, "loss": 0.2243, "step": 15955 }, { "epoch": 0.8738980452280567, "grad_norm": 0.1800120323896408, "learning_rate": 3.937233826809978e-05, "loss": 0.2269, "step": 15960 }, { "epoch": 0.8741718228111482, "grad_norm": 0.19892629981040955, "learning_rate": 3.936726830257554e-05, "loss": 0.2385, "step": 15965 }, { "epoch": 0.8744456003942397, "grad_norm": 0.16691450774669647, "learning_rate": 3.936219833705131e-05, "loss": 0.2158, "step": 15970 }, { "epoch": 0.8747193779773312, "grad_norm": 0.17827458679676056, "learning_rate": 3.9357128371527075e-05, "loss": 0.2278, "step": 15975 }, { "epoch": 0.8749931555604227, "grad_norm": 0.1513853669166565, "learning_rate": 3.935205840600284e-05, "loss": 0.2205, "step": 15980 }, { "epoch": 0.8752669331435142, "grad_norm": 0.14280124008655548, "learning_rate": 3.93469884404786e-05, "loss": 0.2263, "step": 15985 }, { "epoch": 0.8755407107266057, "grad_norm": 0.1513812392950058, "learning_rate": 3.934191847495437e-05, "loss": 0.2108, "step": 15990 }, { "epoch": 0.8758144883096972, "grad_norm": 0.15510207414627075, "learning_rate": 3.9336848509430135e-05, "loss": 0.2209, "step": 15995 }, { "epoch": 0.8760882658927887, "grad_norm": 0.14795461297035217, "learning_rate": 3.93317785439059e-05, "loss": 0.2263, "step": 16000 }, { "epoch": 0.8763620434758802, "grad_norm": 0.14337779581546783, "learning_rate": 3.932670857838167e-05, "loss": 0.2222, "step": 16005 }, { "epoch": 0.8766358210589716, "grad_norm": 0.15278218686580658, "learning_rate": 3.932163861285744e-05, "loss": 0.223, "step": 16010 }, { "epoch": 0.8769095986420632, "grad_norm": 0.15414008498191833, "learning_rate": 3.93165686473332e-05, "loss": 0.2271, "step": 16015 }, { "epoch": 0.8771833762251546, "grad_norm": 0.14628423750400543, "learning_rate": 3.9311498681808965e-05, "loss": 0.2171, "step": 16020 }, { "epoch": 0.8774571538082462, "grad_norm": 0.16519209742546082, "learning_rate": 3.9306428716284735e-05, "loss": 0.2214, "step": 16025 }, { "epoch": 0.8777309313913377, "grad_norm": 0.1583256870508194, "learning_rate": 3.93013587507605e-05, "loss": 0.2284, "step": 16030 }, { "epoch": 0.8780047089744292, "grad_norm": 0.15139856934547424, "learning_rate": 3.929628878523626e-05, "loss": 0.2179, "step": 16035 }, { "epoch": 0.8782784865575207, "grad_norm": 0.15890000760555267, "learning_rate": 3.9291218819712025e-05, "loss": 0.217, "step": 16040 }, { "epoch": 0.8785522641406122, "grad_norm": 0.19966502487659454, "learning_rate": 3.9286148854187795e-05, "loss": 0.2244, "step": 16045 }, { "epoch": 0.8788260417237037, "grad_norm": 0.20648813247680664, "learning_rate": 3.928107888866356e-05, "loss": 0.2287, "step": 16050 }, { "epoch": 0.8790998193067951, "grad_norm": 0.18046851456165314, "learning_rate": 3.927600892313932e-05, "loss": 0.2324, "step": 16055 }, { "epoch": 0.8793735968898867, "grad_norm": 0.2056431770324707, "learning_rate": 3.927093895761509e-05, "loss": 0.2201, "step": 16060 }, { "epoch": 0.8796473744729781, "grad_norm": 0.1473609060049057, "learning_rate": 3.9265868992090855e-05, "loss": 0.2314, "step": 16065 }, { "epoch": 0.8799211520560697, "grad_norm": 0.1708163470029831, "learning_rate": 3.926079902656662e-05, "loss": 0.2249, "step": 16070 }, { "epoch": 0.8801949296391611, "grad_norm": 0.18352605402469635, "learning_rate": 3.925572906104238e-05, "loss": 0.2242, "step": 16075 }, { "epoch": 0.8804687072222527, "grad_norm": 0.19874407351016998, "learning_rate": 3.925065909551815e-05, "loss": 0.2268, "step": 16080 }, { "epoch": 0.8807424848053441, "grad_norm": 0.15659131109714508, "learning_rate": 3.924558912999392e-05, "loss": 0.2227, "step": 16085 }, { "epoch": 0.8810162623884357, "grad_norm": 0.1859852820634842, "learning_rate": 3.9240519164469685e-05, "loss": 0.2268, "step": 16090 }, { "epoch": 0.8812900399715271, "grad_norm": 0.14580951631069183, "learning_rate": 3.923544919894545e-05, "loss": 0.223, "step": 16095 }, { "epoch": 0.8815638175546187, "grad_norm": 0.15975528955459595, "learning_rate": 3.923037923342122e-05, "loss": 0.2119, "step": 16100 }, { "epoch": 0.8818375951377101, "grad_norm": 0.18991661071777344, "learning_rate": 3.922530926789698e-05, "loss": 0.2209, "step": 16105 }, { "epoch": 0.8821113727208016, "grad_norm": 0.16393524408340454, "learning_rate": 3.9220239302372745e-05, "loss": 0.2175, "step": 16110 }, { "epoch": 0.8823851503038931, "grad_norm": 0.1328599900007248, "learning_rate": 3.921516933684851e-05, "loss": 0.2226, "step": 16115 }, { "epoch": 0.8826589278869846, "grad_norm": 0.1606399267911911, "learning_rate": 3.921009937132428e-05, "loss": 0.22, "step": 16120 }, { "epoch": 0.8829327054700761, "grad_norm": 0.1873934417963028, "learning_rate": 3.920502940580004e-05, "loss": 0.2351, "step": 16125 }, { "epoch": 0.8832064830531676, "grad_norm": 0.2253047227859497, "learning_rate": 3.9199959440275805e-05, "loss": 0.2125, "step": 16130 }, { "epoch": 0.8834802606362591, "grad_norm": 0.2255186289548874, "learning_rate": 3.9194889474751575e-05, "loss": 0.2277, "step": 16135 }, { "epoch": 0.8837540382193506, "grad_norm": 0.1600624918937683, "learning_rate": 3.918981950922734e-05, "loss": 0.2301, "step": 16140 }, { "epoch": 0.884027815802442, "grad_norm": 0.1613069474697113, "learning_rate": 3.91847495437031e-05, "loss": 0.212, "step": 16145 }, { "epoch": 0.8843015933855336, "grad_norm": 0.20788060128688812, "learning_rate": 3.9179679578178865e-05, "loss": 0.2325, "step": 16150 }, { "epoch": 0.884575370968625, "grad_norm": 0.19283342361450195, "learning_rate": 3.9174609612654635e-05, "loss": 0.2214, "step": 16155 }, { "epoch": 0.8848491485517166, "grad_norm": 0.16271181404590607, "learning_rate": 3.91695396471304e-05, "loss": 0.219, "step": 16160 }, { "epoch": 0.885122926134808, "grad_norm": 0.15358644723892212, "learning_rate": 3.916446968160617e-05, "loss": 0.2116, "step": 16165 }, { "epoch": 0.8853967037178996, "grad_norm": 0.14695905148983002, "learning_rate": 3.915939971608193e-05, "loss": 0.2276, "step": 16170 }, { "epoch": 0.8856704813009911, "grad_norm": 0.16811996698379517, "learning_rate": 3.91543297505577e-05, "loss": 0.2274, "step": 16175 }, { "epoch": 0.8859442588840826, "grad_norm": 0.16866648197174072, "learning_rate": 3.9149259785033465e-05, "loss": 0.2158, "step": 16180 }, { "epoch": 0.8862180364671741, "grad_norm": 0.15024594962596893, "learning_rate": 3.914418981950923e-05, "loss": 0.2141, "step": 16185 }, { "epoch": 0.8864918140502656, "grad_norm": 0.21342693269252777, "learning_rate": 3.9139119853985e-05, "loss": 0.2268, "step": 16190 }, { "epoch": 0.8867655916333571, "grad_norm": 0.17978507280349731, "learning_rate": 3.913404988846076e-05, "loss": 0.213, "step": 16195 }, { "epoch": 0.8870393692164485, "grad_norm": 0.17959009110927582, "learning_rate": 3.9128979922936525e-05, "loss": 0.2215, "step": 16200 }, { "epoch": 0.8873131467995401, "grad_norm": 0.2023646980524063, "learning_rate": 3.912390995741229e-05, "loss": 0.229, "step": 16205 }, { "epoch": 0.8875869243826315, "grad_norm": 0.17816415429115295, "learning_rate": 3.911883999188806e-05, "loss": 0.2229, "step": 16210 }, { "epoch": 0.8878607019657231, "grad_norm": 0.16607557237148285, "learning_rate": 3.911377002636382e-05, "loss": 0.223, "step": 16215 }, { "epoch": 0.8881344795488145, "grad_norm": 0.21192574501037598, "learning_rate": 3.9108700060839585e-05, "loss": 0.2287, "step": 16220 }, { "epoch": 0.8884082571319061, "grad_norm": 0.19160042703151703, "learning_rate": 3.9103630095315355e-05, "loss": 0.2207, "step": 16225 }, { "epoch": 0.8886820347149975, "grad_norm": 0.13837005198001862, "learning_rate": 3.909856012979112e-05, "loss": 0.2173, "step": 16230 }, { "epoch": 0.8889558122980891, "grad_norm": 0.20234808325767517, "learning_rate": 3.909349016426688e-05, "loss": 0.2193, "step": 16235 }, { "epoch": 0.8892295898811805, "grad_norm": 0.18963231146335602, "learning_rate": 3.9088420198742645e-05, "loss": 0.2159, "step": 16240 }, { "epoch": 0.889503367464272, "grad_norm": 0.1890767365694046, "learning_rate": 3.9083350233218415e-05, "loss": 0.2265, "step": 16245 }, { "epoch": 0.8897771450473635, "grad_norm": 0.18879342079162598, "learning_rate": 3.9078280267694185e-05, "loss": 0.2166, "step": 16250 }, { "epoch": 0.890050922630455, "grad_norm": 0.1635034829378128, "learning_rate": 3.907321030216995e-05, "loss": 0.2269, "step": 16255 }, { "epoch": 0.8903247002135465, "grad_norm": 0.16855664551258087, "learning_rate": 3.906814033664571e-05, "loss": 0.2175, "step": 16260 }, { "epoch": 0.890598477796638, "grad_norm": 0.17195704579353333, "learning_rate": 3.906307037112148e-05, "loss": 0.2162, "step": 16265 }, { "epoch": 0.8908722553797295, "grad_norm": 0.16179142892360687, "learning_rate": 3.9058000405597245e-05, "loss": 0.2198, "step": 16270 }, { "epoch": 0.891146032962821, "grad_norm": 0.17866453528404236, "learning_rate": 3.905293044007301e-05, "loss": 0.2306, "step": 16275 }, { "epoch": 0.8914198105459125, "grad_norm": 0.17953741550445557, "learning_rate": 3.904786047454877e-05, "loss": 0.2209, "step": 16280 }, { "epoch": 0.891693588129004, "grad_norm": 0.183711975812912, "learning_rate": 3.904279050902454e-05, "loss": 0.2252, "step": 16285 }, { "epoch": 0.8919673657120955, "grad_norm": 0.2035597562789917, "learning_rate": 3.9037720543500305e-05, "loss": 0.2261, "step": 16290 }, { "epoch": 0.892241143295187, "grad_norm": 0.13041995465755463, "learning_rate": 3.903265057797607e-05, "loss": 0.2138, "step": 16295 }, { "epoch": 0.8925149208782784, "grad_norm": 0.15408135950565338, "learning_rate": 3.902758061245184e-05, "loss": 0.2265, "step": 16300 }, { "epoch": 0.89278869846137, "grad_norm": 0.15037672221660614, "learning_rate": 3.90225106469276e-05, "loss": 0.2253, "step": 16305 }, { "epoch": 0.8930624760444614, "grad_norm": 0.14144253730773926, "learning_rate": 3.9017440681403365e-05, "loss": 0.2219, "step": 16310 }, { "epoch": 0.893336253627553, "grad_norm": 0.15446652472019196, "learning_rate": 3.901237071587913e-05, "loss": 0.2205, "step": 16315 }, { "epoch": 0.8936100312106444, "grad_norm": 0.1656261682510376, "learning_rate": 3.90073007503549e-05, "loss": 0.2177, "step": 16320 }, { "epoch": 0.893883808793736, "grad_norm": 0.18315768241882324, "learning_rate": 3.900223078483066e-05, "loss": 0.2259, "step": 16325 }, { "epoch": 0.8941575863768275, "grad_norm": 0.16698862612247467, "learning_rate": 3.899716081930643e-05, "loss": 0.2237, "step": 16330 }, { "epoch": 0.894431363959919, "grad_norm": 0.20110656321048737, "learning_rate": 3.8992090853782195e-05, "loss": 0.2152, "step": 16335 }, { "epoch": 0.8947051415430105, "grad_norm": 0.1875162273645401, "learning_rate": 3.8987020888257966e-05, "loss": 0.2281, "step": 16340 }, { "epoch": 0.894978919126102, "grad_norm": 0.1641092747449875, "learning_rate": 3.898195092273373e-05, "loss": 0.2126, "step": 16345 }, { "epoch": 0.8952526967091935, "grad_norm": 0.19535143673419952, "learning_rate": 3.897688095720949e-05, "loss": 0.226, "step": 16350 }, { "epoch": 0.8955264742922849, "grad_norm": 0.18442462384700775, "learning_rate": 3.897181099168526e-05, "loss": 0.2113, "step": 16355 }, { "epoch": 0.8958002518753765, "grad_norm": 0.1860193908214569, "learning_rate": 3.8966741026161026e-05, "loss": 0.2267, "step": 16360 }, { "epoch": 0.8960740294584679, "grad_norm": 0.16067448258399963, "learning_rate": 3.896167106063679e-05, "loss": 0.2111, "step": 16365 }, { "epoch": 0.8963478070415595, "grad_norm": 0.1977577805519104, "learning_rate": 3.895660109511255e-05, "loss": 0.2113, "step": 16370 }, { "epoch": 0.8966215846246509, "grad_norm": 0.16607847809791565, "learning_rate": 3.895153112958832e-05, "loss": 0.2237, "step": 16375 }, { "epoch": 0.8968953622077425, "grad_norm": 0.17488813400268555, "learning_rate": 3.8946461164064086e-05, "loss": 0.2319, "step": 16380 }, { "epoch": 0.8971691397908339, "grad_norm": 0.14808610081672668, "learning_rate": 3.894139119853985e-05, "loss": 0.218, "step": 16385 }, { "epoch": 0.8974429173739255, "grad_norm": 0.1638382226228714, "learning_rate": 3.893632123301562e-05, "loss": 0.2241, "step": 16390 }, { "epoch": 0.8977166949570169, "grad_norm": 0.20102879405021667, "learning_rate": 3.893125126749138e-05, "loss": 0.2137, "step": 16395 }, { "epoch": 0.8979904725401084, "grad_norm": 0.14032672345638275, "learning_rate": 3.8926181301967146e-05, "loss": 0.218, "step": 16400 }, { "epoch": 0.8982642501231999, "grad_norm": 0.15321137011051178, "learning_rate": 3.892111133644291e-05, "loss": 0.2158, "step": 16405 }, { "epoch": 0.8985380277062914, "grad_norm": 0.19509033858776093, "learning_rate": 3.891604137091868e-05, "loss": 0.2192, "step": 16410 }, { "epoch": 0.8988118052893829, "grad_norm": 0.1896442323923111, "learning_rate": 3.891097140539445e-05, "loss": 0.2197, "step": 16415 }, { "epoch": 0.8990855828724744, "grad_norm": 0.17349593341350555, "learning_rate": 3.890590143987021e-05, "loss": 0.2316, "step": 16420 }, { "epoch": 0.8993593604555659, "grad_norm": 0.15712468326091766, "learning_rate": 3.8900831474345976e-05, "loss": 0.2254, "step": 16425 }, { "epoch": 0.8996331380386574, "grad_norm": 0.1600867509841919, "learning_rate": 3.8895761508821746e-05, "loss": 0.2178, "step": 16430 }, { "epoch": 0.8999069156217488, "grad_norm": 0.15506435930728912, "learning_rate": 3.889069154329751e-05, "loss": 0.2251, "step": 16435 }, { "epoch": 0.9001806932048404, "grad_norm": 0.15460172295570374, "learning_rate": 3.888562157777327e-05, "loss": 0.2194, "step": 16440 }, { "epoch": 0.9004544707879318, "grad_norm": 0.2451135814189911, "learning_rate": 3.8880551612249036e-05, "loss": 0.2209, "step": 16445 }, { "epoch": 0.9007282483710234, "grad_norm": 0.17827856540679932, "learning_rate": 3.8875481646724806e-05, "loss": 0.218, "step": 16450 }, { "epoch": 0.9010020259541148, "grad_norm": 0.21503277122974396, "learning_rate": 3.887041168120057e-05, "loss": 0.2255, "step": 16455 }, { "epoch": 0.9012758035372064, "grad_norm": 0.1918146312236786, "learning_rate": 3.886534171567633e-05, "loss": 0.2228, "step": 16460 }, { "epoch": 0.9015495811202978, "grad_norm": 0.16835792362689972, "learning_rate": 3.88602717501521e-05, "loss": 0.2139, "step": 16465 }, { "epoch": 0.9018233587033894, "grad_norm": 0.22239434719085693, "learning_rate": 3.8855201784627866e-05, "loss": 0.2209, "step": 16470 }, { "epoch": 0.9020971362864809, "grad_norm": 0.29378530383110046, "learning_rate": 3.885013181910363e-05, "loss": 0.2192, "step": 16475 }, { "epoch": 0.9023709138695724, "grad_norm": 0.21169419586658478, "learning_rate": 3.884506185357939e-05, "loss": 0.2128, "step": 16480 }, { "epoch": 0.9026446914526639, "grad_norm": 0.28082507848739624, "learning_rate": 3.883999188805516e-05, "loss": 0.2155, "step": 16485 }, { "epoch": 0.9029184690357553, "grad_norm": 0.1933925300836563, "learning_rate": 3.883492192253093e-05, "loss": 0.2167, "step": 16490 }, { "epoch": 0.9031922466188469, "grad_norm": 0.13786669075489044, "learning_rate": 3.8829851957006696e-05, "loss": 0.2139, "step": 16495 }, { "epoch": 0.9034660242019383, "grad_norm": 0.16199491918087006, "learning_rate": 3.882478199148246e-05, "loss": 0.2153, "step": 16500 }, { "epoch": 0.9037398017850299, "grad_norm": 0.21977654099464417, "learning_rate": 3.881971202595823e-05, "loss": 0.2301, "step": 16505 }, { "epoch": 0.9040135793681213, "grad_norm": 0.16103319823741913, "learning_rate": 3.881464206043399e-05, "loss": 0.2217, "step": 16510 }, { "epoch": 0.9042873569512129, "grad_norm": 0.15593454241752625, "learning_rate": 3.8809572094909756e-05, "loss": 0.2168, "step": 16515 }, { "epoch": 0.9045611345343043, "grad_norm": 0.1765109896659851, "learning_rate": 3.8804502129385526e-05, "loss": 0.229, "step": 16520 }, { "epoch": 0.9048349121173959, "grad_norm": 0.16285623610019684, "learning_rate": 3.879943216386129e-05, "loss": 0.2312, "step": 16525 }, { "epoch": 0.9051086897004873, "grad_norm": 0.15595631301403046, "learning_rate": 3.879436219833705e-05, "loss": 0.224, "step": 16530 }, { "epoch": 0.9053824672835789, "grad_norm": 0.1608792543411255, "learning_rate": 3.8789292232812816e-05, "loss": 0.2194, "step": 16535 }, { "epoch": 0.9056562448666703, "grad_norm": 0.13465236127376556, "learning_rate": 3.8784222267288586e-05, "loss": 0.2186, "step": 16540 }, { "epoch": 0.9059300224497618, "grad_norm": 0.1459757387638092, "learning_rate": 3.877915230176435e-05, "loss": 0.22, "step": 16545 }, { "epoch": 0.9062038000328533, "grad_norm": 0.1624721735715866, "learning_rate": 3.877408233624011e-05, "loss": 0.2248, "step": 16550 }, { "epoch": 0.9064775776159448, "grad_norm": 0.14708992838859558, "learning_rate": 3.876901237071588e-05, "loss": 0.2269, "step": 16555 }, { "epoch": 0.9067513551990363, "grad_norm": 0.15020422637462616, "learning_rate": 3.8763942405191646e-05, "loss": 0.2208, "step": 16560 }, { "epoch": 0.9070251327821278, "grad_norm": 0.1735670566558838, "learning_rate": 3.875887243966741e-05, "loss": 0.2088, "step": 16565 }, { "epoch": 0.9072989103652193, "grad_norm": 0.1824474185705185, "learning_rate": 3.875380247414318e-05, "loss": 0.2195, "step": 16570 }, { "epoch": 0.9075726879483108, "grad_norm": 0.1396336853504181, "learning_rate": 3.874873250861894e-05, "loss": 0.2107, "step": 16575 }, { "epoch": 0.9078464655314022, "grad_norm": 0.1751578450202942, "learning_rate": 3.874366254309471e-05, "loss": 0.2151, "step": 16580 }, { "epoch": 0.9081202431144938, "grad_norm": 0.13810600340366364, "learning_rate": 3.8738592577570476e-05, "loss": 0.2177, "step": 16585 }, { "epoch": 0.9083940206975852, "grad_norm": 0.1391001045703888, "learning_rate": 3.873352261204624e-05, "loss": 0.2164, "step": 16590 }, { "epoch": 0.9086677982806768, "grad_norm": 0.15144596993923187, "learning_rate": 3.872845264652201e-05, "loss": 0.2228, "step": 16595 }, { "epoch": 0.9089415758637682, "grad_norm": 0.14972415566444397, "learning_rate": 3.872338268099777e-05, "loss": 0.2162, "step": 16600 }, { "epoch": 0.9092153534468598, "grad_norm": 0.19066151976585388, "learning_rate": 3.8718312715473536e-05, "loss": 0.2352, "step": 16605 }, { "epoch": 0.9094891310299512, "grad_norm": 0.18690729141235352, "learning_rate": 3.87132427499493e-05, "loss": 0.227, "step": 16610 }, { "epoch": 0.9097629086130428, "grad_norm": 0.15193147957324982, "learning_rate": 3.870817278442507e-05, "loss": 0.2269, "step": 16615 }, { "epoch": 0.9100366861961343, "grad_norm": 0.14962518215179443, "learning_rate": 3.870310281890083e-05, "loss": 0.2424, "step": 16620 }, { "epoch": 0.9103104637792258, "grad_norm": 0.16999344527721405, "learning_rate": 3.8698032853376596e-05, "loss": 0.2123, "step": 16625 }, { "epoch": 0.9105842413623173, "grad_norm": 0.15565000474452972, "learning_rate": 3.8692962887852366e-05, "loss": 0.2162, "step": 16630 }, { "epoch": 0.9108580189454087, "grad_norm": 0.17217925190925598, "learning_rate": 3.868789292232813e-05, "loss": 0.2187, "step": 16635 }, { "epoch": 0.9111317965285003, "grad_norm": 0.18517178297042847, "learning_rate": 3.868282295680389e-05, "loss": 0.2131, "step": 16640 }, { "epoch": 0.9114055741115917, "grad_norm": 0.17820590734481812, "learning_rate": 3.8677752991279656e-05, "loss": 0.2209, "step": 16645 }, { "epoch": 0.9116793516946833, "grad_norm": 0.16141587495803833, "learning_rate": 3.8672683025755426e-05, "loss": 0.2157, "step": 16650 }, { "epoch": 0.9119531292777747, "grad_norm": 0.14361898601055145, "learning_rate": 3.8667613060231196e-05, "loss": 0.2242, "step": 16655 }, { "epoch": 0.9122269068608663, "grad_norm": 0.17993824183940887, "learning_rate": 3.866254309470696e-05, "loss": 0.2254, "step": 16660 }, { "epoch": 0.9125006844439577, "grad_norm": 0.14942538738250732, "learning_rate": 3.865747312918272e-05, "loss": 0.2278, "step": 16665 }, { "epoch": 0.9127744620270493, "grad_norm": 0.1700957864522934, "learning_rate": 3.865240316365849e-05, "loss": 0.2154, "step": 16670 }, { "epoch": 0.9130482396101407, "grad_norm": 0.16219936311244965, "learning_rate": 3.8647333198134256e-05, "loss": 0.2239, "step": 16675 }, { "epoch": 0.9133220171932322, "grad_norm": 0.14586715400218964, "learning_rate": 3.864226323261002e-05, "loss": 0.2248, "step": 16680 }, { "epoch": 0.9135957947763237, "grad_norm": 0.17857761681079865, "learning_rate": 3.863719326708579e-05, "loss": 0.2203, "step": 16685 }, { "epoch": 0.9138695723594152, "grad_norm": 0.1613335758447647, "learning_rate": 3.863212330156155e-05, "loss": 0.2161, "step": 16690 }, { "epoch": 0.9141433499425067, "grad_norm": 0.14647835493087769, "learning_rate": 3.8627053336037316e-05, "loss": 0.213, "step": 16695 }, { "epoch": 0.9144171275255982, "grad_norm": 0.15805216133594513, "learning_rate": 3.862198337051308e-05, "loss": 0.2148, "step": 16700 }, { "epoch": 0.9146909051086897, "grad_norm": 0.15455250442028046, "learning_rate": 3.861691340498885e-05, "loss": 0.2208, "step": 16705 }, { "epoch": 0.9149646826917812, "grad_norm": 0.14134234189987183, "learning_rate": 3.861184343946461e-05, "loss": 0.2344, "step": 16710 }, { "epoch": 0.9152384602748727, "grad_norm": 0.16055823862552643, "learning_rate": 3.8606773473940376e-05, "loss": 0.2202, "step": 16715 }, { "epoch": 0.9155122378579642, "grad_norm": 0.1365785300731659, "learning_rate": 3.860170350841614e-05, "loss": 0.2224, "step": 16720 }, { "epoch": 0.9157860154410556, "grad_norm": 0.16451391577720642, "learning_rate": 3.859663354289191e-05, "loss": 0.2201, "step": 16725 }, { "epoch": 0.9160597930241472, "grad_norm": 0.1761467307806015, "learning_rate": 3.859156357736767e-05, "loss": 0.2203, "step": 16730 }, { "epoch": 0.9163335706072386, "grad_norm": 0.18964558839797974, "learning_rate": 3.858649361184344e-05, "loss": 0.2189, "step": 16735 }, { "epoch": 0.9166073481903302, "grad_norm": 0.18947401642799377, "learning_rate": 3.8581423646319206e-05, "loss": 0.2252, "step": 16740 }, { "epoch": 0.9168811257734216, "grad_norm": 0.1440955549478531, "learning_rate": 3.8576353680794976e-05, "loss": 0.2169, "step": 16745 }, { "epoch": 0.9171549033565132, "grad_norm": 0.1893339306116104, "learning_rate": 3.857128371527074e-05, "loss": 0.2311, "step": 16750 }, { "epoch": 0.9174286809396046, "grad_norm": 0.1435205340385437, "learning_rate": 3.85662137497465e-05, "loss": 0.2179, "step": 16755 }, { "epoch": 0.9177024585226962, "grad_norm": 0.16601352393627167, "learning_rate": 3.856114378422227e-05, "loss": 0.2238, "step": 16760 }, { "epoch": 0.9179762361057877, "grad_norm": 0.15546855330467224, "learning_rate": 3.8556073818698036e-05, "loss": 0.2257, "step": 16765 }, { "epoch": 0.9182500136888792, "grad_norm": 0.135462686419487, "learning_rate": 3.85510038531738e-05, "loss": 0.2213, "step": 16770 }, { "epoch": 0.9185237912719707, "grad_norm": 0.1544700562953949, "learning_rate": 3.854593388764956e-05, "loss": 0.2142, "step": 16775 }, { "epoch": 0.9187975688550621, "grad_norm": 0.1375010907649994, "learning_rate": 3.854086392212533e-05, "loss": 0.2132, "step": 16780 }, { "epoch": 0.9190713464381537, "grad_norm": 0.1375955492258072, "learning_rate": 3.8535793956601096e-05, "loss": 0.2266, "step": 16785 }, { "epoch": 0.9193451240212451, "grad_norm": 0.14819318056106567, "learning_rate": 3.853072399107686e-05, "loss": 0.2124, "step": 16790 }, { "epoch": 0.9196189016043367, "grad_norm": 0.17585067451000214, "learning_rate": 3.852565402555263e-05, "loss": 0.2283, "step": 16795 }, { "epoch": 0.9198926791874281, "grad_norm": 0.1625596135854721, "learning_rate": 3.852058406002839e-05, "loss": 0.2068, "step": 16800 }, { "epoch": 0.9201664567705197, "grad_norm": 0.14193762838840485, "learning_rate": 3.8515514094504156e-05, "loss": 0.2234, "step": 16805 }, { "epoch": 0.9204402343536111, "grad_norm": 0.1448332816362381, "learning_rate": 3.851044412897992e-05, "loss": 0.223, "step": 16810 }, { "epoch": 0.9207140119367027, "grad_norm": 0.1558855175971985, "learning_rate": 3.8505374163455696e-05, "loss": 0.2244, "step": 16815 }, { "epoch": 0.9209877895197941, "grad_norm": 0.1513526886701584, "learning_rate": 3.850030419793146e-05, "loss": 0.2162, "step": 16820 }, { "epoch": 0.9212615671028856, "grad_norm": 0.1408485323190689, "learning_rate": 3.849523423240722e-05, "loss": 0.2126, "step": 16825 }, { "epoch": 0.9215353446859771, "grad_norm": 0.161000594496727, "learning_rate": 3.8490164266882986e-05, "loss": 0.2209, "step": 16830 }, { "epoch": 0.9218091222690686, "grad_norm": 0.1645747274160385, "learning_rate": 3.8485094301358756e-05, "loss": 0.2189, "step": 16835 }, { "epoch": 0.9220828998521601, "grad_norm": 0.15217137336730957, "learning_rate": 3.848002433583452e-05, "loss": 0.2177, "step": 16840 }, { "epoch": 0.9223566774352516, "grad_norm": 0.16038459539413452, "learning_rate": 3.847495437031028e-05, "loss": 0.2307, "step": 16845 }, { "epoch": 0.9226304550183431, "grad_norm": 0.15415416657924652, "learning_rate": 3.8469884404786046e-05, "loss": 0.2141, "step": 16850 }, { "epoch": 0.9229042326014346, "grad_norm": 0.18815746903419495, "learning_rate": 3.8464814439261816e-05, "loss": 0.2197, "step": 16855 }, { "epoch": 0.923178010184526, "grad_norm": 0.15067970752716064, "learning_rate": 3.845974447373758e-05, "loss": 0.2145, "step": 16860 }, { "epoch": 0.9234517877676176, "grad_norm": 0.14332512021064758, "learning_rate": 3.845467450821334e-05, "loss": 0.2167, "step": 16865 }, { "epoch": 0.923725565350709, "grad_norm": 0.14693443477153778, "learning_rate": 3.844960454268911e-05, "loss": 0.2212, "step": 16870 }, { "epoch": 0.9239993429338006, "grad_norm": 0.1470099538564682, "learning_rate": 3.8444534577164876e-05, "loss": 0.2157, "step": 16875 }, { "epoch": 0.924273120516892, "grad_norm": 0.14326180517673492, "learning_rate": 3.843946461164064e-05, "loss": 0.2167, "step": 16880 }, { "epoch": 0.9245468980999836, "grad_norm": 0.19877628982067108, "learning_rate": 3.84343946461164e-05, "loss": 0.226, "step": 16885 }, { "epoch": 0.924820675683075, "grad_norm": 0.1273956447839737, "learning_rate": 3.842932468059217e-05, "loss": 0.2202, "step": 16890 }, { "epoch": 0.9250944532661666, "grad_norm": 0.18332229554653168, "learning_rate": 3.8424254715067936e-05, "loss": 0.2148, "step": 16895 }, { "epoch": 0.925368230849258, "grad_norm": 0.18742594122886658, "learning_rate": 3.8419184749543706e-05, "loss": 0.2298, "step": 16900 }, { "epoch": 0.9256420084323496, "grad_norm": 0.17931205034255981, "learning_rate": 3.841411478401947e-05, "loss": 0.2218, "step": 16905 }, { "epoch": 0.925915786015441, "grad_norm": 0.15822292864322662, "learning_rate": 3.840904481849524e-05, "loss": 0.2246, "step": 16910 }, { "epoch": 0.9261895635985326, "grad_norm": 0.17296791076660156, "learning_rate": 3.8403974852971e-05, "loss": 0.2169, "step": 16915 }, { "epoch": 0.9264633411816241, "grad_norm": 0.13785101473331451, "learning_rate": 3.8398904887446766e-05, "loss": 0.217, "step": 16920 }, { "epoch": 0.9267371187647155, "grad_norm": 0.15164630115032196, "learning_rate": 3.8393834921922536e-05, "loss": 0.2194, "step": 16925 }, { "epoch": 0.9270108963478071, "grad_norm": 0.14025737345218658, "learning_rate": 3.83887649563983e-05, "loss": 0.2264, "step": 16930 }, { "epoch": 0.9272846739308985, "grad_norm": 0.15749402344226837, "learning_rate": 3.838369499087406e-05, "loss": 0.2108, "step": 16935 }, { "epoch": 0.9275584515139901, "grad_norm": 0.18014036118984222, "learning_rate": 3.8378625025349826e-05, "loss": 0.2173, "step": 16940 }, { "epoch": 0.9278322290970815, "grad_norm": 0.17246171832084656, "learning_rate": 3.8373555059825596e-05, "loss": 0.2116, "step": 16945 }, { "epoch": 0.9281060066801731, "grad_norm": 0.19384120404720306, "learning_rate": 3.836848509430136e-05, "loss": 0.2202, "step": 16950 }, { "epoch": 0.9283797842632645, "grad_norm": 0.17172189056873322, "learning_rate": 3.836341512877712e-05, "loss": 0.2271, "step": 16955 }, { "epoch": 0.9286535618463561, "grad_norm": 0.1494697630405426, "learning_rate": 3.835834516325289e-05, "loss": 0.2199, "step": 16960 }, { "epoch": 0.9289273394294475, "grad_norm": 0.21113699674606323, "learning_rate": 3.8353275197728656e-05, "loss": 0.2198, "step": 16965 }, { "epoch": 0.929201117012539, "grad_norm": 0.17613175511360168, "learning_rate": 3.834820523220442e-05, "loss": 0.2209, "step": 16970 }, { "epoch": 0.9294748945956305, "grad_norm": 0.17902028560638428, "learning_rate": 3.834313526668018e-05, "loss": 0.2254, "step": 16975 }, { "epoch": 0.929748672178722, "grad_norm": 0.1863524466753006, "learning_rate": 3.833806530115595e-05, "loss": 0.2174, "step": 16980 }, { "epoch": 0.9300224497618135, "grad_norm": 0.1816716492176056, "learning_rate": 3.833299533563172e-05, "loss": 0.2189, "step": 16985 }, { "epoch": 0.930296227344905, "grad_norm": 0.2042967528104782, "learning_rate": 3.8327925370107487e-05, "loss": 0.2172, "step": 16990 }, { "epoch": 0.9305700049279965, "grad_norm": 0.17257508635520935, "learning_rate": 3.832285540458325e-05, "loss": 0.2235, "step": 16995 }, { "epoch": 0.930843782511088, "grad_norm": 0.1775352656841278, "learning_rate": 3.831778543905902e-05, "loss": 0.2149, "step": 17000 }, { "epoch": 0.9311175600941795, "grad_norm": 0.14929930865764618, "learning_rate": 3.831271547353478e-05, "loss": 0.2226, "step": 17005 }, { "epoch": 0.931391337677271, "grad_norm": 0.1794017106294632, "learning_rate": 3.8307645508010547e-05, "loss": 0.2167, "step": 17010 }, { "epoch": 0.9316651152603624, "grad_norm": 0.1517818570137024, "learning_rate": 3.830257554248631e-05, "loss": 0.22, "step": 17015 }, { "epoch": 0.931938892843454, "grad_norm": 0.15704724192619324, "learning_rate": 3.829750557696208e-05, "loss": 0.2181, "step": 17020 }, { "epoch": 0.9322126704265454, "grad_norm": 0.1533052772283554, "learning_rate": 3.829243561143784e-05, "loss": 0.2271, "step": 17025 }, { "epoch": 0.932486448009637, "grad_norm": 0.16134430468082428, "learning_rate": 3.8287365645913607e-05, "loss": 0.2262, "step": 17030 }, { "epoch": 0.9327602255927284, "grad_norm": 0.2080404907464981, "learning_rate": 3.8282295680389377e-05, "loss": 0.2304, "step": 17035 }, { "epoch": 0.93303400317582, "grad_norm": 0.1642976552248001, "learning_rate": 3.827722571486514e-05, "loss": 0.2186, "step": 17040 }, { "epoch": 0.9333077807589114, "grad_norm": 0.14601780474185944, "learning_rate": 3.82721557493409e-05, "loss": 0.2205, "step": 17045 }, { "epoch": 0.933581558342003, "grad_norm": 0.14133429527282715, "learning_rate": 3.8267085783816667e-05, "loss": 0.2137, "step": 17050 }, { "epoch": 0.9338553359250944, "grad_norm": 0.16432327032089233, "learning_rate": 3.8262015818292437e-05, "loss": 0.2147, "step": 17055 }, { "epoch": 0.934129113508186, "grad_norm": 0.14564144611358643, "learning_rate": 3.825694585276821e-05, "loss": 0.2191, "step": 17060 }, { "epoch": 0.9344028910912775, "grad_norm": 0.13931693136692047, "learning_rate": 3.825187588724397e-05, "loss": 0.2166, "step": 17065 }, { "epoch": 0.9346766686743689, "grad_norm": 0.14151841402053833, "learning_rate": 3.824680592171973e-05, "loss": 0.2202, "step": 17070 }, { "epoch": 0.9349504462574605, "grad_norm": 0.15303900837898254, "learning_rate": 3.82417359561955e-05, "loss": 0.2153, "step": 17075 }, { "epoch": 0.9352242238405519, "grad_norm": 0.1876765340566635, "learning_rate": 3.823666599067127e-05, "loss": 0.2219, "step": 17080 }, { "epoch": 0.9354980014236435, "grad_norm": 0.17708824574947357, "learning_rate": 3.823159602514703e-05, "loss": 0.2208, "step": 17085 }, { "epoch": 0.9357717790067349, "grad_norm": 0.14044158160686493, "learning_rate": 3.82265260596228e-05, "loss": 0.2179, "step": 17090 }, { "epoch": 0.9360455565898265, "grad_norm": 0.1669730246067047, "learning_rate": 3.822145609409856e-05, "loss": 0.2153, "step": 17095 }, { "epoch": 0.9363193341729179, "grad_norm": 0.14708451926708221, "learning_rate": 3.821638612857433e-05, "loss": 0.2131, "step": 17100 }, { "epoch": 0.9365931117560095, "grad_norm": 0.15761645138263702, "learning_rate": 3.821131616305009e-05, "loss": 0.213, "step": 17105 }, { "epoch": 0.9368668893391009, "grad_norm": 0.14622437953948975, "learning_rate": 3.820624619752586e-05, "loss": 0.2128, "step": 17110 }, { "epoch": 0.9371406669221924, "grad_norm": 0.14618661999702454, "learning_rate": 3.820117623200162e-05, "loss": 0.2207, "step": 17115 }, { "epoch": 0.9374144445052839, "grad_norm": 0.15544261038303375, "learning_rate": 3.819610626647739e-05, "loss": 0.2183, "step": 17120 }, { "epoch": 0.9376882220883754, "grad_norm": 0.1688200682401657, "learning_rate": 3.819103630095316e-05, "loss": 0.224, "step": 17125 }, { "epoch": 0.9379619996714669, "grad_norm": 0.15950226783752441, "learning_rate": 3.818596633542892e-05, "loss": 0.2227, "step": 17130 }, { "epoch": 0.9382357772545584, "grad_norm": 0.17740362882614136, "learning_rate": 3.818089636990468e-05, "loss": 0.2205, "step": 17135 }, { "epoch": 0.9385095548376499, "grad_norm": 0.1560971438884735, "learning_rate": 3.8175826404380453e-05, "loss": 0.2073, "step": 17140 }, { "epoch": 0.9387833324207414, "grad_norm": 0.1943795382976532, "learning_rate": 3.817075643885622e-05, "loss": 0.2242, "step": 17145 }, { "epoch": 0.9390571100038329, "grad_norm": 0.15874360501766205, "learning_rate": 3.816568647333199e-05, "loss": 0.2222, "step": 17150 }, { "epoch": 0.9393308875869244, "grad_norm": 0.20010462403297424, "learning_rate": 3.816061650780775e-05, "loss": 0.2282, "step": 17155 }, { "epoch": 0.9396046651700158, "grad_norm": 0.18936768174171448, "learning_rate": 3.8155546542283513e-05, "loss": 0.2272, "step": 17160 }, { "epoch": 0.9398784427531074, "grad_norm": 0.15122397243976593, "learning_rate": 3.8150476576759284e-05, "loss": 0.2154, "step": 17165 }, { "epoch": 0.9401522203361988, "grad_norm": 0.15102918446063995, "learning_rate": 3.814540661123505e-05, "loss": 0.2145, "step": 17170 }, { "epoch": 0.9404259979192904, "grad_norm": 0.15328861773014069, "learning_rate": 3.814033664571081e-05, "loss": 0.2218, "step": 17175 }, { "epoch": 0.9406997755023818, "grad_norm": 0.16373899579048157, "learning_rate": 3.8135266680186573e-05, "loss": 0.2213, "step": 17180 }, { "epoch": 0.9409735530854734, "grad_norm": 0.17001718282699585, "learning_rate": 3.8130196714662344e-05, "loss": 0.2214, "step": 17185 }, { "epoch": 0.9412473306685648, "grad_norm": 0.1408540904521942, "learning_rate": 3.812512674913811e-05, "loss": 0.2219, "step": 17190 }, { "epoch": 0.9415211082516564, "grad_norm": 0.17729105055332184, "learning_rate": 3.812005678361387e-05, "loss": 0.2154, "step": 17195 }, { "epoch": 0.9417948858347478, "grad_norm": 0.16451764106750488, "learning_rate": 3.811498681808964e-05, "loss": 0.2284, "step": 17200 }, { "epoch": 0.9420686634178393, "grad_norm": 0.1477985829114914, "learning_rate": 3.8109916852565404e-05, "loss": 0.2164, "step": 17205 }, { "epoch": 0.9423424410009309, "grad_norm": 0.1499282270669937, "learning_rate": 3.810484688704117e-05, "loss": 0.2226, "step": 17210 }, { "epoch": 0.9426162185840223, "grad_norm": 0.16411036252975464, "learning_rate": 3.809977692151693e-05, "loss": 0.2262, "step": 17215 }, { "epoch": 0.9428899961671139, "grad_norm": 0.16457432508468628, "learning_rate": 3.80947069559927e-05, "loss": 0.2168, "step": 17220 }, { "epoch": 0.9431637737502053, "grad_norm": 0.17211848497390747, "learning_rate": 3.808963699046847e-05, "loss": 0.2221, "step": 17225 }, { "epoch": 0.9434375513332969, "grad_norm": 0.1634446233510971, "learning_rate": 3.8084567024944234e-05, "loss": 0.2146, "step": 17230 }, { "epoch": 0.9437113289163883, "grad_norm": 0.18015100061893463, "learning_rate": 3.807949705942e-05, "loss": 0.2275, "step": 17235 }, { "epoch": 0.9439851064994799, "grad_norm": 0.17137426137924194, "learning_rate": 3.807442709389577e-05, "loss": 0.2198, "step": 17240 }, { "epoch": 0.9442588840825713, "grad_norm": 0.14703647792339325, "learning_rate": 3.806935712837153e-05, "loss": 0.2236, "step": 17245 }, { "epoch": 0.9445326616656629, "grad_norm": 0.13660366833209991, "learning_rate": 3.8064287162847294e-05, "loss": 0.2171, "step": 17250 }, { "epoch": 0.9448064392487543, "grad_norm": 0.15261884033679962, "learning_rate": 3.8059217197323064e-05, "loss": 0.2283, "step": 17255 }, { "epoch": 0.9450802168318458, "grad_norm": 0.14106927812099457, "learning_rate": 3.805414723179883e-05, "loss": 0.2161, "step": 17260 }, { "epoch": 0.9453539944149373, "grad_norm": 0.15302923321723938, "learning_rate": 3.804907726627459e-05, "loss": 0.2254, "step": 17265 }, { "epoch": 0.9456277719980288, "grad_norm": 0.16887861490249634, "learning_rate": 3.8044007300750354e-05, "loss": 0.2239, "step": 17270 }, { "epoch": 0.9459015495811203, "grad_norm": 0.16512712836265564, "learning_rate": 3.8038937335226124e-05, "loss": 0.2236, "step": 17275 }, { "epoch": 0.9461753271642118, "grad_norm": 0.17622345685958862, "learning_rate": 3.803386736970189e-05, "loss": 0.2194, "step": 17280 }, { "epoch": 0.9464491047473033, "grad_norm": 0.17943502962589264, "learning_rate": 3.802879740417765e-05, "loss": 0.2182, "step": 17285 }, { "epoch": 0.9467228823303948, "grad_norm": 0.20226804912090302, "learning_rate": 3.802372743865342e-05, "loss": 0.219, "step": 17290 }, { "epoch": 0.9469966599134862, "grad_norm": 0.2160041481256485, "learning_rate": 3.8018657473129184e-05, "loss": 0.2297, "step": 17295 }, { "epoch": 0.9472704374965778, "grad_norm": 0.14722593128681183, "learning_rate": 3.801358750760495e-05, "loss": 0.2195, "step": 17300 }, { "epoch": 0.9475442150796692, "grad_norm": 0.1697334349155426, "learning_rate": 3.800851754208072e-05, "loss": 0.2209, "step": 17305 }, { "epoch": 0.9478179926627608, "grad_norm": 0.15879423916339874, "learning_rate": 3.800344757655648e-05, "loss": 0.2289, "step": 17310 }, { "epoch": 0.9480917702458522, "grad_norm": 0.14943021535873413, "learning_rate": 3.799837761103225e-05, "loss": 0.2191, "step": 17315 }, { "epoch": 0.9483655478289438, "grad_norm": 0.15521639585494995, "learning_rate": 3.7993307645508014e-05, "loss": 0.2229, "step": 17320 }, { "epoch": 0.9486393254120352, "grad_norm": 0.13600902259349823, "learning_rate": 3.798823767998378e-05, "loss": 0.2155, "step": 17325 }, { "epoch": 0.9489131029951268, "grad_norm": 0.15283262729644775, "learning_rate": 3.798316771445955e-05, "loss": 0.2246, "step": 17330 }, { "epoch": 0.9491868805782182, "grad_norm": 0.13515795767307281, "learning_rate": 3.797809774893531e-05, "loss": 0.2104, "step": 17335 }, { "epoch": 0.9494606581613098, "grad_norm": 0.15399986505508423, "learning_rate": 3.7973027783411074e-05, "loss": 0.2162, "step": 17340 }, { "epoch": 0.9497344357444012, "grad_norm": 0.15978658199310303, "learning_rate": 3.796795781788684e-05, "loss": 0.2211, "step": 17345 }, { "epoch": 0.9500082133274927, "grad_norm": 0.15285614132881165, "learning_rate": 3.796288785236261e-05, "loss": 0.2161, "step": 17350 }, { "epoch": 0.9502819909105843, "grad_norm": 0.13593097031116486, "learning_rate": 3.795781788683837e-05, "loss": 0.2174, "step": 17355 }, { "epoch": 0.9505557684936757, "grad_norm": 0.15564805269241333, "learning_rate": 3.7952747921314134e-05, "loss": 0.2125, "step": 17360 }, { "epoch": 0.9508295460767673, "grad_norm": 0.16648846864700317, "learning_rate": 3.7947677955789904e-05, "loss": 0.2223, "step": 17365 }, { "epoch": 0.9511033236598587, "grad_norm": 0.1549735963344574, "learning_rate": 3.794260799026567e-05, "loss": 0.2348, "step": 17370 }, { "epoch": 0.9513771012429503, "grad_norm": 0.14890412986278534, "learning_rate": 3.793753802474143e-05, "loss": 0.2176, "step": 17375 }, { "epoch": 0.9516508788260417, "grad_norm": 0.18490014970302582, "learning_rate": 3.7932468059217194e-05, "loss": 0.2205, "step": 17380 }, { "epoch": 0.9519246564091333, "grad_norm": 0.15746381878852844, "learning_rate": 3.792739809369297e-05, "loss": 0.2203, "step": 17385 }, { "epoch": 0.9521984339922247, "grad_norm": 0.17983831465244293, "learning_rate": 3.7922328128168734e-05, "loss": 0.232, "step": 17390 }, { "epoch": 0.9524722115753163, "grad_norm": 0.15297327935695648, "learning_rate": 3.79172581626445e-05, "loss": 0.2109, "step": 17395 }, { "epoch": 0.9527459891584077, "grad_norm": 0.1395716518163681, "learning_rate": 3.791218819712026e-05, "loss": 0.2231, "step": 17400 }, { "epoch": 0.9530197667414992, "grad_norm": 0.15560898184776306, "learning_rate": 3.790711823159603e-05, "loss": 0.2255, "step": 17405 }, { "epoch": 0.9532935443245907, "grad_norm": 0.15877006947994232, "learning_rate": 3.7902048266071794e-05, "loss": 0.2163, "step": 17410 }, { "epoch": 0.9535673219076822, "grad_norm": 0.20811912417411804, "learning_rate": 3.789697830054756e-05, "loss": 0.2256, "step": 17415 }, { "epoch": 0.9538410994907737, "grad_norm": 0.17555956542491913, "learning_rate": 3.789190833502333e-05, "loss": 0.2199, "step": 17420 }, { "epoch": 0.9541148770738652, "grad_norm": 0.15132634341716766, "learning_rate": 3.788683836949909e-05, "loss": 0.2146, "step": 17425 }, { "epoch": 0.9543886546569567, "grad_norm": 0.15761230885982513, "learning_rate": 3.7881768403974854e-05, "loss": 0.2158, "step": 17430 }, { "epoch": 0.9546624322400482, "grad_norm": 0.1720925122499466, "learning_rate": 3.787669843845062e-05, "loss": 0.2116, "step": 17435 }, { "epoch": 0.9549362098231396, "grad_norm": 0.16379009187221527, "learning_rate": 3.787162847292639e-05, "loss": 0.2239, "step": 17440 }, { "epoch": 0.9552099874062312, "grad_norm": 0.19459791481494904, "learning_rate": 3.786655850740215e-05, "loss": 0.2183, "step": 17445 }, { "epoch": 0.9554837649893226, "grad_norm": 0.15793393552303314, "learning_rate": 3.7861488541877914e-05, "loss": 0.219, "step": 17450 }, { "epoch": 0.9557575425724142, "grad_norm": 0.1442631632089615, "learning_rate": 3.785641857635368e-05, "loss": 0.2089, "step": 17455 }, { "epoch": 0.9560313201555056, "grad_norm": 0.1827803999185562, "learning_rate": 3.785134861082945e-05, "loss": 0.2243, "step": 17460 }, { "epoch": 0.9563050977385972, "grad_norm": 0.17695395648479462, "learning_rate": 3.784627864530522e-05, "loss": 0.2162, "step": 17465 }, { "epoch": 0.9565788753216886, "grad_norm": 0.14198751747608185, "learning_rate": 3.784120867978098e-05, "loss": 0.2165, "step": 17470 }, { "epoch": 0.9568526529047802, "grad_norm": 0.14239336550235748, "learning_rate": 3.7836138714256744e-05, "loss": 0.2248, "step": 17475 }, { "epoch": 0.9571264304878716, "grad_norm": 0.15699176490306854, "learning_rate": 3.7831068748732514e-05, "loss": 0.2195, "step": 17480 }, { "epoch": 0.9574002080709632, "grad_norm": 0.16021676361560822, "learning_rate": 3.782599878320828e-05, "loss": 0.232, "step": 17485 }, { "epoch": 0.9576739856540546, "grad_norm": 0.13097859919071198, "learning_rate": 3.782092881768404e-05, "loss": 0.2206, "step": 17490 }, { "epoch": 0.9579477632371461, "grad_norm": 0.16650864481925964, "learning_rate": 3.781585885215981e-05, "loss": 0.2262, "step": 17495 }, { "epoch": 0.9582215408202377, "grad_norm": 0.17375797033309937, "learning_rate": 3.7810788886635574e-05, "loss": 0.2221, "step": 17500 }, { "epoch": 0.9584953184033291, "grad_norm": 0.1981925219297409, "learning_rate": 3.780571892111134e-05, "loss": 0.2275, "step": 17505 }, { "epoch": 0.9587690959864207, "grad_norm": 0.1572742462158203, "learning_rate": 3.78006489555871e-05, "loss": 0.221, "step": 17510 }, { "epoch": 0.9590428735695121, "grad_norm": 0.14567522704601288, "learning_rate": 3.779557899006287e-05, "loss": 0.2189, "step": 17515 }, { "epoch": 0.9593166511526037, "grad_norm": 0.1699562668800354, "learning_rate": 3.7790509024538634e-05, "loss": 0.2301, "step": 17520 }, { "epoch": 0.9595904287356951, "grad_norm": 0.1579858660697937, "learning_rate": 3.77854390590144e-05, "loss": 0.2096, "step": 17525 }, { "epoch": 0.9598642063187867, "grad_norm": 0.14722193777561188, "learning_rate": 3.778036909349017e-05, "loss": 0.2205, "step": 17530 }, { "epoch": 0.9601379839018781, "grad_norm": 0.170019268989563, "learning_rate": 3.777529912796593e-05, "loss": 0.2204, "step": 17535 }, { "epoch": 0.9604117614849697, "grad_norm": 0.16679325699806213, "learning_rate": 3.7770229162441694e-05, "loss": 0.2233, "step": 17540 }, { "epoch": 0.9606855390680611, "grad_norm": 0.16785085201263428, "learning_rate": 3.776515919691746e-05, "loss": 0.2358, "step": 17545 }, { "epoch": 0.9609593166511526, "grad_norm": 0.14601215720176697, "learning_rate": 3.7760089231393234e-05, "loss": 0.2192, "step": 17550 }, { "epoch": 0.9612330942342441, "grad_norm": 0.1581791490316391, "learning_rate": 3.7755019265869e-05, "loss": 0.2111, "step": 17555 }, { "epoch": 0.9615068718173356, "grad_norm": 0.18167492747306824, "learning_rate": 3.774994930034476e-05, "loss": 0.2272, "step": 17560 }, { "epoch": 0.9617806494004271, "grad_norm": 0.15985003113746643, "learning_rate": 3.7744879334820524e-05, "loss": 0.2187, "step": 17565 }, { "epoch": 0.9620544269835186, "grad_norm": 0.17504025995731354, "learning_rate": 3.7739809369296294e-05, "loss": 0.2163, "step": 17570 }, { "epoch": 0.9623282045666101, "grad_norm": 0.1998526155948639, "learning_rate": 3.773473940377206e-05, "loss": 0.2172, "step": 17575 }, { "epoch": 0.9626019821497016, "grad_norm": 0.1664537787437439, "learning_rate": 3.772966943824782e-05, "loss": 0.2241, "step": 17580 }, { "epoch": 0.962875759732793, "grad_norm": 0.15310919284820557, "learning_rate": 3.7724599472723584e-05, "loss": 0.214, "step": 17585 }, { "epoch": 0.9631495373158846, "grad_norm": 0.15299086272716522, "learning_rate": 3.7719529507199354e-05, "loss": 0.2221, "step": 17590 }, { "epoch": 0.963423314898976, "grad_norm": 0.1669958531856537, "learning_rate": 3.771445954167512e-05, "loss": 0.2201, "step": 17595 }, { "epoch": 0.9636970924820676, "grad_norm": 0.16600266098976135, "learning_rate": 3.770938957615088e-05, "loss": 0.2182, "step": 17600 }, { "epoch": 0.963970870065159, "grad_norm": 0.16512100398540497, "learning_rate": 3.770431961062665e-05, "loss": 0.2225, "step": 17605 }, { "epoch": 0.9642446476482506, "grad_norm": 0.1637863963842392, "learning_rate": 3.7699249645102414e-05, "loss": 0.2146, "step": 17610 }, { "epoch": 0.964518425231342, "grad_norm": 0.16664908826351166, "learning_rate": 3.769417967957818e-05, "loss": 0.2119, "step": 17615 }, { "epoch": 0.9647922028144336, "grad_norm": 0.15068776905536652, "learning_rate": 3.768910971405394e-05, "loss": 0.2159, "step": 17620 }, { "epoch": 0.965065980397525, "grad_norm": 0.15045922994613647, "learning_rate": 3.768403974852971e-05, "loss": 0.2107, "step": 17625 }, { "epoch": 0.9653397579806166, "grad_norm": 0.14522093534469604, "learning_rate": 3.767896978300548e-05, "loss": 0.224, "step": 17630 }, { "epoch": 0.965613535563708, "grad_norm": 0.1751692295074463, "learning_rate": 3.7673899817481244e-05, "loss": 0.217, "step": 17635 }, { "epoch": 0.9658873131467995, "grad_norm": 0.13319018483161926, "learning_rate": 3.766882985195701e-05, "loss": 0.2223, "step": 17640 }, { "epoch": 0.966161090729891, "grad_norm": 0.13844990730285645, "learning_rate": 3.766375988643278e-05, "loss": 0.2223, "step": 17645 }, { "epoch": 0.9664348683129825, "grad_norm": 0.14829619228839874, "learning_rate": 3.765868992090854e-05, "loss": 0.2167, "step": 17650 }, { "epoch": 0.9667086458960741, "grad_norm": 0.1553245186805725, "learning_rate": 3.7653619955384304e-05, "loss": 0.22, "step": 17655 }, { "epoch": 0.9669824234791655, "grad_norm": 0.16156917810440063, "learning_rate": 3.7648549989860074e-05, "loss": 0.2209, "step": 17660 }, { "epoch": 0.9672562010622571, "grad_norm": 0.19150587916374207, "learning_rate": 3.764348002433584e-05, "loss": 0.2229, "step": 17665 }, { "epoch": 0.9675299786453485, "grad_norm": 0.2545674741268158, "learning_rate": 3.76384100588116e-05, "loss": 0.2238, "step": 17670 }, { "epoch": 0.9678037562284401, "grad_norm": 0.19454552233219147, "learning_rate": 3.7633340093287364e-05, "loss": 0.2257, "step": 17675 }, { "epoch": 0.9680775338115315, "grad_norm": 0.16092070937156677, "learning_rate": 3.7628270127763134e-05, "loss": 0.2146, "step": 17680 }, { "epoch": 0.968351311394623, "grad_norm": 0.15331920981407166, "learning_rate": 3.76232001622389e-05, "loss": 0.2162, "step": 17685 }, { "epoch": 0.9686250889777145, "grad_norm": 0.16036708652973175, "learning_rate": 3.761813019671466e-05, "loss": 0.21, "step": 17690 }, { "epoch": 0.968898866560806, "grad_norm": 0.18539012968540192, "learning_rate": 3.761306023119043e-05, "loss": 0.2209, "step": 17695 }, { "epoch": 0.9691726441438975, "grad_norm": 0.17379812896251678, "learning_rate": 3.7607990265666194e-05, "loss": 0.2175, "step": 17700 }, { "epoch": 0.969446421726989, "grad_norm": 0.14293405413627625, "learning_rate": 3.760292030014196e-05, "loss": 0.213, "step": 17705 }, { "epoch": 0.9697201993100805, "grad_norm": 0.14555484056472778, "learning_rate": 3.759785033461773e-05, "loss": 0.223, "step": 17710 }, { "epoch": 0.969993976893172, "grad_norm": 0.18002648651599884, "learning_rate": 3.759278036909349e-05, "loss": 0.2204, "step": 17715 }, { "epoch": 0.9702677544762635, "grad_norm": 0.19145917892456055, "learning_rate": 3.758771040356926e-05, "loss": 0.2181, "step": 17720 }, { "epoch": 0.970541532059355, "grad_norm": 0.1389811784029007, "learning_rate": 3.7582640438045024e-05, "loss": 0.2126, "step": 17725 }, { "epoch": 0.9708153096424464, "grad_norm": 0.15269090235233307, "learning_rate": 3.757757047252079e-05, "loss": 0.227, "step": 17730 }, { "epoch": 0.971089087225538, "grad_norm": 0.16735774278640747, "learning_rate": 3.757250050699656e-05, "loss": 0.2182, "step": 17735 }, { "epoch": 0.9713628648086294, "grad_norm": 0.24789884686470032, "learning_rate": 3.756743054147232e-05, "loss": 0.2215, "step": 17740 }, { "epoch": 0.971636642391721, "grad_norm": 0.185353085398674, "learning_rate": 3.7562360575948084e-05, "loss": 0.2196, "step": 17745 }, { "epoch": 0.9719104199748124, "grad_norm": 0.16652125120162964, "learning_rate": 3.755729061042385e-05, "loss": 0.2247, "step": 17750 }, { "epoch": 0.972184197557904, "grad_norm": 0.18080858886241913, "learning_rate": 3.755222064489962e-05, "loss": 0.2213, "step": 17755 }, { "epoch": 0.9724579751409954, "grad_norm": 0.1468535214662552, "learning_rate": 3.754715067937538e-05, "loss": 0.2251, "step": 17760 }, { "epoch": 0.972731752724087, "grad_norm": 0.14117096364498138, "learning_rate": 3.7542080713851144e-05, "loss": 0.2147, "step": 17765 }, { "epoch": 0.9730055303071784, "grad_norm": 0.1493716984987259, "learning_rate": 3.7537010748326914e-05, "loss": 0.218, "step": 17770 }, { "epoch": 0.97327930789027, "grad_norm": 0.1585056036710739, "learning_rate": 3.753194078280268e-05, "loss": 0.2114, "step": 17775 }, { "epoch": 0.9735530854733614, "grad_norm": 0.14442779123783112, "learning_rate": 3.752687081727844e-05, "loss": 0.2107, "step": 17780 }, { "epoch": 0.9738268630564529, "grad_norm": 0.1544923186302185, "learning_rate": 3.7521800851754204e-05, "loss": 0.2122, "step": 17785 }, { "epoch": 0.9741006406395444, "grad_norm": 0.1479603499174118, "learning_rate": 3.7516730886229974e-05, "loss": 0.2109, "step": 17790 }, { "epoch": 0.9743744182226359, "grad_norm": 0.14190955460071564, "learning_rate": 3.7511660920705745e-05, "loss": 0.2198, "step": 17795 }, { "epoch": 0.9746481958057275, "grad_norm": 0.16141395270824432, "learning_rate": 3.750659095518151e-05, "loss": 0.2206, "step": 17800 }, { "epoch": 0.9749219733888189, "grad_norm": 0.12451034784317017, "learning_rate": 3.750152098965727e-05, "loss": 0.2203, "step": 17805 }, { "epoch": 0.9751957509719105, "grad_norm": 0.14667847752571106, "learning_rate": 3.749645102413304e-05, "loss": 0.2098, "step": 17810 }, { "epoch": 0.9754695285550019, "grad_norm": 0.15082868933677673, "learning_rate": 3.7491381058608804e-05, "loss": 0.2214, "step": 17815 }, { "epoch": 0.9757433061380935, "grad_norm": 0.1542237102985382, "learning_rate": 3.748631109308457e-05, "loss": 0.2226, "step": 17820 }, { "epoch": 0.9760170837211849, "grad_norm": 0.15477365255355835, "learning_rate": 3.748124112756034e-05, "loss": 0.2248, "step": 17825 }, { "epoch": 0.9762908613042764, "grad_norm": 0.15437163412570953, "learning_rate": 3.74761711620361e-05, "loss": 0.2218, "step": 17830 }, { "epoch": 0.9765646388873679, "grad_norm": 0.16451425850391388, "learning_rate": 3.7471101196511864e-05, "loss": 0.218, "step": 17835 }, { "epoch": 0.9768384164704594, "grad_norm": 0.18930675089359283, "learning_rate": 3.746603123098763e-05, "loss": 0.2139, "step": 17840 }, { "epoch": 0.9771121940535509, "grad_norm": 0.14752890169620514, "learning_rate": 3.74609612654634e-05, "loss": 0.2254, "step": 17845 }, { "epoch": 0.9773859716366424, "grad_norm": 0.15823090076446533, "learning_rate": 3.745589129993916e-05, "loss": 0.2142, "step": 17850 }, { "epoch": 0.9776597492197339, "grad_norm": 0.1425502896308899, "learning_rate": 3.7450821334414924e-05, "loss": 0.2238, "step": 17855 }, { "epoch": 0.9779335268028254, "grad_norm": 0.15524759888648987, "learning_rate": 3.7445751368890695e-05, "loss": 0.2165, "step": 17860 }, { "epoch": 0.9782073043859169, "grad_norm": 0.16323363780975342, "learning_rate": 3.744068140336646e-05, "loss": 0.2203, "step": 17865 }, { "epoch": 0.9784810819690084, "grad_norm": 0.17903761565685272, "learning_rate": 3.743561143784222e-05, "loss": 0.2128, "step": 17870 }, { "epoch": 0.9787548595520998, "grad_norm": 0.1553817093372345, "learning_rate": 3.743054147231799e-05, "loss": 0.2158, "step": 17875 }, { "epoch": 0.9790286371351914, "grad_norm": 0.17783136665821075, "learning_rate": 3.7425471506793755e-05, "loss": 0.2284, "step": 17880 }, { "epoch": 0.9793024147182828, "grad_norm": 0.14779534935951233, "learning_rate": 3.7420401541269525e-05, "loss": 0.2141, "step": 17885 }, { "epoch": 0.9795761923013744, "grad_norm": 0.13955026865005493, "learning_rate": 3.741533157574529e-05, "loss": 0.2131, "step": 17890 }, { "epoch": 0.9798499698844658, "grad_norm": 0.14674124121665955, "learning_rate": 3.741026161022105e-05, "loss": 0.2119, "step": 17895 }, { "epoch": 0.9801237474675574, "grad_norm": 0.15992853045463562, "learning_rate": 3.740519164469682e-05, "loss": 0.2227, "step": 17900 }, { "epoch": 0.9803975250506488, "grad_norm": 0.16866141557693481, "learning_rate": 3.7400121679172585e-05, "loss": 0.2188, "step": 17905 }, { "epoch": 0.9806713026337404, "grad_norm": 0.1868792176246643, "learning_rate": 3.739505171364835e-05, "loss": 0.2213, "step": 17910 }, { "epoch": 0.9809450802168318, "grad_norm": 0.14971686899662018, "learning_rate": 3.738998174812411e-05, "loss": 0.2171, "step": 17915 }, { "epoch": 0.9812188577999233, "grad_norm": 0.17040060460567474, "learning_rate": 3.738491178259988e-05, "loss": 0.2264, "step": 17920 }, { "epoch": 0.9814926353830148, "grad_norm": 0.13638006150722504, "learning_rate": 3.7379841817075645e-05, "loss": 0.2089, "step": 17925 }, { "epoch": 0.9817664129661063, "grad_norm": 0.17221282422542572, "learning_rate": 3.737477185155141e-05, "loss": 0.2255, "step": 17930 }, { "epoch": 0.9820401905491978, "grad_norm": 0.16975903511047363, "learning_rate": 3.736970188602718e-05, "loss": 0.2149, "step": 17935 }, { "epoch": 0.9823139681322893, "grad_norm": 0.15960456430912018, "learning_rate": 3.736463192050294e-05, "loss": 0.2184, "step": 17940 }, { "epoch": 0.9825877457153809, "grad_norm": 0.1688055396080017, "learning_rate": 3.7359561954978705e-05, "loss": 0.2219, "step": 17945 }, { "epoch": 0.9828615232984723, "grad_norm": 0.1590014100074768, "learning_rate": 3.735449198945447e-05, "loss": 0.2245, "step": 17950 }, { "epoch": 0.9831353008815639, "grad_norm": 0.17506621778011322, "learning_rate": 3.7349422023930245e-05, "loss": 0.2117, "step": 17955 }, { "epoch": 0.9834090784646553, "grad_norm": 0.19404847919940948, "learning_rate": 3.734435205840601e-05, "loss": 0.2128, "step": 17960 }, { "epoch": 0.9836828560477469, "grad_norm": 0.17199741303920746, "learning_rate": 3.733928209288177e-05, "loss": 0.2213, "step": 17965 }, { "epoch": 0.9839566336308383, "grad_norm": 0.17071788012981415, "learning_rate": 3.7334212127357535e-05, "loss": 0.2174, "step": 17970 }, { "epoch": 0.9842304112139298, "grad_norm": 0.1631534844636917, "learning_rate": 3.7329142161833305e-05, "loss": 0.2169, "step": 17975 }, { "epoch": 0.9845041887970213, "grad_norm": 0.1432487666606903, "learning_rate": 3.732407219630907e-05, "loss": 0.2161, "step": 17980 }, { "epoch": 0.9847779663801128, "grad_norm": 0.14593994617462158, "learning_rate": 3.731900223078483e-05, "loss": 0.2287, "step": 17985 }, { "epoch": 0.9850517439632043, "grad_norm": 0.1221262589097023, "learning_rate": 3.73139322652606e-05, "loss": 0.2147, "step": 17990 }, { "epoch": 0.9853255215462958, "grad_norm": 0.14316079020500183, "learning_rate": 3.7308862299736365e-05, "loss": 0.2266, "step": 17995 }, { "epoch": 0.9855992991293873, "grad_norm": 0.14504212141036987, "learning_rate": 3.730379233421213e-05, "loss": 0.2312, "step": 18000 }, { "epoch": 0.9858730767124788, "grad_norm": 0.12713511288166046, "learning_rate": 3.729872236868789e-05, "loss": 0.2144, "step": 18005 }, { "epoch": 0.9861468542955703, "grad_norm": 0.14315994083881378, "learning_rate": 3.729365240316366e-05, "loss": 0.2173, "step": 18010 }, { "epoch": 0.9864206318786618, "grad_norm": 0.17369534075260162, "learning_rate": 3.7288582437639425e-05, "loss": 0.2106, "step": 18015 }, { "epoch": 0.9866944094617532, "grad_norm": 0.1820080578327179, "learning_rate": 3.728351247211519e-05, "loss": 0.227, "step": 18020 }, { "epoch": 0.9869681870448448, "grad_norm": 0.15530598163604736, "learning_rate": 3.727844250659096e-05, "loss": 0.218, "step": 18025 }, { "epoch": 0.9872419646279362, "grad_norm": 0.18312899768352509, "learning_rate": 3.727337254106672e-05, "loss": 0.2356, "step": 18030 }, { "epoch": 0.9875157422110278, "grad_norm": 0.15390995144844055, "learning_rate": 3.726830257554249e-05, "loss": 0.2151, "step": 18035 }, { "epoch": 0.9877895197941192, "grad_norm": 0.17401279509067535, "learning_rate": 3.7263232610018255e-05, "loss": 0.2252, "step": 18040 }, { "epoch": 0.9880632973772108, "grad_norm": 0.17349550127983093, "learning_rate": 3.725816264449402e-05, "loss": 0.2197, "step": 18045 }, { "epoch": 0.9883370749603022, "grad_norm": 0.13893990218639374, "learning_rate": 3.725309267896979e-05, "loss": 0.208, "step": 18050 }, { "epoch": 0.9886108525433938, "grad_norm": 0.1604415476322174, "learning_rate": 3.724802271344555e-05, "loss": 0.2182, "step": 18055 }, { "epoch": 0.9888846301264852, "grad_norm": 0.16614265739917755, "learning_rate": 3.7242952747921315e-05, "loss": 0.2207, "step": 18060 }, { "epoch": 0.9891584077095767, "grad_norm": 0.14716626703739166, "learning_rate": 3.7237882782397085e-05, "loss": 0.216, "step": 18065 }, { "epoch": 0.9894321852926682, "grad_norm": 0.13162265717983246, "learning_rate": 3.723281281687285e-05, "loss": 0.2144, "step": 18070 }, { "epoch": 0.9897059628757597, "grad_norm": 0.13948954641819, "learning_rate": 3.722774285134861e-05, "loss": 0.2246, "step": 18075 }, { "epoch": 0.9899797404588512, "grad_norm": 0.1503428965806961, "learning_rate": 3.7222672885824375e-05, "loss": 0.222, "step": 18080 }, { "epoch": 0.9902535180419427, "grad_norm": 0.17745837569236755, "learning_rate": 3.7217602920300145e-05, "loss": 0.2155, "step": 18085 }, { "epoch": 0.9905272956250343, "grad_norm": 0.20850731432437897, "learning_rate": 3.721253295477591e-05, "loss": 0.2155, "step": 18090 }, { "epoch": 0.9908010732081257, "grad_norm": 0.1863258332014084, "learning_rate": 3.720746298925167e-05, "loss": 0.2187, "step": 18095 }, { "epoch": 0.9910748507912173, "grad_norm": 0.1474943906068802, "learning_rate": 3.720239302372744e-05, "loss": 0.2065, "step": 18100 }, { "epoch": 0.9913486283743087, "grad_norm": 0.15810401737689972, "learning_rate": 3.7197323058203205e-05, "loss": 0.2247, "step": 18105 }, { "epoch": 0.9916224059574003, "grad_norm": 0.19687169790267944, "learning_rate": 3.719225309267897e-05, "loss": 0.2257, "step": 18110 }, { "epoch": 0.9918961835404917, "grad_norm": 0.1499086320400238, "learning_rate": 3.718718312715473e-05, "loss": 0.2121, "step": 18115 }, { "epoch": 0.9921699611235832, "grad_norm": 0.15015040338039398, "learning_rate": 3.718211316163051e-05, "loss": 0.2226, "step": 18120 }, { "epoch": 0.9924437387066747, "grad_norm": 0.16505759954452515, "learning_rate": 3.717704319610627e-05, "loss": 0.226, "step": 18125 }, { "epoch": 0.9927175162897662, "grad_norm": 0.14342671632766724, "learning_rate": 3.7171973230582035e-05, "loss": 0.2115, "step": 18130 }, { "epoch": 0.9929912938728577, "grad_norm": 0.15615695714950562, "learning_rate": 3.71669032650578e-05, "loss": 0.2234, "step": 18135 }, { "epoch": 0.9932650714559492, "grad_norm": 0.1549229770898819, "learning_rate": 3.716183329953357e-05, "loss": 0.2168, "step": 18140 }, { "epoch": 0.9935388490390407, "grad_norm": 0.14987163245677948, "learning_rate": 3.715676333400933e-05, "loss": 0.2223, "step": 18145 }, { "epoch": 0.9938126266221322, "grad_norm": 0.1730414181947708, "learning_rate": 3.7151693368485095e-05, "loss": 0.226, "step": 18150 }, { "epoch": 0.9940864042052237, "grad_norm": 0.15811602771282196, "learning_rate": 3.7146623402960865e-05, "loss": 0.2204, "step": 18155 }, { "epoch": 0.9943601817883152, "grad_norm": 0.16567541658878326, "learning_rate": 3.714155343743663e-05, "loss": 0.228, "step": 18160 }, { "epoch": 0.9946339593714066, "grad_norm": 0.19178390502929688, "learning_rate": 3.713648347191239e-05, "loss": 0.2136, "step": 18165 }, { "epoch": 0.9949077369544982, "grad_norm": 0.1682128757238388, "learning_rate": 3.7131413506388155e-05, "loss": 0.2251, "step": 18170 }, { "epoch": 0.9951815145375896, "grad_norm": 0.16027940809726715, "learning_rate": 3.7126343540863925e-05, "loss": 0.2159, "step": 18175 }, { "epoch": 0.9954552921206812, "grad_norm": 0.16098453104496002, "learning_rate": 3.712127357533969e-05, "loss": 0.2234, "step": 18180 }, { "epoch": 0.9957290697037726, "grad_norm": 0.14860285818576813, "learning_rate": 3.711620360981545e-05, "loss": 0.2139, "step": 18185 }, { "epoch": 0.9960028472868642, "grad_norm": 0.15215067565441132, "learning_rate": 3.7111133644291215e-05, "loss": 0.2188, "step": 18190 }, { "epoch": 0.9962766248699556, "grad_norm": 0.15094195306301117, "learning_rate": 3.7106063678766985e-05, "loss": 0.2201, "step": 18195 }, { "epoch": 0.9965504024530472, "grad_norm": 0.1393614113330841, "learning_rate": 3.7100993713242755e-05, "loss": 0.2122, "step": 18200 }, { "epoch": 0.9968241800361386, "grad_norm": 0.1538146287202835, "learning_rate": 3.709592374771852e-05, "loss": 0.2184, "step": 18205 }, { "epoch": 0.9970979576192301, "grad_norm": 0.15885667502880096, "learning_rate": 3.709085378219428e-05, "loss": 0.2151, "step": 18210 }, { "epoch": 0.9973717352023216, "grad_norm": 0.15696464478969574, "learning_rate": 3.708578381667005e-05, "loss": 0.2214, "step": 18215 }, { "epoch": 0.9976455127854131, "grad_norm": 0.14793801307678223, "learning_rate": 3.7080713851145815e-05, "loss": 0.2263, "step": 18220 }, { "epoch": 0.9979192903685046, "grad_norm": 0.1266399770975113, "learning_rate": 3.707564388562158e-05, "loss": 0.2146, "step": 18225 }, { "epoch": 0.9981930679515961, "grad_norm": 0.16167579591274261, "learning_rate": 3.707057392009735e-05, "loss": 0.2186, "step": 18230 }, { "epoch": 0.9984668455346877, "grad_norm": 0.13550521433353424, "learning_rate": 3.706550395457311e-05, "loss": 0.2111, "step": 18235 }, { "epoch": 0.9987406231177791, "grad_norm": 0.155539408326149, "learning_rate": 3.7060433989048875e-05, "loss": 0.2119, "step": 18240 }, { "epoch": 0.9990144007008707, "grad_norm": 0.1475057452917099, "learning_rate": 3.705536402352464e-05, "loss": 0.2146, "step": 18245 }, { "epoch": 0.9992881782839621, "grad_norm": 0.14857400953769684, "learning_rate": 3.705029405800041e-05, "loss": 0.2251, "step": 18250 }, { "epoch": 0.9995619558670537, "grad_norm": 0.1443057507276535, "learning_rate": 3.704522409247617e-05, "loss": 0.2199, "step": 18255 }, { "epoch": 0.9998357334501451, "grad_norm": 0.14729659259319305, "learning_rate": 3.7040154126951935e-05, "loss": 0.2192, "step": 18260 }, { "epoch": 1.0001095110332365, "grad_norm": 0.16777586936950684, "learning_rate": 3.7035084161427705e-05, "loss": 0.2258, "step": 18265 }, { "epoch": 1.0003832886163282, "grad_norm": 0.16856320202350616, "learning_rate": 3.703001419590347e-05, "loss": 0.2032, "step": 18270 }, { "epoch": 1.0006570661994196, "grad_norm": 0.16187945008277893, "learning_rate": 3.702494423037923e-05, "loss": 0.2114, "step": 18275 }, { "epoch": 1.000930843782511, "grad_norm": 0.1532398760318756, "learning_rate": 3.7019874264855e-05, "loss": 0.2087, "step": 18280 }, { "epoch": 1.0012046213656025, "grad_norm": 0.15616607666015625, "learning_rate": 3.701480429933077e-05, "loss": 0.2092, "step": 18285 }, { "epoch": 1.0014783989486942, "grad_norm": 0.12265722453594208, "learning_rate": 3.7009734333806535e-05, "loss": 0.1953, "step": 18290 }, { "epoch": 1.0017521765317856, "grad_norm": 0.14180618524551392, "learning_rate": 3.70046643682823e-05, "loss": 0.2044, "step": 18295 }, { "epoch": 1.002025954114877, "grad_norm": 0.16516220569610596, "learning_rate": 3.699959440275806e-05, "loss": 0.2112, "step": 18300 }, { "epoch": 1.0022997316979685, "grad_norm": 0.13626329600811005, "learning_rate": 3.699452443723383e-05, "loss": 0.2048, "step": 18305 }, { "epoch": 1.0025735092810601, "grad_norm": 0.15853090584278107, "learning_rate": 3.6989454471709595e-05, "loss": 0.2122, "step": 18310 }, { "epoch": 1.0028472868641516, "grad_norm": 0.15302379429340363, "learning_rate": 3.698438450618536e-05, "loss": 0.2178, "step": 18315 }, { "epoch": 1.003121064447243, "grad_norm": 0.15159624814987183, "learning_rate": 3.697931454066112e-05, "loss": 0.209, "step": 18320 }, { "epoch": 1.0033948420303345, "grad_norm": 0.12110910564661026, "learning_rate": 3.697424457513689e-05, "loss": 0.1994, "step": 18325 }, { "epoch": 1.0036686196134261, "grad_norm": 0.13199487328529358, "learning_rate": 3.6969174609612655e-05, "loss": 0.2005, "step": 18330 }, { "epoch": 1.0039423971965176, "grad_norm": 0.1688588559627533, "learning_rate": 3.696410464408842e-05, "loss": 0.2116, "step": 18335 }, { "epoch": 1.004216174779609, "grad_norm": 0.13383859395980835, "learning_rate": 3.695903467856419e-05, "loss": 0.2012, "step": 18340 }, { "epoch": 1.0044899523627004, "grad_norm": 0.15115483105182648, "learning_rate": 3.695396471303995e-05, "loss": 0.2019, "step": 18345 }, { "epoch": 1.004763729945792, "grad_norm": 0.1472679227590561, "learning_rate": 3.6948894747515715e-05, "loss": 0.21, "step": 18350 }, { "epoch": 1.0050375075288835, "grad_norm": 0.11360350251197815, "learning_rate": 3.694382478199148e-05, "loss": 0.2058, "step": 18355 }, { "epoch": 1.005311285111975, "grad_norm": 0.1279279738664627, "learning_rate": 3.6938754816467255e-05, "loss": 0.2103, "step": 18360 }, { "epoch": 1.0055850626950664, "grad_norm": 0.12444118410348892, "learning_rate": 3.693368485094302e-05, "loss": 0.2108, "step": 18365 }, { "epoch": 1.005858840278158, "grad_norm": 0.14699965715408325, "learning_rate": 3.692861488541878e-05, "loss": 0.2051, "step": 18370 }, { "epoch": 1.0061326178612495, "grad_norm": 0.1389477401971817, "learning_rate": 3.6923544919894545e-05, "loss": 0.2052, "step": 18375 }, { "epoch": 1.006406395444341, "grad_norm": 0.12104221433401108, "learning_rate": 3.6918474954370315e-05, "loss": 0.2072, "step": 18380 }, { "epoch": 1.0066801730274326, "grad_norm": 0.1404060274362564, "learning_rate": 3.691340498884608e-05, "loss": 0.2117, "step": 18385 }, { "epoch": 1.006953950610524, "grad_norm": 0.16037507355213165, "learning_rate": 3.690833502332184e-05, "loss": 0.2092, "step": 18390 }, { "epoch": 1.0072277281936155, "grad_norm": 0.13531485199928284, "learning_rate": 3.690326505779761e-05, "loss": 0.2046, "step": 18395 }, { "epoch": 1.007501505776707, "grad_norm": 0.1406669020652771, "learning_rate": 3.6898195092273375e-05, "loss": 0.2201, "step": 18400 }, { "epoch": 1.0077752833597986, "grad_norm": 0.13714036345481873, "learning_rate": 3.689312512674914e-05, "loss": 0.1959, "step": 18405 }, { "epoch": 1.00804906094289, "grad_norm": 0.14791980385780334, "learning_rate": 3.68880551612249e-05, "loss": 0.2012, "step": 18410 }, { "epoch": 1.0083228385259815, "grad_norm": 0.15179075300693512, "learning_rate": 3.688298519570067e-05, "loss": 0.2122, "step": 18415 }, { "epoch": 1.008596616109073, "grad_norm": 0.20948199927806854, "learning_rate": 3.6877915230176435e-05, "loss": 0.2069, "step": 18420 }, { "epoch": 1.0088703936921646, "grad_norm": 0.16656258702278137, "learning_rate": 3.68728452646522e-05, "loss": 0.2108, "step": 18425 }, { "epoch": 1.009144171275256, "grad_norm": 0.1477055847644806, "learning_rate": 3.686777529912797e-05, "loss": 0.2067, "step": 18430 }, { "epoch": 1.0094179488583475, "grad_norm": 0.14299114048480988, "learning_rate": 3.686270533360373e-05, "loss": 0.2112, "step": 18435 }, { "epoch": 1.009691726441439, "grad_norm": 0.17512738704681396, "learning_rate": 3.6857635368079495e-05, "loss": 0.208, "step": 18440 }, { "epoch": 1.0099655040245306, "grad_norm": 0.17456930875778198, "learning_rate": 3.6852565402555265e-05, "loss": 0.2077, "step": 18445 }, { "epoch": 1.010239281607622, "grad_norm": 0.14912256598472595, "learning_rate": 3.684749543703103e-05, "loss": 0.2063, "step": 18450 }, { "epoch": 1.0105130591907134, "grad_norm": 0.14231987297534943, "learning_rate": 3.68424254715068e-05, "loss": 0.2032, "step": 18455 }, { "epoch": 1.0107868367738049, "grad_norm": 0.1558835357427597, "learning_rate": 3.683735550598256e-05, "loss": 0.2158, "step": 18460 }, { "epoch": 1.0110606143568965, "grad_norm": 0.13967055082321167, "learning_rate": 3.6832285540458325e-05, "loss": 0.1956, "step": 18465 }, { "epoch": 1.011334391939988, "grad_norm": 0.16307418048381805, "learning_rate": 3.6827215574934096e-05, "loss": 0.2021, "step": 18470 }, { "epoch": 1.0116081695230794, "grad_norm": 0.11960191279649734, "learning_rate": 3.682214560940986e-05, "loss": 0.2035, "step": 18475 }, { "epoch": 1.0118819471061709, "grad_norm": 0.1572525054216385, "learning_rate": 3.681707564388562e-05, "loss": 0.1981, "step": 18480 }, { "epoch": 1.0121557246892625, "grad_norm": 0.1396472156047821, "learning_rate": 3.6812005678361385e-05, "loss": 0.2035, "step": 18485 }, { "epoch": 1.012429502272354, "grad_norm": 0.15012119710445404, "learning_rate": 3.6806935712837156e-05, "loss": 0.21, "step": 18490 }, { "epoch": 1.0127032798554454, "grad_norm": 0.14912483096122742, "learning_rate": 3.680186574731292e-05, "loss": 0.2052, "step": 18495 }, { "epoch": 1.0129770574385368, "grad_norm": 0.13140396773815155, "learning_rate": 3.679679578178868e-05, "loss": 0.2032, "step": 18500 }, { "epoch": 1.0132508350216285, "grad_norm": 0.13762223720550537, "learning_rate": 3.679172581626445e-05, "loss": 0.2081, "step": 18505 }, { "epoch": 1.01352461260472, "grad_norm": 0.13131089508533478, "learning_rate": 3.6786655850740216e-05, "loss": 0.2002, "step": 18510 }, { "epoch": 1.0137983901878114, "grad_norm": 0.16487038135528564, "learning_rate": 3.678158588521598e-05, "loss": 0.2067, "step": 18515 }, { "epoch": 1.0140721677709028, "grad_norm": 0.14064787328243256, "learning_rate": 3.677651591969174e-05, "loss": 0.2116, "step": 18520 }, { "epoch": 1.0143459453539945, "grad_norm": 0.15435636043548584, "learning_rate": 3.677144595416752e-05, "loss": 0.2063, "step": 18525 }, { "epoch": 1.014619722937086, "grad_norm": 0.16731925308704376, "learning_rate": 3.676637598864328e-05, "loss": 0.2067, "step": 18530 }, { "epoch": 1.0148935005201773, "grad_norm": 0.14527012407779694, "learning_rate": 3.6761306023119046e-05, "loss": 0.2104, "step": 18535 }, { "epoch": 1.015167278103269, "grad_norm": 0.15301866829395294, "learning_rate": 3.675623605759481e-05, "loss": 0.2031, "step": 18540 }, { "epoch": 1.0154410556863604, "grad_norm": 0.16032902896404266, "learning_rate": 3.675116609207058e-05, "loss": 0.204, "step": 18545 }, { "epoch": 1.0157148332694519, "grad_norm": 0.14000341296195984, "learning_rate": 3.674609612654634e-05, "loss": 0.2061, "step": 18550 }, { "epoch": 1.0159886108525433, "grad_norm": 0.1619243174791336, "learning_rate": 3.6741026161022106e-05, "loss": 0.2097, "step": 18555 }, { "epoch": 1.016262388435635, "grad_norm": 0.15460924804210663, "learning_rate": 3.6735956195497876e-05, "loss": 0.2024, "step": 18560 }, { "epoch": 1.0165361660187264, "grad_norm": 0.162000373005867, "learning_rate": 3.673088622997364e-05, "loss": 0.2078, "step": 18565 }, { "epoch": 1.0168099436018179, "grad_norm": 0.13190124928951263, "learning_rate": 3.67258162644494e-05, "loss": 0.2053, "step": 18570 }, { "epoch": 1.0170837211849093, "grad_norm": 0.1537695676088333, "learning_rate": 3.6720746298925166e-05, "loss": 0.2017, "step": 18575 }, { "epoch": 1.017357498768001, "grad_norm": 0.21559584140777588, "learning_rate": 3.6715676333400936e-05, "loss": 0.2112, "step": 18580 }, { "epoch": 1.0176312763510924, "grad_norm": 0.1516471952199936, "learning_rate": 3.67106063678767e-05, "loss": 0.2034, "step": 18585 }, { "epoch": 1.0179050539341838, "grad_norm": 0.12671098113059998, "learning_rate": 3.670553640235246e-05, "loss": 0.2066, "step": 18590 }, { "epoch": 1.0181788315172753, "grad_norm": 0.1392461508512497, "learning_rate": 3.670046643682823e-05, "loss": 0.2069, "step": 18595 }, { "epoch": 1.018452609100367, "grad_norm": 0.15835785865783691, "learning_rate": 3.6695396471303996e-05, "loss": 0.2096, "step": 18600 }, { "epoch": 1.0187263866834584, "grad_norm": 0.1580328494310379, "learning_rate": 3.6690326505779766e-05, "loss": 0.2075, "step": 18605 }, { "epoch": 1.0190001642665498, "grad_norm": 0.1486247032880783, "learning_rate": 3.668525654025553e-05, "loss": 0.204, "step": 18610 }, { "epoch": 1.0192739418496413, "grad_norm": 0.15802675485610962, "learning_rate": 3.668018657473129e-05, "loss": 0.2087, "step": 18615 }, { "epoch": 1.019547719432733, "grad_norm": 0.143171489238739, "learning_rate": 3.667511660920706e-05, "loss": 0.2064, "step": 18620 }, { "epoch": 1.0198214970158244, "grad_norm": 0.1243283599615097, "learning_rate": 3.6670046643682826e-05, "loss": 0.2053, "step": 18625 }, { "epoch": 1.0200952745989158, "grad_norm": 0.14466547966003418, "learning_rate": 3.666497667815859e-05, "loss": 0.1981, "step": 18630 }, { "epoch": 1.0203690521820072, "grad_norm": 0.156173974275589, "learning_rate": 3.665990671263436e-05, "loss": 0.2022, "step": 18635 }, { "epoch": 1.020642829765099, "grad_norm": 0.13861772418022156, "learning_rate": 3.665483674711012e-05, "loss": 0.2131, "step": 18640 }, { "epoch": 1.0209166073481903, "grad_norm": 0.14775125682353973, "learning_rate": 3.6649766781585886e-05, "loss": 0.2185, "step": 18645 }, { "epoch": 1.0211903849312818, "grad_norm": 0.13440604507923126, "learning_rate": 3.664469681606165e-05, "loss": 0.2103, "step": 18650 }, { "epoch": 1.0214641625143732, "grad_norm": 0.1297799050807953, "learning_rate": 3.663962685053742e-05, "loss": 0.2036, "step": 18655 }, { "epoch": 1.0217379400974649, "grad_norm": 0.135748952627182, "learning_rate": 3.663455688501318e-05, "loss": 0.2007, "step": 18660 }, { "epoch": 1.0220117176805563, "grad_norm": 0.14561782777309418, "learning_rate": 3.6629486919488946e-05, "loss": 0.2018, "step": 18665 }, { "epoch": 1.0222854952636478, "grad_norm": 0.14544403553009033, "learning_rate": 3.6624416953964716e-05, "loss": 0.197, "step": 18670 }, { "epoch": 1.0225592728467392, "grad_norm": 0.15765605866909027, "learning_rate": 3.661934698844048e-05, "loss": 0.2071, "step": 18675 }, { "epoch": 1.0228330504298309, "grad_norm": 0.1693091094493866, "learning_rate": 3.661427702291624e-05, "loss": 0.2064, "step": 18680 }, { "epoch": 1.0231068280129223, "grad_norm": 0.13012662529945374, "learning_rate": 3.6609207057392006e-05, "loss": 0.202, "step": 18685 }, { "epoch": 1.0233806055960137, "grad_norm": 0.1513783484697342, "learning_rate": 3.660413709186778e-05, "loss": 0.2037, "step": 18690 }, { "epoch": 1.0236543831791054, "grad_norm": 0.14828583598136902, "learning_rate": 3.6599067126343546e-05, "loss": 0.2068, "step": 18695 }, { "epoch": 1.0239281607621968, "grad_norm": 0.15859778225421906, "learning_rate": 3.659399716081931e-05, "loss": 0.1962, "step": 18700 }, { "epoch": 1.0242019383452883, "grad_norm": 0.17081578075885773, "learning_rate": 3.658892719529507e-05, "loss": 0.2094, "step": 18705 }, { "epoch": 1.0244757159283797, "grad_norm": 0.17053017020225525, "learning_rate": 3.658385722977084e-05, "loss": 0.2074, "step": 18710 }, { "epoch": 1.0247494935114714, "grad_norm": 0.14462518692016602, "learning_rate": 3.6578787264246606e-05, "loss": 0.1998, "step": 18715 }, { "epoch": 1.0250232710945628, "grad_norm": 0.12438582628965378, "learning_rate": 3.657371729872237e-05, "loss": 0.2067, "step": 18720 }, { "epoch": 1.0252970486776543, "grad_norm": 0.13638712465763092, "learning_rate": 3.656864733319814e-05, "loss": 0.2121, "step": 18725 }, { "epoch": 1.0255708262607457, "grad_norm": 0.14048507809638977, "learning_rate": 3.65635773676739e-05, "loss": 0.209, "step": 18730 }, { "epoch": 1.0258446038438374, "grad_norm": 0.11694271862506866, "learning_rate": 3.6558507402149666e-05, "loss": 0.2002, "step": 18735 }, { "epoch": 1.0261183814269288, "grad_norm": 0.13932648301124573, "learning_rate": 3.655343743662543e-05, "loss": 0.2112, "step": 18740 }, { "epoch": 1.0263921590100202, "grad_norm": 0.1477651745080948, "learning_rate": 3.65483674711012e-05, "loss": 0.2138, "step": 18745 }, { "epoch": 1.0266659365931117, "grad_norm": 0.16646060347557068, "learning_rate": 3.654329750557696e-05, "loss": 0.2149, "step": 18750 }, { "epoch": 1.0269397141762033, "grad_norm": 0.1487075239419937, "learning_rate": 3.6538227540052726e-05, "loss": 0.2103, "step": 18755 }, { "epoch": 1.0272134917592948, "grad_norm": 0.12482494860887527, "learning_rate": 3.6533157574528496e-05, "loss": 0.2032, "step": 18760 }, { "epoch": 1.0274872693423862, "grad_norm": 0.1402807980775833, "learning_rate": 3.652808760900426e-05, "loss": 0.2035, "step": 18765 }, { "epoch": 1.0277610469254777, "grad_norm": 0.16905051469802856, "learning_rate": 3.652301764348003e-05, "loss": 0.2089, "step": 18770 }, { "epoch": 1.0280348245085693, "grad_norm": 0.15983349084854126, "learning_rate": 3.651794767795579e-05, "loss": 0.2141, "step": 18775 }, { "epoch": 1.0283086020916608, "grad_norm": 0.14347553253173828, "learning_rate": 3.6512877712431556e-05, "loss": 0.2017, "step": 18780 }, { "epoch": 1.0285823796747522, "grad_norm": 0.1374213695526123, "learning_rate": 3.6507807746907326e-05, "loss": 0.2013, "step": 18785 }, { "epoch": 1.0288561572578436, "grad_norm": 0.1371190845966339, "learning_rate": 3.650273778138309e-05, "loss": 0.2075, "step": 18790 }, { "epoch": 1.0291299348409353, "grad_norm": 0.1595194935798645, "learning_rate": 3.649766781585885e-05, "loss": 0.1985, "step": 18795 }, { "epoch": 1.0294037124240267, "grad_norm": 0.14497238397598267, "learning_rate": 3.649259785033462e-05, "loss": 0.2142, "step": 18800 }, { "epoch": 1.0296774900071182, "grad_norm": 0.14632026851177216, "learning_rate": 3.6487527884810386e-05, "loss": 0.211, "step": 18805 }, { "epoch": 1.0299512675902096, "grad_norm": 0.14851436018943787, "learning_rate": 3.648245791928615e-05, "loss": 0.2053, "step": 18810 }, { "epoch": 1.0302250451733013, "grad_norm": 0.13360098004341125, "learning_rate": 3.647738795376191e-05, "loss": 0.197, "step": 18815 }, { "epoch": 1.0304988227563927, "grad_norm": 0.1422709822654724, "learning_rate": 3.647231798823768e-05, "loss": 0.2021, "step": 18820 }, { "epoch": 1.0307726003394841, "grad_norm": 0.14647619426250458, "learning_rate": 3.6467248022713446e-05, "loss": 0.1953, "step": 18825 }, { "epoch": 1.0310463779225758, "grad_norm": 0.13732847571372986, "learning_rate": 3.646217805718921e-05, "loss": 0.2068, "step": 18830 }, { "epoch": 1.0313201555056672, "grad_norm": 0.1286146491765976, "learning_rate": 3.645710809166498e-05, "loss": 0.212, "step": 18835 }, { "epoch": 1.0315939330887587, "grad_norm": 0.16035489737987518, "learning_rate": 3.645203812614074e-05, "loss": 0.2062, "step": 18840 }, { "epoch": 1.0318677106718501, "grad_norm": 0.1893140971660614, "learning_rate": 3.6446968160616506e-05, "loss": 0.2183, "step": 18845 }, { "epoch": 1.0321414882549418, "grad_norm": 0.13673391938209534, "learning_rate": 3.6441898195092276e-05, "loss": 0.21, "step": 18850 }, { "epoch": 1.0324152658380332, "grad_norm": 0.13034041225910187, "learning_rate": 3.6436828229568046e-05, "loss": 0.2084, "step": 18855 }, { "epoch": 1.0326890434211247, "grad_norm": 0.12866255640983582, "learning_rate": 3.643175826404381e-05, "loss": 0.1958, "step": 18860 }, { "epoch": 1.032962821004216, "grad_norm": 0.14989235997200012, "learning_rate": 3.642668829851957e-05, "loss": 0.2101, "step": 18865 }, { "epoch": 1.0332365985873078, "grad_norm": 0.13463009893894196, "learning_rate": 3.6421618332995336e-05, "loss": 0.2055, "step": 18870 }, { "epoch": 1.0335103761703992, "grad_norm": 0.14239566028118134, "learning_rate": 3.6416548367471106e-05, "loss": 0.1967, "step": 18875 }, { "epoch": 1.0337841537534906, "grad_norm": 0.13368506729602814, "learning_rate": 3.641147840194687e-05, "loss": 0.1988, "step": 18880 }, { "epoch": 1.034057931336582, "grad_norm": 0.20776519179344177, "learning_rate": 3.640640843642263e-05, "loss": 0.2096, "step": 18885 }, { "epoch": 1.0343317089196737, "grad_norm": 0.16228827834129333, "learning_rate": 3.64013384708984e-05, "loss": 0.2028, "step": 18890 }, { "epoch": 1.0346054865027652, "grad_norm": 0.16319593787193298, "learning_rate": 3.6396268505374166e-05, "loss": 0.2176, "step": 18895 }, { "epoch": 1.0348792640858566, "grad_norm": 0.1436714380979538, "learning_rate": 3.639119853984993e-05, "loss": 0.2035, "step": 18900 }, { "epoch": 1.035153041668948, "grad_norm": 0.14988164603710175, "learning_rate": 3.638612857432569e-05, "loss": 0.1957, "step": 18905 }, { "epoch": 1.0354268192520397, "grad_norm": 0.1385890692472458, "learning_rate": 3.638105860880146e-05, "loss": 0.2015, "step": 18910 }, { "epoch": 1.0357005968351312, "grad_norm": 0.143193319439888, "learning_rate": 3.6375988643277226e-05, "loss": 0.2079, "step": 18915 }, { "epoch": 1.0359743744182226, "grad_norm": 0.1304965764284134, "learning_rate": 3.637091867775299e-05, "loss": 0.2044, "step": 18920 }, { "epoch": 1.036248152001314, "grad_norm": 0.15698789060115814, "learning_rate": 3.636584871222875e-05, "loss": 0.1996, "step": 18925 }, { "epoch": 1.0365219295844057, "grad_norm": 0.13802029192447662, "learning_rate": 3.636077874670453e-05, "loss": 0.2044, "step": 18930 }, { "epoch": 1.0367957071674971, "grad_norm": 0.17372006177902222, "learning_rate": 3.635570878118029e-05, "loss": 0.2147, "step": 18935 }, { "epoch": 1.0370694847505886, "grad_norm": 0.16933295130729675, "learning_rate": 3.6350638815656056e-05, "loss": 0.2126, "step": 18940 }, { "epoch": 1.03734326233368, "grad_norm": 0.14304906129837036, "learning_rate": 3.634556885013182e-05, "loss": 0.1934, "step": 18945 }, { "epoch": 1.0376170399167717, "grad_norm": 0.16112130880355835, "learning_rate": 3.634049888460759e-05, "loss": 0.2091, "step": 18950 }, { "epoch": 1.0378908174998631, "grad_norm": 0.1622675657272339, "learning_rate": 3.633542891908335e-05, "loss": 0.2129, "step": 18955 }, { "epoch": 1.0381645950829546, "grad_norm": 0.1360224336385727, "learning_rate": 3.6330358953559116e-05, "loss": 0.2083, "step": 18960 }, { "epoch": 1.0384383726660462, "grad_norm": 0.1334448605775833, "learning_rate": 3.6325288988034886e-05, "loss": 0.2002, "step": 18965 }, { "epoch": 1.0387121502491377, "grad_norm": 0.14703282713890076, "learning_rate": 3.632021902251065e-05, "loss": 0.2046, "step": 18970 }, { "epoch": 1.038985927832229, "grad_norm": 0.13058246672153473, "learning_rate": 3.631514905698641e-05, "loss": 0.2039, "step": 18975 }, { "epoch": 1.0392597054153205, "grad_norm": 0.1238250657916069, "learning_rate": 3.6310079091462176e-05, "loss": 0.206, "step": 18980 }, { "epoch": 1.0395334829984122, "grad_norm": 0.12379197031259537, "learning_rate": 3.6305009125937946e-05, "loss": 0.1948, "step": 18985 }, { "epoch": 1.0398072605815036, "grad_norm": 0.14417409896850586, "learning_rate": 3.629993916041371e-05, "loss": 0.1995, "step": 18990 }, { "epoch": 1.040081038164595, "grad_norm": 0.12638117372989655, "learning_rate": 3.629486919488947e-05, "loss": 0.201, "step": 18995 }, { "epoch": 1.0403548157476865, "grad_norm": 0.13562533259391785, "learning_rate": 3.628979922936524e-05, "loss": 0.2032, "step": 19000 }, { "epoch": 1.0406285933307782, "grad_norm": 0.14219249784946442, "learning_rate": 3.6284729263841006e-05, "loss": 0.2044, "step": 19005 }, { "epoch": 1.0409023709138696, "grad_norm": 0.13085268437862396, "learning_rate": 3.627965929831677e-05, "loss": 0.2085, "step": 19010 }, { "epoch": 1.041176148496961, "grad_norm": 0.1364579051733017, "learning_rate": 3.627458933279254e-05, "loss": 0.1967, "step": 19015 }, { "epoch": 1.0414499260800525, "grad_norm": 0.1426170915365219, "learning_rate": 3.626951936726831e-05, "loss": 0.2035, "step": 19020 }, { "epoch": 1.0417237036631442, "grad_norm": 0.12408050149679184, "learning_rate": 3.626444940174407e-05, "loss": 0.2026, "step": 19025 }, { "epoch": 1.0419974812462356, "grad_norm": 0.13991022109985352, "learning_rate": 3.6259379436219836e-05, "loss": 0.2042, "step": 19030 }, { "epoch": 1.042271258829327, "grad_norm": 0.12717309594154358, "learning_rate": 3.62543094706956e-05, "loss": 0.206, "step": 19035 }, { "epoch": 1.0425450364124185, "grad_norm": 0.13519492745399475, "learning_rate": 3.624923950517137e-05, "loss": 0.2082, "step": 19040 }, { "epoch": 1.0428188139955101, "grad_norm": 0.13699868321418762, "learning_rate": 3.624416953964713e-05, "loss": 0.2058, "step": 19045 }, { "epoch": 1.0430925915786016, "grad_norm": 0.14713184535503387, "learning_rate": 3.6239099574122896e-05, "loss": 0.2126, "step": 19050 }, { "epoch": 1.043366369161693, "grad_norm": 0.1510896384716034, "learning_rate": 3.623402960859866e-05, "loss": 0.2062, "step": 19055 }, { "epoch": 1.0436401467447844, "grad_norm": 0.19785679876804352, "learning_rate": 3.622895964307443e-05, "loss": 0.2109, "step": 19060 }, { "epoch": 1.043913924327876, "grad_norm": 0.14504510164260864, "learning_rate": 3.622388967755019e-05, "loss": 0.2159, "step": 19065 }, { "epoch": 1.0441877019109675, "grad_norm": 0.1369822472333908, "learning_rate": 3.6218819712025956e-05, "loss": 0.2063, "step": 19070 }, { "epoch": 1.044461479494059, "grad_norm": 0.13281512260437012, "learning_rate": 3.6213749746501726e-05, "loss": 0.2102, "step": 19075 }, { "epoch": 1.0447352570771504, "grad_norm": 0.12296779453754425, "learning_rate": 3.620867978097749e-05, "loss": 0.2019, "step": 19080 }, { "epoch": 1.045009034660242, "grad_norm": 0.13091681897640228, "learning_rate": 3.620360981545325e-05, "loss": 0.2124, "step": 19085 }, { "epoch": 1.0452828122433335, "grad_norm": 0.1336006224155426, "learning_rate": 3.6198539849929016e-05, "loss": 0.2083, "step": 19090 }, { "epoch": 1.045556589826425, "grad_norm": 0.12995365262031555, "learning_rate": 3.619346988440479e-05, "loss": 0.2125, "step": 19095 }, { "epoch": 1.0458303674095164, "grad_norm": 0.1176975890994072, "learning_rate": 3.6188399918880557e-05, "loss": 0.2037, "step": 19100 }, { "epoch": 1.046104144992608, "grad_norm": 0.16116447746753693, "learning_rate": 3.618332995335632e-05, "loss": 0.2054, "step": 19105 }, { "epoch": 1.0463779225756995, "grad_norm": 0.1312125325202942, "learning_rate": 3.617825998783208e-05, "loss": 0.1983, "step": 19110 }, { "epoch": 1.046651700158791, "grad_norm": 0.13323016464710236, "learning_rate": 3.617319002230785e-05, "loss": 0.2074, "step": 19115 }, { "epoch": 1.0469254777418824, "grad_norm": 0.1572709083557129, "learning_rate": 3.6168120056783617e-05, "loss": 0.2063, "step": 19120 }, { "epoch": 1.047199255324974, "grad_norm": 0.14193195104599, "learning_rate": 3.616305009125938e-05, "loss": 0.2071, "step": 19125 }, { "epoch": 1.0474730329080655, "grad_norm": 0.14207710325717926, "learning_rate": 3.615798012573515e-05, "loss": 0.2077, "step": 19130 }, { "epoch": 1.047746810491157, "grad_norm": 0.14431171119213104, "learning_rate": 3.615291016021091e-05, "loss": 0.199, "step": 19135 }, { "epoch": 1.0480205880742486, "grad_norm": 0.1510278880596161, "learning_rate": 3.6147840194686677e-05, "loss": 0.2073, "step": 19140 }, { "epoch": 1.04829436565734, "grad_norm": 0.1502266824245453, "learning_rate": 3.614277022916244e-05, "loss": 0.2035, "step": 19145 }, { "epoch": 1.0485681432404315, "grad_norm": 0.1387127935886383, "learning_rate": 3.613770026363821e-05, "loss": 0.2002, "step": 19150 }, { "epoch": 1.048841920823523, "grad_norm": 0.16845203936100006, "learning_rate": 3.613263029811397e-05, "loss": 0.2025, "step": 19155 }, { "epoch": 1.0491156984066146, "grad_norm": 0.21279557049274445, "learning_rate": 3.6127560332589737e-05, "loss": 0.2151, "step": 19160 }, { "epoch": 1.049389475989706, "grad_norm": 0.13507185876369476, "learning_rate": 3.612249036706551e-05, "loss": 0.2017, "step": 19165 }, { "epoch": 1.0496632535727974, "grad_norm": 0.15202677249908447, "learning_rate": 3.611742040154127e-05, "loss": 0.2143, "step": 19170 }, { "epoch": 1.0499370311558889, "grad_norm": 0.13598407804965973, "learning_rate": 3.611235043601704e-05, "loss": 0.2004, "step": 19175 }, { "epoch": 1.0502108087389805, "grad_norm": 0.1558222472667694, "learning_rate": 3.61072804704928e-05, "loss": 0.2189, "step": 19180 }, { "epoch": 1.050484586322072, "grad_norm": 0.11776795983314514, "learning_rate": 3.6102210504968567e-05, "loss": 0.203, "step": 19185 }, { "epoch": 1.0507583639051634, "grad_norm": 0.1538064330816269, "learning_rate": 3.609714053944434e-05, "loss": 0.2128, "step": 19190 }, { "epoch": 1.0510321414882549, "grad_norm": 0.15459249913692474, "learning_rate": 3.60920705739201e-05, "loss": 0.2116, "step": 19195 }, { "epoch": 1.0513059190713465, "grad_norm": 0.13325007259845734, "learning_rate": 3.608700060839586e-05, "loss": 0.2057, "step": 19200 }, { "epoch": 1.051579696654438, "grad_norm": 0.1417926549911499, "learning_rate": 3.608193064287163e-05, "loss": 0.2065, "step": 19205 }, { "epoch": 1.0518534742375294, "grad_norm": 0.17645637691020966, "learning_rate": 3.60768606773474e-05, "loss": 0.2108, "step": 19210 }, { "epoch": 1.0521272518206208, "grad_norm": 0.13931284844875336, "learning_rate": 3.607179071182316e-05, "loss": 0.1948, "step": 19215 }, { "epoch": 1.0524010294037125, "grad_norm": 0.13118235766887665, "learning_rate": 3.606672074629892e-05, "loss": 0.2135, "step": 19220 }, { "epoch": 1.052674806986804, "grad_norm": 0.11811834573745728, "learning_rate": 3.606165078077469e-05, "loss": 0.2026, "step": 19225 }, { "epoch": 1.0529485845698954, "grad_norm": 0.1404995173215866, "learning_rate": 3.605658081525046e-05, "loss": 0.2029, "step": 19230 }, { "epoch": 1.0532223621529868, "grad_norm": 0.13981306552886963, "learning_rate": 3.605151084972622e-05, "loss": 0.2065, "step": 19235 }, { "epoch": 1.0534961397360785, "grad_norm": 0.14641182124614716, "learning_rate": 3.604644088420199e-05, "loss": 0.2004, "step": 19240 }, { "epoch": 1.05376991731917, "grad_norm": 0.14480096101760864, "learning_rate": 3.604137091867775e-05, "loss": 0.2126, "step": 19245 }, { "epoch": 1.0540436949022614, "grad_norm": 0.14534644782543182, "learning_rate": 3.603630095315352e-05, "loss": 0.2078, "step": 19250 }, { "epoch": 1.0543174724853528, "grad_norm": 0.14920197427272797, "learning_rate": 3.603123098762929e-05, "loss": 0.2102, "step": 19255 }, { "epoch": 1.0545912500684445, "grad_norm": 0.13006794452667236, "learning_rate": 3.602616102210506e-05, "loss": 0.1976, "step": 19260 }, { "epoch": 1.054865027651536, "grad_norm": 0.13770779967308044, "learning_rate": 3.602109105658082e-05, "loss": 0.1987, "step": 19265 }, { "epoch": 1.0551388052346273, "grad_norm": 0.12107174098491669, "learning_rate": 3.6016021091056583e-05, "loss": 0.2, "step": 19270 }, { "epoch": 1.055412582817719, "grad_norm": 0.1358148604631424, "learning_rate": 3.601095112553235e-05, "loss": 0.2097, "step": 19275 }, { "epoch": 1.0556863604008104, "grad_norm": 0.15156754851341248, "learning_rate": 3.600588116000812e-05, "loss": 0.2016, "step": 19280 }, { "epoch": 1.0559601379839019, "grad_norm": 0.13974258303642273, "learning_rate": 3.600081119448388e-05, "loss": 0.2091, "step": 19285 }, { "epoch": 1.0562339155669933, "grad_norm": 0.13845786452293396, "learning_rate": 3.5995741228959643e-05, "loss": 0.2071, "step": 19290 }, { "epoch": 1.056507693150085, "grad_norm": 0.13854101300239563, "learning_rate": 3.5990671263435414e-05, "loss": 0.2093, "step": 19295 }, { "epoch": 1.0567814707331764, "grad_norm": 0.16243192553520203, "learning_rate": 3.598560129791118e-05, "loss": 0.2066, "step": 19300 }, { "epoch": 1.0570552483162678, "grad_norm": 0.13347385823726654, "learning_rate": 3.598053133238694e-05, "loss": 0.2052, "step": 19305 }, { "epoch": 1.0573290258993593, "grad_norm": 0.13005809485912323, "learning_rate": 3.5975461366862703e-05, "loss": 0.2109, "step": 19310 }, { "epoch": 1.057602803482451, "grad_norm": 0.12953247129917145, "learning_rate": 3.5970391401338474e-05, "loss": 0.2017, "step": 19315 }, { "epoch": 1.0578765810655424, "grad_norm": 0.14543378353118896, "learning_rate": 3.596532143581424e-05, "loss": 0.213, "step": 19320 }, { "epoch": 1.0581503586486338, "grad_norm": 0.1550387293100357, "learning_rate": 3.596025147029e-05, "loss": 0.2016, "step": 19325 }, { "epoch": 1.0584241362317253, "grad_norm": 0.1534850150346756, "learning_rate": 3.595518150476577e-05, "loss": 0.209, "step": 19330 }, { "epoch": 1.058697913814817, "grad_norm": 0.15202030539512634, "learning_rate": 3.5950111539241534e-05, "loss": 0.2164, "step": 19335 }, { "epoch": 1.0589716913979084, "grad_norm": 0.12511785328388214, "learning_rate": 3.5945041573717304e-05, "loss": 0.2113, "step": 19340 }, { "epoch": 1.0592454689809998, "grad_norm": 0.13321490585803986, "learning_rate": 3.593997160819307e-05, "loss": 0.2056, "step": 19345 }, { "epoch": 1.0595192465640912, "grad_norm": 0.17792566120624542, "learning_rate": 3.593490164266883e-05, "loss": 0.2129, "step": 19350 }, { "epoch": 1.059793024147183, "grad_norm": 0.13869673013687134, "learning_rate": 3.59298316771446e-05, "loss": 0.2078, "step": 19355 }, { "epoch": 1.0600668017302743, "grad_norm": 0.15258614718914032, "learning_rate": 3.5924761711620364e-05, "loss": 0.214, "step": 19360 }, { "epoch": 1.0603405793133658, "grad_norm": 0.13542616367340088, "learning_rate": 3.591969174609613e-05, "loss": 0.2206, "step": 19365 }, { "epoch": 1.0606143568964572, "grad_norm": 0.12701670825481415, "learning_rate": 3.59146217805719e-05, "loss": 0.2102, "step": 19370 }, { "epoch": 1.0608881344795489, "grad_norm": 0.13823208212852478, "learning_rate": 3.590955181504766e-05, "loss": 0.2023, "step": 19375 }, { "epoch": 1.0611619120626403, "grad_norm": 0.1530332863330841, "learning_rate": 3.5904481849523424e-05, "loss": 0.2125, "step": 19380 }, { "epoch": 1.0614356896457318, "grad_norm": 0.1295684278011322, "learning_rate": 3.589941188399919e-05, "loss": 0.2041, "step": 19385 }, { "epoch": 1.0617094672288232, "grad_norm": 0.15135322511196136, "learning_rate": 3.589434191847496e-05, "loss": 0.2046, "step": 19390 }, { "epoch": 1.0619832448119149, "grad_norm": 0.17279861867427826, "learning_rate": 3.588927195295072e-05, "loss": 0.211, "step": 19395 }, { "epoch": 1.0622570223950063, "grad_norm": 0.16522563993930817, "learning_rate": 3.5884201987426484e-05, "loss": 0.1988, "step": 19400 }, { "epoch": 1.0625307999780977, "grad_norm": 0.1369892656803131, "learning_rate": 3.5879132021902254e-05, "loss": 0.1979, "step": 19405 }, { "epoch": 1.0628045775611894, "grad_norm": 0.14385156333446503, "learning_rate": 3.587406205637802e-05, "loss": 0.2113, "step": 19410 }, { "epoch": 1.0630783551442808, "grad_norm": 0.1290636658668518, "learning_rate": 3.586899209085378e-05, "loss": 0.2027, "step": 19415 }, { "epoch": 1.0633521327273723, "grad_norm": 0.16981685161590576, "learning_rate": 3.586392212532955e-05, "loss": 0.2137, "step": 19420 }, { "epoch": 1.0636259103104637, "grad_norm": 0.13036204874515533, "learning_rate": 3.585885215980532e-05, "loss": 0.1967, "step": 19425 }, { "epoch": 1.0638996878935554, "grad_norm": 0.13516667485237122, "learning_rate": 3.5853782194281084e-05, "loss": 0.2119, "step": 19430 }, { "epoch": 1.0641734654766468, "grad_norm": 0.14066748321056366, "learning_rate": 3.584871222875685e-05, "loss": 0.2085, "step": 19435 }, { "epoch": 1.0644472430597383, "grad_norm": 0.13182300329208374, "learning_rate": 3.584364226323261e-05, "loss": 0.2021, "step": 19440 }, { "epoch": 1.0647210206428297, "grad_norm": 0.15772859752178192, "learning_rate": 3.583857229770838e-05, "loss": 0.2102, "step": 19445 }, { "epoch": 1.0649947982259214, "grad_norm": 0.15234240889549255, "learning_rate": 3.5833502332184144e-05, "loss": 0.2128, "step": 19450 }, { "epoch": 1.0652685758090128, "grad_norm": 0.13226273655891418, "learning_rate": 3.582843236665991e-05, "loss": 0.2071, "step": 19455 }, { "epoch": 1.0655423533921042, "grad_norm": 0.13551786541938782, "learning_rate": 3.582336240113568e-05, "loss": 0.2039, "step": 19460 }, { "epoch": 1.0658161309751957, "grad_norm": 0.17316773533821106, "learning_rate": 3.581829243561144e-05, "loss": 0.2198, "step": 19465 }, { "epoch": 1.0660899085582873, "grad_norm": 0.14292259514331818, "learning_rate": 3.5813222470087204e-05, "loss": 0.1984, "step": 19470 }, { "epoch": 1.0663636861413788, "grad_norm": 0.15339456498622894, "learning_rate": 3.580815250456297e-05, "loss": 0.2079, "step": 19475 }, { "epoch": 1.0666374637244702, "grad_norm": 0.12175725400447845, "learning_rate": 3.580308253903874e-05, "loss": 0.2167, "step": 19480 }, { "epoch": 1.0669112413075617, "grad_norm": 0.14579445123672485, "learning_rate": 3.57980125735145e-05, "loss": 0.2058, "step": 19485 }, { "epoch": 1.0671850188906533, "grad_norm": 0.1555018275976181, "learning_rate": 3.5792942607990264e-05, "loss": 0.2047, "step": 19490 }, { "epoch": 1.0674587964737448, "grad_norm": 0.14519467949867249, "learning_rate": 3.5787872642466034e-05, "loss": 0.218, "step": 19495 }, { "epoch": 1.0677325740568362, "grad_norm": 0.13497528433799744, "learning_rate": 3.5782802676941804e-05, "loss": 0.2024, "step": 19500 }, { "epoch": 1.0680063516399276, "grad_norm": 0.11876572668552399, "learning_rate": 3.577773271141757e-05, "loss": 0.202, "step": 19505 }, { "epoch": 1.0682801292230193, "grad_norm": 0.14277325570583344, "learning_rate": 3.577266274589333e-05, "loss": 0.207, "step": 19510 }, { "epoch": 1.0685539068061107, "grad_norm": 0.13976874947547913, "learning_rate": 3.5767592780369094e-05, "loss": 0.2072, "step": 19515 }, { "epoch": 1.0688276843892022, "grad_norm": 0.12161148339509964, "learning_rate": 3.5762522814844864e-05, "loss": 0.2125, "step": 19520 }, { "epoch": 1.0691014619722936, "grad_norm": 0.12759770452976227, "learning_rate": 3.575745284932063e-05, "loss": 0.2004, "step": 19525 }, { "epoch": 1.0693752395553853, "grad_norm": 0.12677596509456635, "learning_rate": 3.575238288379639e-05, "loss": 0.2065, "step": 19530 }, { "epoch": 1.0696490171384767, "grad_norm": 0.1488841474056244, "learning_rate": 3.574731291827216e-05, "loss": 0.2061, "step": 19535 }, { "epoch": 1.0699227947215681, "grad_norm": 0.13250099122524261, "learning_rate": 3.5742242952747924e-05, "loss": 0.2061, "step": 19540 }, { "epoch": 1.0701965723046598, "grad_norm": 0.14069144427776337, "learning_rate": 3.573717298722369e-05, "loss": 0.203, "step": 19545 }, { "epoch": 1.0704703498877512, "grad_norm": 0.13428114354610443, "learning_rate": 3.573210302169945e-05, "loss": 0.2068, "step": 19550 }, { "epoch": 1.0707441274708427, "grad_norm": 0.1417442113161087, "learning_rate": 3.572703305617522e-05, "loss": 0.207, "step": 19555 }, { "epoch": 1.0710179050539341, "grad_norm": 0.145883247256279, "learning_rate": 3.5721963090650984e-05, "loss": 0.1963, "step": 19560 }, { "epoch": 1.0712916826370256, "grad_norm": 0.13853400945663452, "learning_rate": 3.571689312512675e-05, "loss": 0.2013, "step": 19565 }, { "epoch": 1.0715654602201172, "grad_norm": 0.13845431804656982, "learning_rate": 3.571182315960252e-05, "loss": 0.2038, "step": 19570 }, { "epoch": 1.0718392378032087, "grad_norm": 0.14673098921775818, "learning_rate": 3.570675319407828e-05, "loss": 0.2034, "step": 19575 }, { "epoch": 1.0721130153863, "grad_norm": 0.1394110769033432, "learning_rate": 3.5701683228554044e-05, "loss": 0.2115, "step": 19580 }, { "epoch": 1.0723867929693918, "grad_norm": 0.18630722165107727, "learning_rate": 3.5696613263029814e-05, "loss": 0.1986, "step": 19585 }, { "epoch": 1.0726605705524832, "grad_norm": 0.14848864078521729, "learning_rate": 3.5691543297505584e-05, "loss": 0.2104, "step": 19590 }, { "epoch": 1.0729343481355746, "grad_norm": 0.1586410105228424, "learning_rate": 3.568647333198135e-05, "loss": 0.2127, "step": 19595 }, { "epoch": 1.073208125718666, "grad_norm": 0.1305687576532364, "learning_rate": 3.568140336645711e-05, "loss": 0.2056, "step": 19600 }, { "epoch": 1.0734819033017577, "grad_norm": 0.11700206249952316, "learning_rate": 3.5676333400932874e-05, "loss": 0.2076, "step": 19605 }, { "epoch": 1.0737556808848492, "grad_norm": 0.1428544819355011, "learning_rate": 3.5671263435408644e-05, "loss": 0.2021, "step": 19610 }, { "epoch": 1.0740294584679406, "grad_norm": 0.14470547437667847, "learning_rate": 3.566619346988441e-05, "loss": 0.2112, "step": 19615 }, { "epoch": 1.074303236051032, "grad_norm": 0.14608827233314514, "learning_rate": 3.566112350436017e-05, "loss": 0.2023, "step": 19620 }, { "epoch": 1.0745770136341237, "grad_norm": 0.12338021397590637, "learning_rate": 3.565605353883594e-05, "loss": 0.1977, "step": 19625 }, { "epoch": 1.0748507912172152, "grad_norm": 0.17371225357055664, "learning_rate": 3.5650983573311704e-05, "loss": 0.2033, "step": 19630 }, { "epoch": 1.0751245688003066, "grad_norm": 0.14237526059150696, "learning_rate": 3.564591360778747e-05, "loss": 0.2073, "step": 19635 }, { "epoch": 1.075398346383398, "grad_norm": 0.1416151225566864, "learning_rate": 3.564084364226323e-05, "loss": 0.2089, "step": 19640 }, { "epoch": 1.0756721239664897, "grad_norm": 0.1619439423084259, "learning_rate": 3.5635773676739e-05, "loss": 0.2035, "step": 19645 }, { "epoch": 1.0759459015495811, "grad_norm": 0.14072850346565247, "learning_rate": 3.5630703711214764e-05, "loss": 0.1996, "step": 19650 }, { "epoch": 1.0762196791326726, "grad_norm": 0.14169079065322876, "learning_rate": 3.562563374569053e-05, "loss": 0.1996, "step": 19655 }, { "epoch": 1.076493456715764, "grad_norm": 0.13660910725593567, "learning_rate": 3.562056378016629e-05, "loss": 0.2058, "step": 19660 }, { "epoch": 1.0767672342988557, "grad_norm": 0.14440877735614777, "learning_rate": 3.561549381464207e-05, "loss": 0.1992, "step": 19665 }, { "epoch": 1.0770410118819471, "grad_norm": 0.1491224765777588, "learning_rate": 3.561042384911783e-05, "loss": 0.197, "step": 19670 }, { "epoch": 1.0773147894650386, "grad_norm": 0.1474621742963791, "learning_rate": 3.5605353883593594e-05, "loss": 0.217, "step": 19675 }, { "epoch": 1.07758856704813, "grad_norm": 0.13784757256507874, "learning_rate": 3.560028391806936e-05, "loss": 0.2074, "step": 19680 }, { "epoch": 1.0778623446312217, "grad_norm": 0.1368209570646286, "learning_rate": 3.559521395254513e-05, "loss": 0.2124, "step": 19685 }, { "epoch": 1.078136122214313, "grad_norm": 0.17290179431438446, "learning_rate": 3.559014398702089e-05, "loss": 0.2077, "step": 19690 }, { "epoch": 1.0784098997974045, "grad_norm": 0.1482270210981369, "learning_rate": 3.5585074021496654e-05, "loss": 0.2066, "step": 19695 }, { "epoch": 1.078683677380496, "grad_norm": 0.13380976021289825, "learning_rate": 3.5580004055972424e-05, "loss": 0.2013, "step": 19700 }, { "epoch": 1.0789574549635876, "grad_norm": 0.14750263094902039, "learning_rate": 3.557493409044819e-05, "loss": 0.2062, "step": 19705 }, { "epoch": 1.079231232546679, "grad_norm": 0.17478814721107483, "learning_rate": 3.556986412492395e-05, "loss": 0.2049, "step": 19710 }, { "epoch": 1.0795050101297705, "grad_norm": 0.1479145586490631, "learning_rate": 3.5564794159399714e-05, "loss": 0.2111, "step": 19715 }, { "epoch": 1.0797787877128622, "grad_norm": 0.12076869606971741, "learning_rate": 3.5559724193875484e-05, "loss": 0.2004, "step": 19720 }, { "epoch": 1.0800525652959536, "grad_norm": 0.148179292678833, "learning_rate": 3.555465422835125e-05, "loss": 0.2091, "step": 19725 }, { "epoch": 1.080326342879045, "grad_norm": 0.1641780138015747, "learning_rate": 3.554958426282701e-05, "loss": 0.2078, "step": 19730 }, { "epoch": 1.0806001204621365, "grad_norm": 0.17463098466396332, "learning_rate": 3.554451429730278e-05, "loss": 0.2097, "step": 19735 }, { "epoch": 1.0808738980452282, "grad_norm": 0.14339062571525574, "learning_rate": 3.5539444331778544e-05, "loss": 0.206, "step": 19740 }, { "epoch": 1.0811476756283196, "grad_norm": 0.14996476471424103, "learning_rate": 3.5534374366254314e-05, "loss": 0.21, "step": 19745 }, { "epoch": 1.081421453211411, "grad_norm": 0.14380709826946259, "learning_rate": 3.552930440073008e-05, "loss": 0.209, "step": 19750 }, { "epoch": 1.0816952307945025, "grad_norm": 0.15391652286052704, "learning_rate": 3.552423443520585e-05, "loss": 0.2074, "step": 19755 }, { "epoch": 1.0819690083775941, "grad_norm": 0.13142836093902588, "learning_rate": 3.551916446968161e-05, "loss": 0.2002, "step": 19760 }, { "epoch": 1.0822427859606856, "grad_norm": 0.12359295040369034, "learning_rate": 3.5514094504157374e-05, "loss": 0.2002, "step": 19765 }, { "epoch": 1.082516563543777, "grad_norm": 0.1303107589483261, "learning_rate": 3.550902453863314e-05, "loss": 0.2128, "step": 19770 }, { "epoch": 1.0827903411268684, "grad_norm": 0.13687019050121307, "learning_rate": 3.550395457310891e-05, "loss": 0.2084, "step": 19775 }, { "epoch": 1.08306411870996, "grad_norm": 0.13394619524478912, "learning_rate": 3.549888460758467e-05, "loss": 0.2091, "step": 19780 }, { "epoch": 1.0833378962930515, "grad_norm": 0.15159916877746582, "learning_rate": 3.5493814642060434e-05, "loss": 0.2, "step": 19785 }, { "epoch": 1.083611673876143, "grad_norm": 0.1598416417837143, "learning_rate": 3.54887446765362e-05, "loss": 0.2138, "step": 19790 }, { "epoch": 1.0838854514592344, "grad_norm": 0.1534590721130371, "learning_rate": 3.548367471101197e-05, "loss": 0.2042, "step": 19795 }, { "epoch": 1.084159229042326, "grad_norm": 0.12632596492767334, "learning_rate": 3.547860474548773e-05, "loss": 0.2035, "step": 19800 }, { "epoch": 1.0844330066254175, "grad_norm": 0.1370188146829605, "learning_rate": 3.5473534779963494e-05, "loss": 0.2022, "step": 19805 }, { "epoch": 1.084706784208509, "grad_norm": 0.132577583193779, "learning_rate": 3.5468464814439264e-05, "loss": 0.2004, "step": 19810 }, { "epoch": 1.0849805617916004, "grad_norm": 0.16204479336738586, "learning_rate": 3.546339484891503e-05, "loss": 0.1998, "step": 19815 }, { "epoch": 1.085254339374692, "grad_norm": 0.1562078297138214, "learning_rate": 3.545832488339079e-05, "loss": 0.2086, "step": 19820 }, { "epoch": 1.0855281169577835, "grad_norm": 0.13759927451610565, "learning_rate": 3.545325491786656e-05, "loss": 0.2138, "step": 19825 }, { "epoch": 1.085801894540875, "grad_norm": 0.1416471153497696, "learning_rate": 3.544818495234233e-05, "loss": 0.2108, "step": 19830 }, { "epoch": 1.0860756721239664, "grad_norm": 0.138203427195549, "learning_rate": 3.5443114986818094e-05, "loss": 0.2111, "step": 19835 }, { "epoch": 1.086349449707058, "grad_norm": 0.1496323198080063, "learning_rate": 3.543804502129386e-05, "loss": 0.2066, "step": 19840 }, { "epoch": 1.0866232272901495, "grad_norm": 0.14425835013389587, "learning_rate": 3.543297505576962e-05, "loss": 0.2036, "step": 19845 }, { "epoch": 1.086897004873241, "grad_norm": 0.1448919177055359, "learning_rate": 3.542790509024539e-05, "loss": 0.2052, "step": 19850 }, { "epoch": 1.0871707824563326, "grad_norm": 0.15504679083824158, "learning_rate": 3.5422835124721154e-05, "loss": 0.2004, "step": 19855 }, { "epoch": 1.087444560039424, "grad_norm": 0.14279355108737946, "learning_rate": 3.541776515919692e-05, "loss": 0.2072, "step": 19860 }, { "epoch": 1.0877183376225155, "grad_norm": 0.1374443769454956, "learning_rate": 3.541269519367269e-05, "loss": 0.2101, "step": 19865 }, { "epoch": 1.087992115205607, "grad_norm": 0.13538186252117157, "learning_rate": 3.540762522814845e-05, "loss": 0.2123, "step": 19870 }, { "epoch": 1.0882658927886986, "grad_norm": 0.15467296540737152, "learning_rate": 3.5402555262624214e-05, "loss": 0.2046, "step": 19875 }, { "epoch": 1.08853967037179, "grad_norm": 0.13989832997322083, "learning_rate": 3.539748529709998e-05, "loss": 0.2165, "step": 19880 }, { "epoch": 1.0888134479548814, "grad_norm": 0.14650961756706238, "learning_rate": 3.539241533157575e-05, "loss": 0.2139, "step": 19885 }, { "epoch": 1.0890872255379729, "grad_norm": 0.15109241008758545, "learning_rate": 3.538734536605151e-05, "loss": 0.1998, "step": 19890 }, { "epoch": 1.0893610031210645, "grad_norm": 0.13829733431339264, "learning_rate": 3.5382275400527274e-05, "loss": 0.2131, "step": 19895 }, { "epoch": 1.089634780704156, "grad_norm": 0.12896783649921417, "learning_rate": 3.5377205435003044e-05, "loss": 0.2088, "step": 19900 }, { "epoch": 1.0899085582872474, "grad_norm": 0.17437583208084106, "learning_rate": 3.537213546947881e-05, "loss": 0.2071, "step": 19905 }, { "epoch": 1.0901823358703389, "grad_norm": 0.16423116624355316, "learning_rate": 3.536706550395458e-05, "loss": 0.2066, "step": 19910 }, { "epoch": 1.0904561134534305, "grad_norm": 0.16261228919029236, "learning_rate": 3.536199553843034e-05, "loss": 0.2037, "step": 19915 }, { "epoch": 1.090729891036522, "grad_norm": 0.15680353343486786, "learning_rate": 3.5356925572906104e-05, "loss": 0.2102, "step": 19920 }, { "epoch": 1.0910036686196134, "grad_norm": 0.14240005612373352, "learning_rate": 3.5351855607381875e-05, "loss": 0.2102, "step": 19925 }, { "epoch": 1.0912774462027048, "grad_norm": 0.1332026571035385, "learning_rate": 3.534678564185764e-05, "loss": 0.2086, "step": 19930 }, { "epoch": 1.0915512237857965, "grad_norm": 0.1319408416748047, "learning_rate": 3.53417156763334e-05, "loss": 0.2102, "step": 19935 }, { "epoch": 1.091825001368888, "grad_norm": 0.13756078481674194, "learning_rate": 3.533664571080917e-05, "loss": 0.204, "step": 19940 }, { "epoch": 1.0920987789519794, "grad_norm": 0.1365317702293396, "learning_rate": 3.5331575745284935e-05, "loss": 0.2052, "step": 19945 }, { "epoch": 1.0923725565350708, "grad_norm": 0.169624462723732, "learning_rate": 3.53265057797607e-05, "loss": 0.2063, "step": 19950 }, { "epoch": 1.0926463341181625, "grad_norm": 0.1870940625667572, "learning_rate": 3.532143581423646e-05, "loss": 0.2056, "step": 19955 }, { "epoch": 1.092920111701254, "grad_norm": 0.142098531126976, "learning_rate": 3.531636584871223e-05, "loss": 0.2028, "step": 19960 }, { "epoch": 1.0931938892843454, "grad_norm": 0.15552054345607758, "learning_rate": 3.5311295883187995e-05, "loss": 0.2173, "step": 19965 }, { "epoch": 1.0934676668674368, "grad_norm": 0.13996370136737823, "learning_rate": 3.530622591766376e-05, "loss": 0.212, "step": 19970 }, { "epoch": 1.0937414444505285, "grad_norm": 0.14249083399772644, "learning_rate": 3.530115595213953e-05, "loss": 0.2091, "step": 19975 }, { "epoch": 1.09401522203362, "grad_norm": 0.13172288239002228, "learning_rate": 3.529608598661529e-05, "loss": 0.2005, "step": 19980 }, { "epoch": 1.0942889996167113, "grad_norm": 0.15650293231010437, "learning_rate": 3.5291016021091054e-05, "loss": 0.2103, "step": 19985 }, { "epoch": 1.094562777199803, "grad_norm": 0.15529164671897888, "learning_rate": 3.5285946055566825e-05, "loss": 0.2183, "step": 19990 }, { "epoch": 1.0948365547828944, "grad_norm": 0.13974352180957794, "learning_rate": 3.5280876090042595e-05, "loss": 0.203, "step": 19995 }, { "epoch": 1.0951103323659859, "grad_norm": 0.12361663579940796, "learning_rate": 3.527580612451836e-05, "loss": 0.2097, "step": 20000 }, { "epoch": 1.0953841099490773, "grad_norm": 0.15915021300315857, "learning_rate": 3.527073615899412e-05, "loss": 0.2063, "step": 20005 }, { "epoch": 1.0956578875321688, "grad_norm": 0.13851910829544067, "learning_rate": 3.5265666193469885e-05, "loss": 0.2039, "step": 20010 }, { "epoch": 1.0959316651152604, "grad_norm": 0.16254357993602753, "learning_rate": 3.5260596227945655e-05, "loss": 0.2092, "step": 20015 }, { "epoch": 1.0962054426983519, "grad_norm": 0.15284964442253113, "learning_rate": 3.525552626242142e-05, "loss": 0.1998, "step": 20020 }, { "epoch": 1.0964792202814433, "grad_norm": 0.12541329860687256, "learning_rate": 3.525045629689718e-05, "loss": 0.2017, "step": 20025 }, { "epoch": 1.096752997864535, "grad_norm": 0.14457033574581146, "learning_rate": 3.524538633137295e-05, "loss": 0.2049, "step": 20030 }, { "epoch": 1.0970267754476264, "grad_norm": 0.13508564233779907, "learning_rate": 3.5240316365848715e-05, "loss": 0.2054, "step": 20035 }, { "epoch": 1.0973005530307178, "grad_norm": 0.13919898867607117, "learning_rate": 3.523524640032448e-05, "loss": 0.2021, "step": 20040 }, { "epoch": 1.0975743306138093, "grad_norm": 0.1475847214460373, "learning_rate": 3.523017643480024e-05, "loss": 0.2066, "step": 20045 }, { "epoch": 1.097848108196901, "grad_norm": 0.17610503733158112, "learning_rate": 3.522510646927601e-05, "loss": 0.2225, "step": 20050 }, { "epoch": 1.0981218857799924, "grad_norm": 0.15097388625144958, "learning_rate": 3.5220036503751775e-05, "loss": 0.2002, "step": 20055 }, { "epoch": 1.0983956633630838, "grad_norm": 0.15005798637866974, "learning_rate": 3.521496653822754e-05, "loss": 0.1995, "step": 20060 }, { "epoch": 1.0986694409461752, "grad_norm": 0.15294000506401062, "learning_rate": 3.520989657270331e-05, "loss": 0.2051, "step": 20065 }, { "epoch": 1.098943218529267, "grad_norm": 0.143091082572937, "learning_rate": 3.520482660717908e-05, "loss": 0.2022, "step": 20070 }, { "epoch": 1.0992169961123583, "grad_norm": 0.14510495960712433, "learning_rate": 3.519975664165484e-05, "loss": 0.2162, "step": 20075 }, { "epoch": 1.0994907736954498, "grad_norm": 0.136628195643425, "learning_rate": 3.5194686676130605e-05, "loss": 0.2082, "step": 20080 }, { "epoch": 1.0997645512785412, "grad_norm": 0.1434275507926941, "learning_rate": 3.518961671060637e-05, "loss": 0.2135, "step": 20085 }, { "epoch": 1.1000383288616329, "grad_norm": 0.14511524140834808, "learning_rate": 3.518454674508214e-05, "loss": 0.2152, "step": 20090 }, { "epoch": 1.1003121064447243, "grad_norm": 0.13425733149051666, "learning_rate": 3.51794767795579e-05, "loss": 0.2067, "step": 20095 }, { "epoch": 1.1005858840278158, "grad_norm": 0.14087393879890442, "learning_rate": 3.5174406814033665e-05, "loss": 0.1973, "step": 20100 }, { "epoch": 1.1008596616109072, "grad_norm": 0.1281580626964569, "learning_rate": 3.5169336848509435e-05, "loss": 0.2141, "step": 20105 }, { "epoch": 1.1011334391939989, "grad_norm": 0.13869234919548035, "learning_rate": 3.51642668829852e-05, "loss": 0.2053, "step": 20110 }, { "epoch": 1.1014072167770903, "grad_norm": 0.14363490045070648, "learning_rate": 3.515919691746096e-05, "loss": 0.2091, "step": 20115 }, { "epoch": 1.1016809943601817, "grad_norm": 0.13206970691680908, "learning_rate": 3.5154126951936725e-05, "loss": 0.2098, "step": 20120 }, { "epoch": 1.1019547719432732, "grad_norm": 0.151541605591774, "learning_rate": 3.5149056986412495e-05, "loss": 0.2001, "step": 20125 }, { "epoch": 1.1022285495263648, "grad_norm": 0.1284824013710022, "learning_rate": 3.514398702088826e-05, "loss": 0.2085, "step": 20130 }, { "epoch": 1.1025023271094563, "grad_norm": 0.15590935945510864, "learning_rate": 3.513891705536402e-05, "loss": 0.2088, "step": 20135 }, { "epoch": 1.1027761046925477, "grad_norm": 0.16604916751384735, "learning_rate": 3.513384708983979e-05, "loss": 0.2061, "step": 20140 }, { "epoch": 1.1030498822756392, "grad_norm": 0.1692882478237152, "learning_rate": 3.5128777124315555e-05, "loss": 0.2022, "step": 20145 }, { "epoch": 1.1033236598587308, "grad_norm": 0.12700828909873962, "learning_rate": 3.5123707158791325e-05, "loss": 0.2065, "step": 20150 }, { "epoch": 1.1035974374418223, "grad_norm": 0.14688646793365479, "learning_rate": 3.511863719326709e-05, "loss": 0.2082, "step": 20155 }, { "epoch": 1.1038712150249137, "grad_norm": 0.13022847473621368, "learning_rate": 3.511356722774286e-05, "loss": 0.2083, "step": 20160 }, { "epoch": 1.1041449926080054, "grad_norm": 0.13388723134994507, "learning_rate": 3.510849726221862e-05, "loss": 0.2176, "step": 20165 }, { "epoch": 1.1044187701910968, "grad_norm": 0.16264784336090088, "learning_rate": 3.5103427296694385e-05, "loss": 0.1953, "step": 20170 }, { "epoch": 1.1046925477741882, "grad_norm": 0.17553240060806274, "learning_rate": 3.509835733117015e-05, "loss": 0.2119, "step": 20175 }, { "epoch": 1.1049663253572797, "grad_norm": 0.153874471783638, "learning_rate": 3.509328736564592e-05, "loss": 0.2069, "step": 20180 }, { "epoch": 1.1052401029403713, "grad_norm": 0.14193783700466156, "learning_rate": 3.508821740012168e-05, "loss": 0.2089, "step": 20185 }, { "epoch": 1.1055138805234628, "grad_norm": 0.1314793825149536, "learning_rate": 3.5083147434597445e-05, "loss": 0.2073, "step": 20190 }, { "epoch": 1.1057876581065542, "grad_norm": 0.13251902163028717, "learning_rate": 3.5078077469073215e-05, "loss": 0.2065, "step": 20195 }, { "epoch": 1.1060614356896457, "grad_norm": 0.14104443788528442, "learning_rate": 3.507300750354898e-05, "loss": 0.2113, "step": 20200 }, { "epoch": 1.1063352132727373, "grad_norm": 0.13735611736774445, "learning_rate": 3.506793753802474e-05, "loss": 0.2052, "step": 20205 }, { "epoch": 1.1066089908558288, "grad_norm": 0.14540022611618042, "learning_rate": 3.5062867572500505e-05, "loss": 0.2214, "step": 20210 }, { "epoch": 1.1068827684389202, "grad_norm": 0.1536705493927002, "learning_rate": 3.5057797606976275e-05, "loss": 0.204, "step": 20215 }, { "epoch": 1.1071565460220116, "grad_norm": 0.13478027284145355, "learning_rate": 3.505272764145204e-05, "loss": 0.2076, "step": 20220 }, { "epoch": 1.1074303236051033, "grad_norm": 0.13134564459323883, "learning_rate": 3.50476576759278e-05, "loss": 0.2076, "step": 20225 }, { "epoch": 1.1077041011881947, "grad_norm": 0.13529914617538452, "learning_rate": 3.504258771040357e-05, "loss": 0.2076, "step": 20230 }, { "epoch": 1.1079778787712862, "grad_norm": 0.14351613819599152, "learning_rate": 3.503751774487934e-05, "loss": 0.2018, "step": 20235 }, { "epoch": 1.1082516563543776, "grad_norm": 0.17424985766410828, "learning_rate": 3.5032447779355105e-05, "loss": 0.2112, "step": 20240 }, { "epoch": 1.1085254339374693, "grad_norm": 0.15672986209392548, "learning_rate": 3.502737781383087e-05, "loss": 0.2058, "step": 20245 }, { "epoch": 1.1087992115205607, "grad_norm": 0.1348431259393692, "learning_rate": 3.502230784830663e-05, "loss": 0.2043, "step": 20250 }, { "epoch": 1.1090729891036522, "grad_norm": 0.13518056273460388, "learning_rate": 3.50172378827824e-05, "loss": 0.1997, "step": 20255 }, { "epoch": 1.1093467666867436, "grad_norm": 0.12923575937747955, "learning_rate": 3.5012167917258165e-05, "loss": 0.2137, "step": 20260 }, { "epoch": 1.1096205442698353, "grad_norm": 0.1707616150379181, "learning_rate": 3.500709795173393e-05, "loss": 0.209, "step": 20265 }, { "epoch": 1.1098943218529267, "grad_norm": 0.1342719942331314, "learning_rate": 3.50020279862097e-05, "loss": 0.2157, "step": 20270 }, { "epoch": 1.1101680994360181, "grad_norm": 0.14704589545726776, "learning_rate": 3.499695802068546e-05, "loss": 0.2138, "step": 20275 }, { "epoch": 1.1104418770191096, "grad_norm": 0.12507040798664093, "learning_rate": 3.4991888055161225e-05, "loss": 0.2094, "step": 20280 }, { "epoch": 1.1107156546022012, "grad_norm": 0.1577831357717514, "learning_rate": 3.498681808963699e-05, "loss": 0.2017, "step": 20285 }, { "epoch": 1.1109894321852927, "grad_norm": 0.13826708495616913, "learning_rate": 3.498174812411276e-05, "loss": 0.2075, "step": 20290 }, { "epoch": 1.111263209768384, "grad_norm": 0.13573578000068665, "learning_rate": 3.497667815858852e-05, "loss": 0.2044, "step": 20295 }, { "epoch": 1.1115369873514758, "grad_norm": 0.132771298289299, "learning_rate": 3.4971608193064285e-05, "loss": 0.2085, "step": 20300 }, { "epoch": 1.1118107649345672, "grad_norm": 0.13864611089229584, "learning_rate": 3.4966538227540055e-05, "loss": 0.2048, "step": 20305 }, { "epoch": 1.1120845425176586, "grad_norm": 0.15181134641170502, "learning_rate": 3.496146826201582e-05, "loss": 0.198, "step": 20310 }, { "epoch": 1.11235832010075, "grad_norm": 0.1299225091934204, "learning_rate": 3.495639829649159e-05, "loss": 0.2056, "step": 20315 }, { "epoch": 1.1126320976838417, "grad_norm": 0.13412068784236908, "learning_rate": 3.495132833096735e-05, "loss": 0.2012, "step": 20320 }, { "epoch": 1.1129058752669332, "grad_norm": 0.14241528511047363, "learning_rate": 3.494625836544312e-05, "loss": 0.205, "step": 20325 }, { "epoch": 1.1131796528500246, "grad_norm": 0.14254029095172882, "learning_rate": 3.4941188399918885e-05, "loss": 0.215, "step": 20330 }, { "epoch": 1.113453430433116, "grad_norm": 0.133841872215271, "learning_rate": 3.493611843439465e-05, "loss": 0.2076, "step": 20335 }, { "epoch": 1.1137272080162077, "grad_norm": 0.140638068318367, "learning_rate": 3.493104846887041e-05, "loss": 0.2123, "step": 20340 }, { "epoch": 1.1140009855992992, "grad_norm": 0.16556084156036377, "learning_rate": 3.492597850334618e-05, "loss": 0.2039, "step": 20345 }, { "epoch": 1.1142747631823906, "grad_norm": 0.12852619588375092, "learning_rate": 3.4920908537821945e-05, "loss": 0.2024, "step": 20350 }, { "epoch": 1.114548540765482, "grad_norm": 0.12694242596626282, "learning_rate": 3.491583857229771e-05, "loss": 0.1954, "step": 20355 }, { "epoch": 1.1148223183485737, "grad_norm": 0.12524466216564178, "learning_rate": 3.491076860677348e-05, "loss": 0.1986, "step": 20360 }, { "epoch": 1.1150960959316651, "grad_norm": 0.15901702642440796, "learning_rate": 3.490569864124924e-05, "loss": 0.2104, "step": 20365 }, { "epoch": 1.1153698735147566, "grad_norm": 0.1476699858903885, "learning_rate": 3.4900628675725005e-05, "loss": 0.2166, "step": 20370 }, { "epoch": 1.115643651097848, "grad_norm": 0.14682509005069733, "learning_rate": 3.489555871020077e-05, "loss": 0.2013, "step": 20375 }, { "epoch": 1.1159174286809397, "grad_norm": 0.12308543175458908, "learning_rate": 3.489048874467654e-05, "loss": 0.2132, "step": 20380 }, { "epoch": 1.1161912062640311, "grad_norm": 0.14706958830356598, "learning_rate": 3.48854187791523e-05, "loss": 0.2059, "step": 20385 }, { "epoch": 1.1164649838471226, "grad_norm": 0.12060993164777756, "learning_rate": 3.4880348813628065e-05, "loss": 0.2126, "step": 20390 }, { "epoch": 1.116738761430214, "grad_norm": 0.1372430920600891, "learning_rate": 3.4875278848103835e-05, "loss": 0.2034, "step": 20395 }, { "epoch": 1.1170125390133057, "grad_norm": 0.14545032382011414, "learning_rate": 3.4870208882579605e-05, "loss": 0.2042, "step": 20400 }, { "epoch": 1.117286316596397, "grad_norm": 0.1348140388727188, "learning_rate": 3.486513891705537e-05, "loss": 0.2108, "step": 20405 }, { "epoch": 1.1175600941794885, "grad_norm": 0.16180531680583954, "learning_rate": 3.486006895153113e-05, "loss": 0.2029, "step": 20410 }, { "epoch": 1.11783387176258, "grad_norm": 0.11762421578168869, "learning_rate": 3.4854998986006895e-05, "loss": 0.2023, "step": 20415 }, { "epoch": 1.1181076493456716, "grad_norm": 0.12711715698242188, "learning_rate": 3.4849929020482665e-05, "loss": 0.2117, "step": 20420 }, { "epoch": 1.118381426928763, "grad_norm": 0.12627534568309784, "learning_rate": 3.484485905495843e-05, "loss": 0.2055, "step": 20425 }, { "epoch": 1.1186552045118545, "grad_norm": 0.14076437056064606, "learning_rate": 3.483978908943419e-05, "loss": 0.2005, "step": 20430 }, { "epoch": 1.1189289820949462, "grad_norm": 0.16258437931537628, "learning_rate": 3.483471912390996e-05, "loss": 0.2116, "step": 20435 }, { "epoch": 1.1192027596780376, "grad_norm": 0.18447482585906982, "learning_rate": 3.4829649158385725e-05, "loss": 0.2094, "step": 20440 }, { "epoch": 1.119476537261129, "grad_norm": 0.13244310021400452, "learning_rate": 3.482457919286149e-05, "loss": 0.2101, "step": 20445 }, { "epoch": 1.1197503148442205, "grad_norm": 0.13643527030944824, "learning_rate": 3.481950922733725e-05, "loss": 0.1965, "step": 20450 }, { "epoch": 1.120024092427312, "grad_norm": 0.15896402299404144, "learning_rate": 3.481443926181302e-05, "loss": 0.2152, "step": 20455 }, { "epoch": 1.1202978700104036, "grad_norm": 0.1620342880487442, "learning_rate": 3.4809369296288785e-05, "loss": 0.2099, "step": 20460 }, { "epoch": 1.120571647593495, "grad_norm": 0.17350663244724274, "learning_rate": 3.480429933076455e-05, "loss": 0.2075, "step": 20465 }, { "epoch": 1.1208454251765865, "grad_norm": 0.2035517394542694, "learning_rate": 3.479922936524032e-05, "loss": 0.2109, "step": 20470 }, { "epoch": 1.1211192027596781, "grad_norm": 0.14270929992198944, "learning_rate": 3.479415939971608e-05, "loss": 0.2009, "step": 20475 }, { "epoch": 1.1213929803427696, "grad_norm": 0.1268605887889862, "learning_rate": 3.478908943419185e-05, "loss": 0.2033, "step": 20480 }, { "epoch": 1.121666757925861, "grad_norm": 0.12400338798761368, "learning_rate": 3.4784019468667615e-05, "loss": 0.205, "step": 20485 }, { "epoch": 1.1219405355089525, "grad_norm": 0.14714205265045166, "learning_rate": 3.4778949503143385e-05, "loss": 0.2038, "step": 20490 }, { "epoch": 1.1222143130920441, "grad_norm": 0.12603451311588287, "learning_rate": 3.477387953761915e-05, "loss": 0.204, "step": 20495 }, { "epoch": 1.1224880906751356, "grad_norm": 0.1313803493976593, "learning_rate": 3.476880957209491e-05, "loss": 0.2143, "step": 20500 }, { "epoch": 1.122761868258227, "grad_norm": 0.14690852165222168, "learning_rate": 3.4763739606570675e-05, "loss": 0.1911, "step": 20505 }, { "epoch": 1.1230356458413184, "grad_norm": 0.13807012140750885, "learning_rate": 3.4758669641046445e-05, "loss": 0.2016, "step": 20510 }, { "epoch": 1.12330942342441, "grad_norm": 0.17054924368858337, "learning_rate": 3.475359967552221e-05, "loss": 0.2064, "step": 20515 }, { "epoch": 1.1235832010075015, "grad_norm": 0.1557554453611374, "learning_rate": 3.474852970999797e-05, "loss": 0.1952, "step": 20520 }, { "epoch": 1.123856978590593, "grad_norm": 0.16040687263011932, "learning_rate": 3.4743459744473735e-05, "loss": 0.2112, "step": 20525 }, { "epoch": 1.1241307561736844, "grad_norm": 0.11745990067720413, "learning_rate": 3.4738389778949505e-05, "loss": 0.2043, "step": 20530 }, { "epoch": 1.124404533756776, "grad_norm": 0.13090479373931885, "learning_rate": 3.473331981342527e-05, "loss": 0.1979, "step": 20535 }, { "epoch": 1.1246783113398675, "grad_norm": 0.13248661160469055, "learning_rate": 3.472824984790103e-05, "loss": 0.2105, "step": 20540 }, { "epoch": 1.124952088922959, "grad_norm": 0.17882364988327026, "learning_rate": 3.47231798823768e-05, "loss": 0.2078, "step": 20545 }, { "epoch": 1.1252258665060504, "grad_norm": 0.19845059514045715, "learning_rate": 3.4718109916852565e-05, "loss": 0.2011, "step": 20550 }, { "epoch": 1.125499644089142, "grad_norm": 0.14629177749156952, "learning_rate": 3.471303995132833e-05, "loss": 0.2063, "step": 20555 }, { "epoch": 1.1257734216722335, "grad_norm": 0.14495304226875305, "learning_rate": 3.47079699858041e-05, "loss": 0.2041, "step": 20560 }, { "epoch": 1.126047199255325, "grad_norm": 0.1747528463602066, "learning_rate": 3.470290002027987e-05, "loss": 0.2121, "step": 20565 }, { "epoch": 1.1263209768384166, "grad_norm": 0.16390720009803772, "learning_rate": 3.469783005475563e-05, "loss": 0.2111, "step": 20570 }, { "epoch": 1.126594754421508, "grad_norm": 0.14454945921897888, "learning_rate": 3.4692760089231395e-05, "loss": 0.2007, "step": 20575 }, { "epoch": 1.1268685320045995, "grad_norm": 0.1561494916677475, "learning_rate": 3.468769012370716e-05, "loss": 0.2076, "step": 20580 }, { "epoch": 1.127142309587691, "grad_norm": 0.19284893572330475, "learning_rate": 3.468262015818293e-05, "loss": 0.2028, "step": 20585 }, { "epoch": 1.1274160871707823, "grad_norm": 0.18406987190246582, "learning_rate": 3.467755019265869e-05, "loss": 0.2138, "step": 20590 }, { "epoch": 1.127689864753874, "grad_norm": 0.13653531670570374, "learning_rate": 3.4672480227134455e-05, "loss": 0.2115, "step": 20595 }, { "epoch": 1.1279636423369654, "grad_norm": 0.1332596242427826, "learning_rate": 3.4667410261610226e-05, "loss": 0.1953, "step": 20600 }, { "epoch": 1.1282374199200569, "grad_norm": 0.13649512827396393, "learning_rate": 3.466234029608599e-05, "loss": 0.2049, "step": 20605 }, { "epoch": 1.1285111975031485, "grad_norm": 0.134761244058609, "learning_rate": 3.465727033056175e-05, "loss": 0.202, "step": 20610 }, { "epoch": 1.12878497508624, "grad_norm": 0.12021488696336746, "learning_rate": 3.4652200365037515e-05, "loss": 0.2081, "step": 20615 }, { "epoch": 1.1290587526693314, "grad_norm": 0.19465066492557526, "learning_rate": 3.4647130399513286e-05, "loss": 0.2065, "step": 20620 }, { "epoch": 1.1293325302524229, "grad_norm": 0.12046148627996445, "learning_rate": 3.464206043398905e-05, "loss": 0.2071, "step": 20625 }, { "epoch": 1.1296063078355145, "grad_norm": 0.1709190160036087, "learning_rate": 3.463699046846481e-05, "loss": 0.2124, "step": 20630 }, { "epoch": 1.129880085418606, "grad_norm": 0.23418498039245605, "learning_rate": 3.463192050294058e-05, "loss": 0.2029, "step": 20635 }, { "epoch": 1.1301538630016974, "grad_norm": 0.1240362823009491, "learning_rate": 3.462685053741635e-05, "loss": 0.2024, "step": 20640 }, { "epoch": 1.1304276405847888, "grad_norm": 0.1242770105600357, "learning_rate": 3.4621780571892116e-05, "loss": 0.2015, "step": 20645 }, { "epoch": 1.1307014181678805, "grad_norm": 0.1816519796848297, "learning_rate": 3.461671060636788e-05, "loss": 0.2096, "step": 20650 }, { "epoch": 1.130975195750972, "grad_norm": 0.23351186513900757, "learning_rate": 3.461164064084364e-05, "loss": 0.2133, "step": 20655 }, { "epoch": 1.1312489733340634, "grad_norm": 0.23824277520179749, "learning_rate": 3.460657067531941e-05, "loss": 0.1955, "step": 20660 }, { "epoch": 1.1315227509171548, "grad_norm": 0.1505732536315918, "learning_rate": 3.4601500709795176e-05, "loss": 0.2042, "step": 20665 }, { "epoch": 1.1317965285002465, "grad_norm": 0.14914269745349884, "learning_rate": 3.459643074427094e-05, "loss": 0.206, "step": 20670 }, { "epoch": 1.132070306083338, "grad_norm": 0.1284157633781433, "learning_rate": 3.459136077874671e-05, "loss": 0.2045, "step": 20675 }, { "epoch": 1.1323440836664294, "grad_norm": 0.16208121180534363, "learning_rate": 3.458629081322247e-05, "loss": 0.2124, "step": 20680 }, { "epoch": 1.1326178612495208, "grad_norm": 0.17021137475967407, "learning_rate": 3.4581220847698236e-05, "loss": 0.1942, "step": 20685 }, { "epoch": 1.1328916388326125, "grad_norm": 0.12783809006214142, "learning_rate": 3.4576150882174e-05, "loss": 0.206, "step": 20690 }, { "epoch": 1.133165416415704, "grad_norm": 0.13040003180503845, "learning_rate": 3.457108091664977e-05, "loss": 0.2012, "step": 20695 }, { "epoch": 1.1334391939987953, "grad_norm": 0.14186570048332214, "learning_rate": 3.456601095112553e-05, "loss": 0.2016, "step": 20700 }, { "epoch": 1.133712971581887, "grad_norm": 0.13351236283779144, "learning_rate": 3.4560940985601296e-05, "loss": 0.2035, "step": 20705 }, { "epoch": 1.1339867491649784, "grad_norm": 0.14111605286598206, "learning_rate": 3.4555871020077066e-05, "loss": 0.2079, "step": 20710 }, { "epoch": 1.1342605267480699, "grad_norm": 0.17440609633922577, "learning_rate": 3.455080105455283e-05, "loss": 0.2052, "step": 20715 }, { "epoch": 1.1345343043311613, "grad_norm": 0.13520368933677673, "learning_rate": 3.45457310890286e-05, "loss": 0.2032, "step": 20720 }, { "epoch": 1.1348080819142528, "grad_norm": 0.18018865585327148, "learning_rate": 3.454066112350436e-05, "loss": 0.2048, "step": 20725 }, { "epoch": 1.1350818594973444, "grad_norm": 0.17156533896923065, "learning_rate": 3.453559115798013e-05, "loss": 0.2077, "step": 20730 }, { "epoch": 1.1353556370804359, "grad_norm": 0.13510408997535706, "learning_rate": 3.4530521192455896e-05, "loss": 0.2096, "step": 20735 }, { "epoch": 1.1356294146635273, "grad_norm": 0.13755406439304352, "learning_rate": 3.452545122693166e-05, "loss": 0.2058, "step": 20740 }, { "epoch": 1.135903192246619, "grad_norm": 0.1530783325433731, "learning_rate": 3.452038126140742e-05, "loss": 0.2105, "step": 20745 }, { "epoch": 1.1361769698297104, "grad_norm": 0.1330023854970932, "learning_rate": 3.451531129588319e-05, "loss": 0.2005, "step": 20750 }, { "epoch": 1.1364507474128018, "grad_norm": 0.2234307825565338, "learning_rate": 3.4510241330358956e-05, "loss": 0.2008, "step": 20755 }, { "epoch": 1.1367245249958933, "grad_norm": 0.13978292047977448, "learning_rate": 3.450517136483472e-05, "loss": 0.2104, "step": 20760 }, { "epoch": 1.1369983025789847, "grad_norm": 0.12891779839992523, "learning_rate": 3.450010139931049e-05, "loss": 0.2028, "step": 20765 }, { "epoch": 1.1372720801620764, "grad_norm": 0.1204245388507843, "learning_rate": 3.449503143378625e-05, "loss": 0.2128, "step": 20770 }, { "epoch": 1.1375458577451678, "grad_norm": 0.14791233837604523, "learning_rate": 3.4489961468262016e-05, "loss": 0.2028, "step": 20775 }, { "epoch": 1.1378196353282592, "grad_norm": 0.13684463500976562, "learning_rate": 3.448489150273778e-05, "loss": 0.2064, "step": 20780 }, { "epoch": 1.138093412911351, "grad_norm": 0.1204998567700386, "learning_rate": 3.447982153721355e-05, "loss": 0.2018, "step": 20785 }, { "epoch": 1.1383671904944423, "grad_norm": 0.15901529788970947, "learning_rate": 3.447475157168931e-05, "loss": 0.2084, "step": 20790 }, { "epoch": 1.1386409680775338, "grad_norm": 0.1356266736984253, "learning_rate": 3.4469681606165076e-05, "loss": 0.2028, "step": 20795 }, { "epoch": 1.1389147456606252, "grad_norm": 0.12505385279655457, "learning_rate": 3.4464611640640846e-05, "loss": 0.2084, "step": 20800 }, { "epoch": 1.1391885232437169, "grad_norm": 0.13809719681739807, "learning_rate": 3.4459541675116616e-05, "loss": 0.2012, "step": 20805 }, { "epoch": 1.1394623008268083, "grad_norm": 0.12394936382770538, "learning_rate": 3.445447170959238e-05, "loss": 0.2153, "step": 20810 }, { "epoch": 1.1397360784098998, "grad_norm": 0.1774217039346695, "learning_rate": 3.444940174406814e-05, "loss": 0.1971, "step": 20815 }, { "epoch": 1.1400098559929912, "grad_norm": 0.15554587543010712, "learning_rate": 3.4444331778543906e-05, "loss": 0.2107, "step": 20820 }, { "epoch": 1.1402836335760829, "grad_norm": 0.12698571383953094, "learning_rate": 3.4439261813019676e-05, "loss": 0.2011, "step": 20825 }, { "epoch": 1.1405574111591743, "grad_norm": 0.12925903499126434, "learning_rate": 3.443419184749544e-05, "loss": 0.2036, "step": 20830 }, { "epoch": 1.1408311887422657, "grad_norm": 0.15464548766613007, "learning_rate": 3.44291218819712e-05, "loss": 0.2122, "step": 20835 }, { "epoch": 1.1411049663253572, "grad_norm": 0.14751040935516357, "learning_rate": 3.442405191644697e-05, "loss": 0.2067, "step": 20840 }, { "epoch": 1.1413787439084488, "grad_norm": 0.12616539001464844, "learning_rate": 3.4418981950922736e-05, "loss": 0.2043, "step": 20845 }, { "epoch": 1.1416525214915403, "grad_norm": 0.14923207461833954, "learning_rate": 3.44139119853985e-05, "loss": 0.1949, "step": 20850 }, { "epoch": 1.1419262990746317, "grad_norm": 0.1272999495267868, "learning_rate": 3.440884201987426e-05, "loss": 0.2026, "step": 20855 }, { "epoch": 1.1422000766577232, "grad_norm": 0.14065438508987427, "learning_rate": 3.440377205435003e-05, "loss": 0.2042, "step": 20860 }, { "epoch": 1.1424738542408148, "grad_norm": 0.13573862612247467, "learning_rate": 3.4398702088825796e-05, "loss": 0.2062, "step": 20865 }, { "epoch": 1.1427476318239063, "grad_norm": 0.14527392387390137, "learning_rate": 3.439363212330156e-05, "loss": 0.2077, "step": 20870 }, { "epoch": 1.1430214094069977, "grad_norm": 0.1433231383562088, "learning_rate": 3.438856215777733e-05, "loss": 0.215, "step": 20875 }, { "epoch": 1.1432951869900894, "grad_norm": 0.1403825283050537, "learning_rate": 3.438349219225309e-05, "loss": 0.1978, "step": 20880 }, { "epoch": 1.1435689645731808, "grad_norm": 0.13456127047538757, "learning_rate": 3.437842222672886e-05, "loss": 0.2033, "step": 20885 }, { "epoch": 1.1438427421562722, "grad_norm": 0.12288115173578262, "learning_rate": 3.4373352261204626e-05, "loss": 0.1997, "step": 20890 }, { "epoch": 1.1441165197393637, "grad_norm": 0.12873460352420807, "learning_rate": 3.4368282295680396e-05, "loss": 0.2105, "step": 20895 }, { "epoch": 1.1443902973224551, "grad_norm": 0.14406821131706238, "learning_rate": 3.436321233015616e-05, "loss": 0.1983, "step": 20900 }, { "epoch": 1.1446640749055468, "grad_norm": 0.13596060872077942, "learning_rate": 3.435814236463192e-05, "loss": 0.2017, "step": 20905 }, { "epoch": 1.1449378524886382, "grad_norm": 0.14460285007953644, "learning_rate": 3.4353072399107686e-05, "loss": 0.2054, "step": 20910 }, { "epoch": 1.1452116300717297, "grad_norm": 0.1492876559495926, "learning_rate": 3.4348002433583456e-05, "loss": 0.2077, "step": 20915 }, { "epoch": 1.1454854076548213, "grad_norm": 0.1383437067270279, "learning_rate": 3.434293246805922e-05, "loss": 0.2004, "step": 20920 }, { "epoch": 1.1457591852379128, "grad_norm": 0.13031333684921265, "learning_rate": 3.433786250253498e-05, "loss": 0.2195, "step": 20925 }, { "epoch": 1.1460329628210042, "grad_norm": 0.12112107127904892, "learning_rate": 3.433279253701075e-05, "loss": 0.2109, "step": 20930 }, { "epoch": 1.1463067404040956, "grad_norm": 0.13269676268100739, "learning_rate": 3.4327722571486516e-05, "loss": 0.1997, "step": 20935 }, { "epoch": 1.1465805179871873, "grad_norm": 0.12859350442886353, "learning_rate": 3.432265260596228e-05, "loss": 0.2053, "step": 20940 }, { "epoch": 1.1468542955702787, "grad_norm": 0.12851841747760773, "learning_rate": 3.431758264043804e-05, "loss": 0.2068, "step": 20945 }, { "epoch": 1.1471280731533702, "grad_norm": 0.12557817995548248, "learning_rate": 3.431251267491381e-05, "loss": 0.1962, "step": 20950 }, { "epoch": 1.1474018507364616, "grad_norm": 0.13782605528831482, "learning_rate": 3.4307442709389576e-05, "loss": 0.2104, "step": 20955 }, { "epoch": 1.1476756283195533, "grad_norm": 0.15202710032463074, "learning_rate": 3.430237274386534e-05, "loss": 0.2045, "step": 20960 }, { "epoch": 1.1479494059026447, "grad_norm": 0.1593223363161087, "learning_rate": 3.429730277834111e-05, "loss": 0.2108, "step": 20965 }, { "epoch": 1.1482231834857362, "grad_norm": 0.12704268097877502, "learning_rate": 3.429223281281688e-05, "loss": 0.204, "step": 20970 }, { "epoch": 1.1484969610688276, "grad_norm": 0.11943478137254715, "learning_rate": 3.428716284729264e-05, "loss": 0.2042, "step": 20975 }, { "epoch": 1.1487707386519193, "grad_norm": 0.14859290421009064, "learning_rate": 3.4282092881768406e-05, "loss": 0.2164, "step": 20980 }, { "epoch": 1.1490445162350107, "grad_norm": 0.1511809527873993, "learning_rate": 3.427702291624417e-05, "loss": 0.2009, "step": 20985 }, { "epoch": 1.1493182938181021, "grad_norm": 0.17393015325069427, "learning_rate": 3.427195295071994e-05, "loss": 0.2095, "step": 20990 }, { "epoch": 1.1495920714011936, "grad_norm": 0.13141129910945892, "learning_rate": 3.42668829851957e-05, "loss": 0.2092, "step": 20995 }, { "epoch": 1.1498658489842852, "grad_norm": 0.1196923777461052, "learning_rate": 3.4261813019671466e-05, "loss": 0.2023, "step": 21000 }, { "epoch": 1.1501396265673767, "grad_norm": 0.1227194294333458, "learning_rate": 3.4256743054147236e-05, "loss": 0.1998, "step": 21005 }, { "epoch": 1.150413404150468, "grad_norm": 0.13945046067237854, "learning_rate": 3.4251673088623e-05, "loss": 0.1986, "step": 21010 }, { "epoch": 1.1506871817335598, "grad_norm": 0.13225595653057098, "learning_rate": 3.424660312309876e-05, "loss": 0.2006, "step": 21015 }, { "epoch": 1.1509609593166512, "grad_norm": 0.14215728640556335, "learning_rate": 3.4241533157574526e-05, "loss": 0.2139, "step": 21020 }, { "epoch": 1.1512347368997426, "grad_norm": 0.15899941325187683, "learning_rate": 3.4236463192050296e-05, "loss": 0.2141, "step": 21025 }, { "epoch": 1.151508514482834, "grad_norm": 0.13202285766601562, "learning_rate": 3.423139322652606e-05, "loss": 0.2052, "step": 21030 }, { "epoch": 1.1517822920659255, "grad_norm": 0.12649932503700256, "learning_rate": 3.422632326100182e-05, "loss": 0.1962, "step": 21035 }, { "epoch": 1.1520560696490172, "grad_norm": 0.14432567358016968, "learning_rate": 3.422125329547759e-05, "loss": 0.2082, "step": 21040 }, { "epoch": 1.1523298472321086, "grad_norm": 0.12357821315526962, "learning_rate": 3.4216183329953356e-05, "loss": 0.2012, "step": 21045 }, { "epoch": 1.1526036248152, "grad_norm": 0.14553745090961456, "learning_rate": 3.4211113364429126e-05, "loss": 0.2028, "step": 21050 }, { "epoch": 1.1528774023982917, "grad_norm": 0.13671916723251343, "learning_rate": 3.420604339890489e-05, "loss": 0.1953, "step": 21055 }, { "epoch": 1.1531511799813832, "grad_norm": 0.13063476979732513, "learning_rate": 3.420097343338066e-05, "loss": 0.2104, "step": 21060 }, { "epoch": 1.1534249575644746, "grad_norm": 0.13404937088489532, "learning_rate": 3.419590346785642e-05, "loss": 0.2023, "step": 21065 }, { "epoch": 1.153698735147566, "grad_norm": 0.14308324456214905, "learning_rate": 3.4190833502332186e-05, "loss": 0.2111, "step": 21070 }, { "epoch": 1.1539725127306577, "grad_norm": 0.13762302696704865, "learning_rate": 3.418576353680795e-05, "loss": 0.2006, "step": 21075 }, { "epoch": 1.1542462903137491, "grad_norm": 0.13256201148033142, "learning_rate": 3.418069357128372e-05, "loss": 0.1999, "step": 21080 }, { "epoch": 1.1545200678968406, "grad_norm": 0.156844824552536, "learning_rate": 3.417562360575948e-05, "loss": 0.2035, "step": 21085 }, { "epoch": 1.154793845479932, "grad_norm": 0.12872083485126495, "learning_rate": 3.4170553640235246e-05, "loss": 0.1941, "step": 21090 }, { "epoch": 1.1550676230630237, "grad_norm": 0.12474821507930756, "learning_rate": 3.4165483674711016e-05, "loss": 0.204, "step": 21095 }, { "epoch": 1.1553414006461151, "grad_norm": 0.15644632279872894, "learning_rate": 3.416041370918678e-05, "loss": 0.2118, "step": 21100 }, { "epoch": 1.1556151782292066, "grad_norm": 0.12767651677131653, "learning_rate": 3.415534374366254e-05, "loss": 0.2047, "step": 21105 }, { "epoch": 1.155888955812298, "grad_norm": 0.1594633162021637, "learning_rate": 3.4150273778138306e-05, "loss": 0.2031, "step": 21110 }, { "epoch": 1.1561627333953897, "grad_norm": 0.11869830638170242, "learning_rate": 3.4145203812614076e-05, "loss": 0.2026, "step": 21115 }, { "epoch": 1.156436510978481, "grad_norm": 0.1457512229681015, "learning_rate": 3.414013384708984e-05, "loss": 0.2005, "step": 21120 }, { "epoch": 1.1567102885615725, "grad_norm": 0.138307124376297, "learning_rate": 3.41350638815656e-05, "loss": 0.2004, "step": 21125 }, { "epoch": 1.156984066144664, "grad_norm": 0.12538285553455353, "learning_rate": 3.412999391604137e-05, "loss": 0.2057, "step": 21130 }, { "epoch": 1.1572578437277556, "grad_norm": 0.14474843442440033, "learning_rate": 3.412492395051714e-05, "loss": 0.2118, "step": 21135 }, { "epoch": 1.157531621310847, "grad_norm": 0.14615173637866974, "learning_rate": 3.4119853984992906e-05, "loss": 0.2106, "step": 21140 }, { "epoch": 1.1578053988939385, "grad_norm": 0.14827249944210052, "learning_rate": 3.411478401946867e-05, "loss": 0.2076, "step": 21145 }, { "epoch": 1.1580791764770302, "grad_norm": 0.1340751051902771, "learning_rate": 3.410971405394443e-05, "loss": 0.1988, "step": 21150 }, { "epoch": 1.1583529540601216, "grad_norm": 0.14769399166107178, "learning_rate": 3.41046440884202e-05, "loss": 0.2147, "step": 21155 }, { "epoch": 1.158626731643213, "grad_norm": 0.13749966025352478, "learning_rate": 3.4099574122895966e-05, "loss": 0.2048, "step": 21160 }, { "epoch": 1.1589005092263045, "grad_norm": 0.1332816481590271, "learning_rate": 3.409450415737173e-05, "loss": 0.2013, "step": 21165 }, { "epoch": 1.159174286809396, "grad_norm": 0.138020321726799, "learning_rate": 3.40894341918475e-05, "loss": 0.2116, "step": 21170 }, { "epoch": 1.1594480643924876, "grad_norm": 0.133638396859169, "learning_rate": 3.408436422632326e-05, "loss": 0.205, "step": 21175 }, { "epoch": 1.159721841975579, "grad_norm": 0.14491766691207886, "learning_rate": 3.4079294260799026e-05, "loss": 0.2029, "step": 21180 }, { "epoch": 1.1599956195586705, "grad_norm": 0.15305307507514954, "learning_rate": 3.407422429527479e-05, "loss": 0.2047, "step": 21185 }, { "epoch": 1.1602693971417621, "grad_norm": 0.1814524233341217, "learning_rate": 3.406915432975056e-05, "loss": 0.2012, "step": 21190 }, { "epoch": 1.1605431747248536, "grad_norm": 0.1372700333595276, "learning_rate": 3.406408436422632e-05, "loss": 0.1999, "step": 21195 }, { "epoch": 1.160816952307945, "grad_norm": 0.13991789519786835, "learning_rate": 3.4059014398702086e-05, "loss": 0.2014, "step": 21200 }, { "epoch": 1.1610907298910365, "grad_norm": 0.13845287263393402, "learning_rate": 3.4053944433177856e-05, "loss": 0.2076, "step": 21205 }, { "epoch": 1.161364507474128, "grad_norm": 0.1561756730079651, "learning_rate": 3.4048874467653627e-05, "loss": 0.2032, "step": 21210 }, { "epoch": 1.1616382850572196, "grad_norm": 0.1559910923242569, "learning_rate": 3.404380450212939e-05, "loss": 0.2004, "step": 21215 }, { "epoch": 1.161912062640311, "grad_norm": 0.14693470299243927, "learning_rate": 3.403873453660515e-05, "loss": 0.2014, "step": 21220 }, { "epoch": 1.1621858402234024, "grad_norm": 0.12669867277145386, "learning_rate": 3.403366457108092e-05, "loss": 0.2048, "step": 21225 }, { "epoch": 1.162459617806494, "grad_norm": 0.1329999566078186, "learning_rate": 3.4028594605556687e-05, "loss": 0.1996, "step": 21230 }, { "epoch": 1.1627333953895855, "grad_norm": 0.15233659744262695, "learning_rate": 3.402352464003245e-05, "loss": 0.2016, "step": 21235 }, { "epoch": 1.163007172972677, "grad_norm": 0.12058207392692566, "learning_rate": 3.401845467450821e-05, "loss": 0.2092, "step": 21240 }, { "epoch": 1.1632809505557684, "grad_norm": 0.1325415074825287, "learning_rate": 3.401338470898398e-05, "loss": 0.1949, "step": 21245 }, { "epoch": 1.16355472813886, "grad_norm": 0.11665078997612, "learning_rate": 3.4008314743459747e-05, "loss": 0.2034, "step": 21250 }, { "epoch": 1.1638285057219515, "grad_norm": 0.13341568410396576, "learning_rate": 3.400324477793551e-05, "loss": 0.2056, "step": 21255 }, { "epoch": 1.164102283305043, "grad_norm": 0.13877153396606445, "learning_rate": 3.399817481241127e-05, "loss": 0.2064, "step": 21260 }, { "epoch": 1.1643760608881344, "grad_norm": 0.11756399273872375, "learning_rate": 3.399310484688704e-05, "loss": 0.2031, "step": 21265 }, { "epoch": 1.164649838471226, "grad_norm": 0.1277185082435608, "learning_rate": 3.3988034881362807e-05, "loss": 0.1912, "step": 21270 }, { "epoch": 1.1649236160543175, "grad_norm": 0.11918089538812637, "learning_rate": 3.398296491583857e-05, "loss": 0.2106, "step": 21275 }, { "epoch": 1.165197393637409, "grad_norm": 0.12393093854188919, "learning_rate": 3.397789495031434e-05, "loss": 0.208, "step": 21280 }, { "epoch": 1.1654711712205006, "grad_norm": 0.12014389783143997, "learning_rate": 3.39728249847901e-05, "loss": 0.1995, "step": 21285 }, { "epoch": 1.165744948803592, "grad_norm": 0.12819910049438477, "learning_rate": 3.396775501926587e-05, "loss": 0.2072, "step": 21290 }, { "epoch": 1.1660187263866835, "grad_norm": 0.12668664753437042, "learning_rate": 3.396268505374164e-05, "loss": 0.2073, "step": 21295 }, { "epoch": 1.166292503969775, "grad_norm": 0.12477272003889084, "learning_rate": 3.395761508821741e-05, "loss": 0.2035, "step": 21300 }, { "epoch": 1.1665662815528663, "grad_norm": 0.14670245349407196, "learning_rate": 3.395254512269317e-05, "loss": 0.2151, "step": 21305 }, { "epoch": 1.166840059135958, "grad_norm": 0.16816918551921844, "learning_rate": 3.394747515716893e-05, "loss": 0.2103, "step": 21310 }, { "epoch": 1.1671138367190494, "grad_norm": 0.1399264931678772, "learning_rate": 3.39424051916447e-05, "loss": 0.2096, "step": 21315 }, { "epoch": 1.1673876143021409, "grad_norm": 0.12483557313680649, "learning_rate": 3.393733522612047e-05, "loss": 0.204, "step": 21320 }, { "epoch": 1.1676613918852325, "grad_norm": 0.14334851503372192, "learning_rate": 3.393226526059623e-05, "loss": 0.203, "step": 21325 }, { "epoch": 1.167935169468324, "grad_norm": 0.13715963065624237, "learning_rate": 3.392719529507199e-05, "loss": 0.2067, "step": 21330 }, { "epoch": 1.1682089470514154, "grad_norm": 0.1360819935798645, "learning_rate": 3.3922125329547763e-05, "loss": 0.2085, "step": 21335 }, { "epoch": 1.1684827246345069, "grad_norm": 0.15064647793769836, "learning_rate": 3.391705536402353e-05, "loss": 0.1949, "step": 21340 }, { "epoch": 1.1687565022175983, "grad_norm": 0.12868733704090118, "learning_rate": 3.391198539849929e-05, "loss": 0.206, "step": 21345 }, { "epoch": 1.16903027980069, "grad_norm": 0.12466838955879211, "learning_rate": 3.390691543297505e-05, "loss": 0.2028, "step": 21350 }, { "epoch": 1.1693040573837814, "grad_norm": 0.14938849210739136, "learning_rate": 3.3901845467450823e-05, "loss": 0.2051, "step": 21355 }, { "epoch": 1.1695778349668728, "grad_norm": 0.12798742949962616, "learning_rate": 3.389677550192659e-05, "loss": 0.1983, "step": 21360 }, { "epoch": 1.1698516125499645, "grad_norm": 0.15165863931179047, "learning_rate": 3.389170553640235e-05, "loss": 0.2072, "step": 21365 }, { "epoch": 1.170125390133056, "grad_norm": 0.15674446523189545, "learning_rate": 3.388663557087812e-05, "loss": 0.2072, "step": 21370 }, { "epoch": 1.1703991677161474, "grad_norm": 0.14649920165538788, "learning_rate": 3.388156560535389e-05, "loss": 0.2003, "step": 21375 }, { "epoch": 1.1706729452992388, "grad_norm": 0.15970122814178467, "learning_rate": 3.3876495639829653e-05, "loss": 0.2041, "step": 21380 }, { "epoch": 1.1709467228823305, "grad_norm": 0.14529716968536377, "learning_rate": 3.387142567430542e-05, "loss": 0.2068, "step": 21385 }, { "epoch": 1.171220500465422, "grad_norm": 0.13538731634616852, "learning_rate": 3.386635570878118e-05, "loss": 0.2061, "step": 21390 }, { "epoch": 1.1714942780485134, "grad_norm": 0.13443708419799805, "learning_rate": 3.386128574325695e-05, "loss": 0.2061, "step": 21395 }, { "epoch": 1.1717680556316048, "grad_norm": 0.14023815095424652, "learning_rate": 3.3856215777732713e-05, "loss": 0.2147, "step": 21400 }, { "epoch": 1.1720418332146965, "grad_norm": 0.13763217628002167, "learning_rate": 3.385114581220848e-05, "loss": 0.2023, "step": 21405 }, { "epoch": 1.172315610797788, "grad_norm": 0.13571085035800934, "learning_rate": 3.384607584668425e-05, "loss": 0.2082, "step": 21410 }, { "epoch": 1.1725893883808793, "grad_norm": 0.1495649218559265, "learning_rate": 3.384100588116001e-05, "loss": 0.2066, "step": 21415 }, { "epoch": 1.1728631659639708, "grad_norm": 0.13767145574092865, "learning_rate": 3.3835935915635773e-05, "loss": 0.21, "step": 21420 }, { "epoch": 1.1731369435470624, "grad_norm": 0.13686911761760712, "learning_rate": 3.383086595011154e-05, "loss": 0.1988, "step": 21425 }, { "epoch": 1.1734107211301539, "grad_norm": 0.1420099288225174, "learning_rate": 3.382579598458731e-05, "loss": 0.2121, "step": 21430 }, { "epoch": 1.1736844987132453, "grad_norm": 0.13371142745018005, "learning_rate": 3.382072601906307e-05, "loss": 0.2024, "step": 21435 }, { "epoch": 1.1739582762963368, "grad_norm": 0.1428297609090805, "learning_rate": 3.3815656053538833e-05, "loss": 0.2123, "step": 21440 }, { "epoch": 1.1742320538794284, "grad_norm": 0.11614900827407837, "learning_rate": 3.3810586088014604e-05, "loss": 0.2122, "step": 21445 }, { "epoch": 1.1745058314625199, "grad_norm": 0.12243736535310745, "learning_rate": 3.380551612249037e-05, "loss": 0.2046, "step": 21450 }, { "epoch": 1.1747796090456113, "grad_norm": 0.1293898969888687, "learning_rate": 3.380044615696614e-05, "loss": 0.2148, "step": 21455 }, { "epoch": 1.175053386628703, "grad_norm": 0.1327681690454483, "learning_rate": 3.37953761914419e-05, "loss": 0.2088, "step": 21460 }, { "epoch": 1.1753271642117944, "grad_norm": 0.14320796728134155, "learning_rate": 3.379030622591767e-05, "loss": 0.2008, "step": 21465 }, { "epoch": 1.1756009417948858, "grad_norm": 0.15565617382526398, "learning_rate": 3.3785236260393434e-05, "loss": 0.2014, "step": 21470 }, { "epoch": 1.1758747193779773, "grad_norm": 0.12814101576805115, "learning_rate": 3.37801662948692e-05, "loss": 0.2054, "step": 21475 }, { "epoch": 1.1761484969610687, "grad_norm": 0.11852279305458069, "learning_rate": 3.377509632934496e-05, "loss": 0.2009, "step": 21480 }, { "epoch": 1.1764222745441604, "grad_norm": 0.12390473484992981, "learning_rate": 3.377002636382073e-05, "loss": 0.2043, "step": 21485 }, { "epoch": 1.1766960521272518, "grad_norm": 0.1366187036037445, "learning_rate": 3.3764956398296494e-05, "loss": 0.2104, "step": 21490 }, { "epoch": 1.1769698297103433, "grad_norm": 0.13436608016490936, "learning_rate": 3.375988643277226e-05, "loss": 0.2082, "step": 21495 }, { "epoch": 1.177243607293435, "grad_norm": 0.14914894104003906, "learning_rate": 3.375481646724803e-05, "loss": 0.2095, "step": 21500 }, { "epoch": 1.1775173848765264, "grad_norm": 0.11931350827217102, "learning_rate": 3.374974650172379e-05, "loss": 0.2018, "step": 21505 }, { "epoch": 1.1777911624596178, "grad_norm": 0.17121830582618713, "learning_rate": 3.3744676536199554e-05, "loss": 0.2085, "step": 21510 }, { "epoch": 1.1780649400427092, "grad_norm": 0.17728231847286224, "learning_rate": 3.373960657067532e-05, "loss": 0.2158, "step": 21515 }, { "epoch": 1.178338717625801, "grad_norm": 0.1667204648256302, "learning_rate": 3.373453660515109e-05, "loss": 0.2031, "step": 21520 }, { "epoch": 1.1786124952088923, "grad_norm": 0.1492314636707306, "learning_rate": 3.372946663962685e-05, "loss": 0.2078, "step": 21525 }, { "epoch": 1.1788862727919838, "grad_norm": 0.13354331254959106, "learning_rate": 3.3724396674102614e-05, "loss": 0.2148, "step": 21530 }, { "epoch": 1.1791600503750752, "grad_norm": 0.15659500658512115, "learning_rate": 3.3719326708578384e-05, "loss": 0.2057, "step": 21535 }, { "epoch": 1.1794338279581669, "grad_norm": 0.11767139285802841, "learning_rate": 3.3714256743054154e-05, "loss": 0.2131, "step": 21540 }, { "epoch": 1.1797076055412583, "grad_norm": 0.13573437929153442, "learning_rate": 3.370918677752992e-05, "loss": 0.1998, "step": 21545 }, { "epoch": 1.1799813831243497, "grad_norm": 0.14394348859786987, "learning_rate": 3.370411681200568e-05, "loss": 0.2109, "step": 21550 }, { "epoch": 1.1802551607074412, "grad_norm": 0.13432545959949493, "learning_rate": 3.3699046846481444e-05, "loss": 0.2139, "step": 21555 }, { "epoch": 1.1805289382905328, "grad_norm": 0.1346694529056549, "learning_rate": 3.3693976880957214e-05, "loss": 0.2167, "step": 21560 }, { "epoch": 1.1808027158736243, "grad_norm": 0.1274946630001068, "learning_rate": 3.368890691543298e-05, "loss": 0.2036, "step": 21565 }, { "epoch": 1.1810764934567157, "grad_norm": 0.1446676403284073, "learning_rate": 3.368383694990874e-05, "loss": 0.2192, "step": 21570 }, { "epoch": 1.1813502710398072, "grad_norm": 0.13914406299591064, "learning_rate": 3.367876698438451e-05, "loss": 0.2071, "step": 21575 }, { "epoch": 1.1816240486228988, "grad_norm": 0.1139739528298378, "learning_rate": 3.3673697018860274e-05, "loss": 0.2008, "step": 21580 }, { "epoch": 1.1818978262059903, "grad_norm": 0.14969941973686218, "learning_rate": 3.366862705333604e-05, "loss": 0.22, "step": 21585 }, { "epoch": 1.1821716037890817, "grad_norm": 0.12419084459543228, "learning_rate": 3.36635570878118e-05, "loss": 0.2139, "step": 21590 }, { "epoch": 1.1824453813721734, "grad_norm": 0.13191048800945282, "learning_rate": 3.365848712228757e-05, "loss": 0.2136, "step": 21595 }, { "epoch": 1.1827191589552648, "grad_norm": 0.14226342737674713, "learning_rate": 3.3653417156763334e-05, "loss": 0.2088, "step": 21600 }, { "epoch": 1.1829929365383562, "grad_norm": 0.13855943083763123, "learning_rate": 3.36483471912391e-05, "loss": 0.2009, "step": 21605 }, { "epoch": 1.1832667141214477, "grad_norm": 0.13786624372005463, "learning_rate": 3.364327722571487e-05, "loss": 0.2099, "step": 21610 }, { "epoch": 1.1835404917045391, "grad_norm": 0.12910324335098267, "learning_rate": 3.363820726019064e-05, "loss": 0.2071, "step": 21615 }, { "epoch": 1.1838142692876308, "grad_norm": 0.10574104636907578, "learning_rate": 3.36331372946664e-05, "loss": 0.1962, "step": 21620 }, { "epoch": 1.1840880468707222, "grad_norm": 0.12637928128242493, "learning_rate": 3.3628067329142164e-05, "loss": 0.2, "step": 21625 }, { "epoch": 1.1843618244538137, "grad_norm": 0.11801373213529587, "learning_rate": 3.3622997363617934e-05, "loss": 0.2135, "step": 21630 }, { "epoch": 1.1846356020369053, "grad_norm": 0.17518050968647003, "learning_rate": 3.36179273980937e-05, "loss": 0.2107, "step": 21635 }, { "epoch": 1.1849093796199968, "grad_norm": 0.11892491579055786, "learning_rate": 3.361285743256946e-05, "loss": 0.2037, "step": 21640 }, { "epoch": 1.1851831572030882, "grad_norm": 0.1437147706747055, "learning_rate": 3.3607787467045224e-05, "loss": 0.2071, "step": 21645 }, { "epoch": 1.1854569347861796, "grad_norm": 0.13943080604076385, "learning_rate": 3.3602717501520994e-05, "loss": 0.205, "step": 21650 }, { "epoch": 1.1857307123692713, "grad_norm": 0.13151662051677704, "learning_rate": 3.359764753599676e-05, "loss": 0.2017, "step": 21655 }, { "epoch": 1.1860044899523627, "grad_norm": 0.13298647105693817, "learning_rate": 3.359257757047252e-05, "loss": 0.2095, "step": 21660 }, { "epoch": 1.1862782675354542, "grad_norm": 0.13840843737125397, "learning_rate": 3.358750760494829e-05, "loss": 0.2073, "step": 21665 }, { "epoch": 1.1865520451185456, "grad_norm": 0.12959349155426025, "learning_rate": 3.3582437639424054e-05, "loss": 0.2043, "step": 21670 }, { "epoch": 1.1868258227016373, "grad_norm": 0.16629375517368317, "learning_rate": 3.357736767389982e-05, "loss": 0.2083, "step": 21675 }, { "epoch": 1.1870996002847287, "grad_norm": 0.13847146928310394, "learning_rate": 3.357229770837558e-05, "loss": 0.2067, "step": 21680 }, { "epoch": 1.1873733778678202, "grad_norm": 0.12497575581073761, "learning_rate": 3.356722774285135e-05, "loss": 0.2086, "step": 21685 }, { "epoch": 1.1876471554509116, "grad_norm": 0.16079171001911163, "learning_rate": 3.3562157777327114e-05, "loss": 0.2027, "step": 21690 }, { "epoch": 1.1879209330340033, "grad_norm": 0.1421753317117691, "learning_rate": 3.355708781180288e-05, "loss": 0.2112, "step": 21695 }, { "epoch": 1.1881947106170947, "grad_norm": 0.1453370749950409, "learning_rate": 3.355201784627865e-05, "loss": 0.2046, "step": 21700 }, { "epoch": 1.1884684882001861, "grad_norm": 0.13079603016376495, "learning_rate": 3.354694788075442e-05, "loss": 0.2085, "step": 21705 }, { "epoch": 1.1887422657832776, "grad_norm": 0.1412345916032791, "learning_rate": 3.354187791523018e-05, "loss": 0.2136, "step": 21710 }, { "epoch": 1.1890160433663692, "grad_norm": 0.13467390835285187, "learning_rate": 3.3536807949705944e-05, "loss": 0.1976, "step": 21715 }, { "epoch": 1.1892898209494607, "grad_norm": 0.1343894749879837, "learning_rate": 3.353173798418171e-05, "loss": 0.1989, "step": 21720 }, { "epoch": 1.1895635985325521, "grad_norm": 0.12894591689109802, "learning_rate": 3.352666801865748e-05, "loss": 0.2008, "step": 21725 }, { "epoch": 1.1898373761156438, "grad_norm": 0.14232702553272247, "learning_rate": 3.352159805313324e-05, "loss": 0.2007, "step": 21730 }, { "epoch": 1.1901111536987352, "grad_norm": 0.15683013200759888, "learning_rate": 3.3516528087609004e-05, "loss": 0.2147, "step": 21735 }, { "epoch": 1.1903849312818267, "grad_norm": 0.12513233721256256, "learning_rate": 3.3511458122084774e-05, "loss": 0.1986, "step": 21740 }, { "epoch": 1.190658708864918, "grad_norm": 0.15100215375423431, "learning_rate": 3.350638815656054e-05, "loss": 0.2062, "step": 21745 }, { "epoch": 1.1909324864480095, "grad_norm": 0.1319979429244995, "learning_rate": 3.35013181910363e-05, "loss": 0.2073, "step": 21750 }, { "epoch": 1.1912062640311012, "grad_norm": 0.11539535224437714, "learning_rate": 3.3496248225512064e-05, "loss": 0.2075, "step": 21755 }, { "epoch": 1.1914800416141926, "grad_norm": 0.17017947137355804, "learning_rate": 3.3491178259987834e-05, "loss": 0.2018, "step": 21760 }, { "epoch": 1.191753819197284, "grad_norm": 0.14147864282131195, "learning_rate": 3.34861082944636e-05, "loss": 0.2082, "step": 21765 }, { "epoch": 1.1920275967803757, "grad_norm": 0.12959668040275574, "learning_rate": 3.348103832893936e-05, "loss": 0.2083, "step": 21770 }, { "epoch": 1.1923013743634672, "grad_norm": 0.13276419043540955, "learning_rate": 3.347596836341513e-05, "loss": 0.2026, "step": 21775 }, { "epoch": 1.1925751519465586, "grad_norm": 0.13849692046642303, "learning_rate": 3.34708983978909e-05, "loss": 0.2011, "step": 21780 }, { "epoch": 1.19284892952965, "grad_norm": 0.14986655116081238, "learning_rate": 3.3465828432366664e-05, "loss": 0.2083, "step": 21785 }, { "epoch": 1.1931227071127415, "grad_norm": 0.13082487881183624, "learning_rate": 3.346075846684243e-05, "loss": 0.2026, "step": 21790 }, { "epoch": 1.1933964846958331, "grad_norm": 0.14008723199367523, "learning_rate": 3.34556885013182e-05, "loss": 0.2022, "step": 21795 }, { "epoch": 1.1936702622789246, "grad_norm": 0.14675180613994598, "learning_rate": 3.345061853579396e-05, "loss": 0.201, "step": 21800 }, { "epoch": 1.193944039862016, "grad_norm": 0.1573682576417923, "learning_rate": 3.3445548570269724e-05, "loss": 0.202, "step": 21805 }, { "epoch": 1.1942178174451077, "grad_norm": 0.12577444314956665, "learning_rate": 3.344047860474549e-05, "loss": 0.2073, "step": 21810 }, { "epoch": 1.1944915950281991, "grad_norm": 0.13478082418441772, "learning_rate": 3.343540863922126e-05, "loss": 0.2084, "step": 21815 }, { "epoch": 1.1947653726112906, "grad_norm": 0.13246804475784302, "learning_rate": 3.343033867369702e-05, "loss": 0.2199, "step": 21820 }, { "epoch": 1.195039150194382, "grad_norm": 0.12135814130306244, "learning_rate": 3.3425268708172784e-05, "loss": 0.2043, "step": 21825 }, { "epoch": 1.1953129277774737, "grad_norm": 0.12231235206127167, "learning_rate": 3.342019874264855e-05, "loss": 0.1995, "step": 21830 }, { "epoch": 1.195586705360565, "grad_norm": 0.11623754352331161, "learning_rate": 3.341512877712432e-05, "loss": 0.2034, "step": 21835 }, { "epoch": 1.1958604829436565, "grad_norm": 0.12322402745485306, "learning_rate": 3.341005881160008e-05, "loss": 0.2108, "step": 21840 }, { "epoch": 1.196134260526748, "grad_norm": 0.12024208903312683, "learning_rate": 3.3404988846075844e-05, "loss": 0.1969, "step": 21845 }, { "epoch": 1.1964080381098396, "grad_norm": 0.15071028470993042, "learning_rate": 3.3399918880551614e-05, "loss": 0.2087, "step": 21850 }, { "epoch": 1.196681815692931, "grad_norm": 0.15217819809913635, "learning_rate": 3.339484891502738e-05, "loss": 0.2002, "step": 21855 }, { "epoch": 1.1969555932760225, "grad_norm": 0.13614697754383087, "learning_rate": 3.338977894950315e-05, "loss": 0.21, "step": 21860 }, { "epoch": 1.197229370859114, "grad_norm": 0.13461169600486755, "learning_rate": 3.338470898397891e-05, "loss": 0.207, "step": 21865 }, { "epoch": 1.1975031484422056, "grad_norm": 0.12449248135089874, "learning_rate": 3.337963901845468e-05, "loss": 0.2012, "step": 21870 }, { "epoch": 1.197776926025297, "grad_norm": 0.11989214271306992, "learning_rate": 3.3374569052930444e-05, "loss": 0.205, "step": 21875 }, { "epoch": 1.1980507036083885, "grad_norm": 0.1354391723871231, "learning_rate": 3.336949908740621e-05, "loss": 0.2056, "step": 21880 }, { "epoch": 1.19832448119148, "grad_norm": 0.11882974207401276, "learning_rate": 3.336442912188197e-05, "loss": 0.2069, "step": 21885 }, { "epoch": 1.1985982587745716, "grad_norm": 0.12114046514034271, "learning_rate": 3.335935915635774e-05, "loss": 0.2038, "step": 21890 }, { "epoch": 1.198872036357663, "grad_norm": 0.12832513451576233, "learning_rate": 3.3354289190833504e-05, "loss": 0.2047, "step": 21895 }, { "epoch": 1.1991458139407545, "grad_norm": 0.1262146234512329, "learning_rate": 3.334921922530927e-05, "loss": 0.2029, "step": 21900 }, { "epoch": 1.1994195915238461, "grad_norm": 0.13144487142562866, "learning_rate": 3.334414925978504e-05, "loss": 0.2059, "step": 21905 }, { "epoch": 1.1996933691069376, "grad_norm": 0.13212837278842926, "learning_rate": 3.33390792942608e-05, "loss": 0.2075, "step": 21910 }, { "epoch": 1.199967146690029, "grad_norm": 0.12171527743339539, "learning_rate": 3.3334009328736564e-05, "loss": 0.2059, "step": 21915 }, { "epoch": 1.2002409242731205, "grad_norm": 0.12860536575317383, "learning_rate": 3.332893936321233e-05, "loss": 0.1987, "step": 21920 }, { "epoch": 1.200514701856212, "grad_norm": 0.13002733886241913, "learning_rate": 3.33238693976881e-05, "loss": 0.2066, "step": 21925 }, { "epoch": 1.2007884794393036, "grad_norm": 0.12926389276981354, "learning_rate": 3.331879943216386e-05, "loss": 0.2011, "step": 21930 }, { "epoch": 1.201062257022395, "grad_norm": 0.12939342856407166, "learning_rate": 3.3313729466639624e-05, "loss": 0.2001, "step": 21935 }, { "epoch": 1.2013360346054864, "grad_norm": 0.14668408036231995, "learning_rate": 3.3308659501115394e-05, "loss": 0.2016, "step": 21940 }, { "epoch": 1.201609812188578, "grad_norm": 0.14006276428699493, "learning_rate": 3.3303589535591164e-05, "loss": 0.2123, "step": 21945 }, { "epoch": 1.2018835897716695, "grad_norm": 0.13781502842903137, "learning_rate": 3.329851957006693e-05, "loss": 0.2086, "step": 21950 }, { "epoch": 1.202157367354761, "grad_norm": 0.12471330165863037, "learning_rate": 3.329344960454269e-05, "loss": 0.2059, "step": 21955 }, { "epoch": 1.2024311449378524, "grad_norm": 0.1904207020998001, "learning_rate": 3.3288379639018454e-05, "loss": 0.2133, "step": 21960 }, { "epoch": 1.202704922520944, "grad_norm": 0.1766560673713684, "learning_rate": 3.3283309673494224e-05, "loss": 0.2072, "step": 21965 }, { "epoch": 1.2029787001040355, "grad_norm": 0.1642163097858429, "learning_rate": 3.327823970796999e-05, "loss": 0.2074, "step": 21970 }, { "epoch": 1.203252477687127, "grad_norm": 0.1456887573003769, "learning_rate": 3.327316974244575e-05, "loss": 0.201, "step": 21975 }, { "epoch": 1.2035262552702184, "grad_norm": 0.11677060276269913, "learning_rate": 3.326809977692152e-05, "loss": 0.1928, "step": 21980 }, { "epoch": 1.20380003285331, "grad_norm": 0.11447372287511826, "learning_rate": 3.3263029811397284e-05, "loss": 0.1995, "step": 21985 }, { "epoch": 1.2040738104364015, "grad_norm": 0.12227175384759903, "learning_rate": 3.325795984587305e-05, "loss": 0.2101, "step": 21990 }, { "epoch": 1.204347588019493, "grad_norm": 0.14395055174827576, "learning_rate": 3.325288988034881e-05, "loss": 0.1946, "step": 21995 }, { "epoch": 1.2046213656025844, "grad_norm": 0.13499018549919128, "learning_rate": 3.324781991482458e-05, "loss": 0.2071, "step": 22000 }, { "epoch": 1.204895143185676, "grad_norm": 0.11808647215366364, "learning_rate": 3.3242749949300344e-05, "loss": 0.2099, "step": 22005 }, { "epoch": 1.2051689207687675, "grad_norm": 0.13662764430046082, "learning_rate": 3.323767998377611e-05, "loss": 0.2166, "step": 22010 }, { "epoch": 1.205442698351859, "grad_norm": 0.15164560079574585, "learning_rate": 3.323261001825188e-05, "loss": 0.2112, "step": 22015 }, { "epoch": 1.2057164759349503, "grad_norm": 0.13650591671466827, "learning_rate": 3.322754005272764e-05, "loss": 0.2049, "step": 22020 }, { "epoch": 1.205990253518042, "grad_norm": 0.12332616746425629, "learning_rate": 3.322247008720341e-05, "loss": 0.2074, "step": 22025 }, { "epoch": 1.2062640311011334, "grad_norm": 0.13085311651229858, "learning_rate": 3.3217400121679174e-05, "loss": 0.2103, "step": 22030 }, { "epoch": 1.2065378086842249, "grad_norm": 0.15177467465400696, "learning_rate": 3.3212330156154945e-05, "loss": 0.2002, "step": 22035 }, { "epoch": 1.2068115862673165, "grad_norm": 0.1494828462600708, "learning_rate": 3.320726019063071e-05, "loss": 0.21, "step": 22040 }, { "epoch": 1.207085363850408, "grad_norm": 0.1459151655435562, "learning_rate": 3.320219022510647e-05, "loss": 0.2057, "step": 22045 }, { "epoch": 1.2073591414334994, "grad_norm": 0.14143183827400208, "learning_rate": 3.3197120259582234e-05, "loss": 0.2106, "step": 22050 }, { "epoch": 1.2076329190165909, "grad_norm": 0.11883697658777237, "learning_rate": 3.3192050294058005e-05, "loss": 0.2011, "step": 22055 }, { "epoch": 1.2079066965996823, "grad_norm": 0.13699141144752502, "learning_rate": 3.318698032853377e-05, "loss": 0.198, "step": 22060 }, { "epoch": 1.208180474182774, "grad_norm": 0.12692895531654358, "learning_rate": 3.318191036300953e-05, "loss": 0.2031, "step": 22065 }, { "epoch": 1.2084542517658654, "grad_norm": 0.1288258284330368, "learning_rate": 3.31768403974853e-05, "loss": 0.2086, "step": 22070 }, { "epoch": 1.2087280293489568, "grad_norm": 0.14448903501033783, "learning_rate": 3.3171770431961065e-05, "loss": 0.2018, "step": 22075 }, { "epoch": 1.2090018069320485, "grad_norm": 0.13552047312259674, "learning_rate": 3.316670046643683e-05, "loss": 0.2108, "step": 22080 }, { "epoch": 1.20927558451514, "grad_norm": 0.14488232135772705, "learning_rate": 3.316163050091259e-05, "loss": 0.2067, "step": 22085 }, { "epoch": 1.2095493620982314, "grad_norm": 0.12890276312828064, "learning_rate": 3.315656053538836e-05, "loss": 0.2132, "step": 22090 }, { "epoch": 1.2098231396813228, "grad_norm": 0.17550082504749298, "learning_rate": 3.3151490569864125e-05, "loss": 0.208, "step": 22095 }, { "epoch": 1.2100969172644145, "grad_norm": 0.12538187205791473, "learning_rate": 3.314642060433989e-05, "loss": 0.2001, "step": 22100 }, { "epoch": 1.210370694847506, "grad_norm": 0.14942552149295807, "learning_rate": 3.314135063881566e-05, "loss": 0.2045, "step": 22105 }, { "epoch": 1.2106444724305974, "grad_norm": 0.11544745415449142, "learning_rate": 3.313628067329143e-05, "loss": 0.1921, "step": 22110 }, { "epoch": 1.2109182500136888, "grad_norm": 0.12970781326293945, "learning_rate": 3.313121070776719e-05, "loss": 0.2101, "step": 22115 }, { "epoch": 1.2111920275967805, "grad_norm": 0.1500472128391266, "learning_rate": 3.3126140742242955e-05, "loss": 0.2045, "step": 22120 }, { "epoch": 1.211465805179872, "grad_norm": 0.1228092834353447, "learning_rate": 3.312107077671872e-05, "loss": 0.1997, "step": 22125 }, { "epoch": 1.2117395827629633, "grad_norm": 0.13298626244068146, "learning_rate": 3.311600081119449e-05, "loss": 0.2025, "step": 22130 }, { "epoch": 1.2120133603460548, "grad_norm": 0.15953639149665833, "learning_rate": 3.311093084567025e-05, "loss": 0.2165, "step": 22135 }, { "epoch": 1.2122871379291464, "grad_norm": 0.18351112306118011, "learning_rate": 3.3105860880146015e-05, "loss": 0.2089, "step": 22140 }, { "epoch": 1.2125609155122379, "grad_norm": 0.14113959670066833, "learning_rate": 3.3100790914621785e-05, "loss": 0.1996, "step": 22145 }, { "epoch": 1.2128346930953293, "grad_norm": 0.19259488582611084, "learning_rate": 3.309572094909755e-05, "loss": 0.2074, "step": 22150 }, { "epoch": 1.2131084706784208, "grad_norm": 0.12945441901683807, "learning_rate": 3.309065098357331e-05, "loss": 0.2209, "step": 22155 }, { "epoch": 1.2133822482615124, "grad_norm": 0.11557850241661072, "learning_rate": 3.3085581018049075e-05, "loss": 0.19, "step": 22160 }, { "epoch": 1.2136560258446039, "grad_norm": 0.13884888589382172, "learning_rate": 3.3080511052524845e-05, "loss": 0.2078, "step": 22165 }, { "epoch": 1.2139298034276953, "grad_norm": 0.19758735597133636, "learning_rate": 3.307544108700061e-05, "loss": 0.2125, "step": 22170 }, { "epoch": 1.214203581010787, "grad_norm": 0.13026009500026703, "learning_rate": 3.307037112147637e-05, "loss": 0.1981, "step": 22175 }, { "epoch": 1.2144773585938784, "grad_norm": 0.12577217817306519, "learning_rate": 3.306530115595214e-05, "loss": 0.2038, "step": 22180 }, { "epoch": 1.2147511361769698, "grad_norm": 0.12148655205965042, "learning_rate": 3.306023119042791e-05, "loss": 0.2002, "step": 22185 }, { "epoch": 1.2150249137600613, "grad_norm": 0.13176971673965454, "learning_rate": 3.3055161224903675e-05, "loss": 0.2043, "step": 22190 }, { "epoch": 1.2152986913431527, "grad_norm": 0.11932389438152313, "learning_rate": 3.305009125937944e-05, "loss": 0.2025, "step": 22195 }, { "epoch": 1.2155724689262444, "grad_norm": 0.13650546967983246, "learning_rate": 3.304502129385521e-05, "loss": 0.2052, "step": 22200 }, { "epoch": 1.2158462465093358, "grad_norm": 0.1709403544664383, "learning_rate": 3.303995132833097e-05, "loss": 0.2124, "step": 22205 }, { "epoch": 1.2161200240924273, "grad_norm": 0.12095949053764343, "learning_rate": 3.3034881362806735e-05, "loss": 0.2002, "step": 22210 }, { "epoch": 1.216393801675519, "grad_norm": 0.14063581824302673, "learning_rate": 3.30298113972825e-05, "loss": 0.2029, "step": 22215 }, { "epoch": 1.2166675792586104, "grad_norm": 0.15020759403705597, "learning_rate": 3.302474143175827e-05, "loss": 0.2078, "step": 22220 }, { "epoch": 1.2169413568417018, "grad_norm": 0.12004609405994415, "learning_rate": 3.301967146623403e-05, "loss": 0.2026, "step": 22225 }, { "epoch": 1.2172151344247932, "grad_norm": 0.14218354225158691, "learning_rate": 3.3014601500709795e-05, "loss": 0.2059, "step": 22230 }, { "epoch": 1.2174889120078847, "grad_norm": 0.14021757245063782, "learning_rate": 3.3009531535185565e-05, "loss": 0.2095, "step": 22235 }, { "epoch": 1.2177626895909763, "grad_norm": 0.15895240008831024, "learning_rate": 3.300446156966133e-05, "loss": 0.2058, "step": 22240 }, { "epoch": 1.2180364671740678, "grad_norm": 0.14183107018470764, "learning_rate": 3.299939160413709e-05, "loss": 0.2088, "step": 22245 }, { "epoch": 1.2183102447571592, "grad_norm": 0.12519483268260956, "learning_rate": 3.2994321638612855e-05, "loss": 0.2023, "step": 22250 }, { "epoch": 1.2185840223402509, "grad_norm": 0.1292644590139389, "learning_rate": 3.2989251673088625e-05, "loss": 0.2058, "step": 22255 }, { "epoch": 1.2188577999233423, "grad_norm": 0.12721765041351318, "learning_rate": 3.298418170756439e-05, "loss": 0.2104, "step": 22260 }, { "epoch": 1.2191315775064338, "grad_norm": 0.14846237003803253, "learning_rate": 3.297911174204015e-05, "loss": 0.2157, "step": 22265 }, { "epoch": 1.2194053550895252, "grad_norm": 0.1447577327489853, "learning_rate": 3.297404177651592e-05, "loss": 0.2037, "step": 22270 }, { "epoch": 1.2196791326726169, "grad_norm": 0.1327894628047943, "learning_rate": 3.296897181099169e-05, "loss": 0.206, "step": 22275 }, { "epoch": 1.2199529102557083, "grad_norm": 0.12241573631763458, "learning_rate": 3.2963901845467455e-05, "loss": 0.1959, "step": 22280 }, { "epoch": 1.2202266878387997, "grad_norm": 0.14263926446437836, "learning_rate": 3.295883187994322e-05, "loss": 0.2032, "step": 22285 }, { "epoch": 1.2205004654218912, "grad_norm": 0.1136494055390358, "learning_rate": 3.295376191441898e-05, "loss": 0.2081, "step": 22290 }, { "epoch": 1.2207742430049828, "grad_norm": 0.14278040826320648, "learning_rate": 3.294869194889475e-05, "loss": 0.2031, "step": 22295 }, { "epoch": 1.2210480205880743, "grad_norm": 0.11312547326087952, "learning_rate": 3.2943621983370515e-05, "loss": 0.204, "step": 22300 }, { "epoch": 1.2213217981711657, "grad_norm": 0.14265653491020203, "learning_rate": 3.293855201784628e-05, "loss": 0.1968, "step": 22305 }, { "epoch": 1.2215955757542571, "grad_norm": 0.19318418204784393, "learning_rate": 3.293348205232205e-05, "loss": 0.2057, "step": 22310 }, { "epoch": 1.2218693533373488, "grad_norm": 0.14244884252548218, "learning_rate": 3.292841208679781e-05, "loss": 0.2068, "step": 22315 }, { "epoch": 1.2221431309204402, "grad_norm": 0.15322016179561615, "learning_rate": 3.2923342121273575e-05, "loss": 0.2107, "step": 22320 }, { "epoch": 1.2224169085035317, "grad_norm": 0.1318577080965042, "learning_rate": 3.291827215574934e-05, "loss": 0.2054, "step": 22325 }, { "epoch": 1.2226906860866231, "grad_norm": 0.13562272489070892, "learning_rate": 3.291320219022511e-05, "loss": 0.2141, "step": 22330 }, { "epoch": 1.2229644636697148, "grad_norm": 0.1341470330953598, "learning_rate": 3.290813222470087e-05, "loss": 0.2079, "step": 22335 }, { "epoch": 1.2232382412528062, "grad_norm": 0.1408136487007141, "learning_rate": 3.2903062259176635e-05, "loss": 0.2018, "step": 22340 }, { "epoch": 1.2235120188358977, "grad_norm": 0.13513168692588806, "learning_rate": 3.2897992293652405e-05, "loss": 0.1962, "step": 22345 }, { "epoch": 1.2237857964189893, "grad_norm": 0.15267328917980194, "learning_rate": 3.2892922328128175e-05, "loss": 0.203, "step": 22350 }, { "epoch": 1.2240595740020808, "grad_norm": 0.11815514415502548, "learning_rate": 3.288785236260394e-05, "loss": 0.1988, "step": 22355 }, { "epoch": 1.2243333515851722, "grad_norm": 0.16015607118606567, "learning_rate": 3.28827823970797e-05, "loss": 0.2114, "step": 22360 }, { "epoch": 1.2246071291682636, "grad_norm": 0.13988728821277618, "learning_rate": 3.287771243155547e-05, "loss": 0.2002, "step": 22365 }, { "epoch": 1.224880906751355, "grad_norm": 0.14857536554336548, "learning_rate": 3.2872642466031235e-05, "loss": 0.196, "step": 22370 }, { "epoch": 1.2251546843344467, "grad_norm": 0.15320563316345215, "learning_rate": 3.2867572500507e-05, "loss": 0.2016, "step": 22375 }, { "epoch": 1.2254284619175382, "grad_norm": 0.12398382276296616, "learning_rate": 3.286250253498276e-05, "loss": 0.2024, "step": 22380 }, { "epoch": 1.2257022395006296, "grad_norm": 0.11384514719247818, "learning_rate": 3.285743256945853e-05, "loss": 0.1968, "step": 22385 }, { "epoch": 1.2259760170837213, "grad_norm": 0.12962566316127777, "learning_rate": 3.2852362603934295e-05, "loss": 0.2077, "step": 22390 }, { "epoch": 1.2262497946668127, "grad_norm": 0.131920725107193, "learning_rate": 3.284729263841006e-05, "loss": 0.2035, "step": 22395 }, { "epoch": 1.2265235722499042, "grad_norm": 0.15527550876140594, "learning_rate": 3.284222267288583e-05, "loss": 0.2149, "step": 22400 }, { "epoch": 1.2267973498329956, "grad_norm": 0.14809049665927887, "learning_rate": 3.283715270736159e-05, "loss": 0.2104, "step": 22405 }, { "epoch": 1.2270711274160873, "grad_norm": 0.16305212676525116, "learning_rate": 3.2832082741837355e-05, "loss": 0.208, "step": 22410 }, { "epoch": 1.2273449049991787, "grad_norm": 0.14507633447647095, "learning_rate": 3.282701277631312e-05, "loss": 0.2026, "step": 22415 }, { "epoch": 1.2276186825822701, "grad_norm": 0.1339862048625946, "learning_rate": 3.282194281078889e-05, "loss": 0.2011, "step": 22420 }, { "epoch": 1.2278924601653616, "grad_norm": 0.12717600166797638, "learning_rate": 3.281687284526465e-05, "loss": 0.2063, "step": 22425 }, { "epoch": 1.2281662377484532, "grad_norm": 0.1390095353126526, "learning_rate": 3.281180287974042e-05, "loss": 0.2068, "step": 22430 }, { "epoch": 1.2284400153315447, "grad_norm": 0.14103761315345764, "learning_rate": 3.2806732914216185e-05, "loss": 0.1979, "step": 22435 }, { "epoch": 1.2287137929146361, "grad_norm": 0.15382634103298187, "learning_rate": 3.2801662948691955e-05, "loss": 0.216, "step": 22440 }, { "epoch": 1.2289875704977276, "grad_norm": 0.14969684183597565, "learning_rate": 3.279659298316772e-05, "loss": 0.2056, "step": 22445 }, { "epoch": 1.2292613480808192, "grad_norm": 0.11773042380809784, "learning_rate": 3.279152301764348e-05, "loss": 0.2154, "step": 22450 }, { "epoch": 1.2295351256639107, "grad_norm": 0.15424373745918274, "learning_rate": 3.2786453052119245e-05, "loss": 0.201, "step": 22455 }, { "epoch": 1.229808903247002, "grad_norm": 0.1316666454076767, "learning_rate": 3.2781383086595015e-05, "loss": 0.2058, "step": 22460 }, { "epoch": 1.2300826808300935, "grad_norm": 0.16442932188510895, "learning_rate": 3.277631312107078e-05, "loss": 0.2142, "step": 22465 }, { "epoch": 1.2303564584131852, "grad_norm": 0.1385732889175415, "learning_rate": 3.277124315554654e-05, "loss": 0.205, "step": 22470 }, { "epoch": 1.2306302359962766, "grad_norm": 0.11571764945983887, "learning_rate": 3.276617319002231e-05, "loss": 0.1986, "step": 22475 }, { "epoch": 1.230904013579368, "grad_norm": 0.1579684019088745, "learning_rate": 3.2761103224498075e-05, "loss": 0.2037, "step": 22480 }, { "epoch": 1.2311777911624597, "grad_norm": 0.1553940325975418, "learning_rate": 3.275603325897384e-05, "loss": 0.1996, "step": 22485 }, { "epoch": 1.2314515687455512, "grad_norm": 0.13290677964687347, "learning_rate": 3.27509632934496e-05, "loss": 0.2044, "step": 22490 }, { "epoch": 1.2317253463286426, "grad_norm": 0.1356683373451233, "learning_rate": 3.274589332792537e-05, "loss": 0.2068, "step": 22495 }, { "epoch": 1.231999123911734, "grad_norm": 0.13971398770809174, "learning_rate": 3.2740823362401135e-05, "loss": 0.1976, "step": 22500 }, { "epoch": 1.2322729014948255, "grad_norm": 0.15170474350452423, "learning_rate": 3.27357533968769e-05, "loss": 0.2144, "step": 22505 }, { "epoch": 1.2325466790779172, "grad_norm": 0.12851165235042572, "learning_rate": 3.273068343135267e-05, "loss": 0.2063, "step": 22510 }, { "epoch": 1.2328204566610086, "grad_norm": 0.12375231087207794, "learning_rate": 3.272561346582844e-05, "loss": 0.2037, "step": 22515 }, { "epoch": 1.2330942342441, "grad_norm": 0.13786742091178894, "learning_rate": 3.27205435003042e-05, "loss": 0.1971, "step": 22520 }, { "epoch": 1.2333680118271917, "grad_norm": 0.12405025213956833, "learning_rate": 3.2715473534779965e-05, "loss": 0.2053, "step": 22525 }, { "epoch": 1.2336417894102831, "grad_norm": 0.14487065374851227, "learning_rate": 3.2710403569255735e-05, "loss": 0.2064, "step": 22530 }, { "epoch": 1.2339155669933746, "grad_norm": 0.14249221980571747, "learning_rate": 3.27053336037315e-05, "loss": 0.1971, "step": 22535 }, { "epoch": 1.234189344576466, "grad_norm": 0.14508092403411865, "learning_rate": 3.270026363820726e-05, "loss": 0.2062, "step": 22540 }, { "epoch": 1.2344631221595577, "grad_norm": 0.11046384274959564, "learning_rate": 3.2695193672683025e-05, "loss": 0.1926, "step": 22545 }, { "epoch": 1.234736899742649, "grad_norm": 0.15707895159721375, "learning_rate": 3.2690123707158795e-05, "loss": 0.2076, "step": 22550 }, { "epoch": 1.2350106773257405, "grad_norm": 0.14632895588874817, "learning_rate": 3.268505374163456e-05, "loss": 0.2021, "step": 22555 }, { "epoch": 1.235284454908832, "grad_norm": 0.13345341384410858, "learning_rate": 3.267998377611032e-05, "loss": 0.2063, "step": 22560 }, { "epoch": 1.2355582324919236, "grad_norm": 0.1242687925696373, "learning_rate": 3.2674913810586085e-05, "loss": 0.2087, "step": 22565 }, { "epoch": 1.235832010075015, "grad_norm": 0.13583512604236603, "learning_rate": 3.2669843845061855e-05, "loss": 0.2073, "step": 22570 }, { "epoch": 1.2361057876581065, "grad_norm": 0.14620137214660645, "learning_rate": 3.266477387953762e-05, "loss": 0.2022, "step": 22575 }, { "epoch": 1.236379565241198, "grad_norm": 0.12644197046756744, "learning_rate": 3.265970391401338e-05, "loss": 0.215, "step": 22580 }, { "epoch": 1.2366533428242896, "grad_norm": 0.16553384065628052, "learning_rate": 3.265463394848915e-05, "loss": 0.2116, "step": 22585 }, { "epoch": 1.236927120407381, "grad_norm": 0.12644176185131073, "learning_rate": 3.2649563982964915e-05, "loss": 0.2066, "step": 22590 }, { "epoch": 1.2372008979904725, "grad_norm": 0.13270123302936554, "learning_rate": 3.2644494017440685e-05, "loss": 0.2069, "step": 22595 }, { "epoch": 1.237474675573564, "grad_norm": 0.12840835750102997, "learning_rate": 3.263942405191645e-05, "loss": 0.2065, "step": 22600 }, { "epoch": 1.2377484531566556, "grad_norm": 0.1526268869638443, "learning_rate": 3.263435408639222e-05, "loss": 0.2058, "step": 22605 }, { "epoch": 1.238022230739747, "grad_norm": 0.1415071189403534, "learning_rate": 3.262928412086798e-05, "loss": 0.2044, "step": 22610 }, { "epoch": 1.2382960083228385, "grad_norm": 0.1296793520450592, "learning_rate": 3.2624214155343745e-05, "loss": 0.2081, "step": 22615 }, { "epoch": 1.2385697859059301, "grad_norm": 0.11574339121580124, "learning_rate": 3.261914418981951e-05, "loss": 0.1969, "step": 22620 }, { "epoch": 1.2388435634890216, "grad_norm": 0.138322114944458, "learning_rate": 3.261407422429528e-05, "loss": 0.2043, "step": 22625 }, { "epoch": 1.239117341072113, "grad_norm": 0.1486145555973053, "learning_rate": 3.260900425877104e-05, "loss": 0.2051, "step": 22630 }, { "epoch": 1.2393911186552045, "grad_norm": 0.13491888344287872, "learning_rate": 3.2603934293246805e-05, "loss": 0.2031, "step": 22635 }, { "epoch": 1.239664896238296, "grad_norm": 0.10702048987150192, "learning_rate": 3.2598864327722575e-05, "loss": 0.1987, "step": 22640 }, { "epoch": 1.2399386738213876, "grad_norm": 0.13508224487304688, "learning_rate": 3.259379436219834e-05, "loss": 0.208, "step": 22645 }, { "epoch": 1.240212451404479, "grad_norm": 0.14615711569786072, "learning_rate": 3.25887243966741e-05, "loss": 0.2005, "step": 22650 }, { "epoch": 1.2404862289875704, "grad_norm": 0.14078156650066376, "learning_rate": 3.2583654431149865e-05, "loss": 0.1987, "step": 22655 }, { "epoch": 1.240760006570662, "grad_norm": 0.12404147535562515, "learning_rate": 3.2578584465625635e-05, "loss": 0.199, "step": 22660 }, { "epoch": 1.2410337841537535, "grad_norm": 0.1335224211215973, "learning_rate": 3.25735145001014e-05, "loss": 0.2103, "step": 22665 }, { "epoch": 1.241307561736845, "grad_norm": 0.12655754387378693, "learning_rate": 3.256844453457716e-05, "loss": 0.2047, "step": 22670 }, { "epoch": 1.2415813393199364, "grad_norm": 0.11666934937238693, "learning_rate": 3.256337456905293e-05, "loss": 0.2089, "step": 22675 }, { "epoch": 1.2418551169030279, "grad_norm": 0.12620894610881805, "learning_rate": 3.25583046035287e-05, "loss": 0.2045, "step": 22680 }, { "epoch": 1.2421288944861195, "grad_norm": 0.13813090324401855, "learning_rate": 3.2553234638004466e-05, "loss": 0.2077, "step": 22685 }, { "epoch": 1.242402672069211, "grad_norm": 0.16471658647060394, "learning_rate": 3.254816467248023e-05, "loss": 0.2076, "step": 22690 }, { "epoch": 1.2426764496523024, "grad_norm": 0.15356646478176117, "learning_rate": 3.254309470695599e-05, "loss": 0.2018, "step": 22695 }, { "epoch": 1.242950227235394, "grad_norm": 0.1538439691066742, "learning_rate": 3.253802474143176e-05, "loss": 0.2077, "step": 22700 }, { "epoch": 1.2432240048184855, "grad_norm": 0.13304735720157623, "learning_rate": 3.2532954775907526e-05, "loss": 0.219, "step": 22705 }, { "epoch": 1.243497782401577, "grad_norm": 0.13894592225551605, "learning_rate": 3.252788481038329e-05, "loss": 0.2006, "step": 22710 }, { "epoch": 1.2437715599846684, "grad_norm": 0.16948901116847992, "learning_rate": 3.252281484485906e-05, "loss": 0.2043, "step": 22715 }, { "epoch": 1.24404533756776, "grad_norm": 0.11465252935886383, "learning_rate": 3.251774487933482e-05, "loss": 0.2098, "step": 22720 }, { "epoch": 1.2443191151508515, "grad_norm": 0.1805533766746521, "learning_rate": 3.2512674913810586e-05, "loss": 0.2114, "step": 22725 }, { "epoch": 1.244592892733943, "grad_norm": 0.12144701927900314, "learning_rate": 3.250760494828635e-05, "loss": 0.2152, "step": 22730 }, { "epoch": 1.2448666703170344, "grad_norm": 0.16035547852516174, "learning_rate": 3.250253498276212e-05, "loss": 0.218, "step": 22735 }, { "epoch": 1.245140447900126, "grad_norm": 0.12591247260570526, "learning_rate": 3.249746501723788e-05, "loss": 0.1996, "step": 22740 }, { "epoch": 1.2454142254832175, "grad_norm": 0.12044569104909897, "learning_rate": 3.2492395051713645e-05, "loss": 0.2029, "step": 22745 }, { "epoch": 1.245688003066309, "grad_norm": 0.13782277703285217, "learning_rate": 3.2487325086189416e-05, "loss": 0.2028, "step": 22750 }, { "epoch": 1.2459617806494003, "grad_norm": 0.16416527330875397, "learning_rate": 3.2482255120665186e-05, "loss": 0.2097, "step": 22755 }, { "epoch": 1.246235558232492, "grad_norm": 0.1300307810306549, "learning_rate": 3.247718515514095e-05, "loss": 0.2019, "step": 22760 }, { "epoch": 1.2465093358155834, "grad_norm": 0.13553421199321747, "learning_rate": 3.247211518961671e-05, "loss": 0.2114, "step": 22765 }, { "epoch": 1.2467831133986749, "grad_norm": 0.1655520796775818, "learning_rate": 3.246704522409248e-05, "loss": 0.1922, "step": 22770 }, { "epoch": 1.2470568909817663, "grad_norm": 0.16019676625728607, "learning_rate": 3.2461975258568246e-05, "loss": 0.2055, "step": 22775 }, { "epoch": 1.247330668564858, "grad_norm": 0.13503044843673706, "learning_rate": 3.245690529304401e-05, "loss": 0.2019, "step": 22780 }, { "epoch": 1.2476044461479494, "grad_norm": 0.15130002796649933, "learning_rate": 3.245183532751977e-05, "loss": 0.2076, "step": 22785 }, { "epoch": 1.2478782237310408, "grad_norm": 0.16080494225025177, "learning_rate": 3.244676536199554e-05, "loss": 0.2112, "step": 22790 }, { "epoch": 1.2481520013141325, "grad_norm": 0.1260124295949936, "learning_rate": 3.2441695396471306e-05, "loss": 0.2066, "step": 22795 }, { "epoch": 1.248425778897224, "grad_norm": 0.13781408965587616, "learning_rate": 3.243662543094707e-05, "loss": 0.213, "step": 22800 }, { "epoch": 1.2486995564803154, "grad_norm": 0.13559919595718384, "learning_rate": 3.243155546542284e-05, "loss": 0.2049, "step": 22805 }, { "epoch": 1.2489733340634068, "grad_norm": 0.13251133263111115, "learning_rate": 3.24264854998986e-05, "loss": 0.2026, "step": 22810 }, { "epoch": 1.2492471116464983, "grad_norm": 0.14812563359737396, "learning_rate": 3.2421415534374366e-05, "loss": 0.1976, "step": 22815 }, { "epoch": 1.24952088922959, "grad_norm": 0.12237999588251114, "learning_rate": 3.241634556885013e-05, "loss": 0.2013, "step": 22820 }, { "epoch": 1.2497946668126814, "grad_norm": 0.13690058887004852, "learning_rate": 3.24112756033259e-05, "loss": 0.2157, "step": 22825 }, { "epoch": 1.2500684443957728, "grad_norm": 0.12533707916736603, "learning_rate": 3.240620563780166e-05, "loss": 0.2113, "step": 22830 }, { "epoch": 1.2503422219788645, "grad_norm": 0.1289815753698349, "learning_rate": 3.2401135672277426e-05, "loss": 0.2078, "step": 22835 }, { "epoch": 1.250615999561956, "grad_norm": 0.12396100908517838, "learning_rate": 3.2396065706753196e-05, "loss": 0.2069, "step": 22840 }, { "epoch": 1.2508897771450473, "grad_norm": 0.14157713949680328, "learning_rate": 3.2390995741228966e-05, "loss": 0.2094, "step": 22845 }, { "epoch": 1.2511635547281388, "grad_norm": 0.12525267899036407, "learning_rate": 3.238592577570473e-05, "loss": 0.2053, "step": 22850 }, { "epoch": 1.2514373323112302, "grad_norm": 0.11680102348327637, "learning_rate": 3.238085581018049e-05, "loss": 0.1933, "step": 22855 }, { "epoch": 1.2517111098943219, "grad_norm": 0.12785881757736206, "learning_rate": 3.2375785844656256e-05, "loss": 0.2088, "step": 22860 }, { "epoch": 1.2519848874774133, "grad_norm": 0.1249578669667244, "learning_rate": 3.2370715879132026e-05, "loss": 0.215, "step": 22865 }, { "epoch": 1.2522586650605048, "grad_norm": 0.12627451121807098, "learning_rate": 3.236564591360779e-05, "loss": 0.2123, "step": 22870 }, { "epoch": 1.2525324426435964, "grad_norm": 0.14691631495952606, "learning_rate": 3.236057594808355e-05, "loss": 0.2179, "step": 22875 }, { "epoch": 1.2528062202266879, "grad_norm": 0.11515316367149353, "learning_rate": 3.235550598255932e-05, "loss": 0.2051, "step": 22880 }, { "epoch": 1.2530799978097793, "grad_norm": 0.11971895396709442, "learning_rate": 3.2350436017035086e-05, "loss": 0.1991, "step": 22885 }, { "epoch": 1.253353775392871, "grad_norm": 0.14165736734867096, "learning_rate": 3.234536605151085e-05, "loss": 0.2076, "step": 22890 }, { "epoch": 1.2536275529759624, "grad_norm": 0.1162286102771759, "learning_rate": 3.234029608598661e-05, "loss": 0.2055, "step": 22895 }, { "epoch": 1.2539013305590538, "grad_norm": 0.15174317359924316, "learning_rate": 3.233522612046238e-05, "loss": 0.2113, "step": 22900 }, { "epoch": 1.2541751081421453, "grad_norm": 0.14318057894706726, "learning_rate": 3.2330156154938146e-05, "loss": 0.1992, "step": 22905 }, { "epoch": 1.2544488857252367, "grad_norm": 0.12686270475387573, "learning_rate": 3.232508618941391e-05, "loss": 0.1981, "step": 22910 }, { "epoch": 1.2547226633083284, "grad_norm": 0.12356618046760559, "learning_rate": 3.232001622388968e-05, "loss": 0.2007, "step": 22915 }, { "epoch": 1.2549964408914198, "grad_norm": 0.14434058964252472, "learning_rate": 3.231494625836545e-05, "loss": 0.2075, "step": 22920 }, { "epoch": 1.2552702184745113, "grad_norm": 0.15361979603767395, "learning_rate": 3.230987629284121e-05, "loss": 0.2004, "step": 22925 }, { "epoch": 1.255543996057603, "grad_norm": 0.12313107401132584, "learning_rate": 3.2304806327316976e-05, "loss": 0.2042, "step": 22930 }, { "epoch": 1.2558177736406944, "grad_norm": 0.13313519954681396, "learning_rate": 3.2299736361792746e-05, "loss": 0.2045, "step": 22935 }, { "epoch": 1.2560915512237858, "grad_norm": 0.13184209167957306, "learning_rate": 3.229466639626851e-05, "loss": 0.2093, "step": 22940 }, { "epoch": 1.2563653288068772, "grad_norm": 0.1335357427597046, "learning_rate": 3.228959643074427e-05, "loss": 0.2082, "step": 22945 }, { "epoch": 1.2566391063899687, "grad_norm": 0.1429542899131775, "learning_rate": 3.2284526465220036e-05, "loss": 0.1989, "step": 22950 }, { "epoch": 1.2569128839730603, "grad_norm": 0.18004052340984344, "learning_rate": 3.2279456499695806e-05, "loss": 0.2074, "step": 22955 }, { "epoch": 1.2571866615561518, "grad_norm": 0.1572851687669754, "learning_rate": 3.227438653417157e-05, "loss": 0.2005, "step": 22960 }, { "epoch": 1.2574604391392432, "grad_norm": 0.13927055895328522, "learning_rate": 3.226931656864733e-05, "loss": 0.2093, "step": 22965 }, { "epoch": 1.2577342167223349, "grad_norm": 0.13476642966270447, "learning_rate": 3.22642466031231e-05, "loss": 0.197, "step": 22970 }, { "epoch": 1.2580079943054263, "grad_norm": 0.13532067835330963, "learning_rate": 3.2259176637598866e-05, "loss": 0.2056, "step": 22975 }, { "epoch": 1.2582817718885178, "grad_norm": 0.13291892409324646, "learning_rate": 3.225410667207463e-05, "loss": 0.2114, "step": 22980 }, { "epoch": 1.2585555494716092, "grad_norm": 0.148359477519989, "learning_rate": 3.224903670655039e-05, "loss": 0.217, "step": 22985 }, { "epoch": 1.2588293270547006, "grad_norm": 0.13118502497673035, "learning_rate": 3.224396674102616e-05, "loss": 0.2015, "step": 22990 }, { "epoch": 1.2591031046377923, "grad_norm": 0.12028107047080994, "learning_rate": 3.2238896775501926e-05, "loss": 0.1989, "step": 22995 }, { "epoch": 1.2593768822208837, "grad_norm": 0.15701454877853394, "learning_rate": 3.2233826809977696e-05, "loss": 0.2028, "step": 23000 }, { "epoch": 1.2596506598039752, "grad_norm": 0.16942958533763885, "learning_rate": 3.222875684445346e-05, "loss": 0.2027, "step": 23005 }, { "epoch": 1.2599244373870668, "grad_norm": 0.14415137469768524, "learning_rate": 3.222368687892923e-05, "loss": 0.2058, "step": 23010 }, { "epoch": 1.2601982149701583, "grad_norm": 0.15761050581932068, "learning_rate": 3.221861691340499e-05, "loss": 0.2128, "step": 23015 }, { "epoch": 1.2604719925532497, "grad_norm": 0.14198541641235352, "learning_rate": 3.2213546947880756e-05, "loss": 0.2022, "step": 23020 }, { "epoch": 1.2607457701363414, "grad_norm": 0.13119462132453918, "learning_rate": 3.220847698235652e-05, "loss": 0.2039, "step": 23025 }, { "epoch": 1.2610195477194328, "grad_norm": 0.1379460245370865, "learning_rate": 3.220340701683229e-05, "loss": 0.2181, "step": 23030 }, { "epoch": 1.2612933253025242, "grad_norm": 0.1415489912033081, "learning_rate": 3.219833705130805e-05, "loss": 0.2046, "step": 23035 }, { "epoch": 1.2615671028856157, "grad_norm": 0.13884934782981873, "learning_rate": 3.2193267085783816e-05, "loss": 0.2108, "step": 23040 }, { "epoch": 1.2618408804687071, "grad_norm": 0.13846485316753387, "learning_rate": 3.2188197120259586e-05, "loss": 0.1956, "step": 23045 }, { "epoch": 1.2621146580517988, "grad_norm": 0.14476501941680908, "learning_rate": 3.218312715473535e-05, "loss": 0.2021, "step": 23050 }, { "epoch": 1.2623884356348902, "grad_norm": 0.12747901678085327, "learning_rate": 3.217805718921111e-05, "loss": 0.1988, "step": 23055 }, { "epoch": 1.2626622132179817, "grad_norm": 0.14307357370853424, "learning_rate": 3.2172987223686876e-05, "loss": 0.207, "step": 23060 }, { "epoch": 1.2629359908010733, "grad_norm": 0.1450308859348297, "learning_rate": 3.2167917258162646e-05, "loss": 0.2045, "step": 23065 }, { "epoch": 1.2632097683841648, "grad_norm": 0.1551457792520523, "learning_rate": 3.216284729263841e-05, "loss": 0.2037, "step": 23070 }, { "epoch": 1.2634835459672562, "grad_norm": 0.17663711309432983, "learning_rate": 3.215777732711417e-05, "loss": 0.2059, "step": 23075 }, { "epoch": 1.2637573235503476, "grad_norm": 0.13655222952365875, "learning_rate": 3.215270736158994e-05, "loss": 0.2015, "step": 23080 }, { "epoch": 1.264031101133439, "grad_norm": 0.14656053483486176, "learning_rate": 3.214763739606571e-05, "loss": 0.2024, "step": 23085 }, { "epoch": 1.2643048787165307, "grad_norm": 0.11933375895023346, "learning_rate": 3.2142567430541476e-05, "loss": 0.2065, "step": 23090 }, { "epoch": 1.2645786562996222, "grad_norm": 0.13584506511688232, "learning_rate": 3.213749746501724e-05, "loss": 0.1989, "step": 23095 }, { "epoch": 1.2648524338827136, "grad_norm": 0.14434263110160828, "learning_rate": 3.213242749949301e-05, "loss": 0.2119, "step": 23100 }, { "epoch": 1.2651262114658053, "grad_norm": 0.1292484998703003, "learning_rate": 3.212735753396877e-05, "loss": 0.1985, "step": 23105 }, { "epoch": 1.2653999890488967, "grad_norm": 0.11499408632516861, "learning_rate": 3.2122287568444536e-05, "loss": 0.1946, "step": 23110 }, { "epoch": 1.2656737666319882, "grad_norm": 0.11887980997562408, "learning_rate": 3.21172176029203e-05, "loss": 0.2123, "step": 23115 }, { "epoch": 1.2659475442150796, "grad_norm": 0.13291998207569122, "learning_rate": 3.211214763739607e-05, "loss": 0.2002, "step": 23120 }, { "epoch": 1.266221321798171, "grad_norm": 0.13044793903827667, "learning_rate": 3.210707767187183e-05, "loss": 0.2064, "step": 23125 }, { "epoch": 1.2664950993812627, "grad_norm": 0.14875690639019012, "learning_rate": 3.2102007706347596e-05, "loss": 0.205, "step": 23130 }, { "epoch": 1.2667688769643541, "grad_norm": 0.12783421576023102, "learning_rate": 3.2096937740823366e-05, "loss": 0.2043, "step": 23135 }, { "epoch": 1.2670426545474456, "grad_norm": 0.13560523092746735, "learning_rate": 3.209186777529913e-05, "loss": 0.2054, "step": 23140 }, { "epoch": 1.2673164321305372, "grad_norm": 0.14745967090129852, "learning_rate": 3.208679780977489e-05, "loss": 0.2063, "step": 23145 }, { "epoch": 1.2675902097136287, "grad_norm": 0.1449231058359146, "learning_rate": 3.2081727844250656e-05, "loss": 0.1962, "step": 23150 }, { "epoch": 1.2678639872967201, "grad_norm": 0.1130312830209732, "learning_rate": 3.2076657878726426e-05, "loss": 0.1977, "step": 23155 }, { "epoch": 1.2681377648798118, "grad_norm": 0.1422487497329712, "learning_rate": 3.207158791320219e-05, "loss": 0.2054, "step": 23160 }, { "epoch": 1.2684115424629032, "grad_norm": 0.12128762155771255, "learning_rate": 3.206651794767796e-05, "loss": 0.2017, "step": 23165 }, { "epoch": 1.2686853200459947, "grad_norm": 0.12673918902873993, "learning_rate": 3.206144798215372e-05, "loss": 0.2069, "step": 23170 }, { "epoch": 1.268959097629086, "grad_norm": 0.12522771954536438, "learning_rate": 3.205637801662949e-05, "loss": 0.2088, "step": 23175 }, { "epoch": 1.2692328752121775, "grad_norm": 0.13880391418933868, "learning_rate": 3.2051308051105256e-05, "loss": 0.2013, "step": 23180 }, { "epoch": 1.2695066527952692, "grad_norm": 0.1460336446762085, "learning_rate": 3.204623808558102e-05, "loss": 0.2098, "step": 23185 }, { "epoch": 1.2697804303783606, "grad_norm": 0.13603366911411285, "learning_rate": 3.204116812005678e-05, "loss": 0.2068, "step": 23190 }, { "epoch": 1.270054207961452, "grad_norm": 0.14278124272823334, "learning_rate": 3.203609815453255e-05, "loss": 0.2039, "step": 23195 }, { "epoch": 1.2703279855445437, "grad_norm": 0.12602819502353668, "learning_rate": 3.2031028189008316e-05, "loss": 0.209, "step": 23200 }, { "epoch": 1.2706017631276352, "grad_norm": 0.13040035963058472, "learning_rate": 3.202595822348408e-05, "loss": 0.2027, "step": 23205 }, { "epoch": 1.2708755407107266, "grad_norm": 0.13040080666542053, "learning_rate": 3.202088825795985e-05, "loss": 0.2067, "step": 23210 }, { "epoch": 1.271149318293818, "grad_norm": 0.1475447416305542, "learning_rate": 3.201581829243561e-05, "loss": 0.2106, "step": 23215 }, { "epoch": 1.2714230958769095, "grad_norm": 0.13944250345230103, "learning_rate": 3.2010748326911376e-05, "loss": 0.2045, "step": 23220 }, { "epoch": 1.2716968734600012, "grad_norm": 0.1342269331216812, "learning_rate": 3.200567836138714e-05, "loss": 0.2103, "step": 23225 }, { "epoch": 1.2719706510430926, "grad_norm": 0.1439346969127655, "learning_rate": 3.200060839586291e-05, "loss": 0.2096, "step": 23230 }, { "epoch": 1.272244428626184, "grad_norm": 0.13847878575325012, "learning_rate": 3.199553843033867e-05, "loss": 0.2043, "step": 23235 }, { "epoch": 1.2725182062092757, "grad_norm": 0.14951390027999878, "learning_rate": 3.1990468464814436e-05, "loss": 0.2074, "step": 23240 }, { "epoch": 1.2727919837923671, "grad_norm": 0.1231059804558754, "learning_rate": 3.1985398499290206e-05, "loss": 0.2013, "step": 23245 }, { "epoch": 1.2730657613754586, "grad_norm": 0.13722266256809235, "learning_rate": 3.1980328533765976e-05, "loss": 0.2095, "step": 23250 }, { "epoch": 1.27333953895855, "grad_norm": 0.12649722397327423, "learning_rate": 3.197525856824174e-05, "loss": 0.1987, "step": 23255 }, { "epoch": 1.2736133165416414, "grad_norm": 0.13005571067333221, "learning_rate": 3.19701886027175e-05, "loss": 0.2075, "step": 23260 }, { "epoch": 1.273887094124733, "grad_norm": 0.1320899873971939, "learning_rate": 3.196511863719327e-05, "loss": 0.205, "step": 23265 }, { "epoch": 1.2741608717078245, "grad_norm": 0.12382737547159195, "learning_rate": 3.1960048671669036e-05, "loss": 0.1936, "step": 23270 }, { "epoch": 1.274434649290916, "grad_norm": 0.15567070245742798, "learning_rate": 3.19549787061448e-05, "loss": 0.1986, "step": 23275 }, { "epoch": 1.2747084268740076, "grad_norm": 0.15100790560245514, "learning_rate": 3.194990874062056e-05, "loss": 0.2117, "step": 23280 }, { "epoch": 1.274982204457099, "grad_norm": 0.12873265147209167, "learning_rate": 3.194483877509633e-05, "loss": 0.2048, "step": 23285 }, { "epoch": 1.2752559820401905, "grad_norm": 0.12155379354953766, "learning_rate": 3.1939768809572096e-05, "loss": 0.2036, "step": 23290 }, { "epoch": 1.275529759623282, "grad_norm": 0.13878996670246124, "learning_rate": 3.193469884404786e-05, "loss": 0.2052, "step": 23295 }, { "epoch": 1.2758035372063734, "grad_norm": 0.1343514323234558, "learning_rate": 3.192962887852362e-05, "loss": 0.1968, "step": 23300 }, { "epoch": 1.276077314789465, "grad_norm": 0.1271924376487732, "learning_rate": 3.192455891299939e-05, "loss": 0.2004, "step": 23305 }, { "epoch": 1.2763510923725565, "grad_norm": 0.1572229564189911, "learning_rate": 3.1919488947475156e-05, "loss": 0.2039, "step": 23310 }, { "epoch": 1.276624869955648, "grad_norm": 0.12930358946323395, "learning_rate": 3.191441898195092e-05, "loss": 0.1987, "step": 23315 }, { "epoch": 1.2768986475387396, "grad_norm": 0.16032449901103973, "learning_rate": 3.190934901642669e-05, "loss": 0.2053, "step": 23320 }, { "epoch": 1.277172425121831, "grad_norm": 0.12935195863246918, "learning_rate": 3.190427905090246e-05, "loss": 0.2038, "step": 23325 }, { "epoch": 1.2774462027049225, "grad_norm": 0.1132582500576973, "learning_rate": 3.189920908537822e-05, "loss": 0.2039, "step": 23330 }, { "epoch": 1.2777199802880141, "grad_norm": 0.12099280953407288, "learning_rate": 3.1894139119853986e-05, "loss": 0.2093, "step": 23335 }, { "epoch": 1.2779937578711056, "grad_norm": 0.12420020252466202, "learning_rate": 3.1889069154329757e-05, "loss": 0.2116, "step": 23340 }, { "epoch": 1.278267535454197, "grad_norm": 0.1372021734714508, "learning_rate": 3.188399918880552e-05, "loss": 0.2069, "step": 23345 }, { "epoch": 1.2785413130372885, "grad_norm": 0.14230851829051971, "learning_rate": 3.187892922328128e-05, "loss": 0.2073, "step": 23350 }, { "epoch": 1.27881509062038, "grad_norm": 0.15267224609851837, "learning_rate": 3.1873859257757046e-05, "loss": 0.2072, "step": 23355 }, { "epoch": 1.2790888682034716, "grad_norm": 0.13444559276103973, "learning_rate": 3.1868789292232817e-05, "loss": 0.2046, "step": 23360 }, { "epoch": 1.279362645786563, "grad_norm": 0.14273908734321594, "learning_rate": 3.186371932670858e-05, "loss": 0.2054, "step": 23365 }, { "epoch": 1.2796364233696544, "grad_norm": 0.13182729482650757, "learning_rate": 3.185864936118434e-05, "loss": 0.2078, "step": 23370 }, { "epoch": 1.279910200952746, "grad_norm": 0.1339770257472992, "learning_rate": 3.185357939566011e-05, "loss": 0.2057, "step": 23375 }, { "epoch": 1.2801839785358375, "grad_norm": 0.18769855797290802, "learning_rate": 3.1848509430135877e-05, "loss": 0.2207, "step": 23380 }, { "epoch": 1.280457756118929, "grad_norm": 0.1405806690454483, "learning_rate": 3.184343946461164e-05, "loss": 0.1997, "step": 23385 }, { "epoch": 1.2807315337020204, "grad_norm": 0.11637305468320847, "learning_rate": 3.18383694990874e-05, "loss": 0.1961, "step": 23390 }, { "epoch": 1.2810053112851119, "grad_norm": 0.13875712454319, "learning_rate": 3.183329953356317e-05, "loss": 0.1999, "step": 23395 }, { "epoch": 1.2812790888682035, "grad_norm": 0.13193432986736298, "learning_rate": 3.1828229568038937e-05, "loss": 0.2114, "step": 23400 }, { "epoch": 1.281552866451295, "grad_norm": 0.1185179278254509, "learning_rate": 3.182315960251471e-05, "loss": 0.1965, "step": 23405 }, { "epoch": 1.2818266440343864, "grad_norm": 0.13725432753562927, "learning_rate": 3.181808963699047e-05, "loss": 0.2053, "step": 23410 }, { "epoch": 1.282100421617478, "grad_norm": 0.12558944523334503, "learning_rate": 3.181301967146624e-05, "loss": 0.1994, "step": 23415 }, { "epoch": 1.2823741992005695, "grad_norm": 0.1605759859085083, "learning_rate": 3.1807949705942e-05, "loss": 0.2034, "step": 23420 }, { "epoch": 1.282647976783661, "grad_norm": 0.11798980087041855, "learning_rate": 3.180287974041777e-05, "loss": 0.2046, "step": 23425 }, { "epoch": 1.2829217543667524, "grad_norm": 0.14841218292713165, "learning_rate": 3.179780977489353e-05, "loss": 0.2117, "step": 23430 }, { "epoch": 1.2831955319498438, "grad_norm": 0.1405918151140213, "learning_rate": 3.17927398093693e-05, "loss": 0.2076, "step": 23435 }, { "epoch": 1.2834693095329355, "grad_norm": 0.1380549520254135, "learning_rate": 3.178766984384506e-05, "loss": 0.2124, "step": 23440 }, { "epoch": 1.283743087116027, "grad_norm": 0.1283455342054367, "learning_rate": 3.178259987832083e-05, "loss": 0.1975, "step": 23445 }, { "epoch": 1.2840168646991184, "grad_norm": 0.11021313816308975, "learning_rate": 3.17775299127966e-05, "loss": 0.2069, "step": 23450 }, { "epoch": 1.28429064228221, "grad_norm": 0.1273367702960968, "learning_rate": 3.177245994727236e-05, "loss": 0.203, "step": 23455 }, { "epoch": 1.2845644198653015, "grad_norm": 0.13432541489601135, "learning_rate": 3.176738998174812e-05, "loss": 0.1987, "step": 23460 }, { "epoch": 1.284838197448393, "grad_norm": 0.1290106624364853, "learning_rate": 3.176232001622389e-05, "loss": 0.2078, "step": 23465 }, { "epoch": 1.2851119750314846, "grad_norm": 0.15723201632499695, "learning_rate": 3.175725005069966e-05, "loss": 0.1965, "step": 23470 }, { "epoch": 1.285385752614576, "grad_norm": 0.13048118352890015, "learning_rate": 3.175218008517542e-05, "loss": 0.2113, "step": 23475 }, { "epoch": 1.2856595301976674, "grad_norm": 0.13205759227275848, "learning_rate": 3.174711011965118e-05, "loss": 0.2063, "step": 23480 }, { "epoch": 1.2859333077807589, "grad_norm": 0.1270013451576233, "learning_rate": 3.1742040154126953e-05, "loss": 0.188, "step": 23485 }, { "epoch": 1.2862070853638503, "grad_norm": 0.1219596341252327, "learning_rate": 3.1736970188602723e-05, "loss": 0.2005, "step": 23490 }, { "epoch": 1.286480862946942, "grad_norm": 0.15740931034088135, "learning_rate": 3.173190022307849e-05, "loss": 0.2061, "step": 23495 }, { "epoch": 1.2867546405300334, "grad_norm": 0.14371967315673828, "learning_rate": 3.172683025755425e-05, "loss": 0.202, "step": 23500 }, { "epoch": 1.2870284181131249, "grad_norm": 0.12309244275093079, "learning_rate": 3.172176029203002e-05, "loss": 0.2053, "step": 23505 }, { "epoch": 1.2873021956962165, "grad_norm": 0.1243031919002533, "learning_rate": 3.1716690326505783e-05, "loss": 0.198, "step": 23510 }, { "epoch": 1.287575973279308, "grad_norm": 0.12849122285842896, "learning_rate": 3.171162036098155e-05, "loss": 0.2088, "step": 23515 }, { "epoch": 1.2878497508623994, "grad_norm": 0.13779190182685852, "learning_rate": 3.170655039545731e-05, "loss": 0.1986, "step": 23520 }, { "epoch": 1.2881235284454908, "grad_norm": 0.13363197445869446, "learning_rate": 3.170148042993308e-05, "loss": 0.2103, "step": 23525 }, { "epoch": 1.2883973060285823, "grad_norm": 0.1463559865951538, "learning_rate": 3.1696410464408843e-05, "loss": 0.2092, "step": 23530 }, { "epoch": 1.288671083611674, "grad_norm": 0.1385507732629776, "learning_rate": 3.169134049888461e-05, "loss": 0.2122, "step": 23535 }, { "epoch": 1.2889448611947654, "grad_norm": 0.13413861393928528, "learning_rate": 3.168627053336038e-05, "loss": 0.2032, "step": 23540 }, { "epoch": 1.2892186387778568, "grad_norm": 0.12019757926464081, "learning_rate": 3.168120056783614e-05, "loss": 0.2041, "step": 23545 }, { "epoch": 1.2894924163609485, "grad_norm": 0.13021251559257507, "learning_rate": 3.1676130602311903e-05, "loss": 0.199, "step": 23550 }, { "epoch": 1.28976619394404, "grad_norm": 0.13545255362987518, "learning_rate": 3.167106063678767e-05, "loss": 0.1988, "step": 23555 }, { "epoch": 1.2900399715271313, "grad_norm": 0.12301548570394516, "learning_rate": 3.166599067126344e-05, "loss": 0.1982, "step": 23560 }, { "epoch": 1.2903137491102228, "grad_norm": 0.15771283209323883, "learning_rate": 3.16609207057392e-05, "loss": 0.2064, "step": 23565 }, { "epoch": 1.2905875266933142, "grad_norm": 0.128336101770401, "learning_rate": 3.165585074021497e-05, "loss": 0.2081, "step": 23570 }, { "epoch": 1.2908613042764059, "grad_norm": 0.15228548645973206, "learning_rate": 3.1650780774690734e-05, "loss": 0.2098, "step": 23575 }, { "epoch": 1.2911350818594973, "grad_norm": 0.12456351518630981, "learning_rate": 3.1645710809166504e-05, "loss": 0.2043, "step": 23580 }, { "epoch": 1.2914088594425888, "grad_norm": 0.12569379806518555, "learning_rate": 3.164064084364227e-05, "loss": 0.2005, "step": 23585 }, { "epoch": 1.2916826370256804, "grad_norm": 0.14448612928390503, "learning_rate": 3.163557087811803e-05, "loss": 0.2167, "step": 23590 }, { "epoch": 1.2919564146087719, "grad_norm": 0.11544372141361237, "learning_rate": 3.1630500912593794e-05, "loss": 0.2026, "step": 23595 }, { "epoch": 1.2922301921918633, "grad_norm": 0.12795929610729218, "learning_rate": 3.1625430947069564e-05, "loss": 0.2027, "step": 23600 }, { "epoch": 1.292503969774955, "grad_norm": 0.14712198078632355, "learning_rate": 3.162036098154533e-05, "loss": 0.2041, "step": 23605 }, { "epoch": 1.2927777473580464, "grad_norm": 0.13557001948356628, "learning_rate": 3.161529101602109e-05, "loss": 0.1991, "step": 23610 }, { "epoch": 1.2930515249411378, "grad_norm": 0.12625503540039062, "learning_rate": 3.161022105049686e-05, "loss": 0.2014, "step": 23615 }, { "epoch": 1.2933253025242293, "grad_norm": 0.11744146794080734, "learning_rate": 3.1605151084972624e-05, "loss": 0.2086, "step": 23620 }, { "epoch": 1.2935990801073207, "grad_norm": 0.14812539517879486, "learning_rate": 3.160008111944839e-05, "loss": 0.1998, "step": 23625 }, { "epoch": 1.2938728576904124, "grad_norm": 0.13462743163108826, "learning_rate": 3.159501115392415e-05, "loss": 0.2092, "step": 23630 }, { "epoch": 1.2941466352735038, "grad_norm": 0.14567045867443085, "learning_rate": 3.158994118839992e-05, "loss": 0.1929, "step": 23635 }, { "epoch": 1.2944204128565953, "grad_norm": 0.18330056965351105, "learning_rate": 3.1584871222875684e-05, "loss": 0.2093, "step": 23640 }, { "epoch": 1.294694190439687, "grad_norm": 0.11666838079690933, "learning_rate": 3.157980125735145e-05, "loss": 0.1956, "step": 23645 }, { "epoch": 1.2949679680227784, "grad_norm": 0.13667161762714386, "learning_rate": 3.157473129182722e-05, "loss": 0.2017, "step": 23650 }, { "epoch": 1.2952417456058698, "grad_norm": 0.13888640701770782, "learning_rate": 3.156966132630299e-05, "loss": 0.1938, "step": 23655 }, { "epoch": 1.2955155231889612, "grad_norm": 0.12604911625385284, "learning_rate": 3.156459136077875e-05, "loss": 0.1991, "step": 23660 }, { "epoch": 1.2957893007720527, "grad_norm": 0.12726882100105286, "learning_rate": 3.1559521395254514e-05, "loss": 0.1964, "step": 23665 }, { "epoch": 1.2960630783551443, "grad_norm": 0.13017426431179047, "learning_rate": 3.1554451429730284e-05, "loss": 0.2146, "step": 23670 }, { "epoch": 1.2963368559382358, "grad_norm": 0.1263498216867447, "learning_rate": 3.154938146420605e-05, "loss": 0.1971, "step": 23675 }, { "epoch": 1.2966106335213272, "grad_norm": 0.13446125388145447, "learning_rate": 3.154431149868181e-05, "loss": 0.1979, "step": 23680 }, { "epoch": 1.2968844111044189, "grad_norm": 0.14552852511405945, "learning_rate": 3.1539241533157574e-05, "loss": 0.2091, "step": 23685 }, { "epoch": 1.2971581886875103, "grad_norm": 0.1184336245059967, "learning_rate": 3.1534171567633344e-05, "loss": 0.2014, "step": 23690 }, { "epoch": 1.2974319662706018, "grad_norm": 0.1656537652015686, "learning_rate": 3.152910160210911e-05, "loss": 0.2105, "step": 23695 }, { "epoch": 1.2977057438536932, "grad_norm": 0.12499986588954926, "learning_rate": 3.152403163658487e-05, "loss": 0.2057, "step": 23700 }, { "epoch": 1.2979795214367846, "grad_norm": 0.15355588495731354, "learning_rate": 3.151896167106064e-05, "loss": 0.2059, "step": 23705 }, { "epoch": 1.2982532990198763, "grad_norm": 0.12003158777952194, "learning_rate": 3.1513891705536404e-05, "loss": 0.2105, "step": 23710 }, { "epoch": 1.2985270766029677, "grad_norm": 0.13628649711608887, "learning_rate": 3.150882174001217e-05, "loss": 0.2107, "step": 23715 }, { "epoch": 1.2988008541860592, "grad_norm": 0.11384966969490051, "learning_rate": 3.150375177448793e-05, "loss": 0.2082, "step": 23720 }, { "epoch": 1.2990746317691508, "grad_norm": 0.12467748671770096, "learning_rate": 3.14986818089637e-05, "loss": 0.2031, "step": 23725 }, { "epoch": 1.2993484093522423, "grad_norm": 0.11960909515619278, "learning_rate": 3.1493611843439464e-05, "loss": 0.203, "step": 23730 }, { "epoch": 1.2996221869353337, "grad_norm": 0.11425302922725677, "learning_rate": 3.1488541877915234e-05, "loss": 0.1995, "step": 23735 }, { "epoch": 1.2998959645184252, "grad_norm": 0.15865828096866608, "learning_rate": 3.1483471912391e-05, "loss": 0.2125, "step": 23740 }, { "epoch": 1.3001697421015166, "grad_norm": 0.12113827466964722, "learning_rate": 3.147840194686677e-05, "loss": 0.2031, "step": 23745 }, { "epoch": 1.3004435196846083, "grad_norm": 0.12139856070280075, "learning_rate": 3.147333198134253e-05, "loss": 0.2085, "step": 23750 }, { "epoch": 1.3007172972676997, "grad_norm": 0.1545022577047348, "learning_rate": 3.1468262015818294e-05, "loss": 0.2055, "step": 23755 }, { "epoch": 1.3009910748507911, "grad_norm": 0.13042740523815155, "learning_rate": 3.146319205029406e-05, "loss": 0.1942, "step": 23760 }, { "epoch": 1.3012648524338828, "grad_norm": 0.1501227468252182, "learning_rate": 3.145812208476983e-05, "loss": 0.2146, "step": 23765 }, { "epoch": 1.3015386300169742, "grad_norm": 0.13231338560581207, "learning_rate": 3.145305211924559e-05, "loss": 0.2047, "step": 23770 }, { "epoch": 1.3018124076000657, "grad_norm": 0.12883052229881287, "learning_rate": 3.1447982153721354e-05, "loss": 0.2008, "step": 23775 }, { "epoch": 1.3020861851831573, "grad_norm": 0.1320689618587494, "learning_rate": 3.1442912188197124e-05, "loss": 0.2, "step": 23780 }, { "epoch": 1.3023599627662488, "grad_norm": 0.12327057123184204, "learning_rate": 3.143784222267289e-05, "loss": 0.2038, "step": 23785 }, { "epoch": 1.3026337403493402, "grad_norm": 0.11172697693109512, "learning_rate": 3.143277225714865e-05, "loss": 0.202, "step": 23790 }, { "epoch": 1.3029075179324316, "grad_norm": 0.128970205783844, "learning_rate": 3.1427702291624414e-05, "loss": 0.2235, "step": 23795 }, { "epoch": 1.303181295515523, "grad_norm": 0.12561416625976562, "learning_rate": 3.1422632326100184e-05, "loss": 0.2106, "step": 23800 }, { "epoch": 1.3034550730986147, "grad_norm": 0.12182427197694778, "learning_rate": 3.141756236057595e-05, "loss": 0.2025, "step": 23805 }, { "epoch": 1.3037288506817062, "grad_norm": 0.14498819410800934, "learning_rate": 3.141249239505171e-05, "loss": 0.2007, "step": 23810 }, { "epoch": 1.3040026282647976, "grad_norm": 0.13231123983860016, "learning_rate": 3.140742242952748e-05, "loss": 0.195, "step": 23815 }, { "epoch": 1.3042764058478893, "grad_norm": 0.13521388173103333, "learning_rate": 3.140235246400325e-05, "loss": 0.2089, "step": 23820 }, { "epoch": 1.3045501834309807, "grad_norm": 0.13456223905086517, "learning_rate": 3.1397282498479014e-05, "loss": 0.2152, "step": 23825 }, { "epoch": 1.3048239610140722, "grad_norm": 0.12312902510166168, "learning_rate": 3.139221253295478e-05, "loss": 0.2055, "step": 23830 }, { "epoch": 1.3050977385971636, "grad_norm": 0.12690004706382751, "learning_rate": 3.138714256743055e-05, "loss": 0.2011, "step": 23835 }, { "epoch": 1.305371516180255, "grad_norm": 0.1447463184595108, "learning_rate": 3.138207260190631e-05, "loss": 0.2099, "step": 23840 }, { "epoch": 1.3056452937633467, "grad_norm": 0.12576407194137573, "learning_rate": 3.1377002636382074e-05, "loss": 0.2079, "step": 23845 }, { "epoch": 1.3059190713464381, "grad_norm": 0.14031806588172913, "learning_rate": 3.137193267085784e-05, "loss": 0.1969, "step": 23850 }, { "epoch": 1.3061928489295296, "grad_norm": 0.11635944992303848, "learning_rate": 3.136686270533361e-05, "loss": 0.203, "step": 23855 }, { "epoch": 1.3064666265126212, "grad_norm": 0.11312460899353027, "learning_rate": 3.136179273980937e-05, "loss": 0.2025, "step": 23860 }, { "epoch": 1.3067404040957127, "grad_norm": 0.11574015021324158, "learning_rate": 3.1356722774285134e-05, "loss": 0.1988, "step": 23865 }, { "epoch": 1.3070141816788041, "grad_norm": 0.1351296752691269, "learning_rate": 3.1351652808760904e-05, "loss": 0.2069, "step": 23870 }, { "epoch": 1.3072879592618956, "grad_norm": 0.13898998498916626, "learning_rate": 3.134658284323667e-05, "loss": 0.2014, "step": 23875 }, { "epoch": 1.307561736844987, "grad_norm": 0.19141578674316406, "learning_rate": 3.134151287771243e-05, "loss": 0.2243, "step": 23880 }, { "epoch": 1.3078355144280787, "grad_norm": 0.13914264738559723, "learning_rate": 3.1336442912188194e-05, "loss": 0.2099, "step": 23885 }, { "epoch": 1.30810929201117, "grad_norm": 0.148869127035141, "learning_rate": 3.1331372946663964e-05, "loss": 0.2131, "step": 23890 }, { "epoch": 1.3083830695942615, "grad_norm": 0.12017402052879333, "learning_rate": 3.1326302981139734e-05, "loss": 0.1996, "step": 23895 }, { "epoch": 1.3086568471773532, "grad_norm": 0.14252154529094696, "learning_rate": 3.13212330156155e-05, "loss": 0.2053, "step": 23900 }, { "epoch": 1.3089306247604446, "grad_norm": 0.1303432285785675, "learning_rate": 3.131616305009126e-05, "loss": 0.2089, "step": 23905 }, { "epoch": 1.309204402343536, "grad_norm": 0.13070736825466156, "learning_rate": 3.131109308456703e-05, "loss": 0.1994, "step": 23910 }, { "epoch": 1.3094781799266277, "grad_norm": 0.11819642037153244, "learning_rate": 3.1306023119042794e-05, "loss": 0.2064, "step": 23915 }, { "epoch": 1.3097519575097192, "grad_norm": 0.14298352599143982, "learning_rate": 3.130095315351856e-05, "loss": 0.2119, "step": 23920 }, { "epoch": 1.3100257350928106, "grad_norm": 0.13619275391101837, "learning_rate": 3.129588318799432e-05, "loss": 0.2149, "step": 23925 }, { "epoch": 1.310299512675902, "grad_norm": 0.14179064333438873, "learning_rate": 3.129081322247009e-05, "loss": 0.2039, "step": 23930 }, { "epoch": 1.3105732902589935, "grad_norm": 0.13030287623405457, "learning_rate": 3.1285743256945854e-05, "loss": 0.2049, "step": 23935 }, { "epoch": 1.3108470678420852, "grad_norm": 0.12098269164562225, "learning_rate": 3.128067329142162e-05, "loss": 0.1984, "step": 23940 }, { "epoch": 1.3111208454251766, "grad_norm": 0.14996661245822906, "learning_rate": 3.127560332589739e-05, "loss": 0.2104, "step": 23945 }, { "epoch": 1.311394623008268, "grad_norm": 0.16907238960266113, "learning_rate": 3.127053336037315e-05, "loss": 0.2058, "step": 23950 }, { "epoch": 1.3116684005913597, "grad_norm": 0.18228857219219208, "learning_rate": 3.1265463394848914e-05, "loss": 0.2052, "step": 23955 }, { "epoch": 1.3119421781744511, "grad_norm": 0.1435035765171051, "learning_rate": 3.126039342932468e-05, "loss": 0.206, "step": 23960 }, { "epoch": 1.3122159557575426, "grad_norm": 0.19441652297973633, "learning_rate": 3.125532346380045e-05, "loss": 0.2135, "step": 23965 }, { "epoch": 1.312489733340634, "grad_norm": 0.1389235407114029, "learning_rate": 3.125025349827621e-05, "loss": 0.2004, "step": 23970 }, { "epoch": 1.3127635109237255, "grad_norm": 0.13789619505405426, "learning_rate": 3.124518353275198e-05, "loss": 0.2036, "step": 23975 }, { "epoch": 1.3130372885068171, "grad_norm": 0.12824063003063202, "learning_rate": 3.1240113567227744e-05, "loss": 0.2026, "step": 23980 }, { "epoch": 1.3133110660899086, "grad_norm": 0.1392715722322464, "learning_rate": 3.1235043601703514e-05, "loss": 0.2146, "step": 23985 }, { "epoch": 1.313584843673, "grad_norm": 0.1136767715215683, "learning_rate": 3.122997363617928e-05, "loss": 0.1991, "step": 23990 }, { "epoch": 1.3138586212560917, "grad_norm": 0.1324659138917923, "learning_rate": 3.122490367065504e-05, "loss": 0.1996, "step": 23995 }, { "epoch": 1.314132398839183, "grad_norm": 0.15088151395320892, "learning_rate": 3.121983370513081e-05, "loss": 0.2033, "step": 24000 }, { "epoch": 1.3144061764222745, "grad_norm": 0.1425837278366089, "learning_rate": 3.1214763739606574e-05, "loss": 0.2058, "step": 24005 }, { "epoch": 1.314679954005366, "grad_norm": 0.1294994205236435, "learning_rate": 3.120969377408234e-05, "loss": 0.2053, "step": 24010 }, { "epoch": 1.3149537315884574, "grad_norm": 0.12374542653560638, "learning_rate": 3.12046238085581e-05, "loss": 0.2069, "step": 24015 }, { "epoch": 1.315227509171549, "grad_norm": 0.12783536314964294, "learning_rate": 3.119955384303387e-05, "loss": 0.2021, "step": 24020 }, { "epoch": 1.3155012867546405, "grad_norm": 0.12466610223054886, "learning_rate": 3.1194483877509634e-05, "loss": 0.1976, "step": 24025 }, { "epoch": 1.315775064337732, "grad_norm": 0.15905149281024933, "learning_rate": 3.11894139119854e-05, "loss": 0.2057, "step": 24030 }, { "epoch": 1.3160488419208236, "grad_norm": 0.11548259854316711, "learning_rate": 3.118434394646116e-05, "loss": 0.204, "step": 24035 }, { "epoch": 1.316322619503915, "grad_norm": 0.1412055641412735, "learning_rate": 3.117927398093693e-05, "loss": 0.2062, "step": 24040 }, { "epoch": 1.3165963970870065, "grad_norm": 0.14878727495670319, "learning_rate": 3.1174204015412694e-05, "loss": 0.199, "step": 24045 }, { "epoch": 1.3168701746700981, "grad_norm": 0.19333823025226593, "learning_rate": 3.116913404988846e-05, "loss": 0.2087, "step": 24050 }, { "epoch": 1.3171439522531896, "grad_norm": 0.14806486666202545, "learning_rate": 3.116406408436423e-05, "loss": 0.2013, "step": 24055 }, { "epoch": 1.317417729836281, "grad_norm": 0.1316903531551361, "learning_rate": 3.115899411884e-05, "loss": 0.202, "step": 24060 }, { "epoch": 1.3176915074193725, "grad_norm": 0.13534991443157196, "learning_rate": 3.115392415331576e-05, "loss": 0.2066, "step": 24065 }, { "epoch": 1.317965285002464, "grad_norm": 0.1357145607471466, "learning_rate": 3.1148854187791524e-05, "loss": 0.2004, "step": 24070 }, { "epoch": 1.3182390625855556, "grad_norm": 0.13205525279045105, "learning_rate": 3.1143784222267294e-05, "loss": 0.1964, "step": 24075 }, { "epoch": 1.318512840168647, "grad_norm": 0.12600335478782654, "learning_rate": 3.113871425674306e-05, "loss": 0.2083, "step": 24080 }, { "epoch": 1.3187866177517384, "grad_norm": 0.135155588388443, "learning_rate": 3.113364429121882e-05, "loss": 0.2101, "step": 24085 }, { "epoch": 1.31906039533483, "grad_norm": 0.13822636008262634, "learning_rate": 3.1128574325694584e-05, "loss": 0.2034, "step": 24090 }, { "epoch": 1.3193341729179215, "grad_norm": 0.11925073713064194, "learning_rate": 3.1123504360170354e-05, "loss": 0.202, "step": 24095 }, { "epoch": 1.319607950501013, "grad_norm": 0.14533740282058716, "learning_rate": 3.111843439464612e-05, "loss": 0.2014, "step": 24100 }, { "epoch": 1.3198817280841044, "grad_norm": 0.13457095623016357, "learning_rate": 3.111336442912188e-05, "loss": 0.2025, "step": 24105 }, { "epoch": 1.3201555056671959, "grad_norm": 0.13065285980701447, "learning_rate": 3.110829446359765e-05, "loss": 0.2023, "step": 24110 }, { "epoch": 1.3204292832502875, "grad_norm": 0.16118274629116058, "learning_rate": 3.1103224498073414e-05, "loss": 0.2192, "step": 24115 }, { "epoch": 1.320703060833379, "grad_norm": 0.12302467226982117, "learning_rate": 3.109815453254918e-05, "loss": 0.2039, "step": 24120 }, { "epoch": 1.3209768384164704, "grad_norm": 0.13865846395492554, "learning_rate": 3.109308456702494e-05, "loss": 0.2007, "step": 24125 }, { "epoch": 1.321250615999562, "grad_norm": 0.13834573328495026, "learning_rate": 3.108801460150071e-05, "loss": 0.2094, "step": 24130 }, { "epoch": 1.3215243935826535, "grad_norm": 0.1137184351682663, "learning_rate": 3.1082944635976474e-05, "loss": 0.2093, "step": 24135 }, { "epoch": 1.321798171165745, "grad_norm": 0.12360966950654984, "learning_rate": 3.1077874670452244e-05, "loss": 0.1931, "step": 24140 }, { "epoch": 1.3220719487488364, "grad_norm": 0.12103845924139023, "learning_rate": 3.107280470492801e-05, "loss": 0.2076, "step": 24145 }, { "epoch": 1.3223457263319278, "grad_norm": 0.12106449156999588, "learning_rate": 3.106773473940378e-05, "loss": 0.2104, "step": 24150 }, { "epoch": 1.3226195039150195, "grad_norm": 0.11062207818031311, "learning_rate": 3.106266477387954e-05, "loss": 0.2014, "step": 24155 }, { "epoch": 1.322893281498111, "grad_norm": 0.13522620499134064, "learning_rate": 3.1057594808355304e-05, "loss": 0.1998, "step": 24160 }, { "epoch": 1.3231670590812024, "grad_norm": 0.11743641644716263, "learning_rate": 3.105252484283107e-05, "loss": 0.2076, "step": 24165 }, { "epoch": 1.323440836664294, "grad_norm": 0.13863201439380646, "learning_rate": 3.104745487730684e-05, "loss": 0.2011, "step": 24170 }, { "epoch": 1.3237146142473855, "grad_norm": 0.13583146035671234, "learning_rate": 3.10423849117826e-05, "loss": 0.2057, "step": 24175 }, { "epoch": 1.323988391830477, "grad_norm": 0.11443033069372177, "learning_rate": 3.1037314946258364e-05, "loss": 0.1994, "step": 24180 }, { "epoch": 1.3242621694135683, "grad_norm": 0.146317258477211, "learning_rate": 3.1032244980734135e-05, "loss": 0.2028, "step": 24185 }, { "epoch": 1.32453594699666, "grad_norm": 0.1239156574010849, "learning_rate": 3.10271750152099e-05, "loss": 0.1969, "step": 24190 }, { "epoch": 1.3248097245797514, "grad_norm": 0.1496327668428421, "learning_rate": 3.102210504968566e-05, "loss": 0.201, "step": 24195 }, { "epoch": 1.3250835021628429, "grad_norm": 0.11808203160762787, "learning_rate": 3.1017035084161424e-05, "loss": 0.1971, "step": 24200 }, { "epoch": 1.3253572797459343, "grad_norm": 0.1273053139448166, "learning_rate": 3.1011965118637195e-05, "loss": 0.2003, "step": 24205 }, { "epoch": 1.325631057329026, "grad_norm": 0.11767402291297913, "learning_rate": 3.100689515311296e-05, "loss": 0.1999, "step": 24210 }, { "epoch": 1.3259048349121174, "grad_norm": 0.13393579423427582, "learning_rate": 3.100182518758872e-05, "loss": 0.2189, "step": 24215 }, { "epoch": 1.3261786124952089, "grad_norm": 0.1401025950908661, "learning_rate": 3.099675522206449e-05, "loss": 0.2017, "step": 24220 }, { "epoch": 1.3264523900783005, "grad_norm": 0.14182201027870178, "learning_rate": 3.099168525654026e-05, "loss": 0.203, "step": 24225 }, { "epoch": 1.326726167661392, "grad_norm": 0.1310279667377472, "learning_rate": 3.0986615291016025e-05, "loss": 0.2045, "step": 24230 }, { "epoch": 1.3269999452444834, "grad_norm": 0.1372610181570053, "learning_rate": 3.098154532549179e-05, "loss": 0.2091, "step": 24235 }, { "epoch": 1.3272737228275748, "grad_norm": 0.1494670957326889, "learning_rate": 3.097647535996756e-05, "loss": 0.2062, "step": 24240 }, { "epoch": 1.3275475004106663, "grad_norm": 0.12925702333450317, "learning_rate": 3.097140539444332e-05, "loss": 0.2002, "step": 24245 }, { "epoch": 1.327821277993758, "grad_norm": 0.14641998708248138, "learning_rate": 3.0966335428919085e-05, "loss": 0.2014, "step": 24250 }, { "epoch": 1.3280950555768494, "grad_norm": 0.1288626790046692, "learning_rate": 3.096126546339485e-05, "loss": 0.1977, "step": 24255 }, { "epoch": 1.3283688331599408, "grad_norm": 0.12560096383094788, "learning_rate": 3.095619549787062e-05, "loss": 0.2071, "step": 24260 }, { "epoch": 1.3286426107430325, "grad_norm": 0.13992848992347717, "learning_rate": 3.095112553234638e-05, "loss": 0.2004, "step": 24265 }, { "epoch": 1.328916388326124, "grad_norm": 0.12479788810014725, "learning_rate": 3.0946055566822145e-05, "loss": 0.2104, "step": 24270 }, { "epoch": 1.3291901659092153, "grad_norm": 0.14404574036598206, "learning_rate": 3.0940985601297915e-05, "loss": 0.2043, "step": 24275 }, { "epoch": 1.3294639434923068, "grad_norm": 0.12214723229408264, "learning_rate": 3.093591563577368e-05, "loss": 0.2113, "step": 24280 }, { "epoch": 1.3297377210753982, "grad_norm": 0.11869578808546066, "learning_rate": 3.093084567024944e-05, "loss": 0.2099, "step": 24285 }, { "epoch": 1.3300114986584899, "grad_norm": 0.14021503925323486, "learning_rate": 3.0925775704725205e-05, "loss": 0.2075, "step": 24290 }, { "epoch": 1.3302852762415813, "grad_norm": 0.14418552815914154, "learning_rate": 3.0920705739200975e-05, "loss": 0.2028, "step": 24295 }, { "epoch": 1.3305590538246728, "grad_norm": 0.20789970457553864, "learning_rate": 3.0915635773676745e-05, "loss": 0.2044, "step": 24300 }, { "epoch": 1.3308328314077644, "grad_norm": 0.1728091686964035, "learning_rate": 3.091056580815251e-05, "loss": 0.2069, "step": 24305 }, { "epoch": 1.3311066089908559, "grad_norm": 0.13560080528259277, "learning_rate": 3.090549584262827e-05, "loss": 0.2033, "step": 24310 }, { "epoch": 1.3313803865739473, "grad_norm": 0.1190541610121727, "learning_rate": 3.090042587710404e-05, "loss": 0.2004, "step": 24315 }, { "epoch": 1.3316541641570387, "grad_norm": 0.1496286541223526, "learning_rate": 3.0895355911579805e-05, "loss": 0.1979, "step": 24320 }, { "epoch": 1.3319279417401302, "grad_norm": 0.13852162659168243, "learning_rate": 3.089028594605557e-05, "loss": 0.1996, "step": 24325 }, { "epoch": 1.3322017193232218, "grad_norm": 0.1726474165916443, "learning_rate": 3.088521598053133e-05, "loss": 0.2095, "step": 24330 }, { "epoch": 1.3324754969063133, "grad_norm": 0.11955749243497849, "learning_rate": 3.08801460150071e-05, "loss": 0.2134, "step": 24335 }, { "epoch": 1.3327492744894047, "grad_norm": 0.13528284430503845, "learning_rate": 3.0875076049482865e-05, "loss": 0.2035, "step": 24340 }, { "epoch": 1.3330230520724964, "grad_norm": 0.12640392780303955, "learning_rate": 3.087000608395863e-05, "loss": 0.1992, "step": 24345 }, { "epoch": 1.3332968296555878, "grad_norm": 0.1360841989517212, "learning_rate": 3.08649361184344e-05, "loss": 0.2111, "step": 24350 }, { "epoch": 1.3335706072386793, "grad_norm": 0.13583114743232727, "learning_rate": 3.085986615291016e-05, "loss": 0.216, "step": 24355 }, { "epoch": 1.333844384821771, "grad_norm": 0.1833513230085373, "learning_rate": 3.0854796187385925e-05, "loss": 0.2092, "step": 24360 }, { "epoch": 1.3341181624048624, "grad_norm": 0.15174411237239838, "learning_rate": 3.084972622186169e-05, "loss": 0.2168, "step": 24365 }, { "epoch": 1.3343919399879538, "grad_norm": 0.1458849012851715, "learning_rate": 3.084465625633746e-05, "loss": 0.2054, "step": 24370 }, { "epoch": 1.3346657175710452, "grad_norm": 0.1589146852493286, "learning_rate": 3.083958629081322e-05, "loss": 0.2091, "step": 24375 }, { "epoch": 1.3349394951541367, "grad_norm": 0.14394932985305786, "learning_rate": 3.0834516325288985e-05, "loss": 0.2128, "step": 24380 }, { "epoch": 1.3352132727372283, "grad_norm": 0.12684345245361328, "learning_rate": 3.0829446359764755e-05, "loss": 0.202, "step": 24385 }, { "epoch": 1.3354870503203198, "grad_norm": 0.13257434964179993, "learning_rate": 3.0824376394240525e-05, "loss": 0.2009, "step": 24390 }, { "epoch": 1.3357608279034112, "grad_norm": 0.16765446960926056, "learning_rate": 3.081930642871629e-05, "loss": 0.2151, "step": 24395 }, { "epoch": 1.3360346054865029, "grad_norm": 0.12870803475379944, "learning_rate": 3.081423646319205e-05, "loss": 0.2067, "step": 24400 }, { "epoch": 1.3363083830695943, "grad_norm": 0.14786174893379211, "learning_rate": 3.080916649766782e-05, "loss": 0.2076, "step": 24405 }, { "epoch": 1.3365821606526858, "grad_norm": 0.13082173466682434, "learning_rate": 3.0804096532143585e-05, "loss": 0.2035, "step": 24410 }, { "epoch": 1.3368559382357772, "grad_norm": 0.13506703078746796, "learning_rate": 3.079902656661935e-05, "loss": 0.1965, "step": 24415 }, { "epoch": 1.3371297158188686, "grad_norm": 0.11442571878433228, "learning_rate": 3.079395660109511e-05, "loss": 0.1943, "step": 24420 }, { "epoch": 1.3374034934019603, "grad_norm": 0.1445186585187912, "learning_rate": 3.078888663557088e-05, "loss": 0.2085, "step": 24425 }, { "epoch": 1.3376772709850517, "grad_norm": 0.1252501904964447, "learning_rate": 3.0783816670046645e-05, "loss": 0.2009, "step": 24430 }, { "epoch": 1.3379510485681432, "grad_norm": 0.11890505254268646, "learning_rate": 3.077874670452241e-05, "loss": 0.2012, "step": 24435 }, { "epoch": 1.3382248261512348, "grad_norm": 0.16789712011814117, "learning_rate": 3.077367673899818e-05, "loss": 0.2064, "step": 24440 }, { "epoch": 1.3384986037343263, "grad_norm": 0.15106211602687836, "learning_rate": 3.076860677347394e-05, "loss": 0.2101, "step": 24445 }, { "epoch": 1.3387723813174177, "grad_norm": 0.1357617825269699, "learning_rate": 3.0763536807949705e-05, "loss": 0.2153, "step": 24450 }, { "epoch": 1.3390461589005092, "grad_norm": 0.1412949562072754, "learning_rate": 3.075846684242547e-05, "loss": 0.1994, "step": 24455 }, { "epoch": 1.3393199364836006, "grad_norm": 0.12685030698776245, "learning_rate": 3.075339687690124e-05, "loss": 0.1964, "step": 24460 }, { "epoch": 1.3395937140666923, "grad_norm": 0.18010573089122772, "learning_rate": 3.074832691137701e-05, "loss": 0.2064, "step": 24465 }, { "epoch": 1.3398674916497837, "grad_norm": 0.1319451779127121, "learning_rate": 3.074325694585277e-05, "loss": 0.2083, "step": 24470 }, { "epoch": 1.3401412692328751, "grad_norm": 0.1396511346101761, "learning_rate": 3.0738186980328535e-05, "loss": 0.2046, "step": 24475 }, { "epoch": 1.3404150468159668, "grad_norm": 0.1569657027721405, "learning_rate": 3.0733117014804305e-05, "loss": 0.2062, "step": 24480 }, { "epoch": 1.3406888243990582, "grad_norm": 0.1185554638504982, "learning_rate": 3.072804704928007e-05, "loss": 0.207, "step": 24485 }, { "epoch": 1.3409626019821497, "grad_norm": 0.129295215010643, "learning_rate": 3.072297708375583e-05, "loss": 0.2075, "step": 24490 }, { "epoch": 1.3412363795652413, "grad_norm": 0.12912069261074066, "learning_rate": 3.0717907118231595e-05, "loss": 0.2183, "step": 24495 }, { "epoch": 1.3415101571483328, "grad_norm": 0.14145122468471527, "learning_rate": 3.0712837152707365e-05, "loss": 0.2089, "step": 24500 }, { "epoch": 1.3417839347314242, "grad_norm": 0.12726466357707977, "learning_rate": 3.070776718718313e-05, "loss": 0.209, "step": 24505 }, { "epoch": 1.3420577123145156, "grad_norm": 0.1333082616329193, "learning_rate": 3.070269722165889e-05, "loss": 0.2078, "step": 24510 }, { "epoch": 1.342331489897607, "grad_norm": 0.12794159352779388, "learning_rate": 3.069762725613466e-05, "loss": 0.1994, "step": 24515 }, { "epoch": 1.3426052674806987, "grad_norm": 0.11432641744613647, "learning_rate": 3.0692557290610425e-05, "loss": 0.2036, "step": 24520 }, { "epoch": 1.3428790450637902, "grad_norm": 0.11727271974086761, "learning_rate": 3.068748732508619e-05, "loss": 0.201, "step": 24525 }, { "epoch": 1.3431528226468816, "grad_norm": 0.13251885771751404, "learning_rate": 3.068241735956195e-05, "loss": 0.2052, "step": 24530 }, { "epoch": 1.3434266002299733, "grad_norm": 0.13324476778507233, "learning_rate": 3.067734739403772e-05, "loss": 0.2029, "step": 24535 }, { "epoch": 1.3437003778130647, "grad_norm": 0.15080362558364868, "learning_rate": 3.0672277428513485e-05, "loss": 0.196, "step": 24540 }, { "epoch": 1.3439741553961562, "grad_norm": 0.12044297158718109, "learning_rate": 3.0667207462989255e-05, "loss": 0.2092, "step": 24545 }, { "epoch": 1.3442479329792476, "grad_norm": 0.1260644793510437, "learning_rate": 3.066213749746502e-05, "loss": 0.2014, "step": 24550 }, { "epoch": 1.344521710562339, "grad_norm": 0.14104746282100677, "learning_rate": 3.065706753194079e-05, "loss": 0.2039, "step": 24555 }, { "epoch": 1.3447954881454307, "grad_norm": 0.1347484588623047, "learning_rate": 3.065199756641655e-05, "loss": 0.1961, "step": 24560 }, { "epoch": 1.3450692657285221, "grad_norm": 0.12311667203903198, "learning_rate": 3.0646927600892315e-05, "loss": 0.2036, "step": 24565 }, { "epoch": 1.3453430433116136, "grad_norm": 0.11746416240930557, "learning_rate": 3.0641857635368085e-05, "loss": 0.2064, "step": 24570 }, { "epoch": 1.3456168208947052, "grad_norm": 0.144344761967659, "learning_rate": 3.063678766984385e-05, "loss": 0.2063, "step": 24575 }, { "epoch": 1.3458905984777967, "grad_norm": 0.12881720066070557, "learning_rate": 3.063171770431961e-05, "loss": 0.2002, "step": 24580 }, { "epoch": 1.3461643760608881, "grad_norm": 0.12133913487195969, "learning_rate": 3.0626647738795375e-05, "loss": 0.2059, "step": 24585 }, { "epoch": 1.3464381536439796, "grad_norm": 0.1287088245153427, "learning_rate": 3.0621577773271145e-05, "loss": 0.2186, "step": 24590 }, { "epoch": 1.346711931227071, "grad_norm": 0.12820281088352203, "learning_rate": 3.061650780774691e-05, "loss": 0.2075, "step": 24595 }, { "epoch": 1.3469857088101627, "grad_norm": 0.12216397374868393, "learning_rate": 3.061143784222267e-05, "loss": 0.2039, "step": 24600 }, { "epoch": 1.347259486393254, "grad_norm": 0.16058336198329926, "learning_rate": 3.060636787669844e-05, "loss": 0.2045, "step": 24605 }, { "epoch": 1.3475332639763455, "grad_norm": 0.1286533772945404, "learning_rate": 3.0601297911174205e-05, "loss": 0.2132, "step": 24610 }, { "epoch": 1.3478070415594372, "grad_norm": 0.11391352117061615, "learning_rate": 3.059622794564997e-05, "loss": 0.196, "step": 24615 }, { "epoch": 1.3480808191425286, "grad_norm": 0.12755310535430908, "learning_rate": 3.059115798012573e-05, "loss": 0.1987, "step": 24620 }, { "epoch": 1.34835459672562, "grad_norm": 0.13307207822799683, "learning_rate": 3.05860880146015e-05, "loss": 0.2003, "step": 24625 }, { "epoch": 1.3486283743087115, "grad_norm": 0.14414773881435394, "learning_rate": 3.058101804907727e-05, "loss": 0.2077, "step": 24630 }, { "epoch": 1.3489021518918032, "grad_norm": 0.12583543360233307, "learning_rate": 3.0575948083553035e-05, "loss": 0.2019, "step": 24635 }, { "epoch": 1.3491759294748946, "grad_norm": 0.13015629351139069, "learning_rate": 3.05708781180288e-05, "loss": 0.2121, "step": 24640 }, { "epoch": 1.349449707057986, "grad_norm": 0.14994651079177856, "learning_rate": 3.056580815250457e-05, "loss": 0.2003, "step": 24645 }, { "epoch": 1.3497234846410775, "grad_norm": 0.13541977107524872, "learning_rate": 3.056073818698033e-05, "loss": 0.2049, "step": 24650 }, { "epoch": 1.3499972622241692, "grad_norm": 0.14949405193328857, "learning_rate": 3.0555668221456095e-05, "loss": 0.2084, "step": 24655 }, { "epoch": 1.3502710398072606, "grad_norm": 0.14155784249305725, "learning_rate": 3.055059825593186e-05, "loss": 0.2056, "step": 24660 }, { "epoch": 1.350544817390352, "grad_norm": 0.12952782213687897, "learning_rate": 3.054552829040763e-05, "loss": 0.2025, "step": 24665 }, { "epoch": 1.3508185949734437, "grad_norm": 0.13745710253715515, "learning_rate": 3.054045832488339e-05, "loss": 0.2025, "step": 24670 }, { "epoch": 1.3510923725565351, "grad_norm": 0.14219605922698975, "learning_rate": 3.0535388359359155e-05, "loss": 0.2003, "step": 24675 }, { "epoch": 1.3513661501396266, "grad_norm": 0.11531440168619156, "learning_rate": 3.0530318393834925e-05, "loss": 0.1988, "step": 24680 }, { "epoch": 1.351639927722718, "grad_norm": 0.17900997400283813, "learning_rate": 3.052524842831069e-05, "loss": 0.2024, "step": 24685 }, { "epoch": 1.3519137053058095, "grad_norm": 0.12522971630096436, "learning_rate": 3.052017846278645e-05, "loss": 0.2041, "step": 24690 }, { "epoch": 1.3521874828889011, "grad_norm": 0.1265510469675064, "learning_rate": 3.051510849726222e-05, "loss": 0.2016, "step": 24695 }, { "epoch": 1.3524612604719926, "grad_norm": 0.1225028932094574, "learning_rate": 3.0510038531737982e-05, "loss": 0.1974, "step": 24700 }, { "epoch": 1.352735038055084, "grad_norm": 0.12777376174926758, "learning_rate": 3.050496856621375e-05, "loss": 0.1965, "step": 24705 }, { "epoch": 1.3530088156381757, "grad_norm": 0.11559569090604782, "learning_rate": 3.049989860068952e-05, "loss": 0.1927, "step": 24710 }, { "epoch": 1.353282593221267, "grad_norm": 0.1333783119916916, "learning_rate": 3.0494828635165285e-05, "loss": 0.2046, "step": 24715 }, { "epoch": 1.3535563708043585, "grad_norm": 0.1417113095521927, "learning_rate": 3.048975866964105e-05, "loss": 0.2033, "step": 24720 }, { "epoch": 1.35383014838745, "grad_norm": 0.12099289894104004, "learning_rate": 3.0484688704116815e-05, "loss": 0.196, "step": 24725 }, { "epoch": 1.3541039259705414, "grad_norm": 0.1319737434387207, "learning_rate": 3.047961873859258e-05, "loss": 0.2105, "step": 24730 }, { "epoch": 1.354377703553633, "grad_norm": 0.14281223714351654, "learning_rate": 3.0474548773068345e-05, "loss": 0.2084, "step": 24735 }, { "epoch": 1.3546514811367245, "grad_norm": 0.12342311441898346, "learning_rate": 3.0469478807544112e-05, "loss": 0.2146, "step": 24740 }, { "epoch": 1.354925258719816, "grad_norm": 0.11152788251638412, "learning_rate": 3.0464408842019875e-05, "loss": 0.1953, "step": 24745 }, { "epoch": 1.3551990363029076, "grad_norm": 0.11680401116609573, "learning_rate": 3.0459338876495642e-05, "loss": 0.204, "step": 24750 }, { "epoch": 1.355472813885999, "grad_norm": 0.14740286767482758, "learning_rate": 3.0454268910971405e-05, "loss": 0.205, "step": 24755 }, { "epoch": 1.3557465914690905, "grad_norm": 0.14138810336589813, "learning_rate": 3.0449198945447172e-05, "loss": 0.2123, "step": 24760 }, { "epoch": 1.356020369052182, "grad_norm": 0.12749189138412476, "learning_rate": 3.0444128979922935e-05, "loss": 0.2065, "step": 24765 }, { "epoch": 1.3562941466352734, "grad_norm": 0.13268576562404633, "learning_rate": 3.0439059014398702e-05, "loss": 0.2043, "step": 24770 }, { "epoch": 1.356567924218365, "grad_norm": 0.17309802770614624, "learning_rate": 3.043398904887447e-05, "loss": 0.2107, "step": 24775 }, { "epoch": 1.3568417018014565, "grad_norm": 0.12780828773975372, "learning_rate": 3.0428919083350232e-05, "loss": 0.198, "step": 24780 }, { "epoch": 1.357115479384548, "grad_norm": 0.1286337971687317, "learning_rate": 3.0423849117826e-05, "loss": 0.2097, "step": 24785 }, { "epoch": 1.3573892569676396, "grad_norm": 0.14358234405517578, "learning_rate": 3.041877915230177e-05, "loss": 0.214, "step": 24790 }, { "epoch": 1.357663034550731, "grad_norm": 0.15266892313957214, "learning_rate": 3.0413709186777532e-05, "loss": 0.2017, "step": 24795 }, { "epoch": 1.3579368121338224, "grad_norm": 0.14259493350982666, "learning_rate": 3.04086392212533e-05, "loss": 0.2021, "step": 24800 }, { "epoch": 1.358210589716914, "grad_norm": 0.1479731947183609, "learning_rate": 3.0403569255729066e-05, "loss": 0.2121, "step": 24805 }, { "epoch": 1.3584843673000055, "grad_norm": 0.14448325335979462, "learning_rate": 3.039849929020483e-05, "loss": 0.2126, "step": 24810 }, { "epoch": 1.358758144883097, "grad_norm": 0.14247407019138336, "learning_rate": 3.0393429324680596e-05, "loss": 0.2047, "step": 24815 }, { "epoch": 1.3590319224661884, "grad_norm": 0.13195179402828217, "learning_rate": 3.038835935915636e-05, "loss": 0.2101, "step": 24820 }, { "epoch": 1.3593057000492799, "grad_norm": 0.13745252788066864, "learning_rate": 3.0383289393632126e-05, "loss": 0.1969, "step": 24825 }, { "epoch": 1.3595794776323715, "grad_norm": 0.13217027485370636, "learning_rate": 3.037821942810789e-05, "loss": 0.2108, "step": 24830 }, { "epoch": 1.359853255215463, "grad_norm": 0.1489136666059494, "learning_rate": 3.0373149462583656e-05, "loss": 0.22, "step": 24835 }, { "epoch": 1.3601270327985544, "grad_norm": 0.1170007660984993, "learning_rate": 3.0368079497059422e-05, "loss": 0.2038, "step": 24840 }, { "epoch": 1.360400810381646, "grad_norm": 0.12212560325860977, "learning_rate": 3.0363009531535186e-05, "loss": 0.204, "step": 24845 }, { "epoch": 1.3606745879647375, "grad_norm": 0.12438423186540604, "learning_rate": 3.0357939566010952e-05, "loss": 0.2018, "step": 24850 }, { "epoch": 1.360948365547829, "grad_norm": 0.13493579626083374, "learning_rate": 3.0352869600486716e-05, "loss": 0.2049, "step": 24855 }, { "epoch": 1.3612221431309204, "grad_norm": 0.13578619062900543, "learning_rate": 3.0347799634962482e-05, "loss": 0.2117, "step": 24860 }, { "epoch": 1.3614959207140118, "grad_norm": 0.12530988454818726, "learning_rate": 3.0342729669438246e-05, "loss": 0.1977, "step": 24865 }, { "epoch": 1.3617696982971035, "grad_norm": 0.1405823677778244, "learning_rate": 3.033765970391402e-05, "loss": 0.2014, "step": 24870 }, { "epoch": 1.362043475880195, "grad_norm": 0.1251465529203415, "learning_rate": 3.0332589738389782e-05, "loss": 0.2044, "step": 24875 }, { "epoch": 1.3623172534632864, "grad_norm": 0.11631403118371964, "learning_rate": 3.032751977286555e-05, "loss": 0.1955, "step": 24880 }, { "epoch": 1.362591031046378, "grad_norm": 0.11740726977586746, "learning_rate": 3.0322449807341312e-05, "loss": 0.2077, "step": 24885 }, { "epoch": 1.3628648086294695, "grad_norm": 0.1543361395597458, "learning_rate": 3.031737984181708e-05, "loss": 0.2053, "step": 24890 }, { "epoch": 1.363138586212561, "grad_norm": 0.1350385546684265, "learning_rate": 3.0312309876292842e-05, "loss": 0.2062, "step": 24895 }, { "epoch": 1.3634123637956523, "grad_norm": 0.1312444508075714, "learning_rate": 3.030723991076861e-05, "loss": 0.2018, "step": 24900 }, { "epoch": 1.3636861413787438, "grad_norm": 0.1353958398103714, "learning_rate": 3.0302169945244376e-05, "loss": 0.2002, "step": 24905 }, { "epoch": 1.3639599189618354, "grad_norm": 0.13780546188354492, "learning_rate": 3.029709997972014e-05, "loss": 0.1984, "step": 24910 }, { "epoch": 1.3642336965449269, "grad_norm": 0.12454712390899658, "learning_rate": 3.0292030014195906e-05, "loss": 0.2042, "step": 24915 }, { "epoch": 1.3645074741280183, "grad_norm": 0.1325564831495285, "learning_rate": 3.028696004867167e-05, "loss": 0.1996, "step": 24920 }, { "epoch": 1.36478125171111, "grad_norm": 0.1388290375471115, "learning_rate": 3.0281890083147436e-05, "loss": 0.2066, "step": 24925 }, { "epoch": 1.3650550292942014, "grad_norm": 0.11468737572431564, "learning_rate": 3.02768201176232e-05, "loss": 0.2092, "step": 24930 }, { "epoch": 1.3653288068772929, "grad_norm": 0.12394914031028748, "learning_rate": 3.0271750152098966e-05, "loss": 0.219, "step": 24935 }, { "epoch": 1.3656025844603845, "grad_norm": 0.12135078758001328, "learning_rate": 3.0266680186574732e-05, "loss": 0.1981, "step": 24940 }, { "epoch": 1.365876362043476, "grad_norm": 0.13046494126319885, "learning_rate": 3.0261610221050496e-05, "loss": 0.2094, "step": 24945 }, { "epoch": 1.3661501396265674, "grad_norm": 0.11149874329566956, "learning_rate": 3.0256540255526262e-05, "loss": 0.2016, "step": 24950 }, { "epoch": 1.3664239172096588, "grad_norm": 0.12078851461410522, "learning_rate": 3.0251470290002032e-05, "loss": 0.2058, "step": 24955 }, { "epoch": 1.3666976947927503, "grad_norm": 0.1257631480693817, "learning_rate": 3.0246400324477796e-05, "loss": 0.1924, "step": 24960 }, { "epoch": 1.366971472375842, "grad_norm": 0.13341738283634186, "learning_rate": 3.0241330358953562e-05, "loss": 0.2004, "step": 24965 }, { "epoch": 1.3672452499589334, "grad_norm": 0.12443643063306808, "learning_rate": 3.023626039342933e-05, "loss": 0.2112, "step": 24970 }, { "epoch": 1.3675190275420248, "grad_norm": 0.12796595692634583, "learning_rate": 3.0231190427905092e-05, "loss": 0.2039, "step": 24975 }, { "epoch": 1.3677928051251165, "grad_norm": 0.12074897438287735, "learning_rate": 3.022612046238086e-05, "loss": 0.2125, "step": 24980 }, { "epoch": 1.368066582708208, "grad_norm": 0.13526029884815216, "learning_rate": 3.0221050496856622e-05, "loss": 0.2049, "step": 24985 }, { "epoch": 1.3683403602912994, "grad_norm": 0.13058960437774658, "learning_rate": 3.021598053133239e-05, "loss": 0.2088, "step": 24990 }, { "epoch": 1.3686141378743908, "grad_norm": 0.13185498118400574, "learning_rate": 3.0210910565808152e-05, "loss": 0.2042, "step": 24995 }, { "epoch": 1.3688879154574822, "grad_norm": 0.14075204730033875, "learning_rate": 3.020584060028392e-05, "loss": 0.2126, "step": 25000 }, { "epoch": 1.369161693040574, "grad_norm": 0.13191330432891846, "learning_rate": 3.0200770634759686e-05, "loss": 0.205, "step": 25005 }, { "epoch": 1.3694354706236653, "grad_norm": 0.13845157623291016, "learning_rate": 3.019570066923545e-05, "loss": 0.2006, "step": 25010 }, { "epoch": 1.3697092482067568, "grad_norm": 0.11068708449602127, "learning_rate": 3.0190630703711216e-05, "loss": 0.207, "step": 25015 }, { "epoch": 1.3699830257898484, "grad_norm": 0.1369282305240631, "learning_rate": 3.018556073818698e-05, "loss": 0.209, "step": 25020 }, { "epoch": 1.3702568033729399, "grad_norm": 0.132248193025589, "learning_rate": 3.0180490772662746e-05, "loss": 0.2101, "step": 25025 }, { "epoch": 1.3705305809560313, "grad_norm": 0.13763336837291718, "learning_rate": 3.017542080713851e-05, "loss": 0.203, "step": 25030 }, { "epoch": 1.3708043585391227, "grad_norm": 0.1388237029314041, "learning_rate": 3.0170350841614283e-05, "loss": 0.2145, "step": 25035 }, { "epoch": 1.3710781361222142, "grad_norm": 0.14134250581264496, "learning_rate": 3.0165280876090046e-05, "loss": 0.2129, "step": 25040 }, { "epoch": 1.3713519137053058, "grad_norm": 0.12076074630022049, "learning_rate": 3.0160210910565813e-05, "loss": 0.2108, "step": 25045 }, { "epoch": 1.3716256912883973, "grad_norm": 0.13246873021125793, "learning_rate": 3.0155140945041576e-05, "loss": 0.2055, "step": 25050 }, { "epoch": 1.3718994688714887, "grad_norm": 0.131540447473526, "learning_rate": 3.0150070979517343e-05, "loss": 0.2016, "step": 25055 }, { "epoch": 1.3721732464545804, "grad_norm": 0.11610293388366699, "learning_rate": 3.0145001013993106e-05, "loss": 0.2019, "step": 25060 }, { "epoch": 1.3724470240376718, "grad_norm": 0.11870328336954117, "learning_rate": 3.0139931048468873e-05, "loss": 0.2006, "step": 25065 }, { "epoch": 1.3727208016207633, "grad_norm": 0.13649463653564453, "learning_rate": 3.013486108294464e-05, "loss": 0.2145, "step": 25070 }, { "epoch": 1.372994579203855, "grad_norm": 0.11702309548854828, "learning_rate": 3.0129791117420403e-05, "loss": 0.2006, "step": 25075 }, { "epoch": 1.3732683567869464, "grad_norm": 0.12665016949176788, "learning_rate": 3.012472115189617e-05, "loss": 0.2033, "step": 25080 }, { "epoch": 1.3735421343700378, "grad_norm": 0.13106493651866913, "learning_rate": 3.0119651186371933e-05, "loss": 0.2042, "step": 25085 }, { "epoch": 1.3738159119531292, "grad_norm": 0.13245734572410583, "learning_rate": 3.01145812208477e-05, "loss": 0.2046, "step": 25090 }, { "epoch": 1.3740896895362207, "grad_norm": 0.1172124445438385, "learning_rate": 3.0109511255323463e-05, "loss": 0.1982, "step": 25095 }, { "epoch": 1.3743634671193123, "grad_norm": 0.1497499644756317, "learning_rate": 3.010444128979923e-05, "loss": 0.2085, "step": 25100 }, { "epoch": 1.3746372447024038, "grad_norm": 0.14785896241664886, "learning_rate": 3.0099371324274993e-05, "loss": 0.2109, "step": 25105 }, { "epoch": 1.3749110222854952, "grad_norm": 0.1471807062625885, "learning_rate": 3.009430135875076e-05, "loss": 0.199, "step": 25110 }, { "epoch": 1.3751847998685869, "grad_norm": 0.11452262848615646, "learning_rate": 3.008923139322653e-05, "loss": 0.2043, "step": 25115 }, { "epoch": 1.3754585774516783, "grad_norm": 0.11800739169120789, "learning_rate": 3.0084161427702296e-05, "loss": 0.2052, "step": 25120 }, { "epoch": 1.3757323550347698, "grad_norm": 0.12586970627307892, "learning_rate": 3.007909146217806e-05, "loss": 0.2019, "step": 25125 }, { "epoch": 1.3760061326178612, "grad_norm": 0.12593962252140045, "learning_rate": 3.0074021496653826e-05, "loss": 0.2037, "step": 25130 }, { "epoch": 1.3762799102009526, "grad_norm": 0.11979984492063522, "learning_rate": 3.0068951531129593e-05, "loss": 0.2056, "step": 25135 }, { "epoch": 1.3765536877840443, "grad_norm": 0.11012257635593414, "learning_rate": 3.0063881565605356e-05, "loss": 0.1974, "step": 25140 }, { "epoch": 1.3768274653671357, "grad_norm": 0.14838990569114685, "learning_rate": 3.0058811600081123e-05, "loss": 0.2141, "step": 25145 }, { "epoch": 1.3771012429502272, "grad_norm": 0.12625843286514282, "learning_rate": 3.0053741634556886e-05, "loss": 0.2092, "step": 25150 }, { "epoch": 1.3773750205333188, "grad_norm": 0.1277693659067154, "learning_rate": 3.0048671669032653e-05, "loss": 0.2035, "step": 25155 }, { "epoch": 1.3776487981164103, "grad_norm": 0.1300714761018753, "learning_rate": 3.0043601703508416e-05, "loss": 0.2003, "step": 25160 }, { "epoch": 1.3779225756995017, "grad_norm": 0.12054961174726486, "learning_rate": 3.0038531737984183e-05, "loss": 0.1905, "step": 25165 }, { "epoch": 1.3781963532825932, "grad_norm": 0.12613068521022797, "learning_rate": 3.0033461772459946e-05, "loss": 0.2013, "step": 25170 }, { "epoch": 1.3784701308656846, "grad_norm": 0.1310734897851944, "learning_rate": 3.0028391806935713e-05, "loss": 0.2024, "step": 25175 }, { "epoch": 1.3787439084487763, "grad_norm": 0.13922643661499023, "learning_rate": 3.002332184141148e-05, "loss": 0.1973, "step": 25180 }, { "epoch": 1.3790176860318677, "grad_norm": 0.12198494374752045, "learning_rate": 3.0018251875887243e-05, "loss": 0.2111, "step": 25185 }, { "epoch": 1.3792914636149591, "grad_norm": 0.17090654373168945, "learning_rate": 3.001318191036301e-05, "loss": 0.2025, "step": 25190 }, { "epoch": 1.3795652411980508, "grad_norm": 0.10620216280221939, "learning_rate": 3.000811194483878e-05, "loss": 0.2108, "step": 25195 }, { "epoch": 1.3798390187811422, "grad_norm": 0.12600992619991302, "learning_rate": 3.0003041979314546e-05, "loss": 0.2006, "step": 25200 }, { "epoch": 1.3801127963642337, "grad_norm": 0.14691062271595, "learning_rate": 2.999797201379031e-05, "loss": 0.2075, "step": 25205 }, { "epoch": 1.3803865739473251, "grad_norm": 0.12407390773296356, "learning_rate": 2.9992902048266076e-05, "loss": 0.2069, "step": 25210 }, { "epoch": 1.3806603515304166, "grad_norm": 0.1314932405948639, "learning_rate": 2.998783208274184e-05, "loss": 0.203, "step": 25215 }, { "epoch": 1.3809341291135082, "grad_norm": 0.12161843478679657, "learning_rate": 2.9982762117217606e-05, "loss": 0.2045, "step": 25220 }, { "epoch": 1.3812079066965997, "grad_norm": 0.13595521450042725, "learning_rate": 2.997769215169337e-05, "loss": 0.2024, "step": 25225 }, { "epoch": 1.381481684279691, "grad_norm": 0.13681553304195404, "learning_rate": 2.9972622186169136e-05, "loss": 0.2008, "step": 25230 }, { "epoch": 1.3817554618627828, "grad_norm": 0.14303508400917053, "learning_rate": 2.99675522206449e-05, "loss": 0.2001, "step": 25235 }, { "epoch": 1.3820292394458742, "grad_norm": 0.13016806542873383, "learning_rate": 2.9962482255120666e-05, "loss": 0.2138, "step": 25240 }, { "epoch": 1.3823030170289656, "grad_norm": 0.12026108801364899, "learning_rate": 2.9957412289596433e-05, "loss": 0.1991, "step": 25245 }, { "epoch": 1.3825767946120573, "grad_norm": 0.15504415333271027, "learning_rate": 2.9952342324072196e-05, "loss": 0.2085, "step": 25250 }, { "epoch": 1.3828505721951487, "grad_norm": 0.12987907230854034, "learning_rate": 2.9947272358547963e-05, "loss": 0.1944, "step": 25255 }, { "epoch": 1.3831243497782402, "grad_norm": 0.11564963310956955, "learning_rate": 2.9942202393023726e-05, "loss": 0.2056, "step": 25260 }, { "epoch": 1.3833981273613316, "grad_norm": 0.13216499984264374, "learning_rate": 2.9937132427499493e-05, "loss": 0.2022, "step": 25265 }, { "epoch": 1.383671904944423, "grad_norm": 0.14695492386817932, "learning_rate": 2.9932062461975256e-05, "loss": 0.2171, "step": 25270 }, { "epoch": 1.3839456825275147, "grad_norm": 0.16097936034202576, "learning_rate": 2.9926992496451023e-05, "loss": 0.2037, "step": 25275 }, { "epoch": 1.3842194601106061, "grad_norm": 0.12018870562314987, "learning_rate": 2.9921922530926793e-05, "loss": 0.1968, "step": 25280 }, { "epoch": 1.3844932376936976, "grad_norm": 0.13765151798725128, "learning_rate": 2.991685256540256e-05, "loss": 0.2122, "step": 25285 }, { "epoch": 1.3847670152767892, "grad_norm": 0.12821830809116364, "learning_rate": 2.9911782599878323e-05, "loss": 0.2083, "step": 25290 }, { "epoch": 1.3850407928598807, "grad_norm": 0.14863044023513794, "learning_rate": 2.990671263435409e-05, "loss": 0.1968, "step": 25295 }, { "epoch": 1.3853145704429721, "grad_norm": 0.15388759970664978, "learning_rate": 2.9901642668829853e-05, "loss": 0.2119, "step": 25300 }, { "epoch": 1.3855883480260636, "grad_norm": 0.13516873121261597, "learning_rate": 2.989657270330562e-05, "loss": 0.2029, "step": 25305 }, { "epoch": 1.385862125609155, "grad_norm": 0.1494317501783371, "learning_rate": 2.9891502737781386e-05, "loss": 0.2009, "step": 25310 }, { "epoch": 1.3861359031922467, "grad_norm": 0.14023426175117493, "learning_rate": 2.988643277225715e-05, "loss": 0.2031, "step": 25315 }, { "epoch": 1.386409680775338, "grad_norm": 0.18000274896621704, "learning_rate": 2.9881362806732916e-05, "loss": 0.21, "step": 25320 }, { "epoch": 1.3866834583584295, "grad_norm": 0.16689488291740417, "learning_rate": 2.987629284120868e-05, "loss": 0.1999, "step": 25325 }, { "epoch": 1.3869572359415212, "grad_norm": 0.19787445664405823, "learning_rate": 2.9871222875684446e-05, "loss": 0.2032, "step": 25330 }, { "epoch": 1.3872310135246126, "grad_norm": 0.15441083908081055, "learning_rate": 2.986615291016021e-05, "loss": 0.2057, "step": 25335 }, { "epoch": 1.387504791107704, "grad_norm": 0.14175905287265778, "learning_rate": 2.9861082944635976e-05, "loss": 0.2115, "step": 25340 }, { "epoch": 1.3877785686907955, "grad_norm": 0.16967856884002686, "learning_rate": 2.9856012979111743e-05, "loss": 0.1983, "step": 25345 }, { "epoch": 1.388052346273887, "grad_norm": 0.15879680216312408, "learning_rate": 2.9850943013587506e-05, "loss": 0.2121, "step": 25350 }, { "epoch": 1.3883261238569786, "grad_norm": 0.12458483129739761, "learning_rate": 2.9845873048063273e-05, "loss": 0.2047, "step": 25355 }, { "epoch": 1.38859990144007, "grad_norm": 0.14303120970726013, "learning_rate": 2.9840803082539043e-05, "loss": 0.2071, "step": 25360 }, { "epoch": 1.3888736790231615, "grad_norm": 0.1569707840681076, "learning_rate": 2.9835733117014806e-05, "loss": 0.1951, "step": 25365 }, { "epoch": 1.3891474566062532, "grad_norm": 0.11814964562654495, "learning_rate": 2.9830663151490573e-05, "loss": 0.2023, "step": 25370 }, { "epoch": 1.3894212341893446, "grad_norm": 0.12658633291721344, "learning_rate": 2.982559318596634e-05, "loss": 0.1979, "step": 25375 }, { "epoch": 1.389695011772436, "grad_norm": 0.15799875557422638, "learning_rate": 2.9820523220442103e-05, "loss": 0.2069, "step": 25380 }, { "epoch": 1.3899687893555277, "grad_norm": 0.1455484926700592, "learning_rate": 2.981545325491787e-05, "loss": 0.1927, "step": 25385 }, { "epoch": 1.3902425669386191, "grad_norm": 0.1306239515542984, "learning_rate": 2.9810383289393633e-05, "loss": 0.196, "step": 25390 }, { "epoch": 1.3905163445217106, "grad_norm": 0.2227073460817337, "learning_rate": 2.98053133238694e-05, "loss": 0.2037, "step": 25395 }, { "epoch": 1.390790122104802, "grad_norm": 0.15575578808784485, "learning_rate": 2.9800243358345163e-05, "loss": 0.1951, "step": 25400 }, { "epoch": 1.3910638996878935, "grad_norm": 0.11641238629817963, "learning_rate": 2.979517339282093e-05, "loss": 0.2087, "step": 25405 }, { "epoch": 1.3913376772709851, "grad_norm": 0.15129253268241882, "learning_rate": 2.9790103427296696e-05, "loss": 0.2044, "step": 25410 }, { "epoch": 1.3916114548540766, "grad_norm": 0.15721648931503296, "learning_rate": 2.978503346177246e-05, "loss": 0.2042, "step": 25415 }, { "epoch": 1.391885232437168, "grad_norm": 0.15004509687423706, "learning_rate": 2.9779963496248226e-05, "loss": 0.199, "step": 25420 }, { "epoch": 1.3921590100202597, "grad_norm": 0.20024126768112183, "learning_rate": 2.977489353072399e-05, "loss": 0.2126, "step": 25425 }, { "epoch": 1.392432787603351, "grad_norm": 0.14744123816490173, "learning_rate": 2.9769823565199756e-05, "loss": 0.2063, "step": 25430 }, { "epoch": 1.3927065651864425, "grad_norm": 0.1116407960653305, "learning_rate": 2.976475359967552e-05, "loss": 0.2004, "step": 25435 }, { "epoch": 1.392980342769534, "grad_norm": 0.16369374096393585, "learning_rate": 2.9759683634151293e-05, "loss": 0.2006, "step": 25440 }, { "epoch": 1.3932541203526254, "grad_norm": 0.1391550451517105, "learning_rate": 2.9754613668627057e-05, "loss": 0.2113, "step": 25445 }, { "epoch": 1.393527897935717, "grad_norm": 0.13911832869052887, "learning_rate": 2.9749543703102823e-05, "loss": 0.2111, "step": 25450 }, { "epoch": 1.3938016755188085, "grad_norm": 0.18355627357959747, "learning_rate": 2.9744473737578587e-05, "loss": 0.2054, "step": 25455 }, { "epoch": 1.3940754531019, "grad_norm": 0.11899752914905548, "learning_rate": 2.9739403772054353e-05, "loss": 0.2052, "step": 25460 }, { "epoch": 1.3943492306849916, "grad_norm": 0.1244754046201706, "learning_rate": 2.9734333806530117e-05, "loss": 0.2002, "step": 25465 }, { "epoch": 1.394623008268083, "grad_norm": 0.11930327117443085, "learning_rate": 2.9729263841005883e-05, "loss": 0.1987, "step": 25470 }, { "epoch": 1.3948967858511745, "grad_norm": 0.12323814630508423, "learning_rate": 2.972419387548165e-05, "loss": 0.2073, "step": 25475 }, { "epoch": 1.395170563434266, "grad_norm": 0.154885932803154, "learning_rate": 2.9719123909957413e-05, "loss": 0.2079, "step": 25480 }, { "epoch": 1.3954443410173574, "grad_norm": 0.13772115111351013, "learning_rate": 2.971405394443318e-05, "loss": 0.206, "step": 25485 }, { "epoch": 1.395718118600449, "grad_norm": 0.12329702824354172, "learning_rate": 2.9708983978908943e-05, "loss": 0.2025, "step": 25490 }, { "epoch": 1.3959918961835405, "grad_norm": 0.11945106089115143, "learning_rate": 2.970391401338471e-05, "loss": 0.2109, "step": 25495 }, { "epoch": 1.396265673766632, "grad_norm": 0.1400996744632721, "learning_rate": 2.9698844047860473e-05, "loss": 0.2132, "step": 25500 }, { "epoch": 1.3965394513497236, "grad_norm": 0.1545642763376236, "learning_rate": 2.969377408233624e-05, "loss": 0.2013, "step": 25505 }, { "epoch": 1.396813228932815, "grad_norm": 0.15397322177886963, "learning_rate": 2.9688704116812007e-05, "loss": 0.2059, "step": 25510 }, { "epoch": 1.3970870065159064, "grad_norm": 0.12603521347045898, "learning_rate": 2.968363415128777e-05, "loss": 0.2025, "step": 25515 }, { "epoch": 1.397360784098998, "grad_norm": 0.13895490765571594, "learning_rate": 2.9678564185763537e-05, "loss": 0.1919, "step": 25520 }, { "epoch": 1.3976345616820895, "grad_norm": 0.1872631162405014, "learning_rate": 2.9673494220239307e-05, "loss": 0.1989, "step": 25525 }, { "epoch": 1.397908339265181, "grad_norm": 0.1286713033914566, "learning_rate": 2.966842425471507e-05, "loss": 0.2043, "step": 25530 }, { "epoch": 1.3981821168482724, "grad_norm": 0.14039170742034912, "learning_rate": 2.9663354289190837e-05, "loss": 0.2063, "step": 25535 }, { "epoch": 1.3984558944313639, "grad_norm": 0.12639932334423065, "learning_rate": 2.9658284323666603e-05, "loss": 0.2016, "step": 25540 }, { "epoch": 1.3987296720144555, "grad_norm": 0.1390039175748825, "learning_rate": 2.9653214358142367e-05, "loss": 0.2133, "step": 25545 }, { "epoch": 1.399003449597547, "grad_norm": 0.1493031233549118, "learning_rate": 2.9648144392618133e-05, "loss": 0.2052, "step": 25550 }, { "epoch": 1.3992772271806384, "grad_norm": 0.14383935928344727, "learning_rate": 2.9643074427093897e-05, "loss": 0.1999, "step": 25555 }, { "epoch": 1.39955100476373, "grad_norm": 0.1375875473022461, "learning_rate": 2.9638004461569663e-05, "loss": 0.2003, "step": 25560 }, { "epoch": 1.3998247823468215, "grad_norm": 0.13108472526073456, "learning_rate": 2.9632934496045427e-05, "loss": 0.1982, "step": 25565 }, { "epoch": 1.400098559929913, "grad_norm": 0.12323460727930069, "learning_rate": 2.9627864530521193e-05, "loss": 0.201, "step": 25570 }, { "epoch": 1.4003723375130044, "grad_norm": 0.141290545463562, "learning_rate": 2.962279456499696e-05, "loss": 0.1932, "step": 25575 }, { "epoch": 1.4006461150960958, "grad_norm": 0.1489780843257904, "learning_rate": 2.9617724599472723e-05, "loss": 0.1981, "step": 25580 }, { "epoch": 1.4009198926791875, "grad_norm": 0.13994045555591583, "learning_rate": 2.961265463394849e-05, "loss": 0.1978, "step": 25585 }, { "epoch": 1.401193670262279, "grad_norm": 0.12499432265758514, "learning_rate": 2.9607584668424253e-05, "loss": 0.2061, "step": 25590 }, { "epoch": 1.4014674478453704, "grad_norm": 0.12428432703018188, "learning_rate": 2.960251470290002e-05, "loss": 0.2052, "step": 25595 }, { "epoch": 1.401741225428462, "grad_norm": 0.1219622865319252, "learning_rate": 2.9597444737375783e-05, "loss": 0.2043, "step": 25600 }, { "epoch": 1.4020150030115535, "grad_norm": 0.12682868540287018, "learning_rate": 2.9592374771851557e-05, "loss": 0.1971, "step": 25605 }, { "epoch": 1.402288780594645, "grad_norm": 0.15884515643119812, "learning_rate": 2.958730480632732e-05, "loss": 0.2148, "step": 25610 }, { "epoch": 1.4025625581777363, "grad_norm": 0.13492703437805176, "learning_rate": 2.9582234840803087e-05, "loss": 0.2029, "step": 25615 }, { "epoch": 1.4028363357608278, "grad_norm": 0.13682329654693604, "learning_rate": 2.957716487527885e-05, "loss": 0.2112, "step": 25620 }, { "epoch": 1.4031101133439194, "grad_norm": 0.11161684989929199, "learning_rate": 2.9572094909754617e-05, "loss": 0.201, "step": 25625 }, { "epoch": 1.4033838909270109, "grad_norm": 0.1304159164428711, "learning_rate": 2.956702494423038e-05, "loss": 0.2004, "step": 25630 }, { "epoch": 1.4036576685101023, "grad_norm": 0.14195862412452698, "learning_rate": 2.9561954978706147e-05, "loss": 0.2039, "step": 25635 }, { "epoch": 1.403931446093194, "grad_norm": 0.11030715703964233, "learning_rate": 2.9556885013181913e-05, "loss": 0.1981, "step": 25640 }, { "epoch": 1.4042052236762854, "grad_norm": 0.11445304751396179, "learning_rate": 2.9551815047657677e-05, "loss": 0.2072, "step": 25645 }, { "epoch": 1.4044790012593769, "grad_norm": 0.11001145094633102, "learning_rate": 2.9546745082133443e-05, "loss": 0.2042, "step": 25650 }, { "epoch": 1.4047527788424683, "grad_norm": 0.12209835648536682, "learning_rate": 2.9541675116609207e-05, "loss": 0.2088, "step": 25655 }, { "epoch": 1.40502655642556, "grad_norm": 0.13802796602249146, "learning_rate": 2.9536605151084973e-05, "loss": 0.2128, "step": 25660 }, { "epoch": 1.4053003340086514, "grad_norm": 0.12667864561080933, "learning_rate": 2.9531535185560737e-05, "loss": 0.2114, "step": 25665 }, { "epoch": 1.4055741115917428, "grad_norm": 0.1237429529428482, "learning_rate": 2.9526465220036503e-05, "loss": 0.2098, "step": 25670 }, { "epoch": 1.4058478891748343, "grad_norm": 0.12277670949697495, "learning_rate": 2.952139525451227e-05, "loss": 0.2055, "step": 25675 }, { "epoch": 1.406121666757926, "grad_norm": 0.13190986216068268, "learning_rate": 2.9516325288988033e-05, "loss": 0.2051, "step": 25680 }, { "epoch": 1.4063954443410174, "grad_norm": 0.15586437284946442, "learning_rate": 2.9511255323463804e-05, "loss": 0.2096, "step": 25685 }, { "epoch": 1.4066692219241088, "grad_norm": 0.13076981902122498, "learning_rate": 2.950618535793957e-05, "loss": 0.2049, "step": 25690 }, { "epoch": 1.4069429995072005, "grad_norm": 0.13308870792388916, "learning_rate": 2.9501115392415334e-05, "loss": 0.206, "step": 25695 }, { "epoch": 1.407216777090292, "grad_norm": 0.12470513582229614, "learning_rate": 2.94960454268911e-05, "loss": 0.2061, "step": 25700 }, { "epoch": 1.4074905546733834, "grad_norm": 0.1087895929813385, "learning_rate": 2.9490975461366867e-05, "loss": 0.1951, "step": 25705 }, { "epoch": 1.4077643322564748, "grad_norm": 0.12591120600700378, "learning_rate": 2.948590549584263e-05, "loss": 0.2055, "step": 25710 }, { "epoch": 1.4080381098395662, "grad_norm": 0.12547987699508667, "learning_rate": 2.9480835530318397e-05, "loss": 0.2058, "step": 25715 }, { "epoch": 1.408311887422658, "grad_norm": 0.13895978033542633, "learning_rate": 2.947576556479416e-05, "loss": 0.2151, "step": 25720 }, { "epoch": 1.4085856650057493, "grad_norm": 0.13266344368457794, "learning_rate": 2.9470695599269927e-05, "loss": 0.1941, "step": 25725 }, { "epoch": 1.4088594425888408, "grad_norm": 0.15842676162719727, "learning_rate": 2.946562563374569e-05, "loss": 0.1905, "step": 25730 }, { "epoch": 1.4091332201719324, "grad_norm": 0.12481179088354111, "learning_rate": 2.9460555668221457e-05, "loss": 0.2042, "step": 25735 }, { "epoch": 1.4094069977550239, "grad_norm": 0.14316757023334503, "learning_rate": 2.9455485702697224e-05, "loss": 0.2083, "step": 25740 }, { "epoch": 1.4096807753381153, "grad_norm": 0.1820957511663437, "learning_rate": 2.9450415737172987e-05, "loss": 0.2086, "step": 25745 }, { "epoch": 1.4099545529212067, "grad_norm": 0.13118240237236023, "learning_rate": 2.9445345771648754e-05, "loss": 0.2014, "step": 25750 }, { "epoch": 1.4102283305042982, "grad_norm": 0.11394783854484558, "learning_rate": 2.9440275806124517e-05, "loss": 0.2103, "step": 25755 }, { "epoch": 1.4105021080873898, "grad_norm": 0.139568030834198, "learning_rate": 2.9435205840600284e-05, "loss": 0.2074, "step": 25760 }, { "epoch": 1.4107758856704813, "grad_norm": 0.11594001203775406, "learning_rate": 2.9430135875076054e-05, "loss": 0.1988, "step": 25765 }, { "epoch": 1.4110496632535727, "grad_norm": 0.12266908586025238, "learning_rate": 2.942506590955182e-05, "loss": 0.2048, "step": 25770 }, { "epoch": 1.4113234408366644, "grad_norm": 0.13288936018943787, "learning_rate": 2.9419995944027584e-05, "loss": 0.1994, "step": 25775 }, { "epoch": 1.4115972184197558, "grad_norm": 0.13453204929828644, "learning_rate": 2.941492597850335e-05, "loss": 0.2062, "step": 25780 }, { "epoch": 1.4118709960028473, "grad_norm": 0.11210492998361588, "learning_rate": 2.9409856012979114e-05, "loss": 0.1986, "step": 25785 }, { "epoch": 1.4121447735859387, "grad_norm": 0.12260524183511734, "learning_rate": 2.940478604745488e-05, "loss": 0.2047, "step": 25790 }, { "epoch": 1.4124185511690301, "grad_norm": 0.1521809697151184, "learning_rate": 2.9399716081930644e-05, "loss": 0.2136, "step": 25795 }, { "epoch": 1.4126923287521218, "grad_norm": 0.11352450400590897, "learning_rate": 2.939464611640641e-05, "loss": 0.2054, "step": 25800 }, { "epoch": 1.4129661063352132, "grad_norm": 0.1129220649600029, "learning_rate": 2.9389576150882177e-05, "loss": 0.1945, "step": 25805 }, { "epoch": 1.4132398839183047, "grad_norm": 0.14703400433063507, "learning_rate": 2.938450618535794e-05, "loss": 0.2087, "step": 25810 }, { "epoch": 1.4135136615013963, "grad_norm": 0.1250615417957306, "learning_rate": 2.9379436219833707e-05, "loss": 0.2007, "step": 25815 }, { "epoch": 1.4137874390844878, "grad_norm": 0.12465154379606247, "learning_rate": 2.937436625430947e-05, "loss": 0.1994, "step": 25820 }, { "epoch": 1.4140612166675792, "grad_norm": 0.12064813077449799, "learning_rate": 2.9369296288785237e-05, "loss": 0.2009, "step": 25825 }, { "epoch": 1.4143349942506709, "grad_norm": 0.14659279584884644, "learning_rate": 2.9364226323261e-05, "loss": 0.1999, "step": 25830 }, { "epoch": 1.4146087718337623, "grad_norm": 0.15147559344768524, "learning_rate": 2.9359156357736767e-05, "loss": 0.2048, "step": 25835 }, { "epoch": 1.4148825494168538, "grad_norm": 0.1263914555311203, "learning_rate": 2.935408639221253e-05, "loss": 0.1997, "step": 25840 }, { "epoch": 1.4151563269999452, "grad_norm": 0.11908936500549316, "learning_rate": 2.9349016426688297e-05, "loss": 0.2035, "step": 25845 }, { "epoch": 1.4154301045830366, "grad_norm": 0.1213090717792511, "learning_rate": 2.9343946461164067e-05, "loss": 0.2075, "step": 25850 }, { "epoch": 1.4157038821661283, "grad_norm": 0.11247003078460693, "learning_rate": 2.9338876495639834e-05, "loss": 0.1996, "step": 25855 }, { "epoch": 1.4159776597492197, "grad_norm": 0.1326652616262436, "learning_rate": 2.9333806530115597e-05, "loss": 0.2084, "step": 25860 }, { "epoch": 1.4162514373323112, "grad_norm": 0.12248937785625458, "learning_rate": 2.9328736564591364e-05, "loss": 0.2114, "step": 25865 }, { "epoch": 1.4165252149154028, "grad_norm": 0.1218990609049797, "learning_rate": 2.932366659906713e-05, "loss": 0.2085, "step": 25870 }, { "epoch": 1.4167989924984943, "grad_norm": 0.13580599427223206, "learning_rate": 2.9318596633542894e-05, "loss": 0.1933, "step": 25875 }, { "epoch": 1.4170727700815857, "grad_norm": 0.1487404853105545, "learning_rate": 2.931352666801866e-05, "loss": 0.2067, "step": 25880 }, { "epoch": 1.4173465476646772, "grad_norm": 0.13908140361309052, "learning_rate": 2.9308456702494424e-05, "loss": 0.2077, "step": 25885 }, { "epoch": 1.4176203252477686, "grad_norm": 0.13529597222805023, "learning_rate": 2.930338673697019e-05, "loss": 0.2099, "step": 25890 }, { "epoch": 1.4178941028308603, "grad_norm": 0.12486271560192108, "learning_rate": 2.9298316771445954e-05, "loss": 0.2045, "step": 25895 }, { "epoch": 1.4181678804139517, "grad_norm": 0.1317077875137329, "learning_rate": 2.929324680592172e-05, "loss": 0.2131, "step": 25900 }, { "epoch": 1.4184416579970431, "grad_norm": 0.12114960700273514, "learning_rate": 2.9288176840397484e-05, "loss": 0.2022, "step": 25905 }, { "epoch": 1.4187154355801348, "grad_norm": 0.15380729734897614, "learning_rate": 2.928310687487325e-05, "loss": 0.207, "step": 25910 }, { "epoch": 1.4189892131632262, "grad_norm": 0.14677390456199646, "learning_rate": 2.9278036909349017e-05, "loss": 0.2047, "step": 25915 }, { "epoch": 1.4192629907463177, "grad_norm": 0.12409184873104095, "learning_rate": 2.927296694382478e-05, "loss": 0.2015, "step": 25920 }, { "epoch": 1.4195367683294091, "grad_norm": 0.14054487645626068, "learning_rate": 2.9267896978300547e-05, "loss": 0.2032, "step": 25925 }, { "epoch": 1.4198105459125006, "grad_norm": 0.1318846344947815, "learning_rate": 2.9262827012776317e-05, "loss": 0.2052, "step": 25930 }, { "epoch": 1.4200843234955922, "grad_norm": 0.11203411966562271, "learning_rate": 2.9257757047252084e-05, "loss": 0.202, "step": 25935 }, { "epoch": 1.4203581010786837, "grad_norm": 0.12823772430419922, "learning_rate": 2.9252687081727847e-05, "loss": 0.2186, "step": 25940 }, { "epoch": 1.420631878661775, "grad_norm": 0.13566239178180695, "learning_rate": 2.9247617116203614e-05, "loss": 0.1991, "step": 25945 }, { "epoch": 1.4209056562448668, "grad_norm": 0.11258527636528015, "learning_rate": 2.9242547150679377e-05, "loss": 0.2031, "step": 25950 }, { "epoch": 1.4211794338279582, "grad_norm": 0.13095718622207642, "learning_rate": 2.9237477185155144e-05, "loss": 0.2031, "step": 25955 }, { "epoch": 1.4214532114110496, "grad_norm": 0.12784218788146973, "learning_rate": 2.9232407219630907e-05, "loss": 0.2092, "step": 25960 }, { "epoch": 1.4217269889941413, "grad_norm": 0.12983402609825134, "learning_rate": 2.9227337254106674e-05, "loss": 0.2193, "step": 25965 }, { "epoch": 1.4220007665772327, "grad_norm": 0.12785542011260986, "learning_rate": 2.9222267288582437e-05, "loss": 0.1987, "step": 25970 }, { "epoch": 1.4222745441603242, "grad_norm": 0.11751774698495865, "learning_rate": 2.9217197323058204e-05, "loss": 0.2022, "step": 25975 }, { "epoch": 1.4225483217434156, "grad_norm": 0.14362795650959015, "learning_rate": 2.921212735753397e-05, "loss": 0.2025, "step": 25980 }, { "epoch": 1.422822099326507, "grad_norm": 0.11729224771261215, "learning_rate": 2.9207057392009734e-05, "loss": 0.2187, "step": 25985 }, { "epoch": 1.4230958769095987, "grad_norm": 0.15783211588859558, "learning_rate": 2.92019874264855e-05, "loss": 0.2043, "step": 25990 }, { "epoch": 1.4233696544926902, "grad_norm": 0.11692764610052109, "learning_rate": 2.9196917460961264e-05, "loss": 0.2069, "step": 25995 }, { "epoch": 1.4236434320757816, "grad_norm": 0.15331371128559113, "learning_rate": 2.919184749543703e-05, "loss": 0.1963, "step": 26000 }, { "epoch": 1.4239172096588733, "grad_norm": 0.12877152860164642, "learning_rate": 2.9186777529912794e-05, "loss": 0.2043, "step": 26005 }, { "epoch": 1.4241909872419647, "grad_norm": 0.1453077495098114, "learning_rate": 2.9181707564388567e-05, "loss": 0.2062, "step": 26010 }, { "epoch": 1.4244647648250561, "grad_norm": 0.1309603601694107, "learning_rate": 2.917663759886433e-05, "loss": 0.2113, "step": 26015 }, { "epoch": 1.4247385424081476, "grad_norm": 0.14539118111133575, "learning_rate": 2.9171567633340097e-05, "loss": 0.2086, "step": 26020 }, { "epoch": 1.425012319991239, "grad_norm": 0.15814049541950226, "learning_rate": 2.916649766781586e-05, "loss": 0.2094, "step": 26025 }, { "epoch": 1.4252860975743307, "grad_norm": 0.12692560255527496, "learning_rate": 2.9161427702291627e-05, "loss": 0.2035, "step": 26030 }, { "epoch": 1.425559875157422, "grad_norm": 0.11659980565309525, "learning_rate": 2.915635773676739e-05, "loss": 0.2047, "step": 26035 }, { "epoch": 1.4258336527405135, "grad_norm": 0.14569850265979767, "learning_rate": 2.9151287771243157e-05, "loss": 0.2016, "step": 26040 }, { "epoch": 1.4261074303236052, "grad_norm": 0.12335317581892014, "learning_rate": 2.9146217805718924e-05, "loss": 0.2049, "step": 26045 }, { "epoch": 1.4263812079066966, "grad_norm": 0.1123025119304657, "learning_rate": 2.9141147840194687e-05, "loss": 0.2107, "step": 26050 }, { "epoch": 1.426654985489788, "grad_norm": 0.12196483463048935, "learning_rate": 2.9136077874670454e-05, "loss": 0.21, "step": 26055 }, { "epoch": 1.4269287630728795, "grad_norm": 0.12809891998767853, "learning_rate": 2.9131007909146217e-05, "loss": 0.2025, "step": 26060 }, { "epoch": 1.427202540655971, "grad_norm": 0.13781768083572388, "learning_rate": 2.9125937943621984e-05, "loss": 0.1969, "step": 26065 }, { "epoch": 1.4274763182390626, "grad_norm": 0.12477219849824905, "learning_rate": 2.9120867978097747e-05, "loss": 0.2021, "step": 26070 }, { "epoch": 1.427750095822154, "grad_norm": 0.14699973165988922, "learning_rate": 2.9115798012573514e-05, "loss": 0.2131, "step": 26075 }, { "epoch": 1.4280238734052455, "grad_norm": 0.13828563690185547, "learning_rate": 2.911072804704928e-05, "loss": 0.1908, "step": 26080 }, { "epoch": 1.4282976509883372, "grad_norm": 0.12293241918087006, "learning_rate": 2.9105658081525044e-05, "loss": 0.2058, "step": 26085 }, { "epoch": 1.4285714285714286, "grad_norm": 0.12443812191486359, "learning_rate": 2.9100588116000814e-05, "loss": 0.2019, "step": 26090 }, { "epoch": 1.42884520615452, "grad_norm": 0.1343982070684433, "learning_rate": 2.909551815047658e-05, "loss": 0.2011, "step": 26095 }, { "epoch": 1.4291189837376115, "grad_norm": 0.1248355582356453, "learning_rate": 2.9090448184952344e-05, "loss": 0.2112, "step": 26100 }, { "epoch": 1.4293927613207031, "grad_norm": 0.15837320685386658, "learning_rate": 2.908537821942811e-05, "loss": 0.2167, "step": 26105 }, { "epoch": 1.4296665389037946, "grad_norm": 0.1403147280216217, "learning_rate": 2.9080308253903878e-05, "loss": 0.2031, "step": 26110 }, { "epoch": 1.429940316486886, "grad_norm": 0.12077415734529495, "learning_rate": 2.907523828837964e-05, "loss": 0.2086, "step": 26115 }, { "epoch": 1.4302140940699775, "grad_norm": 0.11008696258068085, "learning_rate": 2.9070168322855408e-05, "loss": 0.1997, "step": 26120 }, { "epoch": 1.4304878716530691, "grad_norm": 0.1242719292640686, "learning_rate": 2.906509835733117e-05, "loss": 0.2088, "step": 26125 }, { "epoch": 1.4307616492361606, "grad_norm": 0.124413400888443, "learning_rate": 2.9060028391806938e-05, "loss": 0.2141, "step": 26130 }, { "epoch": 1.431035426819252, "grad_norm": 0.146397665143013, "learning_rate": 2.90549584262827e-05, "loss": 0.2031, "step": 26135 }, { "epoch": 1.4313092044023437, "grad_norm": 0.1159931868314743, "learning_rate": 2.9049888460758468e-05, "loss": 0.202, "step": 26140 }, { "epoch": 1.431582981985435, "grad_norm": 0.15824241936206818, "learning_rate": 2.9044818495234234e-05, "loss": 0.206, "step": 26145 }, { "epoch": 1.4318567595685265, "grad_norm": 0.14247220754623413, "learning_rate": 2.9039748529709998e-05, "loss": 0.1953, "step": 26150 }, { "epoch": 1.432130537151618, "grad_norm": 0.1335340440273285, "learning_rate": 2.9034678564185764e-05, "loss": 0.2049, "step": 26155 }, { "epoch": 1.4324043147347094, "grad_norm": 0.1912185400724411, "learning_rate": 2.9029608598661528e-05, "loss": 0.2145, "step": 26160 }, { "epoch": 1.432678092317801, "grad_norm": 0.12028276920318604, "learning_rate": 2.9024538633137294e-05, "loss": 0.2016, "step": 26165 }, { "epoch": 1.4329518699008925, "grad_norm": 0.11977679282426834, "learning_rate": 2.9019468667613058e-05, "loss": 0.1949, "step": 26170 }, { "epoch": 1.433225647483984, "grad_norm": 0.12893211841583252, "learning_rate": 2.901439870208883e-05, "loss": 0.1977, "step": 26175 }, { "epoch": 1.4334994250670756, "grad_norm": 0.11523927003145218, "learning_rate": 2.9009328736564594e-05, "loss": 0.2047, "step": 26180 }, { "epoch": 1.433773202650167, "grad_norm": 0.11342253535985947, "learning_rate": 2.900425877104036e-05, "loss": 0.2137, "step": 26185 }, { "epoch": 1.4340469802332585, "grad_norm": 0.12242987006902695, "learning_rate": 2.8999188805516124e-05, "loss": 0.2055, "step": 26190 }, { "epoch": 1.43432075781635, "grad_norm": 0.13043908774852753, "learning_rate": 2.899411883999189e-05, "loss": 0.2078, "step": 26195 }, { "epoch": 1.4345945353994414, "grad_norm": 0.11938010901212692, "learning_rate": 2.8989048874467654e-05, "loss": 0.2109, "step": 26200 }, { "epoch": 1.434868312982533, "grad_norm": 0.13305693864822388, "learning_rate": 2.898397890894342e-05, "loss": 0.1984, "step": 26205 }, { "epoch": 1.4351420905656245, "grad_norm": 0.15310943126678467, "learning_rate": 2.8978908943419188e-05, "loss": 0.2018, "step": 26210 }, { "epoch": 1.435415868148716, "grad_norm": 0.17676135897636414, "learning_rate": 2.897383897789495e-05, "loss": 0.2052, "step": 26215 }, { "epoch": 1.4356896457318076, "grad_norm": 0.12447068095207214, "learning_rate": 2.8968769012370718e-05, "loss": 0.195, "step": 26220 }, { "epoch": 1.435963423314899, "grad_norm": 0.14100079238414764, "learning_rate": 2.896369904684648e-05, "loss": 0.1987, "step": 26225 }, { "epoch": 1.4362372008979905, "grad_norm": 0.1491498053073883, "learning_rate": 2.8958629081322248e-05, "loss": 0.2061, "step": 26230 }, { "epoch": 1.436510978481082, "grad_norm": 0.11797727644443512, "learning_rate": 2.895355911579801e-05, "loss": 0.2119, "step": 26235 }, { "epoch": 1.4367847560641733, "grad_norm": 0.1309620887041092, "learning_rate": 2.8948489150273778e-05, "loss": 0.1964, "step": 26240 }, { "epoch": 1.437058533647265, "grad_norm": 0.1096319705247879, "learning_rate": 2.8943419184749544e-05, "loss": 0.1968, "step": 26245 }, { "epoch": 1.4373323112303564, "grad_norm": 0.13607195019721985, "learning_rate": 2.8938349219225308e-05, "loss": 0.2061, "step": 26250 }, { "epoch": 1.4376060888134479, "grad_norm": 0.11414622515439987, "learning_rate": 2.8933279253701078e-05, "loss": 0.2087, "step": 26255 }, { "epoch": 1.4378798663965395, "grad_norm": 0.12653383612632751, "learning_rate": 2.8928209288176844e-05, "loss": 0.2041, "step": 26260 }, { "epoch": 1.438153643979631, "grad_norm": 0.12419568002223969, "learning_rate": 2.8923139322652608e-05, "loss": 0.202, "step": 26265 }, { "epoch": 1.4384274215627224, "grad_norm": 0.14253929257392883, "learning_rate": 2.8918069357128374e-05, "loss": 0.2133, "step": 26270 }, { "epoch": 1.438701199145814, "grad_norm": 0.10524313151836395, "learning_rate": 2.891299939160414e-05, "loss": 0.1999, "step": 26275 }, { "epoch": 1.4389749767289055, "grad_norm": 0.12929649651050568, "learning_rate": 2.8907929426079904e-05, "loss": 0.2016, "step": 26280 }, { "epoch": 1.439248754311997, "grad_norm": 0.12389294803142548, "learning_rate": 2.890285946055567e-05, "loss": 0.2011, "step": 26285 }, { "epoch": 1.4395225318950884, "grad_norm": 0.1348712146282196, "learning_rate": 2.8897789495031434e-05, "loss": 0.1994, "step": 26290 }, { "epoch": 1.4397963094781798, "grad_norm": 0.1219879686832428, "learning_rate": 2.88927195295072e-05, "loss": 0.2041, "step": 26295 }, { "epoch": 1.4400700870612715, "grad_norm": 0.13731303811073303, "learning_rate": 2.8887649563982964e-05, "loss": 0.208, "step": 26300 }, { "epoch": 1.440343864644363, "grad_norm": 0.12670111656188965, "learning_rate": 2.888257959845873e-05, "loss": 0.2108, "step": 26305 }, { "epoch": 1.4406176422274544, "grad_norm": 0.11321252584457397, "learning_rate": 2.8877509632934498e-05, "loss": 0.1982, "step": 26310 }, { "epoch": 1.440891419810546, "grad_norm": 0.12189070880413055, "learning_rate": 2.887243966741026e-05, "loss": 0.1967, "step": 26315 }, { "epoch": 1.4411651973936375, "grad_norm": 0.12802958488464355, "learning_rate": 2.8867369701886028e-05, "loss": 0.2157, "step": 26320 }, { "epoch": 1.441438974976729, "grad_norm": 0.124285988509655, "learning_rate": 2.886229973636179e-05, "loss": 0.2027, "step": 26325 }, { "epoch": 1.4417127525598203, "grad_norm": 0.12267082184553146, "learning_rate": 2.8857229770837558e-05, "loss": 0.2056, "step": 26330 }, { "epoch": 1.4419865301429118, "grad_norm": 0.14272083342075348, "learning_rate": 2.8852159805313328e-05, "loss": 0.2025, "step": 26335 }, { "epoch": 1.4422603077260034, "grad_norm": 0.11253374069929123, "learning_rate": 2.8847089839789095e-05, "loss": 0.1956, "step": 26340 }, { "epoch": 1.4425340853090949, "grad_norm": 0.12770803272724152, "learning_rate": 2.8842019874264858e-05, "loss": 0.2043, "step": 26345 }, { "epoch": 1.4428078628921863, "grad_norm": 0.12016786634922028, "learning_rate": 2.8836949908740625e-05, "loss": 0.2032, "step": 26350 }, { "epoch": 1.443081640475278, "grad_norm": 0.1638716161251068, "learning_rate": 2.8831879943216388e-05, "loss": 0.2037, "step": 26355 }, { "epoch": 1.4433554180583694, "grad_norm": 0.12387701123952866, "learning_rate": 2.8826809977692155e-05, "loss": 0.2025, "step": 26360 }, { "epoch": 1.4436291956414609, "grad_norm": 0.15651802718639374, "learning_rate": 2.8821740012167918e-05, "loss": 0.2062, "step": 26365 }, { "epoch": 1.4439029732245523, "grad_norm": 0.1291436105966568, "learning_rate": 2.8816670046643685e-05, "loss": 0.2061, "step": 26370 }, { "epoch": 1.4441767508076437, "grad_norm": 0.15067093074321747, "learning_rate": 2.881160008111945e-05, "loss": 0.2101, "step": 26375 }, { "epoch": 1.4444505283907354, "grad_norm": 0.12446349114179611, "learning_rate": 2.8806530115595215e-05, "loss": 0.2038, "step": 26380 }, { "epoch": 1.4447243059738268, "grad_norm": 0.12580956518650055, "learning_rate": 2.880146015007098e-05, "loss": 0.2057, "step": 26385 }, { "epoch": 1.4449980835569183, "grad_norm": 0.12657101452350616, "learning_rate": 2.8796390184546745e-05, "loss": 0.2108, "step": 26390 }, { "epoch": 1.44527186114001, "grad_norm": 0.13567961752414703, "learning_rate": 2.879132021902251e-05, "loss": 0.2001, "step": 26395 }, { "epoch": 1.4455456387231014, "grad_norm": 0.12028419971466064, "learning_rate": 2.8786250253498275e-05, "loss": 0.1945, "step": 26400 }, { "epoch": 1.4458194163061928, "grad_norm": 0.13935312628746033, "learning_rate": 2.878118028797404e-05, "loss": 0.2066, "step": 26405 }, { "epoch": 1.4460931938892845, "grad_norm": 0.11867563426494598, "learning_rate": 2.8776110322449808e-05, "loss": 0.2095, "step": 26410 }, { "epoch": 1.446366971472376, "grad_norm": 0.1331169754266739, "learning_rate": 2.877104035692557e-05, "loss": 0.1993, "step": 26415 }, { "epoch": 1.4466407490554674, "grad_norm": 0.12763388454914093, "learning_rate": 2.876597039140134e-05, "loss": 0.2053, "step": 26420 }, { "epoch": 1.4469145266385588, "grad_norm": 0.14417393505573273, "learning_rate": 2.8760900425877108e-05, "loss": 0.2081, "step": 26425 }, { "epoch": 1.4471883042216502, "grad_norm": 0.11193599551916122, "learning_rate": 2.875583046035287e-05, "loss": 0.2064, "step": 26430 }, { "epoch": 1.447462081804742, "grad_norm": 0.13907086849212646, "learning_rate": 2.8750760494828638e-05, "loss": 0.1984, "step": 26435 }, { "epoch": 1.4477358593878333, "grad_norm": 0.15732012689113617, "learning_rate": 2.8745690529304405e-05, "loss": 0.2129, "step": 26440 }, { "epoch": 1.4480096369709248, "grad_norm": 0.13576743006706238, "learning_rate": 2.8740620563780168e-05, "loss": 0.2023, "step": 26445 }, { "epoch": 1.4482834145540164, "grad_norm": 0.14545440673828125, "learning_rate": 2.8735550598255935e-05, "loss": 0.2052, "step": 26450 }, { "epoch": 1.4485571921371079, "grad_norm": 0.15340061485767365, "learning_rate": 2.8730480632731698e-05, "loss": 0.2126, "step": 26455 }, { "epoch": 1.4488309697201993, "grad_norm": 0.11882954090833664, "learning_rate": 2.8725410667207465e-05, "loss": 0.2077, "step": 26460 }, { "epoch": 1.4491047473032908, "grad_norm": 0.11443454027175903, "learning_rate": 2.8720340701683228e-05, "loss": 0.2045, "step": 26465 }, { "epoch": 1.4493785248863822, "grad_norm": 0.1273690164089203, "learning_rate": 2.8715270736158995e-05, "loss": 0.2116, "step": 26470 }, { "epoch": 1.4496523024694739, "grad_norm": 0.1250038743019104, "learning_rate": 2.871020077063476e-05, "loss": 0.1977, "step": 26475 }, { "epoch": 1.4499260800525653, "grad_norm": 0.11189151555299759, "learning_rate": 2.8705130805110525e-05, "loss": 0.2145, "step": 26480 }, { "epoch": 1.4501998576356567, "grad_norm": 0.1526959091424942, "learning_rate": 2.870006083958629e-05, "loss": 0.2126, "step": 26485 }, { "epoch": 1.4504736352187484, "grad_norm": 0.1332779973745346, "learning_rate": 2.8694990874062055e-05, "loss": 0.2072, "step": 26490 }, { "epoch": 1.4507474128018398, "grad_norm": 0.13757765293121338, "learning_rate": 2.868992090853782e-05, "loss": 0.2062, "step": 26495 }, { "epoch": 1.4510211903849313, "grad_norm": 0.12057121098041534, "learning_rate": 2.868485094301359e-05, "loss": 0.1964, "step": 26500 }, { "epoch": 1.4512949679680227, "grad_norm": 0.13873358070850372, "learning_rate": 2.8679780977489358e-05, "loss": 0.2056, "step": 26505 }, { "epoch": 1.4515687455511141, "grad_norm": 0.1143827959895134, "learning_rate": 2.867471101196512e-05, "loss": 0.1936, "step": 26510 }, { "epoch": 1.4518425231342058, "grad_norm": 0.12389327585697174, "learning_rate": 2.8669641046440888e-05, "loss": 0.2055, "step": 26515 }, { "epoch": 1.4521163007172972, "grad_norm": 0.13712100684642792, "learning_rate": 2.866457108091665e-05, "loss": 0.209, "step": 26520 }, { "epoch": 1.4523900783003887, "grad_norm": 0.14931118488311768, "learning_rate": 2.8659501115392418e-05, "loss": 0.1996, "step": 26525 }, { "epoch": 1.4526638558834803, "grad_norm": 0.12568429112434387, "learning_rate": 2.865443114986818e-05, "loss": 0.2044, "step": 26530 }, { "epoch": 1.4529376334665718, "grad_norm": 0.1352720558643341, "learning_rate": 2.8649361184343948e-05, "loss": 0.2076, "step": 26535 }, { "epoch": 1.4532114110496632, "grad_norm": 0.12297796458005905, "learning_rate": 2.8644291218819715e-05, "loss": 0.1975, "step": 26540 }, { "epoch": 1.4534851886327549, "grad_norm": 0.12326040863990784, "learning_rate": 2.8639221253295478e-05, "loss": 0.1946, "step": 26545 }, { "epoch": 1.4537589662158463, "grad_norm": 0.1267557144165039, "learning_rate": 2.8634151287771245e-05, "loss": 0.2026, "step": 26550 }, { "epoch": 1.4540327437989378, "grad_norm": 0.12272230535745621, "learning_rate": 2.8629081322247008e-05, "loss": 0.2085, "step": 26555 }, { "epoch": 1.4543065213820292, "grad_norm": 0.12339361011981964, "learning_rate": 2.8624011356722775e-05, "loss": 0.1939, "step": 26560 }, { "epoch": 1.4545802989651206, "grad_norm": 0.11693916469812393, "learning_rate": 2.8618941391198538e-05, "loss": 0.2011, "step": 26565 }, { "epoch": 1.4548540765482123, "grad_norm": 0.13269728422164917, "learning_rate": 2.8613871425674305e-05, "loss": 0.199, "step": 26570 }, { "epoch": 1.4551278541313037, "grad_norm": 0.1265670657157898, "learning_rate": 2.8608801460150068e-05, "loss": 0.1975, "step": 26575 }, { "epoch": 1.4554016317143952, "grad_norm": 0.12405417114496231, "learning_rate": 2.860373149462584e-05, "loss": 0.2003, "step": 26580 }, { "epoch": 1.4556754092974868, "grad_norm": 0.13218876719474792, "learning_rate": 2.8598661529101605e-05, "loss": 0.2049, "step": 26585 }, { "epoch": 1.4559491868805783, "grad_norm": 0.11787352710962296, "learning_rate": 2.859359156357737e-05, "loss": 0.1904, "step": 26590 }, { "epoch": 1.4562229644636697, "grad_norm": 0.1510845273733139, "learning_rate": 2.8588521598053135e-05, "loss": 0.2145, "step": 26595 }, { "epoch": 1.4564967420467612, "grad_norm": 0.13713853061199188, "learning_rate": 2.85834516325289e-05, "loss": 0.2063, "step": 26600 }, { "epoch": 1.4567705196298526, "grad_norm": 0.13387882709503174, "learning_rate": 2.857838166700467e-05, "loss": 0.209, "step": 26605 }, { "epoch": 1.4570442972129443, "grad_norm": 0.1413111835718155, "learning_rate": 2.857331170148043e-05, "loss": 0.1987, "step": 26610 }, { "epoch": 1.4573180747960357, "grad_norm": 0.13012611865997314, "learning_rate": 2.85682417359562e-05, "loss": 0.2004, "step": 26615 }, { "epoch": 1.4575918523791271, "grad_norm": 0.12187749147415161, "learning_rate": 2.856317177043196e-05, "loss": 0.1984, "step": 26620 }, { "epoch": 1.4578656299622188, "grad_norm": 0.1455419957637787, "learning_rate": 2.855810180490773e-05, "loss": 0.2, "step": 26625 }, { "epoch": 1.4581394075453102, "grad_norm": 0.12673206627368927, "learning_rate": 2.855303183938349e-05, "loss": 0.1997, "step": 26630 }, { "epoch": 1.4584131851284017, "grad_norm": 0.1209489107131958, "learning_rate": 2.854796187385926e-05, "loss": 0.1996, "step": 26635 }, { "epoch": 1.4586869627114931, "grad_norm": 0.1251036375761032, "learning_rate": 2.854289190833502e-05, "loss": 0.1923, "step": 26640 }, { "epoch": 1.4589607402945846, "grad_norm": 0.1224747821688652, "learning_rate": 2.853782194281079e-05, "loss": 0.196, "step": 26645 }, { "epoch": 1.4592345178776762, "grad_norm": 0.14550291001796722, "learning_rate": 2.8532751977286555e-05, "loss": 0.2068, "step": 26650 }, { "epoch": 1.4595082954607677, "grad_norm": 0.14389249682426453, "learning_rate": 2.852768201176232e-05, "loss": 0.2004, "step": 26655 }, { "epoch": 1.459782073043859, "grad_norm": 0.13823749125003815, "learning_rate": 2.852261204623809e-05, "loss": 0.2036, "step": 26660 }, { "epoch": 1.4600558506269508, "grad_norm": 0.12841661274433136, "learning_rate": 2.8517542080713855e-05, "loss": 0.2046, "step": 26665 }, { "epoch": 1.4603296282100422, "grad_norm": 0.1256100982427597, "learning_rate": 2.8512472115189622e-05, "loss": 0.2074, "step": 26670 }, { "epoch": 1.4606034057931336, "grad_norm": 0.13067084550857544, "learning_rate": 2.8507402149665385e-05, "loss": 0.2011, "step": 26675 }, { "epoch": 1.460877183376225, "grad_norm": 0.13372044265270233, "learning_rate": 2.8502332184141152e-05, "loss": 0.2066, "step": 26680 }, { "epoch": 1.4611509609593165, "grad_norm": 0.13154059648513794, "learning_rate": 2.8497262218616915e-05, "loss": 0.2019, "step": 26685 }, { "epoch": 1.4614247385424082, "grad_norm": 0.13561785221099854, "learning_rate": 2.8492192253092682e-05, "loss": 0.2109, "step": 26690 }, { "epoch": 1.4616985161254996, "grad_norm": 0.1624545305967331, "learning_rate": 2.8487122287568445e-05, "loss": 0.2008, "step": 26695 }, { "epoch": 1.461972293708591, "grad_norm": 0.12175451964139938, "learning_rate": 2.8482052322044212e-05, "loss": 0.1975, "step": 26700 }, { "epoch": 1.4622460712916827, "grad_norm": 0.1327366828918457, "learning_rate": 2.8476982356519975e-05, "loss": 0.2041, "step": 26705 }, { "epoch": 1.4625198488747742, "grad_norm": 0.15411345660686493, "learning_rate": 2.8471912390995742e-05, "loss": 0.2016, "step": 26710 }, { "epoch": 1.4627936264578656, "grad_norm": 0.11467863619327545, "learning_rate": 2.846684242547151e-05, "loss": 0.2055, "step": 26715 }, { "epoch": 1.4630674040409573, "grad_norm": 0.12899212539196014, "learning_rate": 2.8461772459947272e-05, "loss": 0.2015, "step": 26720 }, { "epoch": 1.4633411816240487, "grad_norm": 0.12015139311552048, "learning_rate": 2.845670249442304e-05, "loss": 0.196, "step": 26725 }, { "epoch": 1.4636149592071401, "grad_norm": 0.12675981223583221, "learning_rate": 2.8451632528898802e-05, "loss": 0.1989, "step": 26730 }, { "epoch": 1.4638887367902316, "grad_norm": 0.12326288223266602, "learning_rate": 2.844656256337457e-05, "loss": 0.1914, "step": 26735 }, { "epoch": 1.464162514373323, "grad_norm": 0.1482122540473938, "learning_rate": 2.8441492597850332e-05, "loss": 0.2023, "step": 26740 }, { "epoch": 1.4644362919564147, "grad_norm": 0.1362069994211197, "learning_rate": 2.8436422632326105e-05, "loss": 0.2038, "step": 26745 }, { "epoch": 1.464710069539506, "grad_norm": 0.10989547520875931, "learning_rate": 2.843135266680187e-05, "loss": 0.2055, "step": 26750 }, { "epoch": 1.4649838471225975, "grad_norm": 0.12291960418224335, "learning_rate": 2.8426282701277635e-05, "loss": 0.1974, "step": 26755 }, { "epoch": 1.4652576247056892, "grad_norm": 0.12075453251600266, "learning_rate": 2.84212127357534e-05, "loss": 0.1968, "step": 26760 }, { "epoch": 1.4655314022887806, "grad_norm": 0.13029956817626953, "learning_rate": 2.8416142770229165e-05, "loss": 0.2186, "step": 26765 }, { "epoch": 1.465805179871872, "grad_norm": 0.13835559785366058, "learning_rate": 2.841107280470493e-05, "loss": 0.2, "step": 26770 }, { "epoch": 1.4660789574549635, "grad_norm": 0.10613202303647995, "learning_rate": 2.8406002839180695e-05, "loss": 0.1998, "step": 26775 }, { "epoch": 1.466352735038055, "grad_norm": 0.11518415808677673, "learning_rate": 2.8400932873656462e-05, "loss": 0.2101, "step": 26780 }, { "epoch": 1.4666265126211466, "grad_norm": 0.11946889013051987, "learning_rate": 2.8395862908132225e-05, "loss": 0.2009, "step": 26785 }, { "epoch": 1.466900290204238, "grad_norm": 0.11209649592638016, "learning_rate": 2.8390792942607992e-05, "loss": 0.2019, "step": 26790 }, { "epoch": 1.4671740677873295, "grad_norm": 0.12510426342487335, "learning_rate": 2.8385722977083755e-05, "loss": 0.1985, "step": 26795 }, { "epoch": 1.4674478453704212, "grad_norm": 0.14610794186592102, "learning_rate": 2.8380653011559522e-05, "loss": 0.2118, "step": 26800 }, { "epoch": 1.4677216229535126, "grad_norm": 0.11678546667098999, "learning_rate": 2.8375583046035285e-05, "loss": 0.2011, "step": 26805 }, { "epoch": 1.467995400536604, "grad_norm": 0.10702988505363464, "learning_rate": 2.8370513080511052e-05, "loss": 0.2024, "step": 26810 }, { "epoch": 1.4682691781196955, "grad_norm": 0.14875558018684387, "learning_rate": 2.836544311498682e-05, "loss": 0.2027, "step": 26815 }, { "epoch": 1.468542955702787, "grad_norm": 0.1090674102306366, "learning_rate": 2.8360373149462582e-05, "loss": 0.1989, "step": 26820 }, { "epoch": 1.4688167332858786, "grad_norm": 0.12312186509370804, "learning_rate": 2.8355303183938352e-05, "loss": 0.2082, "step": 26825 }, { "epoch": 1.46909051086897, "grad_norm": 0.13438044488430023, "learning_rate": 2.835023321841412e-05, "loss": 0.2117, "step": 26830 }, { "epoch": 1.4693642884520615, "grad_norm": 0.1240023821592331, "learning_rate": 2.8345163252889882e-05, "loss": 0.2041, "step": 26835 }, { "epoch": 1.4696380660351531, "grad_norm": 0.12379539012908936, "learning_rate": 2.834009328736565e-05, "loss": 0.2002, "step": 26840 }, { "epoch": 1.4699118436182446, "grad_norm": 0.14967043697834015, "learning_rate": 2.8335023321841415e-05, "loss": 0.2161, "step": 26845 }, { "epoch": 1.470185621201336, "grad_norm": 0.1235990896821022, "learning_rate": 2.832995335631718e-05, "loss": 0.2001, "step": 26850 }, { "epoch": 1.4704593987844277, "grad_norm": 0.11852820217609406, "learning_rate": 2.8324883390792945e-05, "loss": 0.1948, "step": 26855 }, { "epoch": 1.470733176367519, "grad_norm": 0.1226632371544838, "learning_rate": 2.831981342526871e-05, "loss": 0.2, "step": 26860 }, { "epoch": 1.4710069539506105, "grad_norm": 0.10086452215909958, "learning_rate": 2.8314743459744475e-05, "loss": 0.207, "step": 26865 }, { "epoch": 1.471280731533702, "grad_norm": 0.1107228547334671, "learning_rate": 2.830967349422024e-05, "loss": 0.2032, "step": 26870 }, { "epoch": 1.4715545091167934, "grad_norm": 0.137582927942276, "learning_rate": 2.8304603528696005e-05, "loss": 0.2075, "step": 26875 }, { "epoch": 1.471828286699885, "grad_norm": 0.1498463749885559, "learning_rate": 2.8299533563171772e-05, "loss": 0.2074, "step": 26880 }, { "epoch": 1.4721020642829765, "grad_norm": 0.11767137795686722, "learning_rate": 2.8294463597647535e-05, "loss": 0.1994, "step": 26885 }, { "epoch": 1.472375841866068, "grad_norm": 0.12430847436189651, "learning_rate": 2.8289393632123302e-05, "loss": 0.2051, "step": 26890 }, { "epoch": 1.4726496194491596, "grad_norm": 0.12281929701566696, "learning_rate": 2.8284323666599065e-05, "loss": 0.2101, "step": 26895 }, { "epoch": 1.472923397032251, "grad_norm": 0.13899104297161102, "learning_rate": 2.8279253701074832e-05, "loss": 0.201, "step": 26900 }, { "epoch": 1.4731971746153425, "grad_norm": 0.13746246695518494, "learning_rate": 2.8274183735550602e-05, "loss": 0.2002, "step": 26905 }, { "epoch": 1.473470952198434, "grad_norm": 0.1268922984600067, "learning_rate": 2.826911377002637e-05, "loss": 0.2014, "step": 26910 }, { "epoch": 1.4737447297815254, "grad_norm": 0.13014328479766846, "learning_rate": 2.8264043804502132e-05, "loss": 0.2069, "step": 26915 }, { "epoch": 1.474018507364617, "grad_norm": 0.1094914898276329, "learning_rate": 2.82589738389779e-05, "loss": 0.1939, "step": 26920 }, { "epoch": 1.4742922849477085, "grad_norm": 0.11271936446428299, "learning_rate": 2.8253903873453662e-05, "loss": 0.195, "step": 26925 }, { "epoch": 1.4745660625308, "grad_norm": 0.1349925547838211, "learning_rate": 2.824883390792943e-05, "loss": 0.2057, "step": 26930 }, { "epoch": 1.4748398401138916, "grad_norm": 0.12671612203121185, "learning_rate": 2.8243763942405192e-05, "loss": 0.2169, "step": 26935 }, { "epoch": 1.475113617696983, "grad_norm": 0.12463783472776413, "learning_rate": 2.823869397688096e-05, "loss": 0.202, "step": 26940 }, { "epoch": 1.4753873952800745, "grad_norm": 0.1316390037536621, "learning_rate": 2.8233624011356726e-05, "loss": 0.1994, "step": 26945 }, { "epoch": 1.475661172863166, "grad_norm": 0.11654425412416458, "learning_rate": 2.822855404583249e-05, "loss": 0.2052, "step": 26950 }, { "epoch": 1.4759349504462573, "grad_norm": 0.11912316083908081, "learning_rate": 2.8223484080308256e-05, "loss": 0.2081, "step": 26955 }, { "epoch": 1.476208728029349, "grad_norm": 0.11640976369380951, "learning_rate": 2.821841411478402e-05, "loss": 0.1986, "step": 26960 }, { "epoch": 1.4764825056124404, "grad_norm": 0.12610402703285217, "learning_rate": 2.8213344149259786e-05, "loss": 0.2166, "step": 26965 }, { "epoch": 1.4767562831955319, "grad_norm": 0.1265062838792801, "learning_rate": 2.820827418373555e-05, "loss": 0.2037, "step": 26970 }, { "epoch": 1.4770300607786235, "grad_norm": 0.11639413237571716, "learning_rate": 2.8203204218211316e-05, "loss": 0.211, "step": 26975 }, { "epoch": 1.477303838361715, "grad_norm": 0.15316566824913025, "learning_rate": 2.8198134252687082e-05, "loss": 0.2095, "step": 26980 }, { "epoch": 1.4775776159448064, "grad_norm": 0.11460255831480026, "learning_rate": 2.8193064287162852e-05, "loss": 0.2016, "step": 26985 }, { "epoch": 1.477851393527898, "grad_norm": 0.15114232897758484, "learning_rate": 2.8187994321638616e-05, "loss": 0.2, "step": 26990 }, { "epoch": 1.4781251711109895, "grad_norm": 0.12425804138183594, "learning_rate": 2.8182924356114382e-05, "loss": 0.1974, "step": 26995 }, { "epoch": 1.478398948694081, "grad_norm": 0.14152491092681885, "learning_rate": 2.8177854390590146e-05, "loss": 0.1994, "step": 27000 }, { "epoch": 1.4786727262771724, "grad_norm": 0.13401885330677032, "learning_rate": 2.8172784425065912e-05, "loss": 0.2159, "step": 27005 }, { "epoch": 1.4789465038602638, "grad_norm": 0.11079591512680054, "learning_rate": 2.816771445954168e-05, "loss": 0.2098, "step": 27010 }, { "epoch": 1.4792202814433555, "grad_norm": 0.11648602783679962, "learning_rate": 2.8162644494017442e-05, "loss": 0.1997, "step": 27015 }, { "epoch": 1.479494059026447, "grad_norm": 0.1198493093252182, "learning_rate": 2.815757452849321e-05, "loss": 0.1978, "step": 27020 }, { "epoch": 1.4797678366095384, "grad_norm": 0.11672970652580261, "learning_rate": 2.8152504562968972e-05, "loss": 0.191, "step": 27025 }, { "epoch": 1.48004161419263, "grad_norm": 0.12551890313625336, "learning_rate": 2.814743459744474e-05, "loss": 0.2009, "step": 27030 }, { "epoch": 1.4803153917757215, "grad_norm": 0.1406085342168808, "learning_rate": 2.8142364631920502e-05, "loss": 0.2005, "step": 27035 }, { "epoch": 1.480589169358813, "grad_norm": 0.13095445930957794, "learning_rate": 2.813729466639627e-05, "loss": 0.2148, "step": 27040 }, { "epoch": 1.4808629469419043, "grad_norm": 0.12884390354156494, "learning_rate": 2.8132224700872036e-05, "loss": 0.2025, "step": 27045 }, { "epoch": 1.4811367245249958, "grad_norm": 0.13750405609607697, "learning_rate": 2.81271547353478e-05, "loss": 0.2146, "step": 27050 }, { "epoch": 1.4814105021080874, "grad_norm": 0.12530578672885895, "learning_rate": 2.8122084769823566e-05, "loss": 0.2052, "step": 27055 }, { "epoch": 1.4816842796911789, "grad_norm": 0.11530674993991852, "learning_rate": 2.811701480429933e-05, "loss": 0.2034, "step": 27060 }, { "epoch": 1.4819580572742703, "grad_norm": 0.11536180227994919, "learning_rate": 2.8111944838775096e-05, "loss": 0.1958, "step": 27065 }, { "epoch": 1.482231834857362, "grad_norm": 0.1233273446559906, "learning_rate": 2.8106874873250866e-05, "loss": 0.1989, "step": 27070 }, { "epoch": 1.4825056124404534, "grad_norm": 0.1299779862165451, "learning_rate": 2.8101804907726632e-05, "loss": 0.2033, "step": 27075 }, { "epoch": 1.4827793900235449, "grad_norm": 0.12614767253398895, "learning_rate": 2.8096734942202396e-05, "loss": 0.2102, "step": 27080 }, { "epoch": 1.4830531676066363, "grad_norm": 0.1464887261390686, "learning_rate": 2.8091664976678162e-05, "loss": 0.2076, "step": 27085 }, { "epoch": 1.4833269451897277, "grad_norm": 0.13697080314159393, "learning_rate": 2.8086595011153926e-05, "loss": 0.1998, "step": 27090 }, { "epoch": 1.4836007227728194, "grad_norm": 0.12076269835233688, "learning_rate": 2.8081525045629692e-05, "loss": 0.205, "step": 27095 }, { "epoch": 1.4838745003559108, "grad_norm": 0.1117979884147644, "learning_rate": 2.8076455080105456e-05, "loss": 0.1981, "step": 27100 }, { "epoch": 1.4841482779390023, "grad_norm": 0.14598779380321503, "learning_rate": 2.8071385114581222e-05, "loss": 0.205, "step": 27105 }, { "epoch": 1.484422055522094, "grad_norm": 0.13441386818885803, "learning_rate": 2.806631514905699e-05, "loss": 0.1993, "step": 27110 }, { "epoch": 1.4846958331051854, "grad_norm": 0.1088603287935257, "learning_rate": 2.8061245183532752e-05, "loss": 0.1991, "step": 27115 }, { "epoch": 1.4849696106882768, "grad_norm": 0.13303765654563904, "learning_rate": 2.805617521800852e-05, "loss": 0.2165, "step": 27120 }, { "epoch": 1.4852433882713683, "grad_norm": 0.12158569693565369, "learning_rate": 2.8051105252484282e-05, "loss": 0.2016, "step": 27125 }, { "epoch": 1.48551716585446, "grad_norm": 0.10756786912679672, "learning_rate": 2.804603528696005e-05, "loss": 0.1978, "step": 27130 }, { "epoch": 1.4857909434375514, "grad_norm": 0.15068888664245605, "learning_rate": 2.8040965321435812e-05, "loss": 0.2024, "step": 27135 }, { "epoch": 1.4860647210206428, "grad_norm": 0.12453047186136246, "learning_rate": 2.803589535591158e-05, "loss": 0.2013, "step": 27140 }, { "epoch": 1.4863384986037342, "grad_norm": 0.12534239888191223, "learning_rate": 2.8030825390387346e-05, "loss": 0.2094, "step": 27145 }, { "epoch": 1.486612276186826, "grad_norm": 0.12919053435325623, "learning_rate": 2.8025755424863116e-05, "loss": 0.198, "step": 27150 }, { "epoch": 1.4868860537699173, "grad_norm": 0.131448894739151, "learning_rate": 2.802068545933888e-05, "loss": 0.2013, "step": 27155 }, { "epoch": 1.4871598313530088, "grad_norm": 0.12495896965265274, "learning_rate": 2.8015615493814646e-05, "loss": 0.197, "step": 27160 }, { "epoch": 1.4874336089361004, "grad_norm": 0.11564404517412186, "learning_rate": 2.801054552829041e-05, "loss": 0.199, "step": 27165 }, { "epoch": 1.4877073865191919, "grad_norm": 0.15086640417575836, "learning_rate": 2.8005475562766176e-05, "loss": 0.2042, "step": 27170 }, { "epoch": 1.4879811641022833, "grad_norm": 0.11081043630838394, "learning_rate": 2.8000405597241943e-05, "loss": 0.1978, "step": 27175 }, { "epoch": 1.4882549416853748, "grad_norm": 0.10859241336584091, "learning_rate": 2.7995335631717706e-05, "loss": 0.1923, "step": 27180 }, { "epoch": 1.4885287192684662, "grad_norm": 0.11000212281942368, "learning_rate": 2.7990265666193473e-05, "loss": 0.2093, "step": 27185 }, { "epoch": 1.4888024968515579, "grad_norm": 0.11053723096847534, "learning_rate": 2.7985195700669236e-05, "loss": 0.1938, "step": 27190 }, { "epoch": 1.4890762744346493, "grad_norm": 0.12977616488933563, "learning_rate": 2.7980125735145003e-05, "loss": 0.2105, "step": 27195 }, { "epoch": 1.4893500520177407, "grad_norm": 0.13888730108737946, "learning_rate": 2.7975055769620766e-05, "loss": 0.2165, "step": 27200 }, { "epoch": 1.4896238296008324, "grad_norm": 0.11268830299377441, "learning_rate": 2.7969985804096533e-05, "loss": 0.1979, "step": 27205 }, { "epoch": 1.4898976071839238, "grad_norm": 0.12923267483711243, "learning_rate": 2.79649158385723e-05, "loss": 0.2031, "step": 27210 }, { "epoch": 1.4901713847670153, "grad_norm": 0.1339496225118637, "learning_rate": 2.7959845873048063e-05, "loss": 0.2073, "step": 27215 }, { "epoch": 1.4904451623501067, "grad_norm": 0.1169474869966507, "learning_rate": 2.795477590752383e-05, "loss": 0.2009, "step": 27220 }, { "epoch": 1.4907189399331982, "grad_norm": 0.11680677533149719, "learning_rate": 2.7949705941999593e-05, "loss": 0.2107, "step": 27225 }, { "epoch": 1.4909927175162898, "grad_norm": 0.14264671504497528, "learning_rate": 2.7944635976475363e-05, "loss": 0.2054, "step": 27230 }, { "epoch": 1.4912664950993813, "grad_norm": 0.11655678600072861, "learning_rate": 2.793956601095113e-05, "loss": 0.1999, "step": 27235 }, { "epoch": 1.4915402726824727, "grad_norm": 0.12039065361022949, "learning_rate": 2.7934496045426896e-05, "loss": 0.2107, "step": 27240 }, { "epoch": 1.4918140502655644, "grad_norm": 0.13160185515880585, "learning_rate": 2.792942607990266e-05, "loss": 0.1941, "step": 27245 }, { "epoch": 1.4920878278486558, "grad_norm": 0.13246628642082214, "learning_rate": 2.7924356114378426e-05, "loss": 0.2075, "step": 27250 }, { "epoch": 1.4923616054317472, "grad_norm": 0.13283902406692505, "learning_rate": 2.791928614885419e-05, "loss": 0.1961, "step": 27255 }, { "epoch": 1.4926353830148387, "grad_norm": 0.11007584631443024, "learning_rate": 2.7914216183329956e-05, "loss": 0.1968, "step": 27260 }, { "epoch": 1.49290916059793, "grad_norm": 0.12239349633455276, "learning_rate": 2.790914621780572e-05, "loss": 0.2104, "step": 27265 }, { "epoch": 1.4931829381810218, "grad_norm": 0.14221015572547913, "learning_rate": 2.7904076252281486e-05, "loss": 0.201, "step": 27270 }, { "epoch": 1.4934567157641132, "grad_norm": 0.11574264615774155, "learning_rate": 2.7899006286757253e-05, "loss": 0.1926, "step": 27275 }, { "epoch": 1.4937304933472046, "grad_norm": 0.14624595642089844, "learning_rate": 2.7893936321233016e-05, "loss": 0.213, "step": 27280 }, { "epoch": 1.4940042709302963, "grad_norm": 0.13034842908382416, "learning_rate": 2.7888866355708783e-05, "loss": 0.208, "step": 27285 }, { "epoch": 1.4942780485133877, "grad_norm": 0.12909917533397675, "learning_rate": 2.7883796390184546e-05, "loss": 0.1973, "step": 27290 }, { "epoch": 1.4945518260964792, "grad_norm": 0.11621188372373581, "learning_rate": 2.7878726424660313e-05, "loss": 0.2072, "step": 27295 }, { "epoch": 1.4948256036795708, "grad_norm": 0.1262647658586502, "learning_rate": 2.7873656459136076e-05, "loss": 0.2101, "step": 27300 }, { "epoch": 1.4950993812626623, "grad_norm": 0.1332654356956482, "learning_rate": 2.7868586493611843e-05, "loss": 0.204, "step": 27305 }, { "epoch": 1.4953731588457537, "grad_norm": 0.1084536612033844, "learning_rate": 2.7863516528087606e-05, "loss": 0.1977, "step": 27310 }, { "epoch": 1.4956469364288452, "grad_norm": 0.12735146284103394, "learning_rate": 2.785844656256338e-05, "loss": 0.2063, "step": 27315 }, { "epoch": 1.4959207140119366, "grad_norm": 0.1371576189994812, "learning_rate": 2.7853376597039143e-05, "loss": 0.1993, "step": 27320 }, { "epoch": 1.4961944915950283, "grad_norm": 0.14039209485054016, "learning_rate": 2.784830663151491e-05, "loss": 0.2033, "step": 27325 }, { "epoch": 1.4964682691781197, "grad_norm": 0.11497431993484497, "learning_rate": 2.7843236665990673e-05, "loss": 0.1958, "step": 27330 }, { "epoch": 1.4967420467612111, "grad_norm": 0.10660242289304733, "learning_rate": 2.783816670046644e-05, "loss": 0.2025, "step": 27335 }, { "epoch": 1.4970158243443028, "grad_norm": 0.11901205778121948, "learning_rate": 2.7833096734942206e-05, "loss": 0.1955, "step": 27340 }, { "epoch": 1.4972896019273942, "grad_norm": 0.11857552081346512, "learning_rate": 2.782802676941797e-05, "loss": 0.2031, "step": 27345 }, { "epoch": 1.4975633795104857, "grad_norm": 0.12144239991903305, "learning_rate": 2.7822956803893736e-05, "loss": 0.2054, "step": 27350 }, { "epoch": 1.4978371570935771, "grad_norm": 0.12281043082475662, "learning_rate": 2.78178868383695e-05, "loss": 0.2042, "step": 27355 }, { "epoch": 1.4981109346766686, "grad_norm": 0.1404409110546112, "learning_rate": 2.7812816872845266e-05, "loss": 0.2031, "step": 27360 }, { "epoch": 1.4983847122597602, "grad_norm": 0.12903635203838348, "learning_rate": 2.780774690732103e-05, "loss": 0.2039, "step": 27365 }, { "epoch": 1.4986584898428517, "grad_norm": 0.13565491139888763, "learning_rate": 2.7802676941796796e-05, "loss": 0.2046, "step": 27370 }, { "epoch": 1.498932267425943, "grad_norm": 0.1351700723171234, "learning_rate": 2.779760697627256e-05, "loss": 0.2027, "step": 27375 }, { "epoch": 1.4992060450090348, "grad_norm": 0.11804047226905823, "learning_rate": 2.7792537010748326e-05, "loss": 0.1974, "step": 27380 }, { "epoch": 1.4994798225921262, "grad_norm": 0.13799168169498444, "learning_rate": 2.7787467045224093e-05, "loss": 0.2073, "step": 27385 }, { "epoch": 1.4997536001752176, "grad_norm": 0.11188524961471558, "learning_rate": 2.7782397079699856e-05, "loss": 0.1913, "step": 27390 }, { "epoch": 1.5000273777583093, "grad_norm": 0.14439722895622253, "learning_rate": 2.7777327114175626e-05, "loss": 0.2081, "step": 27395 }, { "epoch": 1.5003011553414005, "grad_norm": 0.12741577625274658, "learning_rate": 2.7772257148651393e-05, "loss": 0.208, "step": 27400 }, { "epoch": 1.5005749329244922, "grad_norm": 0.13647235929965973, "learning_rate": 2.776718718312716e-05, "loss": 0.2028, "step": 27405 }, { "epoch": 1.5008487105075836, "grad_norm": 0.13848039507865906, "learning_rate": 2.7762117217602923e-05, "loss": 0.2071, "step": 27410 }, { "epoch": 1.501122488090675, "grad_norm": 0.16431908309459686, "learning_rate": 2.775704725207869e-05, "loss": 0.209, "step": 27415 }, { "epoch": 1.5013962656737667, "grad_norm": 0.13501207530498505, "learning_rate": 2.7751977286554453e-05, "loss": 0.209, "step": 27420 }, { "epoch": 1.5016700432568582, "grad_norm": 0.14893348515033722, "learning_rate": 2.774690732103022e-05, "loss": 0.2097, "step": 27425 }, { "epoch": 1.5019438208399496, "grad_norm": 0.14199639856815338, "learning_rate": 2.7741837355505983e-05, "loss": 0.2106, "step": 27430 }, { "epoch": 1.5022175984230413, "grad_norm": 0.14438869059085846, "learning_rate": 2.773676738998175e-05, "loss": 0.2101, "step": 27435 }, { "epoch": 1.5024913760061325, "grad_norm": 0.13601729273796082, "learning_rate": 2.7731697424457513e-05, "loss": 0.2058, "step": 27440 }, { "epoch": 1.5027651535892241, "grad_norm": 0.12931431829929352, "learning_rate": 2.772662745893328e-05, "loss": 0.2029, "step": 27445 }, { "epoch": 1.5030389311723156, "grad_norm": 0.1453094482421875, "learning_rate": 2.7721557493409046e-05, "loss": 0.2125, "step": 27450 }, { "epoch": 1.503312708755407, "grad_norm": 0.1176081970334053, "learning_rate": 2.771648752788481e-05, "loss": 0.1977, "step": 27455 }, { "epoch": 1.5035864863384987, "grad_norm": 0.12620733678340912, "learning_rate": 2.7711417562360576e-05, "loss": 0.2011, "step": 27460 }, { "epoch": 1.5038602639215901, "grad_norm": 0.1183231770992279, "learning_rate": 2.770634759683634e-05, "loss": 0.1978, "step": 27465 }, { "epoch": 1.5041340415046816, "grad_norm": 0.13252760469913483, "learning_rate": 2.7701277631312106e-05, "loss": 0.2074, "step": 27470 }, { "epoch": 1.5044078190877732, "grad_norm": 0.1267506629228592, "learning_rate": 2.7696207665787876e-05, "loss": 0.2122, "step": 27475 }, { "epoch": 1.5046815966708644, "grad_norm": 0.12936925888061523, "learning_rate": 2.7691137700263643e-05, "loss": 0.204, "step": 27480 }, { "epoch": 1.504955374253956, "grad_norm": 0.12773805856704712, "learning_rate": 2.7686067734739406e-05, "loss": 0.2039, "step": 27485 }, { "epoch": 1.5052291518370475, "grad_norm": 0.1213703528046608, "learning_rate": 2.7680997769215173e-05, "loss": 0.2028, "step": 27490 }, { "epoch": 1.505502929420139, "grad_norm": 0.11484389007091522, "learning_rate": 2.7675927803690936e-05, "loss": 0.193, "step": 27495 }, { "epoch": 1.5057767070032306, "grad_norm": 0.12684006989002228, "learning_rate": 2.7670857838166703e-05, "loss": 0.1982, "step": 27500 }, { "epoch": 1.506050484586322, "grad_norm": 0.12836481630802155, "learning_rate": 2.7665787872642466e-05, "loss": 0.2017, "step": 27505 }, { "epoch": 1.5063242621694135, "grad_norm": 0.13717186450958252, "learning_rate": 2.7660717907118233e-05, "loss": 0.1985, "step": 27510 }, { "epoch": 1.5065980397525052, "grad_norm": 0.139656201004982, "learning_rate": 2.7655647941594e-05, "loss": 0.2003, "step": 27515 }, { "epoch": 1.5068718173355966, "grad_norm": 0.12634173035621643, "learning_rate": 2.7650577976069763e-05, "loss": 0.1997, "step": 27520 }, { "epoch": 1.507145594918688, "grad_norm": 0.13223564624786377, "learning_rate": 2.764550801054553e-05, "loss": 0.2121, "step": 27525 }, { "epoch": 1.5074193725017797, "grad_norm": 0.12262260168790817, "learning_rate": 2.7640438045021293e-05, "loss": 0.2043, "step": 27530 }, { "epoch": 1.507693150084871, "grad_norm": 0.1075427383184433, "learning_rate": 2.763536807949706e-05, "loss": 0.1991, "step": 27535 }, { "epoch": 1.5079669276679626, "grad_norm": 0.12935128808021545, "learning_rate": 2.7630298113972823e-05, "loss": 0.2028, "step": 27540 }, { "epoch": 1.508240705251054, "grad_norm": 0.11956032365560532, "learning_rate": 2.762522814844859e-05, "loss": 0.2048, "step": 27545 }, { "epoch": 1.5085144828341455, "grad_norm": 0.1429830640554428, "learning_rate": 2.7620158182924356e-05, "loss": 0.2074, "step": 27550 }, { "epoch": 1.5087882604172371, "grad_norm": 0.12948885560035706, "learning_rate": 2.7615088217400127e-05, "loss": 0.1963, "step": 27555 }, { "epoch": 1.5090620380003286, "grad_norm": 0.14512529969215393, "learning_rate": 2.761001825187589e-05, "loss": 0.1967, "step": 27560 }, { "epoch": 1.50933581558342, "grad_norm": 0.11291255056858063, "learning_rate": 2.7604948286351657e-05, "loss": 0.2066, "step": 27565 }, { "epoch": 1.5096095931665117, "grad_norm": 0.13697078824043274, "learning_rate": 2.759987832082742e-05, "loss": 0.2161, "step": 27570 }, { "epoch": 1.5098833707496029, "grad_norm": 0.1552678644657135, "learning_rate": 2.7594808355303187e-05, "loss": 0.2091, "step": 27575 }, { "epoch": 1.5101571483326945, "grad_norm": 0.12682528793811798, "learning_rate": 2.7589738389778953e-05, "loss": 0.2036, "step": 27580 }, { "epoch": 1.510430925915786, "grad_norm": 0.11274135857820511, "learning_rate": 2.7584668424254717e-05, "loss": 0.1984, "step": 27585 }, { "epoch": 1.5107047034988774, "grad_norm": 0.12640810012817383, "learning_rate": 2.7579598458730483e-05, "loss": 0.2065, "step": 27590 }, { "epoch": 1.510978481081969, "grad_norm": 0.12161728739738464, "learning_rate": 2.7574528493206247e-05, "loss": 0.2019, "step": 27595 }, { "epoch": 1.5112522586650605, "grad_norm": 0.11461564898490906, "learning_rate": 2.7569458527682013e-05, "loss": 0.2023, "step": 27600 }, { "epoch": 1.511526036248152, "grad_norm": 0.13479454815387726, "learning_rate": 2.7564388562157777e-05, "loss": 0.1987, "step": 27605 }, { "epoch": 1.5117998138312436, "grad_norm": 0.129834845662117, "learning_rate": 2.7559318596633543e-05, "loss": 0.2009, "step": 27610 }, { "epoch": 1.5120735914143348, "grad_norm": 0.12094981223344803, "learning_rate": 2.755424863110931e-05, "loss": 0.2062, "step": 27615 }, { "epoch": 1.5123473689974265, "grad_norm": 0.1324750930070877, "learning_rate": 2.7549178665585073e-05, "loss": 0.208, "step": 27620 }, { "epoch": 1.512621146580518, "grad_norm": 0.1171601191163063, "learning_rate": 2.754410870006084e-05, "loss": 0.1967, "step": 27625 }, { "epoch": 1.5128949241636094, "grad_norm": 0.1417178511619568, "learning_rate": 2.7539038734536603e-05, "loss": 0.1986, "step": 27630 }, { "epoch": 1.513168701746701, "grad_norm": 0.14402726292610168, "learning_rate": 2.753396876901237e-05, "loss": 0.2065, "step": 27635 }, { "epoch": 1.5134424793297925, "grad_norm": 0.13601797819137573, "learning_rate": 2.752889880348814e-05, "loss": 0.2025, "step": 27640 }, { "epoch": 1.513716256912884, "grad_norm": 0.14076413214206696, "learning_rate": 2.7523828837963907e-05, "loss": 0.2019, "step": 27645 }, { "epoch": 1.5139900344959756, "grad_norm": 0.11943147331476212, "learning_rate": 2.751875887243967e-05, "loss": 0.2105, "step": 27650 }, { "epoch": 1.514263812079067, "grad_norm": 0.11186113953590393, "learning_rate": 2.7513688906915437e-05, "loss": 0.2071, "step": 27655 }, { "epoch": 1.5145375896621585, "grad_norm": 0.1120511069893837, "learning_rate": 2.75086189413912e-05, "loss": 0.2059, "step": 27660 }, { "epoch": 1.5148113672452501, "grad_norm": 0.12781095504760742, "learning_rate": 2.7503548975866967e-05, "loss": 0.2019, "step": 27665 }, { "epoch": 1.5150851448283413, "grad_norm": 0.11099603027105331, "learning_rate": 2.749847901034273e-05, "loss": 0.2114, "step": 27670 }, { "epoch": 1.515358922411433, "grad_norm": 0.1353714019060135, "learning_rate": 2.7493409044818497e-05, "loss": 0.2172, "step": 27675 }, { "epoch": 1.5156326999945244, "grad_norm": 0.12110942602157593, "learning_rate": 2.7488339079294263e-05, "loss": 0.197, "step": 27680 }, { "epoch": 1.5159064775776159, "grad_norm": 0.10773999243974686, "learning_rate": 2.7483269113770027e-05, "loss": 0.2051, "step": 27685 }, { "epoch": 1.5161802551607075, "grad_norm": 0.1336909979581833, "learning_rate": 2.7478199148245793e-05, "loss": 0.1963, "step": 27690 }, { "epoch": 1.516454032743799, "grad_norm": 0.13189582526683807, "learning_rate": 2.7473129182721557e-05, "loss": 0.2003, "step": 27695 }, { "epoch": 1.5167278103268904, "grad_norm": 0.11951714009046555, "learning_rate": 2.7468059217197323e-05, "loss": 0.2068, "step": 27700 }, { "epoch": 1.517001587909982, "grad_norm": 0.12900233268737793, "learning_rate": 2.7462989251673087e-05, "loss": 0.2078, "step": 27705 }, { "epoch": 1.5172753654930733, "grad_norm": 0.13668620586395264, "learning_rate": 2.7457919286148853e-05, "loss": 0.2038, "step": 27710 }, { "epoch": 1.517549143076165, "grad_norm": 0.1215183213353157, "learning_rate": 2.745284932062462e-05, "loss": 0.2043, "step": 27715 }, { "epoch": 1.5178229206592564, "grad_norm": 0.11773446202278137, "learning_rate": 2.744777935510039e-05, "loss": 0.2115, "step": 27720 }, { "epoch": 1.5180966982423478, "grad_norm": 0.10927470028400421, "learning_rate": 2.7442709389576153e-05, "loss": 0.1998, "step": 27725 }, { "epoch": 1.5183704758254395, "grad_norm": 0.12339528650045395, "learning_rate": 2.743763942405192e-05, "loss": 0.1986, "step": 27730 }, { "epoch": 1.518644253408531, "grad_norm": 0.12687063217163086, "learning_rate": 2.7432569458527683e-05, "loss": 0.2009, "step": 27735 }, { "epoch": 1.5189180309916224, "grad_norm": 0.1165299266576767, "learning_rate": 2.742749949300345e-05, "loss": 0.2094, "step": 27740 }, { "epoch": 1.519191808574714, "grad_norm": 0.11607792228460312, "learning_rate": 2.7422429527479217e-05, "loss": 0.1882, "step": 27745 }, { "epoch": 1.5194655861578052, "grad_norm": 0.10703251510858536, "learning_rate": 2.741735956195498e-05, "loss": 0.2033, "step": 27750 }, { "epoch": 1.519739363740897, "grad_norm": 0.1330576092004776, "learning_rate": 2.7412289596430747e-05, "loss": 0.2052, "step": 27755 }, { "epoch": 1.5200131413239883, "grad_norm": 0.11892309784889221, "learning_rate": 2.740721963090651e-05, "loss": 0.1957, "step": 27760 }, { "epoch": 1.5202869189070798, "grad_norm": 0.1248881071805954, "learning_rate": 2.7402149665382277e-05, "loss": 0.1986, "step": 27765 }, { "epoch": 1.5205606964901714, "grad_norm": 0.11560092866420746, "learning_rate": 2.739707969985804e-05, "loss": 0.2067, "step": 27770 }, { "epoch": 1.5208344740732629, "grad_norm": 0.14749674499034882, "learning_rate": 2.7392009734333807e-05, "loss": 0.1977, "step": 27775 }, { "epoch": 1.5211082516563543, "grad_norm": 0.14858457446098328, "learning_rate": 2.7386939768809573e-05, "loss": 0.2003, "step": 27780 }, { "epoch": 1.521382029239446, "grad_norm": 0.15315864980220795, "learning_rate": 2.7381869803285337e-05, "loss": 0.2044, "step": 27785 }, { "epoch": 1.5216558068225372, "grad_norm": 0.16162405908107758, "learning_rate": 2.7376799837761103e-05, "loss": 0.2072, "step": 27790 }, { "epoch": 1.5219295844056289, "grad_norm": 0.14167863130569458, "learning_rate": 2.7371729872236867e-05, "loss": 0.1977, "step": 27795 }, { "epoch": 1.5222033619887205, "grad_norm": 0.12891988456249237, "learning_rate": 2.7366659906712637e-05, "loss": 0.2013, "step": 27800 }, { "epoch": 1.5224771395718117, "grad_norm": 0.11975474655628204, "learning_rate": 2.7361589941188404e-05, "loss": 0.2008, "step": 27805 }, { "epoch": 1.5227509171549034, "grad_norm": 0.11713037639856339, "learning_rate": 2.735651997566417e-05, "loss": 0.2078, "step": 27810 }, { "epoch": 1.5230246947379948, "grad_norm": 0.11123387515544891, "learning_rate": 2.7351450010139934e-05, "loss": 0.1922, "step": 27815 }, { "epoch": 1.5232984723210863, "grad_norm": 0.1286381185054779, "learning_rate": 2.73463800446157e-05, "loss": 0.2088, "step": 27820 }, { "epoch": 1.523572249904178, "grad_norm": 0.10992564260959625, "learning_rate": 2.7341310079091464e-05, "loss": 0.2013, "step": 27825 }, { "epoch": 1.5238460274872694, "grad_norm": 0.1068979874253273, "learning_rate": 2.733624011356723e-05, "loss": 0.1944, "step": 27830 }, { "epoch": 1.5241198050703608, "grad_norm": 0.12150554358959198, "learning_rate": 2.7331170148042994e-05, "loss": 0.2072, "step": 27835 }, { "epoch": 1.5243935826534525, "grad_norm": 0.11734871566295624, "learning_rate": 2.732610018251876e-05, "loss": 0.2031, "step": 27840 }, { "epoch": 1.5246673602365437, "grad_norm": 0.11219346523284912, "learning_rate": 2.7321030216994527e-05, "loss": 0.2005, "step": 27845 }, { "epoch": 1.5249411378196354, "grad_norm": 0.1224292740225792, "learning_rate": 2.731596025147029e-05, "loss": 0.2086, "step": 27850 }, { "epoch": 1.5252149154027268, "grad_norm": 0.12496708333492279, "learning_rate": 2.7310890285946057e-05, "loss": 0.2099, "step": 27855 }, { "epoch": 1.5254886929858182, "grad_norm": 0.13949641585350037, "learning_rate": 2.730582032042182e-05, "loss": 0.1997, "step": 27860 }, { "epoch": 1.52576247056891, "grad_norm": 0.12220539897680283, "learning_rate": 2.7300750354897587e-05, "loss": 0.2042, "step": 27865 }, { "epoch": 1.5260362481520013, "grad_norm": 0.11826268583536148, "learning_rate": 2.729568038937335e-05, "loss": 0.195, "step": 27870 }, { "epoch": 1.5263100257350928, "grad_norm": 0.12385201454162598, "learning_rate": 2.7290610423849117e-05, "loss": 0.2015, "step": 27875 }, { "epoch": 1.5265838033181844, "grad_norm": 0.1221662387251854, "learning_rate": 2.728554045832488e-05, "loss": 0.2049, "step": 27880 }, { "epoch": 1.5268575809012757, "grad_norm": 0.14962679147720337, "learning_rate": 2.7280470492800654e-05, "loss": 0.2003, "step": 27885 }, { "epoch": 1.5271313584843673, "grad_norm": 0.1014549508690834, "learning_rate": 2.7275400527276417e-05, "loss": 0.2028, "step": 27890 }, { "epoch": 1.5274051360674588, "grad_norm": 0.13385285437107086, "learning_rate": 2.7270330561752184e-05, "loss": 0.2065, "step": 27895 }, { "epoch": 1.5276789136505502, "grad_norm": 0.14848360419273376, "learning_rate": 2.7265260596227947e-05, "loss": 0.2002, "step": 27900 }, { "epoch": 1.5279526912336419, "grad_norm": 0.1171117052435875, "learning_rate": 2.7260190630703714e-05, "loss": 0.195, "step": 27905 }, { "epoch": 1.5282264688167333, "grad_norm": 0.11186744272708893, "learning_rate": 2.725512066517948e-05, "loss": 0.2048, "step": 27910 }, { "epoch": 1.5285002463998247, "grad_norm": 0.13633324205875397, "learning_rate": 2.7250050699655244e-05, "loss": 0.206, "step": 27915 }, { "epoch": 1.5287740239829164, "grad_norm": 0.11859816312789917, "learning_rate": 2.724498073413101e-05, "loss": 0.2023, "step": 27920 }, { "epoch": 1.5290478015660076, "grad_norm": 0.12121239304542542, "learning_rate": 2.7239910768606774e-05, "loss": 0.2099, "step": 27925 }, { "epoch": 1.5293215791490993, "grad_norm": 0.12511102855205536, "learning_rate": 2.723484080308254e-05, "loss": 0.2099, "step": 27930 }, { "epoch": 1.5295953567321907, "grad_norm": 0.13571032881736755, "learning_rate": 2.7229770837558304e-05, "loss": 0.1974, "step": 27935 }, { "epoch": 1.5298691343152822, "grad_norm": 0.1493314653635025, "learning_rate": 2.722470087203407e-05, "loss": 0.2023, "step": 27940 }, { "epoch": 1.5301429118983738, "grad_norm": 0.13073518872261047, "learning_rate": 2.7219630906509834e-05, "loss": 0.1975, "step": 27945 }, { "epoch": 1.5304166894814653, "grad_norm": 0.1279115229845047, "learning_rate": 2.72145609409856e-05, "loss": 0.2079, "step": 27950 }, { "epoch": 1.5306904670645567, "grad_norm": 0.1157677173614502, "learning_rate": 2.7209490975461367e-05, "loss": 0.2075, "step": 27955 }, { "epoch": 1.5309642446476484, "grad_norm": 0.11374475061893463, "learning_rate": 2.720442100993713e-05, "loss": 0.199, "step": 27960 }, { "epoch": 1.5312380222307398, "grad_norm": 0.11072325706481934, "learning_rate": 2.71993510444129e-05, "loss": 0.2032, "step": 27965 }, { "epoch": 1.5315117998138312, "grad_norm": 0.11866999417543411, "learning_rate": 2.7194281078888667e-05, "loss": 0.2, "step": 27970 }, { "epoch": 1.531785577396923, "grad_norm": 0.11741872131824493, "learning_rate": 2.7189211113364434e-05, "loss": 0.2051, "step": 27975 }, { "epoch": 1.532059354980014, "grad_norm": 0.12935523688793182, "learning_rate": 2.7184141147840197e-05, "loss": 0.2034, "step": 27980 }, { "epoch": 1.5323331325631058, "grad_norm": 0.13342411816120148, "learning_rate": 2.7179071182315964e-05, "loss": 0.2039, "step": 27985 }, { "epoch": 1.5326069101461972, "grad_norm": 0.11240498721599579, "learning_rate": 2.7174001216791727e-05, "loss": 0.1978, "step": 27990 }, { "epoch": 1.5328806877292886, "grad_norm": 0.11614249646663666, "learning_rate": 2.7168931251267494e-05, "loss": 0.2029, "step": 27995 }, { "epoch": 1.5331544653123803, "grad_norm": 0.13441044092178345, "learning_rate": 2.7163861285743257e-05, "loss": 0.2023, "step": 28000 }, { "epoch": 1.5334282428954717, "grad_norm": 0.13379092514514923, "learning_rate": 2.7158791320219024e-05, "loss": 0.2121, "step": 28005 }, { "epoch": 1.5337020204785632, "grad_norm": 0.1345641314983368, "learning_rate": 2.7153721354694787e-05, "loss": 0.2064, "step": 28010 }, { "epoch": 1.5339757980616548, "grad_norm": 0.14134511351585388, "learning_rate": 2.7148651389170554e-05, "loss": 0.2045, "step": 28015 }, { "epoch": 1.534249575644746, "grad_norm": 0.12444156408309937, "learning_rate": 2.714358142364632e-05, "loss": 0.2183, "step": 28020 }, { "epoch": 1.5345233532278377, "grad_norm": 0.13743236660957336, "learning_rate": 2.7138511458122084e-05, "loss": 0.2057, "step": 28025 }, { "epoch": 1.5347971308109292, "grad_norm": 0.13107715547084808, "learning_rate": 2.713344149259785e-05, "loss": 0.2072, "step": 28030 }, { "epoch": 1.5350709083940206, "grad_norm": 0.14449499547481537, "learning_rate": 2.7128371527073614e-05, "loss": 0.2067, "step": 28035 }, { "epoch": 1.5353446859771123, "grad_norm": 0.14547418057918549, "learning_rate": 2.712330156154938e-05, "loss": 0.2075, "step": 28040 }, { "epoch": 1.5356184635602037, "grad_norm": 0.15175437927246094, "learning_rate": 2.711823159602515e-05, "loss": 0.1998, "step": 28045 }, { "epoch": 1.5358922411432951, "grad_norm": 0.15029333531856537, "learning_rate": 2.7113161630500917e-05, "loss": 0.2049, "step": 28050 }, { "epoch": 1.5361660187263868, "grad_norm": 0.11858697235584259, "learning_rate": 2.710809166497668e-05, "loss": 0.2041, "step": 28055 }, { "epoch": 1.536439796309478, "grad_norm": 0.1333753913640976, "learning_rate": 2.7103021699452447e-05, "loss": 0.2009, "step": 28060 }, { "epoch": 1.5367135738925697, "grad_norm": 0.14709413051605225, "learning_rate": 2.709795173392821e-05, "loss": 0.2001, "step": 28065 }, { "epoch": 1.5369873514756611, "grad_norm": 0.1401974856853485, "learning_rate": 2.7092881768403977e-05, "loss": 0.2105, "step": 28070 }, { "epoch": 1.5372611290587526, "grad_norm": 0.13489802181720734, "learning_rate": 2.708781180287974e-05, "loss": 0.2086, "step": 28075 }, { "epoch": 1.5375349066418442, "grad_norm": 0.14142611622810364, "learning_rate": 2.7082741837355507e-05, "loss": 0.2091, "step": 28080 }, { "epoch": 1.5378086842249357, "grad_norm": 0.12645362317562103, "learning_rate": 2.7077671871831274e-05, "loss": 0.2034, "step": 28085 }, { "epoch": 1.538082461808027, "grad_norm": 0.14074303209781647, "learning_rate": 2.7072601906307037e-05, "loss": 0.206, "step": 28090 }, { "epoch": 1.5383562393911188, "grad_norm": 0.16648328304290771, "learning_rate": 2.7067531940782804e-05, "loss": 0.2076, "step": 28095 }, { "epoch": 1.5386300169742102, "grad_norm": 0.11409246921539307, "learning_rate": 2.7062461975258567e-05, "loss": 0.193, "step": 28100 }, { "epoch": 1.5389037945573016, "grad_norm": 0.12700100243091583, "learning_rate": 2.7057392009734334e-05, "loss": 0.1994, "step": 28105 }, { "epoch": 1.5391775721403933, "grad_norm": 0.12351479381322861, "learning_rate": 2.7052322044210097e-05, "loss": 0.1928, "step": 28110 }, { "epoch": 1.5394513497234845, "grad_norm": 0.10621009767055511, "learning_rate": 2.7047252078685864e-05, "loss": 0.2019, "step": 28115 }, { "epoch": 1.5397251273065762, "grad_norm": 0.1278925985097885, "learning_rate": 2.704218211316163e-05, "loss": 0.202, "step": 28120 }, { "epoch": 1.5399989048896676, "grad_norm": 0.129651740193367, "learning_rate": 2.70371121476374e-05, "loss": 0.2062, "step": 28125 }, { "epoch": 1.540272682472759, "grad_norm": 0.14439333975315094, "learning_rate": 2.7032042182113164e-05, "loss": 0.1954, "step": 28130 }, { "epoch": 1.5405464600558507, "grad_norm": 0.13990865647792816, "learning_rate": 2.702697221658893e-05, "loss": 0.201, "step": 28135 }, { "epoch": 1.5408202376389422, "grad_norm": 0.11771204322576523, "learning_rate": 2.7021902251064694e-05, "loss": 0.2091, "step": 28140 }, { "epoch": 1.5410940152220336, "grad_norm": 0.11014660447835922, "learning_rate": 2.701683228554046e-05, "loss": 0.1927, "step": 28145 }, { "epoch": 1.5413677928051253, "grad_norm": 0.12705974280834198, "learning_rate": 2.7011762320016227e-05, "loss": 0.1971, "step": 28150 }, { "epoch": 1.5416415703882165, "grad_norm": 0.13523463904857635, "learning_rate": 2.700669235449199e-05, "loss": 0.2059, "step": 28155 }, { "epoch": 1.5419153479713081, "grad_norm": 0.18872308731079102, "learning_rate": 2.7001622388967757e-05, "loss": 0.207, "step": 28160 }, { "epoch": 1.5421891255543996, "grad_norm": 0.1702509969472885, "learning_rate": 2.699655242344352e-05, "loss": 0.2101, "step": 28165 }, { "epoch": 1.542462903137491, "grad_norm": 0.12764661014080048, "learning_rate": 2.6991482457919287e-05, "loss": 0.2002, "step": 28170 }, { "epoch": 1.5427366807205827, "grad_norm": 0.13913874328136444, "learning_rate": 2.698641249239505e-05, "loss": 0.2024, "step": 28175 }, { "epoch": 1.5430104583036741, "grad_norm": 0.12755931913852692, "learning_rate": 2.6981342526870817e-05, "loss": 0.2018, "step": 28180 }, { "epoch": 1.5432842358867656, "grad_norm": 0.16299556195735931, "learning_rate": 2.6976272561346584e-05, "loss": 0.2074, "step": 28185 }, { "epoch": 1.5435580134698572, "grad_norm": 0.1370321363210678, "learning_rate": 2.6971202595822347e-05, "loss": 0.1956, "step": 28190 }, { "epoch": 1.5438317910529484, "grad_norm": 0.14254991710186005, "learning_rate": 2.6966132630298114e-05, "loss": 0.201, "step": 28195 }, { "epoch": 1.54410556863604, "grad_norm": 0.1120515838265419, "learning_rate": 2.6961062664773877e-05, "loss": 0.2007, "step": 28200 }, { "epoch": 1.5443793462191315, "grad_norm": 0.10800252109766006, "learning_rate": 2.6955992699249644e-05, "loss": 0.2011, "step": 28205 }, { "epoch": 1.544653123802223, "grad_norm": 0.11850161105394363, "learning_rate": 2.6950922733725414e-05, "loss": 0.1976, "step": 28210 }, { "epoch": 1.5449269013853146, "grad_norm": 0.12028968334197998, "learning_rate": 2.694585276820118e-05, "loss": 0.2091, "step": 28215 }, { "epoch": 1.545200678968406, "grad_norm": 0.11238904297351837, "learning_rate": 2.6940782802676944e-05, "loss": 0.2062, "step": 28220 }, { "epoch": 1.5454744565514975, "grad_norm": 0.11186893284320831, "learning_rate": 2.693571283715271e-05, "loss": 0.1963, "step": 28225 }, { "epoch": 1.5457482341345892, "grad_norm": 0.12838754057884216, "learning_rate": 2.6930642871628474e-05, "loss": 0.1968, "step": 28230 }, { "epoch": 1.5460220117176806, "grad_norm": 0.14198635518550873, "learning_rate": 2.692557290610424e-05, "loss": 0.203, "step": 28235 }, { "epoch": 1.546295789300772, "grad_norm": 0.156980499625206, "learning_rate": 2.6920502940580004e-05, "loss": 0.2112, "step": 28240 }, { "epoch": 1.5465695668838637, "grad_norm": 0.12736451625823975, "learning_rate": 2.691543297505577e-05, "loss": 0.1987, "step": 28245 }, { "epoch": 1.546843344466955, "grad_norm": 0.1331537514925003, "learning_rate": 2.6910363009531538e-05, "loss": 0.208, "step": 28250 }, { "epoch": 1.5471171220500466, "grad_norm": 0.11807723343372345, "learning_rate": 2.69052930440073e-05, "loss": 0.1985, "step": 28255 }, { "epoch": 1.547390899633138, "grad_norm": 0.13393297791481018, "learning_rate": 2.6900223078483068e-05, "loss": 0.2043, "step": 28260 }, { "epoch": 1.5476646772162295, "grad_norm": 0.1308920830488205, "learning_rate": 2.689515311295883e-05, "loss": 0.2039, "step": 28265 }, { "epoch": 1.5479384547993211, "grad_norm": 0.16371572017669678, "learning_rate": 2.6890083147434598e-05, "loss": 0.2045, "step": 28270 }, { "epoch": 1.5482122323824126, "grad_norm": 0.126969113945961, "learning_rate": 2.688501318191036e-05, "loss": 0.2111, "step": 28275 }, { "epoch": 1.548486009965504, "grad_norm": 0.15192198753356934, "learning_rate": 2.6879943216386128e-05, "loss": 0.206, "step": 28280 }, { "epoch": 1.5487597875485957, "grad_norm": 0.13925743103027344, "learning_rate": 2.6874873250861894e-05, "loss": 0.2093, "step": 28285 }, { "epoch": 1.5490335651316869, "grad_norm": 0.11915737390518188, "learning_rate": 2.6869803285337664e-05, "loss": 0.195, "step": 28290 }, { "epoch": 1.5493073427147785, "grad_norm": 0.12577877938747406, "learning_rate": 2.6864733319813428e-05, "loss": 0.195, "step": 28295 }, { "epoch": 1.54958112029787, "grad_norm": 0.1542365550994873, "learning_rate": 2.6859663354289194e-05, "loss": 0.1976, "step": 28300 }, { "epoch": 1.5498548978809614, "grad_norm": 0.13127923011779785, "learning_rate": 2.6854593388764958e-05, "loss": 0.2082, "step": 28305 }, { "epoch": 1.550128675464053, "grad_norm": 0.11700288951396942, "learning_rate": 2.6849523423240724e-05, "loss": 0.1934, "step": 28310 }, { "epoch": 1.5504024530471445, "grad_norm": 0.12258696556091309, "learning_rate": 2.684445345771649e-05, "loss": 0.2025, "step": 28315 }, { "epoch": 1.550676230630236, "grad_norm": 0.13286817073822021, "learning_rate": 2.6839383492192254e-05, "loss": 0.1992, "step": 28320 }, { "epoch": 1.5509500082133276, "grad_norm": 0.12363585829734802, "learning_rate": 2.683431352666802e-05, "loss": 0.2004, "step": 28325 }, { "epoch": 1.5512237857964188, "grad_norm": 0.1556745022535324, "learning_rate": 2.6829243561143784e-05, "loss": 0.2053, "step": 28330 }, { "epoch": 1.5514975633795105, "grad_norm": 0.11657492816448212, "learning_rate": 2.682417359561955e-05, "loss": 0.2049, "step": 28335 }, { "epoch": 1.551771340962602, "grad_norm": 0.12474113702774048, "learning_rate": 2.6819103630095314e-05, "loss": 0.2123, "step": 28340 }, { "epoch": 1.5520451185456934, "grad_norm": 0.11382238566875458, "learning_rate": 2.681403366457108e-05, "loss": 0.2061, "step": 28345 }, { "epoch": 1.552318896128785, "grad_norm": 0.11410623788833618, "learning_rate": 2.6808963699046848e-05, "loss": 0.2009, "step": 28350 }, { "epoch": 1.5525926737118765, "grad_norm": 0.12350527197122574, "learning_rate": 2.680389373352261e-05, "loss": 0.2019, "step": 28355 }, { "epoch": 1.552866451294968, "grad_norm": 0.12392079830169678, "learning_rate": 2.6798823767998378e-05, "loss": 0.2081, "step": 28360 }, { "epoch": 1.5531402288780596, "grad_norm": 0.12288577854633331, "learning_rate": 2.679375380247414e-05, "loss": 0.1973, "step": 28365 }, { "epoch": 1.5534140064611508, "grad_norm": 0.11127223819494247, "learning_rate": 2.678868383694991e-05, "loss": 0.1971, "step": 28370 }, { "epoch": 1.5536877840442425, "grad_norm": 0.13189896941184998, "learning_rate": 2.6783613871425678e-05, "loss": 0.2033, "step": 28375 }, { "epoch": 1.5539615616273341, "grad_norm": 0.12444491684436798, "learning_rate": 2.6778543905901444e-05, "loss": 0.2036, "step": 28380 }, { "epoch": 1.5542353392104253, "grad_norm": 0.14508411288261414, "learning_rate": 2.6773473940377208e-05, "loss": 0.2061, "step": 28385 }, { "epoch": 1.554509116793517, "grad_norm": 0.11727740615606308, "learning_rate": 2.6768403974852974e-05, "loss": 0.2105, "step": 28390 }, { "epoch": 1.5547828943766084, "grad_norm": 0.11203032732009888, "learning_rate": 2.6763334009328738e-05, "loss": 0.2116, "step": 28395 }, { "epoch": 1.5550566719596999, "grad_norm": 0.1373997926712036, "learning_rate": 2.6758264043804504e-05, "loss": 0.2043, "step": 28400 }, { "epoch": 1.5553304495427915, "grad_norm": 0.13174787163734436, "learning_rate": 2.6753194078280268e-05, "loss": 0.2025, "step": 28405 }, { "epoch": 1.555604227125883, "grad_norm": 0.14437608420848846, "learning_rate": 2.6748124112756034e-05, "loss": 0.1947, "step": 28410 }, { "epoch": 1.5558780047089744, "grad_norm": 0.12886270880699158, "learning_rate": 2.67430541472318e-05, "loss": 0.2028, "step": 28415 }, { "epoch": 1.556151782292066, "grad_norm": 0.14328348636627197, "learning_rate": 2.6737984181707564e-05, "loss": 0.2105, "step": 28420 }, { "epoch": 1.5564255598751573, "grad_norm": 0.1658942997455597, "learning_rate": 2.673291421618333e-05, "loss": 0.2111, "step": 28425 }, { "epoch": 1.556699337458249, "grad_norm": 0.11864323168992996, "learning_rate": 2.6727844250659094e-05, "loss": 0.1986, "step": 28430 }, { "epoch": 1.5569731150413404, "grad_norm": 0.11153461039066315, "learning_rate": 2.672277428513486e-05, "loss": 0.2036, "step": 28435 }, { "epoch": 1.5572468926244318, "grad_norm": 0.11495324969291687, "learning_rate": 2.6717704319610624e-05, "loss": 0.2024, "step": 28440 }, { "epoch": 1.5575206702075235, "grad_norm": 0.12641221284866333, "learning_rate": 2.671263435408639e-05, "loss": 0.1973, "step": 28445 }, { "epoch": 1.557794447790615, "grad_norm": 0.15868312120437622, "learning_rate": 2.670756438856216e-05, "loss": 0.21, "step": 28450 }, { "epoch": 1.5580682253737064, "grad_norm": 0.1271672546863556, "learning_rate": 2.6702494423037928e-05, "loss": 0.1976, "step": 28455 }, { "epoch": 1.558342002956798, "grad_norm": 0.116233691573143, "learning_rate": 2.669742445751369e-05, "loss": 0.205, "step": 28460 }, { "epoch": 1.5586157805398893, "grad_norm": 0.11742140352725983, "learning_rate": 2.6692354491989458e-05, "loss": 0.2049, "step": 28465 }, { "epoch": 1.558889558122981, "grad_norm": 0.11530118435621262, "learning_rate": 2.668728452646522e-05, "loss": 0.1964, "step": 28470 }, { "epoch": 1.5591633357060724, "grad_norm": 0.13150198757648468, "learning_rate": 2.6682214560940988e-05, "loss": 0.2062, "step": 28475 }, { "epoch": 1.5594371132891638, "grad_norm": 0.12373516708612442, "learning_rate": 2.6677144595416755e-05, "loss": 0.2099, "step": 28480 }, { "epoch": 1.5597108908722555, "grad_norm": 0.12283418327569962, "learning_rate": 2.6672074629892518e-05, "loss": 0.2027, "step": 28485 }, { "epoch": 1.559984668455347, "grad_norm": 0.12758179008960724, "learning_rate": 2.6667004664368285e-05, "loss": 0.1957, "step": 28490 }, { "epoch": 1.5602584460384383, "grad_norm": 0.14121559262275696, "learning_rate": 2.6661934698844048e-05, "loss": 0.2118, "step": 28495 }, { "epoch": 1.56053222362153, "grad_norm": 0.12527069449424744, "learning_rate": 2.6656864733319815e-05, "loss": 0.203, "step": 28500 }, { "epoch": 1.5608060012046212, "grad_norm": 0.12741266191005707, "learning_rate": 2.6651794767795578e-05, "loss": 0.2003, "step": 28505 }, { "epoch": 1.5610797787877129, "grad_norm": 0.1292857527732849, "learning_rate": 2.6646724802271345e-05, "loss": 0.2073, "step": 28510 }, { "epoch": 1.5613535563708043, "grad_norm": 0.1315898597240448, "learning_rate": 2.664165483674711e-05, "loss": 0.1992, "step": 28515 }, { "epoch": 1.5616273339538957, "grad_norm": 0.11638636142015457, "learning_rate": 2.6636584871222875e-05, "loss": 0.2055, "step": 28520 }, { "epoch": 1.5619011115369874, "grad_norm": 0.11020632088184357, "learning_rate": 2.663151490569864e-05, "loss": 0.2027, "step": 28525 }, { "epoch": 1.5621748891200788, "grad_norm": 0.12589029967784882, "learning_rate": 2.6626444940174405e-05, "loss": 0.2035, "step": 28530 }, { "epoch": 1.5624486667031703, "grad_norm": 0.138814315199852, "learning_rate": 2.6621374974650175e-05, "loss": 0.2054, "step": 28535 }, { "epoch": 1.562722444286262, "grad_norm": 0.12400680035352707, "learning_rate": 2.661630500912594e-05, "loss": 0.1983, "step": 28540 }, { "epoch": 1.5629962218693534, "grad_norm": 0.13501271605491638, "learning_rate": 2.6611235043601708e-05, "loss": 0.1933, "step": 28545 }, { "epoch": 1.5632699994524448, "grad_norm": 0.11515259742736816, "learning_rate": 2.660616507807747e-05, "loss": 0.1954, "step": 28550 }, { "epoch": 1.5635437770355365, "grad_norm": 0.12135180085897446, "learning_rate": 2.6601095112553238e-05, "loss": 0.2074, "step": 28555 }, { "epoch": 1.5638175546186277, "grad_norm": 0.11429665982723236, "learning_rate": 2.6596025147029e-05, "loss": 0.2027, "step": 28560 }, { "epoch": 1.5640913322017194, "grad_norm": 0.13044820725917816, "learning_rate": 2.6590955181504768e-05, "loss": 0.1938, "step": 28565 }, { "epoch": 1.5643651097848108, "grad_norm": 0.1163635104894638, "learning_rate": 2.658588521598053e-05, "loss": 0.2055, "step": 28570 }, { "epoch": 1.5646388873679022, "grad_norm": 0.12384253740310669, "learning_rate": 2.6580815250456298e-05, "loss": 0.2035, "step": 28575 }, { "epoch": 1.564912664950994, "grad_norm": 0.11276665329933167, "learning_rate": 2.6575745284932065e-05, "loss": 0.2087, "step": 28580 }, { "epoch": 1.5651864425340853, "grad_norm": 0.14205105602741241, "learning_rate": 2.6570675319407828e-05, "loss": 0.2075, "step": 28585 }, { "epoch": 1.5654602201171768, "grad_norm": 0.11828579008579254, "learning_rate": 2.6565605353883595e-05, "loss": 0.2192, "step": 28590 }, { "epoch": 1.5657339977002684, "grad_norm": 0.13305875658988953, "learning_rate": 2.6560535388359358e-05, "loss": 0.2106, "step": 28595 }, { "epoch": 1.5660077752833597, "grad_norm": 0.12647590041160583, "learning_rate": 2.6555465422835125e-05, "loss": 0.1989, "step": 28600 }, { "epoch": 1.5662815528664513, "grad_norm": 0.13439951837062836, "learning_rate": 2.6550395457310888e-05, "loss": 0.2067, "step": 28605 }, { "epoch": 1.5665553304495428, "grad_norm": 0.11322183161973953, "learning_rate": 2.6545325491786655e-05, "loss": 0.2017, "step": 28610 }, { "epoch": 1.5668291080326342, "grad_norm": 0.137697234749794, "learning_rate": 2.6540255526262425e-05, "loss": 0.2109, "step": 28615 }, { "epoch": 1.5671028856157259, "grad_norm": 0.13699769973754883, "learning_rate": 2.653518556073819e-05, "loss": 0.2003, "step": 28620 }, { "epoch": 1.5673766631988173, "grad_norm": 0.13846105337142944, "learning_rate": 2.6530115595213955e-05, "loss": 0.2108, "step": 28625 }, { "epoch": 1.5676504407819087, "grad_norm": 0.12993304431438446, "learning_rate": 2.652504562968972e-05, "loss": 0.1941, "step": 28630 }, { "epoch": 1.5679242183650004, "grad_norm": 0.12516942620277405, "learning_rate": 2.6519975664165485e-05, "loss": 0.1942, "step": 28635 }, { "epoch": 1.5681979959480916, "grad_norm": 0.13912323117256165, "learning_rate": 2.651490569864125e-05, "loss": 0.2092, "step": 28640 }, { "epoch": 1.5684717735311833, "grad_norm": 0.10452965646982193, "learning_rate": 2.6509835733117018e-05, "loss": 0.1978, "step": 28645 }, { "epoch": 1.5687455511142747, "grad_norm": 0.14676545560359955, "learning_rate": 2.650476576759278e-05, "loss": 0.2037, "step": 28650 }, { "epoch": 1.5690193286973662, "grad_norm": 0.12942110002040863, "learning_rate": 2.6499695802068548e-05, "loss": 0.2092, "step": 28655 }, { "epoch": 1.5692931062804578, "grad_norm": 0.11558451503515244, "learning_rate": 2.649462583654431e-05, "loss": 0.2009, "step": 28660 }, { "epoch": 1.5695668838635493, "grad_norm": 0.11688831448554993, "learning_rate": 2.6489555871020078e-05, "loss": 0.2123, "step": 28665 }, { "epoch": 1.5698406614466407, "grad_norm": 0.10948808491230011, "learning_rate": 2.648448590549584e-05, "loss": 0.2005, "step": 28670 }, { "epoch": 1.5701144390297324, "grad_norm": 0.12440789490938187, "learning_rate": 2.6479415939971608e-05, "loss": 0.2109, "step": 28675 }, { "epoch": 1.5703882166128238, "grad_norm": 0.12553851306438446, "learning_rate": 2.647434597444737e-05, "loss": 0.2112, "step": 28680 }, { "epoch": 1.5706619941959152, "grad_norm": 0.11464394629001617, "learning_rate": 2.6469276008923138e-05, "loss": 0.2053, "step": 28685 }, { "epoch": 1.570935771779007, "grad_norm": 0.12297840416431427, "learning_rate": 2.6464206043398905e-05, "loss": 0.2087, "step": 28690 }, { "epoch": 1.5712095493620981, "grad_norm": 0.11951407790184021, "learning_rate": 2.6459136077874675e-05, "loss": 0.202, "step": 28695 }, { "epoch": 1.5714833269451898, "grad_norm": 0.15404370427131653, "learning_rate": 2.6454066112350438e-05, "loss": 0.2096, "step": 28700 }, { "epoch": 1.5717571045282812, "grad_norm": 0.11132373660802841, "learning_rate": 2.6448996146826205e-05, "loss": 0.2009, "step": 28705 }, { "epoch": 1.5720308821113727, "grad_norm": 0.11734496802091599, "learning_rate": 2.644392618130197e-05, "loss": 0.202, "step": 28710 }, { "epoch": 1.5723046596944643, "grad_norm": 0.12313490360975266, "learning_rate": 2.6438856215777735e-05, "loss": 0.2066, "step": 28715 }, { "epoch": 1.5725784372775558, "grad_norm": 0.1129048764705658, "learning_rate": 2.64337862502535e-05, "loss": 0.1941, "step": 28720 }, { "epoch": 1.5728522148606472, "grad_norm": 0.115526482462883, "learning_rate": 2.6428716284729265e-05, "loss": 0.2074, "step": 28725 }, { "epoch": 1.5731259924437389, "grad_norm": 0.10988865792751312, "learning_rate": 2.642364631920503e-05, "loss": 0.1976, "step": 28730 }, { "epoch": 1.57339977002683, "grad_norm": 0.12264932692050934, "learning_rate": 2.6418576353680795e-05, "loss": 0.2052, "step": 28735 }, { "epoch": 1.5736735476099217, "grad_norm": 0.11569761484861374, "learning_rate": 2.641350638815656e-05, "loss": 0.2067, "step": 28740 }, { "epoch": 1.5739473251930132, "grad_norm": 0.12204580008983612, "learning_rate": 2.6408436422632325e-05, "loss": 0.2097, "step": 28745 }, { "epoch": 1.5742211027761046, "grad_norm": 0.11230684071779251, "learning_rate": 2.640336645710809e-05, "loss": 0.1962, "step": 28750 }, { "epoch": 1.5744948803591963, "grad_norm": 0.12552513182163239, "learning_rate": 2.639829649158386e-05, "loss": 0.2016, "step": 28755 }, { "epoch": 1.5747686579422877, "grad_norm": 0.12920361757278442, "learning_rate": 2.639322652605962e-05, "loss": 0.2074, "step": 28760 }, { "epoch": 1.5750424355253791, "grad_norm": 0.12677916884422302, "learning_rate": 2.638815656053539e-05, "loss": 0.1977, "step": 28765 }, { "epoch": 1.5753162131084708, "grad_norm": 0.12472964823246002, "learning_rate": 2.638308659501115e-05, "loss": 0.2033, "step": 28770 }, { "epoch": 1.575589990691562, "grad_norm": 0.13128705322742462, "learning_rate": 2.637801662948692e-05, "loss": 0.2117, "step": 28775 }, { "epoch": 1.5758637682746537, "grad_norm": 0.13578097522258759, "learning_rate": 2.637294666396269e-05, "loss": 0.2076, "step": 28780 }, { "epoch": 1.5761375458577451, "grad_norm": 0.12288129329681396, "learning_rate": 2.6367876698438455e-05, "loss": 0.2036, "step": 28785 }, { "epoch": 1.5764113234408366, "grad_norm": 0.12036088854074478, "learning_rate": 2.636280673291422e-05, "loss": 0.2107, "step": 28790 }, { "epoch": 1.5766851010239282, "grad_norm": 0.12592783570289612, "learning_rate": 2.6357736767389985e-05, "loss": 0.203, "step": 28795 }, { "epoch": 1.5769588786070197, "grad_norm": 0.11193270981311798, "learning_rate": 2.635266680186575e-05, "loss": 0.2015, "step": 28800 }, { "epoch": 1.577232656190111, "grad_norm": 0.11045022308826447, "learning_rate": 2.6347596836341515e-05, "loss": 0.2021, "step": 28805 }, { "epoch": 1.5775064337732028, "grad_norm": 0.13286398351192474, "learning_rate": 2.634252687081728e-05, "loss": 0.2072, "step": 28810 }, { "epoch": 1.577780211356294, "grad_norm": 0.1368749439716339, "learning_rate": 2.6337456905293045e-05, "loss": 0.1994, "step": 28815 }, { "epoch": 1.5780539889393856, "grad_norm": 0.13934271037578583, "learning_rate": 2.6332386939768812e-05, "loss": 0.2014, "step": 28820 }, { "epoch": 1.5783277665224773, "grad_norm": 0.10605074465274811, "learning_rate": 2.6327316974244575e-05, "loss": 0.2037, "step": 28825 }, { "epoch": 1.5786015441055685, "grad_norm": 0.12550295889377594, "learning_rate": 2.6322247008720342e-05, "loss": 0.2035, "step": 28830 }, { "epoch": 1.5788753216886602, "grad_norm": 0.12236464023590088, "learning_rate": 2.6317177043196105e-05, "loss": 0.2057, "step": 28835 }, { "epoch": 1.5791490992717516, "grad_norm": 0.11744323372840881, "learning_rate": 2.6312107077671872e-05, "loss": 0.2082, "step": 28840 }, { "epoch": 1.579422876854843, "grad_norm": 0.1369747370481491, "learning_rate": 2.6307037112147635e-05, "loss": 0.211, "step": 28845 }, { "epoch": 1.5796966544379347, "grad_norm": 0.11377023160457611, "learning_rate": 2.6301967146623402e-05, "loss": 0.1991, "step": 28850 }, { "epoch": 1.5799704320210262, "grad_norm": 0.12739096581935883, "learning_rate": 2.629689718109917e-05, "loss": 0.1987, "step": 28855 }, { "epoch": 1.5802442096041176, "grad_norm": 0.1174621656537056, "learning_rate": 2.629182721557494e-05, "loss": 0.2038, "step": 28860 }, { "epoch": 1.5805179871872093, "grad_norm": 0.11727944761514664, "learning_rate": 2.6286757250050702e-05, "loss": 0.1971, "step": 28865 }, { "epoch": 1.5807917647703005, "grad_norm": 0.15855523943901062, "learning_rate": 2.628168728452647e-05, "loss": 0.206, "step": 28870 }, { "epoch": 1.5810655423533921, "grad_norm": 0.15128400921821594, "learning_rate": 2.6276617319002232e-05, "loss": 0.2092, "step": 28875 }, { "epoch": 1.5813393199364836, "grad_norm": 0.1462433785200119, "learning_rate": 2.6271547353478e-05, "loss": 0.2076, "step": 28880 }, { "epoch": 1.581613097519575, "grad_norm": 0.14598549902439117, "learning_rate": 2.6266477387953765e-05, "loss": 0.1975, "step": 28885 }, { "epoch": 1.5818868751026667, "grad_norm": 0.11731383949518204, "learning_rate": 2.626140742242953e-05, "loss": 0.2016, "step": 28890 }, { "epoch": 1.5821606526857581, "grad_norm": 0.11691121757030487, "learning_rate": 2.6256337456905295e-05, "loss": 0.2009, "step": 28895 }, { "epoch": 1.5824344302688496, "grad_norm": 0.1142384260892868, "learning_rate": 2.625126749138106e-05, "loss": 0.2045, "step": 28900 }, { "epoch": 1.5827082078519412, "grad_norm": 0.1507006138563156, "learning_rate": 2.6246197525856825e-05, "loss": 0.2098, "step": 28905 }, { "epoch": 1.5829819854350324, "grad_norm": 0.1214873194694519, "learning_rate": 2.624112756033259e-05, "loss": 0.205, "step": 28910 }, { "epoch": 1.583255763018124, "grad_norm": 0.12654422223567963, "learning_rate": 2.6236057594808355e-05, "loss": 0.2039, "step": 28915 }, { "epoch": 1.5835295406012155, "grad_norm": 0.11711233109235764, "learning_rate": 2.6230987629284122e-05, "loss": 0.2101, "step": 28920 }, { "epoch": 1.583803318184307, "grad_norm": 0.1248982846736908, "learning_rate": 2.6225917663759885e-05, "loss": 0.2027, "step": 28925 }, { "epoch": 1.5840770957673986, "grad_norm": 0.12390182167291641, "learning_rate": 2.6220847698235652e-05, "loss": 0.2019, "step": 28930 }, { "epoch": 1.58435087335049, "grad_norm": 0.13161295652389526, "learning_rate": 2.6215777732711415e-05, "loss": 0.2069, "step": 28935 }, { "epoch": 1.5846246509335815, "grad_norm": 0.14372122287750244, "learning_rate": 2.6210707767187185e-05, "loss": 0.2099, "step": 28940 }, { "epoch": 1.5848984285166732, "grad_norm": 0.12089399993419647, "learning_rate": 2.6205637801662952e-05, "loss": 0.2037, "step": 28945 }, { "epoch": 1.5851722060997644, "grad_norm": 0.12203926593065262, "learning_rate": 2.620056783613872e-05, "loss": 0.2116, "step": 28950 }, { "epoch": 1.585445983682856, "grad_norm": 0.13007785379886627, "learning_rate": 2.6195497870614482e-05, "loss": 0.1989, "step": 28955 }, { "epoch": 1.5857197612659475, "grad_norm": 0.12822993099689484, "learning_rate": 2.619042790509025e-05, "loss": 0.1992, "step": 28960 }, { "epoch": 1.585993538849039, "grad_norm": 0.12762658298015594, "learning_rate": 2.6185357939566012e-05, "loss": 0.1941, "step": 28965 }, { "epoch": 1.5862673164321306, "grad_norm": 0.11113512516021729, "learning_rate": 2.618028797404178e-05, "loss": 0.2134, "step": 28970 }, { "epoch": 1.586541094015222, "grad_norm": 0.09990650415420532, "learning_rate": 2.6175218008517542e-05, "loss": 0.1919, "step": 28975 }, { "epoch": 1.5868148715983135, "grad_norm": 0.14432977139949799, "learning_rate": 2.617014804299331e-05, "loss": 0.2049, "step": 28980 }, { "epoch": 1.5870886491814051, "grad_norm": 0.123028464615345, "learning_rate": 2.6165078077469075e-05, "loss": 0.2084, "step": 28985 }, { "epoch": 1.5873624267644966, "grad_norm": 0.12577703595161438, "learning_rate": 2.616000811194484e-05, "loss": 0.1936, "step": 28990 }, { "epoch": 1.587636204347588, "grad_norm": 0.15022563934326172, "learning_rate": 2.6154938146420605e-05, "loss": 0.2067, "step": 28995 }, { "epoch": 1.5879099819306797, "grad_norm": 0.11302533745765686, "learning_rate": 2.614986818089637e-05, "loss": 0.1981, "step": 29000 }, { "epoch": 1.5881837595137709, "grad_norm": 0.14059647917747498, "learning_rate": 2.6144798215372135e-05, "loss": 0.2096, "step": 29005 }, { "epoch": 1.5884575370968625, "grad_norm": 0.14346766471862793, "learning_rate": 2.61397282498479e-05, "loss": 0.2089, "step": 29010 }, { "epoch": 1.588731314679954, "grad_norm": 0.1201031282544136, "learning_rate": 2.6134658284323665e-05, "loss": 0.2151, "step": 29015 }, { "epoch": 1.5890050922630454, "grad_norm": 0.1308470070362091, "learning_rate": 2.6129588318799435e-05, "loss": 0.2091, "step": 29020 }, { "epoch": 1.589278869846137, "grad_norm": 0.1265486776828766, "learning_rate": 2.6124518353275202e-05, "loss": 0.2021, "step": 29025 }, { "epoch": 1.5895526474292285, "grad_norm": 0.1214933767914772, "learning_rate": 2.6119448387750965e-05, "loss": 0.1955, "step": 29030 }, { "epoch": 1.58982642501232, "grad_norm": 0.1159641370177269, "learning_rate": 2.6114378422226732e-05, "loss": 0.2084, "step": 29035 }, { "epoch": 1.5901002025954116, "grad_norm": 0.12701387703418732, "learning_rate": 2.6109308456702495e-05, "loss": 0.2025, "step": 29040 }, { "epoch": 1.5903739801785028, "grad_norm": 0.10702298581600189, "learning_rate": 2.6104238491178262e-05, "loss": 0.2086, "step": 29045 }, { "epoch": 1.5906477577615945, "grad_norm": 0.1234036237001419, "learning_rate": 2.609916852565403e-05, "loss": 0.1912, "step": 29050 }, { "epoch": 1.590921535344686, "grad_norm": 0.12361554056406021, "learning_rate": 2.6094098560129792e-05, "loss": 0.2048, "step": 29055 }, { "epoch": 1.5911953129277774, "grad_norm": 0.12306571751832962, "learning_rate": 2.608902859460556e-05, "loss": 0.2065, "step": 29060 }, { "epoch": 1.591469090510869, "grad_norm": 0.12969449162483215, "learning_rate": 2.6083958629081322e-05, "loss": 0.2016, "step": 29065 }, { "epoch": 1.5917428680939605, "grad_norm": 0.11466347426176071, "learning_rate": 2.607888866355709e-05, "loss": 0.2027, "step": 29070 }, { "epoch": 1.592016645677052, "grad_norm": 0.1346864402294159, "learning_rate": 2.6073818698032852e-05, "loss": 0.2007, "step": 29075 }, { "epoch": 1.5922904232601436, "grad_norm": 0.13190501928329468, "learning_rate": 2.606874873250862e-05, "loss": 0.2101, "step": 29080 }, { "epoch": 1.5925642008432348, "grad_norm": 0.1474357694387436, "learning_rate": 2.6063678766984386e-05, "loss": 0.2058, "step": 29085 }, { "epoch": 1.5928379784263265, "grad_norm": 0.1318625658750534, "learning_rate": 2.605860880146015e-05, "loss": 0.2059, "step": 29090 }, { "epoch": 1.593111756009418, "grad_norm": 0.1385670155286789, "learning_rate": 2.6053538835935916e-05, "loss": 0.2005, "step": 29095 }, { "epoch": 1.5933855335925093, "grad_norm": 0.11155466735363007, "learning_rate": 2.604846887041168e-05, "loss": 0.2055, "step": 29100 }, { "epoch": 1.593659311175601, "grad_norm": 0.13939902186393738, "learning_rate": 2.604339890488745e-05, "loss": 0.2081, "step": 29105 }, { "epoch": 1.5939330887586924, "grad_norm": 0.1248268112540245, "learning_rate": 2.6038328939363216e-05, "loss": 0.1962, "step": 29110 }, { "epoch": 1.5942068663417839, "grad_norm": 0.1314091682434082, "learning_rate": 2.6033258973838982e-05, "loss": 0.1982, "step": 29115 }, { "epoch": 1.5944806439248755, "grad_norm": 0.12623222172260284, "learning_rate": 2.6028189008314746e-05, "loss": 0.2077, "step": 29120 }, { "epoch": 1.594754421507967, "grad_norm": 0.1169525608420372, "learning_rate": 2.6023119042790512e-05, "loss": 0.1955, "step": 29125 }, { "epoch": 1.5950281990910584, "grad_norm": 0.12995503842830658, "learning_rate": 2.6018049077266276e-05, "loss": 0.2, "step": 29130 }, { "epoch": 1.59530197667415, "grad_norm": 0.12611691653728485, "learning_rate": 2.6012979111742042e-05, "loss": 0.2093, "step": 29135 }, { "epoch": 1.5955757542572413, "grad_norm": 0.13958631455898285, "learning_rate": 2.6007909146217806e-05, "loss": 0.1951, "step": 29140 }, { "epoch": 1.595849531840333, "grad_norm": 0.11901888996362686, "learning_rate": 2.6002839180693572e-05, "loss": 0.1971, "step": 29145 }, { "epoch": 1.5961233094234244, "grad_norm": 0.11817920953035355, "learning_rate": 2.599776921516934e-05, "loss": 0.2065, "step": 29150 }, { "epoch": 1.5963970870065158, "grad_norm": 0.14015847444534302, "learning_rate": 2.5992699249645102e-05, "loss": 0.2069, "step": 29155 }, { "epoch": 1.5966708645896075, "grad_norm": 0.11623433977365494, "learning_rate": 2.598762928412087e-05, "loss": 0.2072, "step": 29160 }, { "epoch": 1.596944642172699, "grad_norm": 0.11946442723274231, "learning_rate": 2.5982559318596632e-05, "loss": 0.2024, "step": 29165 }, { "epoch": 1.5972184197557904, "grad_norm": 0.11953273415565491, "learning_rate": 2.59774893530724e-05, "loss": 0.2007, "step": 29170 }, { "epoch": 1.597492197338882, "grad_norm": 0.11698860675096512, "learning_rate": 2.5972419387548162e-05, "loss": 0.1968, "step": 29175 }, { "epoch": 1.5977659749219733, "grad_norm": 0.11893469095230103, "learning_rate": 2.596734942202393e-05, "loss": 0.1975, "step": 29180 }, { "epoch": 1.598039752505065, "grad_norm": 0.10906344652175903, "learning_rate": 2.59622794564997e-05, "loss": 0.2012, "step": 29185 }, { "epoch": 1.5983135300881564, "grad_norm": 0.1260557323694229, "learning_rate": 2.5957209490975466e-05, "loss": 0.2046, "step": 29190 }, { "epoch": 1.5985873076712478, "grad_norm": 0.1346581131219864, "learning_rate": 2.595213952545123e-05, "loss": 0.1999, "step": 29195 }, { "epoch": 1.5988610852543395, "grad_norm": 0.1727609932422638, "learning_rate": 2.5947069559926996e-05, "loss": 0.2033, "step": 29200 }, { "epoch": 1.599134862837431, "grad_norm": 0.16110540926456451, "learning_rate": 2.594199959440276e-05, "loss": 0.2109, "step": 29205 }, { "epoch": 1.5994086404205223, "grad_norm": 0.1276383101940155, "learning_rate": 2.5936929628878526e-05, "loss": 0.1901, "step": 29210 }, { "epoch": 1.599682418003614, "grad_norm": 0.1312071532011032, "learning_rate": 2.5931859663354292e-05, "loss": 0.208, "step": 29215 }, { "epoch": 1.5999561955867052, "grad_norm": 0.14379145205020905, "learning_rate": 2.5926789697830056e-05, "loss": 0.2068, "step": 29220 }, { "epoch": 1.6002299731697969, "grad_norm": 0.1107383519411087, "learning_rate": 2.5921719732305822e-05, "loss": 0.2079, "step": 29225 }, { "epoch": 1.6005037507528883, "grad_norm": 0.11365082859992981, "learning_rate": 2.5916649766781586e-05, "loss": 0.1975, "step": 29230 }, { "epoch": 1.6007775283359797, "grad_norm": 0.11917124688625336, "learning_rate": 2.5911579801257352e-05, "loss": 0.2019, "step": 29235 }, { "epoch": 1.6010513059190714, "grad_norm": 0.12500295042991638, "learning_rate": 2.5906509835733116e-05, "loss": 0.2117, "step": 29240 }, { "epoch": 1.6013250835021628, "grad_norm": 0.13011567294597626, "learning_rate": 2.5901439870208882e-05, "loss": 0.2008, "step": 29245 }, { "epoch": 1.6015988610852543, "grad_norm": 0.12559150159358978, "learning_rate": 2.589636990468465e-05, "loss": 0.1996, "step": 29250 }, { "epoch": 1.601872638668346, "grad_norm": 0.13204248249530792, "learning_rate": 2.5891299939160412e-05, "loss": 0.2038, "step": 29255 }, { "epoch": 1.6021464162514372, "grad_norm": 0.13753020763397217, "learning_rate": 2.588622997363618e-05, "loss": 0.195, "step": 29260 }, { "epoch": 1.6024201938345288, "grad_norm": 0.11821579188108444, "learning_rate": 2.588116000811195e-05, "loss": 0.2097, "step": 29265 }, { "epoch": 1.6026939714176205, "grad_norm": 0.15449118614196777, "learning_rate": 2.5876090042587713e-05, "loss": 0.2048, "step": 29270 }, { "epoch": 1.6029677490007117, "grad_norm": 0.11357513815164566, "learning_rate": 2.587102007706348e-05, "loss": 0.1949, "step": 29275 }, { "epoch": 1.6032415265838034, "grad_norm": 0.11762606352567673, "learning_rate": 2.5865950111539246e-05, "loss": 0.1978, "step": 29280 }, { "epoch": 1.6035153041668948, "grad_norm": 0.11917363852262497, "learning_rate": 2.586088014601501e-05, "loss": 0.2085, "step": 29285 }, { "epoch": 1.6037890817499862, "grad_norm": 0.1287434995174408, "learning_rate": 2.5855810180490776e-05, "loss": 0.1947, "step": 29290 }, { "epoch": 1.604062859333078, "grad_norm": 0.11472935974597931, "learning_rate": 2.585074021496654e-05, "loss": 0.2027, "step": 29295 }, { "epoch": 1.6043366369161693, "grad_norm": 0.1329006552696228, "learning_rate": 2.5845670249442306e-05, "loss": 0.1983, "step": 29300 }, { "epoch": 1.6046104144992608, "grad_norm": 0.12146413326263428, "learning_rate": 2.584060028391807e-05, "loss": 0.1988, "step": 29305 }, { "epoch": 1.6048841920823524, "grad_norm": 0.11633674800395966, "learning_rate": 2.5835530318393836e-05, "loss": 0.2001, "step": 29310 }, { "epoch": 1.6051579696654437, "grad_norm": 0.1108885109424591, "learning_rate": 2.5830460352869603e-05, "loss": 0.2034, "step": 29315 }, { "epoch": 1.6054317472485353, "grad_norm": 0.11948979645967484, "learning_rate": 2.5825390387345366e-05, "loss": 0.1911, "step": 29320 }, { "epoch": 1.6057055248316268, "grad_norm": 0.14554725587368011, "learning_rate": 2.5820320421821133e-05, "loss": 0.2129, "step": 29325 }, { "epoch": 1.6059793024147182, "grad_norm": 0.13386718928813934, "learning_rate": 2.5815250456296896e-05, "loss": 0.2069, "step": 29330 }, { "epoch": 1.6062530799978099, "grad_norm": 0.12221649289131165, "learning_rate": 2.5810180490772663e-05, "loss": 0.2104, "step": 29335 }, { "epoch": 1.6065268575809013, "grad_norm": 0.15225951373577118, "learning_rate": 2.5805110525248426e-05, "loss": 0.2029, "step": 29340 }, { "epoch": 1.6068006351639927, "grad_norm": 0.11152441054582596, "learning_rate": 2.58000405597242e-05, "loss": 0.195, "step": 29345 }, { "epoch": 1.6070744127470844, "grad_norm": 0.12391716986894608, "learning_rate": 2.5794970594199963e-05, "loss": 0.2043, "step": 29350 }, { "epoch": 1.6073481903301756, "grad_norm": 0.1383650004863739, "learning_rate": 2.578990062867573e-05, "loss": 0.2014, "step": 29355 }, { "epoch": 1.6076219679132673, "grad_norm": 0.14471381902694702, "learning_rate": 2.5784830663151493e-05, "loss": 0.215, "step": 29360 }, { "epoch": 1.6078957454963587, "grad_norm": 0.11795841157436371, "learning_rate": 2.577976069762726e-05, "loss": 0.1963, "step": 29365 }, { "epoch": 1.6081695230794502, "grad_norm": 0.11890797317028046, "learning_rate": 2.5774690732103023e-05, "loss": 0.196, "step": 29370 }, { "epoch": 1.6084433006625418, "grad_norm": 0.11994341015815735, "learning_rate": 2.576962076657879e-05, "loss": 0.2095, "step": 29375 }, { "epoch": 1.6087170782456333, "grad_norm": 0.1423962414264679, "learning_rate": 2.5764550801054556e-05, "loss": 0.2107, "step": 29380 }, { "epoch": 1.6089908558287247, "grad_norm": 0.12666131556034088, "learning_rate": 2.575948083553032e-05, "loss": 0.2028, "step": 29385 }, { "epoch": 1.6092646334118164, "grad_norm": 0.1111801415681839, "learning_rate": 2.5754410870006086e-05, "loss": 0.2067, "step": 29390 }, { "epoch": 1.6095384109949076, "grad_norm": 0.11975301057100296, "learning_rate": 2.574934090448185e-05, "loss": 0.1987, "step": 29395 }, { "epoch": 1.6098121885779992, "grad_norm": 0.11130042374134064, "learning_rate": 2.5744270938957616e-05, "loss": 0.2023, "step": 29400 }, { "epoch": 1.6100859661610907, "grad_norm": 0.11148837208747864, "learning_rate": 2.573920097343338e-05, "loss": 0.2123, "step": 29405 }, { "epoch": 1.6103597437441821, "grad_norm": 0.11927720904350281, "learning_rate": 2.5734131007909146e-05, "loss": 0.2008, "step": 29410 }, { "epoch": 1.6106335213272738, "grad_norm": 0.11578609049320221, "learning_rate": 2.572906104238491e-05, "loss": 0.1944, "step": 29415 }, { "epoch": 1.6109072989103652, "grad_norm": 0.11748117953538895, "learning_rate": 2.5723991076860676e-05, "loss": 0.197, "step": 29420 }, { "epoch": 1.6111810764934567, "grad_norm": 0.11097155511379242, "learning_rate": 2.5718921111336443e-05, "loss": 0.2051, "step": 29425 }, { "epoch": 1.6114548540765483, "grad_norm": 0.11254400759935379, "learning_rate": 2.5713851145812213e-05, "loss": 0.1999, "step": 29430 }, { "epoch": 1.6117286316596398, "grad_norm": 0.11710719764232635, "learning_rate": 2.5708781180287976e-05, "loss": 0.1999, "step": 29435 }, { "epoch": 1.6120024092427312, "grad_norm": 0.12483903020620346, "learning_rate": 2.5703711214763743e-05, "loss": 0.198, "step": 29440 }, { "epoch": 1.6122761868258229, "grad_norm": 0.11160100996494293, "learning_rate": 2.569864124923951e-05, "loss": 0.2, "step": 29445 }, { "epoch": 1.612549964408914, "grad_norm": 0.12007429450750351, "learning_rate": 2.5693571283715273e-05, "loss": 0.2036, "step": 29450 }, { "epoch": 1.6128237419920057, "grad_norm": 0.12834212183952332, "learning_rate": 2.568850131819104e-05, "loss": 0.2019, "step": 29455 }, { "epoch": 1.6130975195750972, "grad_norm": 0.11919274181127548, "learning_rate": 2.5683431352666803e-05, "loss": 0.1975, "step": 29460 }, { "epoch": 1.6133712971581886, "grad_norm": 0.12774257361888885, "learning_rate": 2.567836138714257e-05, "loss": 0.1981, "step": 29465 }, { "epoch": 1.6136450747412803, "grad_norm": 0.13099433481693268, "learning_rate": 2.5673291421618333e-05, "loss": 0.2047, "step": 29470 }, { "epoch": 1.6139188523243717, "grad_norm": 0.12196052819490433, "learning_rate": 2.56682214560941e-05, "loss": 0.199, "step": 29475 }, { "epoch": 1.6141926299074632, "grad_norm": 0.11865831911563873, "learning_rate": 2.5663151490569863e-05, "loss": 0.196, "step": 29480 }, { "epoch": 1.6144664074905548, "grad_norm": 0.12946400046348572, "learning_rate": 2.565808152504563e-05, "loss": 0.1981, "step": 29485 }, { "epoch": 1.614740185073646, "grad_norm": 0.11342600733041763, "learning_rate": 2.5653011559521396e-05, "loss": 0.2061, "step": 29490 }, { "epoch": 1.6150139626567377, "grad_norm": 0.12706358730793, "learning_rate": 2.564794159399716e-05, "loss": 0.1989, "step": 29495 }, { "epoch": 1.6152877402398291, "grad_norm": 0.11243338137865067, "learning_rate": 2.5642871628472926e-05, "loss": 0.2054, "step": 29500 }, { "epoch": 1.6155615178229206, "grad_norm": 0.1388944685459137, "learning_rate": 2.563780166294869e-05, "loss": 0.1928, "step": 29505 }, { "epoch": 1.6158352954060122, "grad_norm": 0.14648351073265076, "learning_rate": 2.5632731697424463e-05, "loss": 0.1951, "step": 29510 }, { "epoch": 1.6161090729891037, "grad_norm": 0.11282608658075333, "learning_rate": 2.5627661731900226e-05, "loss": 0.2018, "step": 29515 }, { "epoch": 1.616382850572195, "grad_norm": 0.11663763970136642, "learning_rate": 2.5622591766375993e-05, "loss": 0.1983, "step": 29520 }, { "epoch": 1.6166566281552868, "grad_norm": 0.1256130039691925, "learning_rate": 2.5617521800851756e-05, "loss": 0.1997, "step": 29525 }, { "epoch": 1.616930405738378, "grad_norm": 0.11772053688764572, "learning_rate": 2.5612451835327523e-05, "loss": 0.2058, "step": 29530 }, { "epoch": 1.6172041833214696, "grad_norm": 0.12766820192337036, "learning_rate": 2.5607381869803286e-05, "loss": 0.2091, "step": 29535 }, { "epoch": 1.617477960904561, "grad_norm": 0.1344507336616516, "learning_rate": 2.5602311904279053e-05, "loss": 0.2058, "step": 29540 }, { "epoch": 1.6177517384876525, "grad_norm": 0.11302148550748825, "learning_rate": 2.5597241938754816e-05, "loss": 0.1971, "step": 29545 }, { "epoch": 1.6180255160707442, "grad_norm": 0.13198576867580414, "learning_rate": 2.5592171973230583e-05, "loss": 0.2048, "step": 29550 }, { "epoch": 1.6182992936538356, "grad_norm": 0.12556912004947662, "learning_rate": 2.558710200770635e-05, "loss": 0.1948, "step": 29555 }, { "epoch": 1.618573071236927, "grad_norm": 0.11974451690912247, "learning_rate": 2.5582032042182113e-05, "loss": 0.2018, "step": 29560 }, { "epoch": 1.6188468488200187, "grad_norm": 0.12161435931921005, "learning_rate": 2.557696207665788e-05, "loss": 0.2083, "step": 29565 }, { "epoch": 1.6191206264031102, "grad_norm": 0.11396455019712448, "learning_rate": 2.5571892111133643e-05, "loss": 0.1968, "step": 29570 }, { "epoch": 1.6193944039862016, "grad_norm": 0.1383390724658966, "learning_rate": 2.556682214560941e-05, "loss": 0.2013, "step": 29575 }, { "epoch": 1.6196681815692933, "grad_norm": 0.12269481271505356, "learning_rate": 2.5561752180085173e-05, "loss": 0.203, "step": 29580 }, { "epoch": 1.6199419591523845, "grad_norm": 0.11889971047639847, "learning_rate": 2.555668221456094e-05, "loss": 0.2016, "step": 29585 }, { "epoch": 1.6202157367354761, "grad_norm": 0.12523670494556427, "learning_rate": 2.555161224903671e-05, "loss": 0.195, "step": 29590 }, { "epoch": 1.6204895143185676, "grad_norm": 0.12758220732212067, "learning_rate": 2.5546542283512476e-05, "loss": 0.2103, "step": 29595 }, { "epoch": 1.620763291901659, "grad_norm": 0.11219397187232971, "learning_rate": 2.554147231798824e-05, "loss": 0.1991, "step": 29600 }, { "epoch": 1.6210370694847507, "grad_norm": 0.13537254929542542, "learning_rate": 2.5536402352464006e-05, "loss": 0.2058, "step": 29605 }, { "epoch": 1.6213108470678421, "grad_norm": 0.12930716574192047, "learning_rate": 2.553133238693977e-05, "loss": 0.2049, "step": 29610 }, { "epoch": 1.6215846246509336, "grad_norm": 0.1365649700164795, "learning_rate": 2.5526262421415536e-05, "loss": 0.2087, "step": 29615 }, { "epoch": 1.6218584022340252, "grad_norm": 0.11559883505105972, "learning_rate": 2.5521192455891303e-05, "loss": 0.2027, "step": 29620 }, { "epoch": 1.6221321798171164, "grad_norm": 0.11631831526756287, "learning_rate": 2.5516122490367066e-05, "loss": 0.1979, "step": 29625 }, { "epoch": 1.622405957400208, "grad_norm": 0.11388019472360611, "learning_rate": 2.5511052524842833e-05, "loss": 0.2089, "step": 29630 }, { "epoch": 1.6226797349832995, "grad_norm": 0.14747245609760284, "learning_rate": 2.5505982559318596e-05, "loss": 0.2132, "step": 29635 }, { "epoch": 1.622953512566391, "grad_norm": 0.10881482064723969, "learning_rate": 2.5500912593794363e-05, "loss": 0.2084, "step": 29640 }, { "epoch": 1.6232272901494826, "grad_norm": 0.11099739372730255, "learning_rate": 2.5495842628270126e-05, "loss": 0.1972, "step": 29645 }, { "epoch": 1.623501067732574, "grad_norm": 0.13105908036231995, "learning_rate": 2.5490772662745893e-05, "loss": 0.1998, "step": 29650 }, { "epoch": 1.6237748453156655, "grad_norm": 0.12601348757743835, "learning_rate": 2.548570269722166e-05, "loss": 0.2001, "step": 29655 }, { "epoch": 1.6240486228987572, "grad_norm": 0.1208721473813057, "learning_rate": 2.5480632731697423e-05, "loss": 0.1992, "step": 29660 }, { "epoch": 1.6243224004818484, "grad_norm": 0.11623634397983551, "learning_rate": 2.547556276617319e-05, "loss": 0.1956, "step": 29665 }, { "epoch": 1.62459617806494, "grad_norm": 0.13074152171611786, "learning_rate": 2.5470492800648953e-05, "loss": 0.2072, "step": 29670 }, { "epoch": 1.6248699556480315, "grad_norm": 0.12835481762886047, "learning_rate": 2.5465422835124723e-05, "loss": 0.2027, "step": 29675 }, { "epoch": 1.625143733231123, "grad_norm": 0.1452922821044922, "learning_rate": 2.546035286960049e-05, "loss": 0.1991, "step": 29680 }, { "epoch": 1.6254175108142146, "grad_norm": 0.1146480068564415, "learning_rate": 2.5455282904076257e-05, "loss": 0.1892, "step": 29685 }, { "epoch": 1.625691288397306, "grad_norm": 0.13282807171344757, "learning_rate": 2.545021293855202e-05, "loss": 0.2053, "step": 29690 }, { "epoch": 1.6259650659803975, "grad_norm": 0.13250494003295898, "learning_rate": 2.5445142973027787e-05, "loss": 0.2008, "step": 29695 }, { "epoch": 1.6262388435634891, "grad_norm": 0.1494683027267456, "learning_rate": 2.544007300750355e-05, "loss": 0.2037, "step": 29700 }, { "epoch": 1.6265126211465806, "grad_norm": 0.11539959162473679, "learning_rate": 2.5435003041979317e-05, "loss": 0.1992, "step": 29705 }, { "epoch": 1.626786398729672, "grad_norm": 0.11852724105119705, "learning_rate": 2.542993307645508e-05, "loss": 0.2073, "step": 29710 }, { "epoch": 1.6270601763127637, "grad_norm": 0.1201196014881134, "learning_rate": 2.5424863110930847e-05, "loss": 0.197, "step": 29715 }, { "epoch": 1.627333953895855, "grad_norm": 0.12284013628959656, "learning_rate": 2.5419793145406613e-05, "loss": 0.2005, "step": 29720 }, { "epoch": 1.6276077314789466, "grad_norm": 0.10273899137973785, "learning_rate": 2.5414723179882377e-05, "loss": 0.199, "step": 29725 }, { "epoch": 1.627881509062038, "grad_norm": 0.13619567453861237, "learning_rate": 2.5409653214358143e-05, "loss": 0.2056, "step": 29730 }, { "epoch": 1.6281552866451294, "grad_norm": 0.12456243485212326, "learning_rate": 2.5404583248833907e-05, "loss": 0.2002, "step": 29735 }, { "epoch": 1.628429064228221, "grad_norm": 0.14610441029071808, "learning_rate": 2.5399513283309673e-05, "loss": 0.2045, "step": 29740 }, { "epoch": 1.6287028418113125, "grad_norm": 0.1195518895983696, "learning_rate": 2.5394443317785437e-05, "loss": 0.1972, "step": 29745 }, { "epoch": 1.628976619394404, "grad_norm": 0.11873737722635269, "learning_rate": 2.5389373352261203e-05, "loss": 0.1948, "step": 29750 }, { "epoch": 1.6292503969774956, "grad_norm": 0.1280512511730194, "learning_rate": 2.5384303386736973e-05, "loss": 0.2034, "step": 29755 }, { "epoch": 1.6295241745605868, "grad_norm": 0.10949084907770157, "learning_rate": 2.537923342121274e-05, "loss": 0.2, "step": 29760 }, { "epoch": 1.6297979521436785, "grad_norm": 0.12086904048919678, "learning_rate": 2.5374163455688503e-05, "loss": 0.1984, "step": 29765 }, { "epoch": 1.63007172972677, "grad_norm": 0.12840384244918823, "learning_rate": 2.536909349016427e-05, "loss": 0.2088, "step": 29770 }, { "epoch": 1.6303455073098614, "grad_norm": 0.12498064339160919, "learning_rate": 2.5364023524640033e-05, "loss": 0.2019, "step": 29775 }, { "epoch": 1.630619284892953, "grad_norm": 0.12830683588981628, "learning_rate": 2.53589535591158e-05, "loss": 0.2018, "step": 29780 }, { "epoch": 1.6308930624760445, "grad_norm": 0.12876658141613007, "learning_rate": 2.5353883593591567e-05, "loss": 0.2003, "step": 29785 }, { "epoch": 1.631166840059136, "grad_norm": 0.14924457669258118, "learning_rate": 2.534881362806733e-05, "loss": 0.206, "step": 29790 }, { "epoch": 1.6314406176422276, "grad_norm": 0.11908241361379623, "learning_rate": 2.5343743662543097e-05, "loss": 0.2121, "step": 29795 }, { "epoch": 1.6317143952253188, "grad_norm": 0.14889656007289886, "learning_rate": 2.533867369701886e-05, "loss": 0.2086, "step": 29800 }, { "epoch": 1.6319881728084105, "grad_norm": 0.12408046424388885, "learning_rate": 2.5333603731494627e-05, "loss": 0.1936, "step": 29805 }, { "epoch": 1.632261950391502, "grad_norm": 0.12947794795036316, "learning_rate": 2.532853376597039e-05, "loss": 0.2051, "step": 29810 }, { "epoch": 1.6325357279745933, "grad_norm": 0.11829427629709244, "learning_rate": 2.5323463800446157e-05, "loss": 0.2011, "step": 29815 }, { "epoch": 1.632809505557685, "grad_norm": 0.14011338353157043, "learning_rate": 2.5318393834921923e-05, "loss": 0.2084, "step": 29820 }, { "epoch": 1.6330832831407764, "grad_norm": 0.11530911177396774, "learning_rate": 2.5313323869397687e-05, "loss": 0.1993, "step": 29825 }, { "epoch": 1.6333570607238679, "grad_norm": 0.11357110738754272, "learning_rate": 2.5308253903873453e-05, "loss": 0.2032, "step": 29830 }, { "epoch": 1.6336308383069595, "grad_norm": 0.12323155999183655, "learning_rate": 2.5303183938349223e-05, "loss": 0.2132, "step": 29835 }, { "epoch": 1.6339046158900508, "grad_norm": 0.10565587133169174, "learning_rate": 2.5298113972824987e-05, "loss": 0.2022, "step": 29840 }, { "epoch": 1.6341783934731424, "grad_norm": 0.12304652482271194, "learning_rate": 2.5293044007300753e-05, "loss": 0.195, "step": 29845 }, { "epoch": 1.6344521710562339, "grad_norm": 0.11720792949199677, "learning_rate": 2.528797404177652e-05, "loss": 0.198, "step": 29850 }, { "epoch": 1.6347259486393253, "grad_norm": 0.1090419813990593, "learning_rate": 2.5282904076252283e-05, "loss": 0.1992, "step": 29855 }, { "epoch": 1.634999726222417, "grad_norm": 0.14349280297756195, "learning_rate": 2.527783411072805e-05, "loss": 0.2069, "step": 29860 }, { "epoch": 1.6352735038055084, "grad_norm": 0.11456779390573502, "learning_rate": 2.5272764145203813e-05, "loss": 0.201, "step": 29865 }, { "epoch": 1.6355472813885998, "grad_norm": 0.12147238850593567, "learning_rate": 2.526769417967958e-05, "loss": 0.2003, "step": 29870 }, { "epoch": 1.6358210589716915, "grad_norm": 0.13531118631362915, "learning_rate": 2.5262624214155343e-05, "loss": 0.2087, "step": 29875 }, { "epoch": 1.636094836554783, "grad_norm": 0.11464204639196396, "learning_rate": 2.525755424863111e-05, "loss": 0.2119, "step": 29880 }, { "epoch": 1.6363686141378744, "grad_norm": 0.13112999498844147, "learning_rate": 2.5252484283106877e-05, "loss": 0.1974, "step": 29885 }, { "epoch": 1.636642391720966, "grad_norm": 0.12183082848787308, "learning_rate": 2.524741431758264e-05, "loss": 0.2066, "step": 29890 }, { "epoch": 1.6369161693040573, "grad_norm": 0.1208491399884224, "learning_rate": 2.5242344352058407e-05, "loss": 0.1949, "step": 29895 }, { "epoch": 1.637189946887149, "grad_norm": 0.11406205594539642, "learning_rate": 2.523727438653417e-05, "loss": 0.2017, "step": 29900 }, { "epoch": 1.6374637244702404, "grad_norm": 0.12455243617296219, "learning_rate": 2.5232204421009937e-05, "loss": 0.2019, "step": 29905 }, { "epoch": 1.6377375020533318, "grad_norm": 0.10873401165008545, "learning_rate": 2.52271344554857e-05, "loss": 0.2047, "step": 29910 }, { "epoch": 1.6380112796364235, "grad_norm": 0.1387764811515808, "learning_rate": 2.5222064489961474e-05, "loss": 0.2108, "step": 29915 }, { "epoch": 1.638285057219515, "grad_norm": 0.12301935255527496, "learning_rate": 2.5216994524437237e-05, "loss": 0.2022, "step": 29920 }, { "epoch": 1.6385588348026063, "grad_norm": 0.12383924424648285, "learning_rate": 2.5211924558913004e-05, "loss": 0.1995, "step": 29925 }, { "epoch": 1.638832612385698, "grad_norm": 0.11061706393957138, "learning_rate": 2.5206854593388767e-05, "loss": 0.1914, "step": 29930 }, { "epoch": 1.6391063899687892, "grad_norm": 0.12507180869579315, "learning_rate": 2.5201784627864534e-05, "loss": 0.211, "step": 29935 }, { "epoch": 1.6393801675518809, "grad_norm": 0.15146271884441376, "learning_rate": 2.5196714662340297e-05, "loss": 0.2034, "step": 29940 }, { "epoch": 1.6396539451349723, "grad_norm": 0.11252517253160477, "learning_rate": 2.5191644696816064e-05, "loss": 0.1974, "step": 29945 }, { "epoch": 1.6399277227180638, "grad_norm": 0.1323241889476776, "learning_rate": 2.518657473129183e-05, "loss": 0.2018, "step": 29950 }, { "epoch": 1.6402015003011554, "grad_norm": 0.10974282026290894, "learning_rate": 2.5181504765767594e-05, "loss": 0.2104, "step": 29955 }, { "epoch": 1.6404752778842469, "grad_norm": 0.10801789909601212, "learning_rate": 2.517643480024336e-05, "loss": 0.1981, "step": 29960 }, { "epoch": 1.6407490554673383, "grad_norm": 0.11916490644216537, "learning_rate": 2.5171364834719124e-05, "loss": 0.2049, "step": 29965 }, { "epoch": 1.64102283305043, "grad_norm": 0.13264815509319305, "learning_rate": 2.516629486919489e-05, "loss": 0.2002, "step": 29970 }, { "epoch": 1.6412966106335212, "grad_norm": 0.11823397874832153, "learning_rate": 2.5161224903670654e-05, "loss": 0.1997, "step": 29975 }, { "epoch": 1.6415703882166128, "grad_norm": 0.11239060014486313, "learning_rate": 2.515615493814642e-05, "loss": 0.198, "step": 29980 }, { "epoch": 1.6418441657997043, "grad_norm": 0.12025441229343414, "learning_rate": 2.5151084972622187e-05, "loss": 0.1983, "step": 29985 }, { "epoch": 1.6421179433827957, "grad_norm": 0.12711144983768463, "learning_rate": 2.514601500709795e-05, "loss": 0.1985, "step": 29990 }, { "epoch": 1.6423917209658874, "grad_norm": 0.11323661357164383, "learning_rate": 2.5140945041573717e-05, "loss": 0.2007, "step": 29995 }, { "epoch": 1.6426654985489788, "grad_norm": 0.11635926365852356, "learning_rate": 2.5135875076049487e-05, "loss": 0.1973, "step": 30000 }, { "epoch": 1.6429392761320702, "grad_norm": 0.14465506374835968, "learning_rate": 2.513080511052525e-05, "loss": 0.2087, "step": 30005 }, { "epoch": 1.643213053715162, "grad_norm": 0.10850229859352112, "learning_rate": 2.5125735145001017e-05, "loss": 0.2109, "step": 30010 }, { "epoch": 1.6434868312982533, "grad_norm": 0.1163269579410553, "learning_rate": 2.5120665179476784e-05, "loss": 0.1996, "step": 30015 }, { "epoch": 1.6437606088813448, "grad_norm": 0.1169450506567955, "learning_rate": 2.5115595213952547e-05, "loss": 0.1972, "step": 30020 }, { "epoch": 1.6440343864644364, "grad_norm": 0.12620700895786285, "learning_rate": 2.5110525248428314e-05, "loss": 0.2034, "step": 30025 }, { "epoch": 1.6443081640475277, "grad_norm": 0.11126328259706497, "learning_rate": 2.5105455282904077e-05, "loss": 0.1962, "step": 30030 }, { "epoch": 1.6445819416306193, "grad_norm": 0.13269272446632385, "learning_rate": 2.5100385317379844e-05, "loss": 0.2137, "step": 30035 }, { "epoch": 1.6448557192137108, "grad_norm": 0.11717239767313004, "learning_rate": 2.5095315351855607e-05, "loss": 0.2059, "step": 30040 }, { "epoch": 1.6451294967968022, "grad_norm": 0.12242245674133301, "learning_rate": 2.5090245386331374e-05, "loss": 0.2013, "step": 30045 }, { "epoch": 1.6454032743798939, "grad_norm": 0.13065730035305023, "learning_rate": 2.508517542080714e-05, "loss": 0.2027, "step": 30050 }, { "epoch": 1.6456770519629853, "grad_norm": 0.1329009234905243, "learning_rate": 2.5080105455282904e-05, "loss": 0.2072, "step": 30055 }, { "epoch": 1.6459508295460767, "grad_norm": 0.12138880044221878, "learning_rate": 2.507503548975867e-05, "loss": 0.1973, "step": 30060 }, { "epoch": 1.6462246071291684, "grad_norm": 0.12876340746879578, "learning_rate": 2.5069965524234434e-05, "loss": 0.1999, "step": 30065 }, { "epoch": 1.6464983847122596, "grad_norm": 0.10713759064674377, "learning_rate": 2.50648955587102e-05, "loss": 0.2072, "step": 30070 }, { "epoch": 1.6467721622953513, "grad_norm": 0.11480559408664703, "learning_rate": 2.5059825593185964e-05, "loss": 0.1982, "step": 30075 }, { "epoch": 1.6470459398784427, "grad_norm": 0.12363792210817337, "learning_rate": 2.5054755627661737e-05, "loss": 0.2172, "step": 30080 }, { "epoch": 1.6473197174615342, "grad_norm": 0.11678621172904968, "learning_rate": 2.50496856621375e-05, "loss": 0.1984, "step": 30085 }, { "epoch": 1.6475934950446258, "grad_norm": 0.12026617676019669, "learning_rate": 2.5044615696613267e-05, "loss": 0.2093, "step": 30090 }, { "epoch": 1.6478672726277173, "grad_norm": 0.11939563602209091, "learning_rate": 2.503954573108903e-05, "loss": 0.1984, "step": 30095 }, { "epoch": 1.6481410502108087, "grad_norm": 0.17417126893997192, "learning_rate": 2.5034475765564797e-05, "loss": 0.1984, "step": 30100 }, { "epoch": 1.6484148277939004, "grad_norm": 0.12450703233480453, "learning_rate": 2.502940580004056e-05, "loss": 0.204, "step": 30105 }, { "epoch": 1.6486886053769916, "grad_norm": 0.13888385891914368, "learning_rate": 2.5024335834516327e-05, "loss": 0.2031, "step": 30110 }, { "epoch": 1.6489623829600832, "grad_norm": 0.1314629763364792, "learning_rate": 2.5019265868992094e-05, "loss": 0.2016, "step": 30115 }, { "epoch": 1.6492361605431747, "grad_norm": 0.11683863401412964, "learning_rate": 2.5014195903467857e-05, "loss": 0.2008, "step": 30120 }, { "epoch": 1.6495099381262661, "grad_norm": 0.140318363904953, "learning_rate": 2.5009125937943624e-05, "loss": 0.205, "step": 30125 }, { "epoch": 1.6497837157093578, "grad_norm": 0.1446918100118637, "learning_rate": 2.5004055972419387e-05, "loss": 0.2013, "step": 30130 }, { "epoch": 1.6500574932924492, "grad_norm": 0.14432820677757263, "learning_rate": 2.4998986006895154e-05, "loss": 0.2127, "step": 30135 }, { "epoch": 1.6503312708755407, "grad_norm": 0.12483776360750198, "learning_rate": 2.499391604137092e-05, "loss": 0.1986, "step": 30140 }, { "epoch": 1.6506050484586323, "grad_norm": 0.11790399253368378, "learning_rate": 2.4988846075846687e-05, "loss": 0.2114, "step": 30145 }, { "epoch": 1.6508788260417238, "grad_norm": 0.10968056321144104, "learning_rate": 2.498377611032245e-05, "loss": 0.2038, "step": 30150 }, { "epoch": 1.6511526036248152, "grad_norm": 0.18222030997276306, "learning_rate": 2.4978706144798217e-05, "loss": 0.2097, "step": 30155 }, { "epoch": 1.6514263812079069, "grad_norm": 0.13335348665714264, "learning_rate": 2.497363617927398e-05, "loss": 0.1997, "step": 30160 }, { "epoch": 1.651700158790998, "grad_norm": 0.13469401001930237, "learning_rate": 2.4968566213749747e-05, "loss": 0.2049, "step": 30165 }, { "epoch": 1.6519739363740897, "grad_norm": 0.1415322870016098, "learning_rate": 2.4963496248225514e-05, "loss": 0.2033, "step": 30170 }, { "epoch": 1.6522477139571812, "grad_norm": 0.1196814700961113, "learning_rate": 2.4958426282701277e-05, "loss": 0.1977, "step": 30175 }, { "epoch": 1.6525214915402726, "grad_norm": 0.13208283483982086, "learning_rate": 2.4953356317177047e-05, "loss": 0.2019, "step": 30180 }, { "epoch": 1.6527952691233643, "grad_norm": 0.12740063667297363, "learning_rate": 2.494828635165281e-05, "loss": 0.2039, "step": 30185 }, { "epoch": 1.6530690467064557, "grad_norm": 0.12304133176803589, "learning_rate": 2.4943216386128577e-05, "loss": 0.2047, "step": 30190 }, { "epoch": 1.6533428242895472, "grad_norm": 0.12103205174207687, "learning_rate": 2.493814642060434e-05, "loss": 0.1935, "step": 30195 }, { "epoch": 1.6536166018726388, "grad_norm": 0.10637153685092926, "learning_rate": 2.4933076455080107e-05, "loss": 0.207, "step": 30200 }, { "epoch": 1.65389037945573, "grad_norm": 0.11963412165641785, "learning_rate": 2.492800648955587e-05, "loss": 0.2021, "step": 30205 }, { "epoch": 1.6541641570388217, "grad_norm": 0.11917214840650558, "learning_rate": 2.4922936524031637e-05, "loss": 0.2057, "step": 30210 }, { "epoch": 1.6544379346219131, "grad_norm": 0.10935503244400024, "learning_rate": 2.49178665585074e-05, "loss": 0.2068, "step": 30215 }, { "epoch": 1.6547117122050046, "grad_norm": 0.12100554257631302, "learning_rate": 2.491279659298317e-05, "loss": 0.203, "step": 30220 }, { "epoch": 1.6549854897880962, "grad_norm": 0.10518752783536911, "learning_rate": 2.4907726627458934e-05, "loss": 0.1956, "step": 30225 }, { "epoch": 1.6552592673711877, "grad_norm": 0.11947951465845108, "learning_rate": 2.49026566619347e-05, "loss": 0.2023, "step": 30230 }, { "epoch": 1.655533044954279, "grad_norm": 0.12782514095306396, "learning_rate": 2.4897586696410467e-05, "loss": 0.1997, "step": 30235 }, { "epoch": 1.6558068225373708, "grad_norm": 0.1179499551653862, "learning_rate": 2.489251673088623e-05, "loss": 0.1958, "step": 30240 }, { "epoch": 1.656080600120462, "grad_norm": 0.11674528568983078, "learning_rate": 2.4887446765361997e-05, "loss": 0.1959, "step": 30245 }, { "epoch": 1.6563543777035536, "grad_norm": 0.10357487946748734, "learning_rate": 2.488237679983776e-05, "loss": 0.1954, "step": 30250 }, { "epoch": 1.656628155286645, "grad_norm": 0.13782694935798645, "learning_rate": 2.4877306834313527e-05, "loss": 0.2158, "step": 30255 }, { "epoch": 1.6569019328697365, "grad_norm": 0.11247294396162033, "learning_rate": 2.487223686878929e-05, "loss": 0.2059, "step": 30260 }, { "epoch": 1.6571757104528282, "grad_norm": 0.12876145541667938, "learning_rate": 2.486716690326506e-05, "loss": 0.2017, "step": 30265 }, { "epoch": 1.6574494880359196, "grad_norm": 0.13127678632736206, "learning_rate": 2.4862096937740824e-05, "loss": 0.2077, "step": 30270 }, { "epoch": 1.657723265619011, "grad_norm": 0.12004011869430542, "learning_rate": 2.485702697221659e-05, "loss": 0.2112, "step": 30275 }, { "epoch": 1.6579970432021027, "grad_norm": 0.11019788682460785, "learning_rate": 2.4851957006692354e-05, "loss": 0.1997, "step": 30280 }, { "epoch": 1.658270820785194, "grad_norm": 0.11512897908687592, "learning_rate": 2.484688704116812e-05, "loss": 0.199, "step": 30285 }, { "epoch": 1.6585445983682856, "grad_norm": 0.12463849037885666, "learning_rate": 2.4841817075643887e-05, "loss": 0.2157, "step": 30290 }, { "epoch": 1.6588183759513773, "grad_norm": 0.11207406222820282, "learning_rate": 2.483674711011965e-05, "loss": 0.2001, "step": 30295 }, { "epoch": 1.6590921535344685, "grad_norm": 0.11645565927028656, "learning_rate": 2.4831677144595417e-05, "loss": 0.2125, "step": 30300 }, { "epoch": 1.6593659311175601, "grad_norm": 0.1417003720998764, "learning_rate": 2.4826607179071184e-05, "loss": 0.2031, "step": 30305 }, { "epoch": 1.6596397087006516, "grad_norm": 0.10083785653114319, "learning_rate": 2.482153721354695e-05, "loss": 0.2011, "step": 30310 }, { "epoch": 1.659913486283743, "grad_norm": 0.11249946802854538, "learning_rate": 2.4816467248022714e-05, "loss": 0.1957, "step": 30315 }, { "epoch": 1.6601872638668347, "grad_norm": 0.1030871719121933, "learning_rate": 2.481139728249848e-05, "loss": 0.1981, "step": 30320 }, { "epoch": 1.6604610414499261, "grad_norm": 0.1096392348408699, "learning_rate": 2.4806327316974244e-05, "loss": 0.1963, "step": 30325 }, { "epoch": 1.6607348190330176, "grad_norm": 0.1183709055185318, "learning_rate": 2.480125735145001e-05, "loss": 0.202, "step": 30330 }, { "epoch": 1.6610085966161092, "grad_norm": 0.1327972114086151, "learning_rate": 2.4796187385925778e-05, "loss": 0.2074, "step": 30335 }, { "epoch": 1.6612823741992004, "grad_norm": 0.16848768293857574, "learning_rate": 2.479111742040154e-05, "loss": 0.1973, "step": 30340 }, { "epoch": 1.661556151782292, "grad_norm": 0.12553831934928894, "learning_rate": 2.4786047454877308e-05, "loss": 0.1992, "step": 30345 }, { "epoch": 1.6618299293653835, "grad_norm": 0.12627890706062317, "learning_rate": 2.4780977489353074e-05, "loss": 0.2003, "step": 30350 }, { "epoch": 1.662103706948475, "grad_norm": 0.14396190643310547, "learning_rate": 2.477590752382884e-05, "loss": 0.1977, "step": 30355 }, { "epoch": 1.6623774845315666, "grad_norm": 0.13109196722507477, "learning_rate": 2.4770837558304604e-05, "loss": 0.2013, "step": 30360 }, { "epoch": 1.662651262114658, "grad_norm": 0.11366637796163559, "learning_rate": 2.476576759278037e-05, "loss": 0.1943, "step": 30365 }, { "epoch": 1.6629250396977495, "grad_norm": 0.1454150527715683, "learning_rate": 2.4760697627256134e-05, "loss": 0.1984, "step": 30370 }, { "epoch": 1.6631988172808412, "grad_norm": 0.12497404962778091, "learning_rate": 2.47556276617319e-05, "loss": 0.206, "step": 30375 }, { "epoch": 1.6634725948639324, "grad_norm": 0.13758598268032074, "learning_rate": 2.4750557696207664e-05, "loss": 0.2083, "step": 30380 }, { "epoch": 1.663746372447024, "grad_norm": 0.12436880171298981, "learning_rate": 2.4745487730683434e-05, "loss": 0.1994, "step": 30385 }, { "epoch": 1.6640201500301155, "grad_norm": 0.10041621327400208, "learning_rate": 2.4740417765159198e-05, "loss": 0.1929, "step": 30390 }, { "epoch": 1.664293927613207, "grad_norm": 0.12571540474891663, "learning_rate": 2.4735347799634964e-05, "loss": 0.2013, "step": 30395 }, { "epoch": 1.6645677051962986, "grad_norm": 0.1572003960609436, "learning_rate": 2.473027783411073e-05, "loss": 0.2032, "step": 30400 }, { "epoch": 1.66484148277939, "grad_norm": 0.13059130311012268, "learning_rate": 2.4725207868586494e-05, "loss": 0.2038, "step": 30405 }, { "epoch": 1.6651152603624815, "grad_norm": 0.13495756685733795, "learning_rate": 2.472013790306226e-05, "loss": 0.2003, "step": 30410 }, { "epoch": 1.6653890379455731, "grad_norm": 0.12066479027271271, "learning_rate": 2.4715067937538024e-05, "loss": 0.2091, "step": 30415 }, { "epoch": 1.6656628155286644, "grad_norm": 0.12758108973503113, "learning_rate": 2.470999797201379e-05, "loss": 0.1956, "step": 30420 }, { "epoch": 1.665936593111756, "grad_norm": 0.1304822862148285, "learning_rate": 2.4704928006489558e-05, "loss": 0.2013, "step": 30425 }, { "epoch": 1.6662103706948475, "grad_norm": 0.1101362556219101, "learning_rate": 2.4699858040965324e-05, "loss": 0.2062, "step": 30430 }, { "epoch": 1.666484148277939, "grad_norm": 0.12190388143062592, "learning_rate": 2.4694788075441088e-05, "loss": 0.2084, "step": 30435 }, { "epoch": 1.6667579258610306, "grad_norm": 0.10532696545124054, "learning_rate": 2.4689718109916854e-05, "loss": 0.1956, "step": 30440 }, { "epoch": 1.667031703444122, "grad_norm": 0.1119498685002327, "learning_rate": 2.4684648144392618e-05, "loss": 0.2051, "step": 30445 }, { "epoch": 1.6673054810272134, "grad_norm": 0.1159578338265419, "learning_rate": 2.4679578178868384e-05, "loss": 0.1966, "step": 30450 }, { "epoch": 1.667579258610305, "grad_norm": 0.14820492267608643, "learning_rate": 2.467450821334415e-05, "loss": 0.2005, "step": 30455 }, { "epoch": 1.6678530361933965, "grad_norm": 0.10963907837867737, "learning_rate": 2.4669438247819914e-05, "loss": 0.2038, "step": 30460 }, { "epoch": 1.668126813776488, "grad_norm": 0.11287447810173035, "learning_rate": 2.4664368282295684e-05, "loss": 0.2036, "step": 30465 }, { "epoch": 1.6684005913595796, "grad_norm": 0.11840681731700897, "learning_rate": 2.4659298316771448e-05, "loss": 0.1999, "step": 30470 }, { "epoch": 1.6686743689426708, "grad_norm": 0.13449633121490479, "learning_rate": 2.4654228351247214e-05, "loss": 0.2048, "step": 30475 }, { "epoch": 1.6689481465257625, "grad_norm": 0.119540736079216, "learning_rate": 2.4649158385722978e-05, "loss": 0.2072, "step": 30480 }, { "epoch": 1.669221924108854, "grad_norm": 0.12093401700258255, "learning_rate": 2.4644088420198744e-05, "loss": 0.2106, "step": 30485 }, { "epoch": 1.6694957016919454, "grad_norm": 0.130036860704422, "learning_rate": 2.4639018454674508e-05, "loss": 0.2038, "step": 30490 }, { "epoch": 1.669769479275037, "grad_norm": 0.13023103773593903, "learning_rate": 2.4633948489150274e-05, "loss": 0.2062, "step": 30495 }, { "epoch": 1.6700432568581285, "grad_norm": 0.12379647046327591, "learning_rate": 2.462887852362604e-05, "loss": 0.2083, "step": 30500 }, { "epoch": 1.67031703444122, "grad_norm": 0.14212222397327423, "learning_rate": 2.4623808558101808e-05, "loss": 0.1974, "step": 30505 }, { "epoch": 1.6705908120243116, "grad_norm": 0.12330333143472672, "learning_rate": 2.461873859257757e-05, "loss": 0.2039, "step": 30510 }, { "epoch": 1.6708645896074028, "grad_norm": 0.1296589970588684, "learning_rate": 2.4613668627053338e-05, "loss": 0.2022, "step": 30515 }, { "epoch": 1.6711383671904945, "grad_norm": 0.116512231528759, "learning_rate": 2.4608598661529104e-05, "loss": 0.2012, "step": 30520 }, { "epoch": 1.671412144773586, "grad_norm": 0.11503800004720688, "learning_rate": 2.4603528696004868e-05, "loss": 0.2, "step": 30525 }, { "epoch": 1.6716859223566773, "grad_norm": 0.11315735429525375, "learning_rate": 2.4598458730480634e-05, "loss": 0.2025, "step": 30530 }, { "epoch": 1.671959699939769, "grad_norm": 0.13979150354862213, "learning_rate": 2.4593388764956398e-05, "loss": 0.2079, "step": 30535 }, { "epoch": 1.6722334775228604, "grad_norm": 0.12164657562971115, "learning_rate": 2.4588318799432164e-05, "loss": 0.2068, "step": 30540 }, { "epoch": 1.6725072551059519, "grad_norm": 0.1124982237815857, "learning_rate": 2.458324883390793e-05, "loss": 0.1956, "step": 30545 }, { "epoch": 1.6727810326890435, "grad_norm": 0.1172167956829071, "learning_rate": 2.4578178868383698e-05, "loss": 0.2015, "step": 30550 }, { "epoch": 1.6730548102721348, "grad_norm": 0.12524177134037018, "learning_rate": 2.457310890285946e-05, "loss": 0.2011, "step": 30555 }, { "epoch": 1.6733285878552264, "grad_norm": 0.12969104945659637, "learning_rate": 2.4568038937335228e-05, "loss": 0.2037, "step": 30560 }, { "epoch": 1.6736023654383179, "grad_norm": 0.13903038203716278, "learning_rate": 2.4562968971810995e-05, "loss": 0.1932, "step": 30565 }, { "epoch": 1.6738761430214093, "grad_norm": 0.12163153290748596, "learning_rate": 2.4557899006286758e-05, "loss": 0.204, "step": 30570 }, { "epoch": 1.674149920604501, "grad_norm": 0.19008757174015045, "learning_rate": 2.4552829040762525e-05, "loss": 0.2077, "step": 30575 }, { "epoch": 1.6744236981875924, "grad_norm": 0.11653389781713486, "learning_rate": 2.4547759075238288e-05, "loss": 0.1977, "step": 30580 }, { "epoch": 1.6746974757706838, "grad_norm": 0.11942056566476822, "learning_rate": 2.4542689109714055e-05, "loss": 0.2054, "step": 30585 }, { "epoch": 1.6749712533537755, "grad_norm": 0.12221858650445938, "learning_rate": 2.453761914418982e-05, "loss": 0.1915, "step": 30590 }, { "epoch": 1.675245030936867, "grad_norm": 0.13230670988559723, "learning_rate": 2.4532549178665588e-05, "loss": 0.2073, "step": 30595 }, { "epoch": 1.6755188085199584, "grad_norm": 0.1335025578737259, "learning_rate": 2.452747921314135e-05, "loss": 0.1975, "step": 30600 }, { "epoch": 1.67579258610305, "grad_norm": 0.11965865641832352, "learning_rate": 2.4522409247617118e-05, "loss": 0.2017, "step": 30605 }, { "epoch": 1.6760663636861413, "grad_norm": 0.13199010491371155, "learning_rate": 2.451733928209288e-05, "loss": 0.2073, "step": 30610 }, { "epoch": 1.676340141269233, "grad_norm": 0.14553257822990417, "learning_rate": 2.4512269316568648e-05, "loss": 0.1973, "step": 30615 }, { "epoch": 1.6766139188523244, "grad_norm": 0.15455560386180878, "learning_rate": 2.4507199351044415e-05, "loss": 0.2004, "step": 30620 }, { "epoch": 1.6768876964354158, "grad_norm": 0.10289754718542099, "learning_rate": 2.4502129385520178e-05, "loss": 0.1956, "step": 30625 }, { "epoch": 1.6771614740185075, "grad_norm": 0.12312506139278412, "learning_rate": 2.4497059419995948e-05, "loss": 0.1981, "step": 30630 }, { "epoch": 1.677435251601599, "grad_norm": 0.1330714225769043, "learning_rate": 2.449198945447171e-05, "loss": 0.1969, "step": 30635 }, { "epoch": 1.6777090291846903, "grad_norm": 0.1438293755054474, "learning_rate": 2.4486919488947478e-05, "loss": 0.2065, "step": 30640 }, { "epoch": 1.677982806767782, "grad_norm": 0.11626315861940384, "learning_rate": 2.448184952342324e-05, "loss": 0.2011, "step": 30645 }, { "epoch": 1.6782565843508732, "grad_norm": 0.12817241251468658, "learning_rate": 2.4476779557899008e-05, "loss": 0.2047, "step": 30650 }, { "epoch": 1.6785303619339649, "grad_norm": 0.11949831992387772, "learning_rate": 2.447170959237477e-05, "loss": 0.2034, "step": 30655 }, { "epoch": 1.6788041395170563, "grad_norm": 0.11986108124256134, "learning_rate": 2.4466639626850538e-05, "loss": 0.2015, "step": 30660 }, { "epoch": 1.6790779171001478, "grad_norm": 0.12543447315692902, "learning_rate": 2.44615696613263e-05, "loss": 0.2004, "step": 30665 }, { "epoch": 1.6793516946832394, "grad_norm": 0.11052849888801575, "learning_rate": 2.445649969580207e-05, "loss": 0.1992, "step": 30670 }, { "epoch": 1.6796254722663309, "grad_norm": 0.12582746148109436, "learning_rate": 2.4451429730277835e-05, "loss": 0.2039, "step": 30675 }, { "epoch": 1.6798992498494223, "grad_norm": 0.12667647004127502, "learning_rate": 2.44463597647536e-05, "loss": 0.2055, "step": 30680 }, { "epoch": 1.680173027432514, "grad_norm": 0.11623106896877289, "learning_rate": 2.4441289799229368e-05, "loss": 0.2025, "step": 30685 }, { "epoch": 1.6804468050156052, "grad_norm": 0.12374383211135864, "learning_rate": 2.443621983370513e-05, "loss": 0.2021, "step": 30690 }, { "epoch": 1.6807205825986968, "grad_norm": 0.13524103164672852, "learning_rate": 2.4431149868180898e-05, "loss": 0.1947, "step": 30695 }, { "epoch": 1.6809943601817883, "grad_norm": 0.1229667067527771, "learning_rate": 2.442607990265666e-05, "loss": 0.2024, "step": 30700 }, { "epoch": 1.6812681377648797, "grad_norm": 0.11629556864500046, "learning_rate": 2.4421009937132428e-05, "loss": 0.1996, "step": 30705 }, { "epoch": 1.6815419153479714, "grad_norm": 0.12004853785037994, "learning_rate": 2.4415939971608195e-05, "loss": 0.2007, "step": 30710 }, { "epoch": 1.6818156929310628, "grad_norm": 0.11791527271270752, "learning_rate": 2.441087000608396e-05, "loss": 0.208, "step": 30715 }, { "epoch": 1.6820894705141543, "grad_norm": 0.11550679057836533, "learning_rate": 2.4405800040559725e-05, "loss": 0.197, "step": 30720 }, { "epoch": 1.682363248097246, "grad_norm": 0.14264217019081116, "learning_rate": 2.440073007503549e-05, "loss": 0.1965, "step": 30725 }, { "epoch": 1.6826370256803371, "grad_norm": 0.12492939829826355, "learning_rate": 2.4395660109511255e-05, "loss": 0.1937, "step": 30730 }, { "epoch": 1.6829108032634288, "grad_norm": 0.12530823051929474, "learning_rate": 2.439059014398702e-05, "loss": 0.2133, "step": 30735 }, { "epoch": 1.6831845808465205, "grad_norm": 0.12017481029033661, "learning_rate": 2.4385520178462788e-05, "loss": 0.2043, "step": 30740 }, { "epoch": 1.6834583584296117, "grad_norm": 0.14538753032684326, "learning_rate": 2.438045021293855e-05, "loss": 0.2104, "step": 30745 }, { "epoch": 1.6837321360127033, "grad_norm": 0.12833258509635925, "learning_rate": 2.437538024741432e-05, "loss": 0.2018, "step": 30750 }, { "epoch": 1.6840059135957948, "grad_norm": 0.1333002746105194, "learning_rate": 2.4370310281890085e-05, "loss": 0.2034, "step": 30755 }, { "epoch": 1.6842796911788862, "grad_norm": 0.12812627851963043, "learning_rate": 2.436524031636585e-05, "loss": 0.203, "step": 30760 }, { "epoch": 1.6845534687619779, "grad_norm": 0.14121606945991516, "learning_rate": 2.4360170350841615e-05, "loss": 0.206, "step": 30765 }, { "epoch": 1.6848272463450693, "grad_norm": 0.12586037814617157, "learning_rate": 2.435510038531738e-05, "loss": 0.2067, "step": 30770 }, { "epoch": 1.6851010239281607, "grad_norm": 0.12990041077136993, "learning_rate": 2.4350030419793145e-05, "loss": 0.1981, "step": 30775 }, { "epoch": 1.6853748015112524, "grad_norm": 0.13309982419013977, "learning_rate": 2.434496045426891e-05, "loss": 0.2054, "step": 30780 }, { "epoch": 1.6856485790943436, "grad_norm": 0.12043721973896027, "learning_rate": 2.4339890488744678e-05, "loss": 0.2007, "step": 30785 }, { "epoch": 1.6859223566774353, "grad_norm": 0.11861957609653473, "learning_rate": 2.4334820523220445e-05, "loss": 0.2032, "step": 30790 }, { "epoch": 1.6861961342605267, "grad_norm": 0.13697195053100586, "learning_rate": 2.4329750557696208e-05, "loss": 0.1955, "step": 30795 }, { "epoch": 1.6864699118436182, "grad_norm": 0.10888580977916718, "learning_rate": 2.4324680592171975e-05, "loss": 0.2036, "step": 30800 }, { "epoch": 1.6867436894267098, "grad_norm": 0.1269962638616562, "learning_rate": 2.431961062664774e-05, "loss": 0.2029, "step": 30805 }, { "epoch": 1.6870174670098013, "grad_norm": 0.12475956976413727, "learning_rate": 2.4314540661123505e-05, "loss": 0.2038, "step": 30810 }, { "epoch": 1.6872912445928927, "grad_norm": 0.11521429568529129, "learning_rate": 2.430947069559927e-05, "loss": 0.2025, "step": 30815 }, { "epoch": 1.6875650221759844, "grad_norm": 0.12846611440181732, "learning_rate": 2.4304400730075035e-05, "loss": 0.202, "step": 30820 }, { "epoch": 1.6878387997590756, "grad_norm": 0.1208549439907074, "learning_rate": 2.42993307645508e-05, "loss": 0.1984, "step": 30825 }, { "epoch": 1.6881125773421672, "grad_norm": 0.12819507718086243, "learning_rate": 2.4294260799026568e-05, "loss": 0.208, "step": 30830 }, { "epoch": 1.6883863549252587, "grad_norm": 0.12572363018989563, "learning_rate": 2.4289190833502335e-05, "loss": 0.2036, "step": 30835 }, { "epoch": 1.6886601325083501, "grad_norm": 0.12267163395881653, "learning_rate": 2.4284120867978098e-05, "loss": 0.2044, "step": 30840 }, { "epoch": 1.6889339100914418, "grad_norm": 0.12256330251693726, "learning_rate": 2.4279050902453865e-05, "loss": 0.2063, "step": 30845 }, { "epoch": 1.6892076876745332, "grad_norm": 0.13228961825370789, "learning_rate": 2.427398093692963e-05, "loss": 0.1951, "step": 30850 }, { "epoch": 1.6894814652576247, "grad_norm": 0.1280847042798996, "learning_rate": 2.4268910971405395e-05, "loss": 0.1982, "step": 30855 }, { "epoch": 1.6897552428407163, "grad_norm": 0.1228720173239708, "learning_rate": 2.426384100588116e-05, "loss": 0.2046, "step": 30860 }, { "epoch": 1.6900290204238075, "grad_norm": 0.11929801851511002, "learning_rate": 2.4258771040356925e-05, "loss": 0.2026, "step": 30865 }, { "epoch": 1.6903027980068992, "grad_norm": 0.1169402003288269, "learning_rate": 2.425370107483269e-05, "loss": 0.1991, "step": 30870 }, { "epoch": 1.6905765755899906, "grad_norm": 0.11747238785028458, "learning_rate": 2.424863110930846e-05, "loss": 0.2026, "step": 30875 }, { "epoch": 1.690850353173082, "grad_norm": 0.11346086114645004, "learning_rate": 2.4243561143784225e-05, "loss": 0.2019, "step": 30880 }, { "epoch": 1.6911241307561737, "grad_norm": 0.12917481362819672, "learning_rate": 2.423849117825999e-05, "loss": 0.2074, "step": 30885 }, { "epoch": 1.6913979083392652, "grad_norm": 0.1237235888838768, "learning_rate": 2.4233421212735755e-05, "loss": 0.2036, "step": 30890 }, { "epoch": 1.6916716859223566, "grad_norm": 0.12861092388629913, "learning_rate": 2.422835124721152e-05, "loss": 0.207, "step": 30895 }, { "epoch": 1.6919454635054483, "grad_norm": 0.11431829631328583, "learning_rate": 2.4223281281687285e-05, "loss": 0.2055, "step": 30900 }, { "epoch": 1.6922192410885397, "grad_norm": 0.1237022802233696, "learning_rate": 2.4218211316163052e-05, "loss": 0.1992, "step": 30905 }, { "epoch": 1.6924930186716312, "grad_norm": 0.1099385991692543, "learning_rate": 2.4213141350638815e-05, "loss": 0.2073, "step": 30910 }, { "epoch": 1.6927667962547228, "grad_norm": 0.14683139324188232, "learning_rate": 2.4208071385114585e-05, "loss": 0.2027, "step": 30915 }, { "epoch": 1.693040573837814, "grad_norm": 0.12425879389047623, "learning_rate": 2.420300141959035e-05, "loss": 0.202, "step": 30920 }, { "epoch": 1.6933143514209057, "grad_norm": 0.12180006504058838, "learning_rate": 2.4197931454066115e-05, "loss": 0.2027, "step": 30925 }, { "epoch": 1.6935881290039971, "grad_norm": 0.11965371668338776, "learning_rate": 2.419286148854188e-05, "loss": 0.1993, "step": 30930 }, { "epoch": 1.6938619065870886, "grad_norm": 0.12442474067211151, "learning_rate": 2.4187791523017645e-05, "loss": 0.2042, "step": 30935 }, { "epoch": 1.6941356841701802, "grad_norm": 0.12992310523986816, "learning_rate": 2.418272155749341e-05, "loss": 0.2046, "step": 30940 }, { "epoch": 1.6944094617532717, "grad_norm": 0.12594863772392273, "learning_rate": 2.4177651591969175e-05, "loss": 0.1995, "step": 30945 }, { "epoch": 1.6946832393363631, "grad_norm": 0.1142423152923584, "learning_rate": 2.417258162644494e-05, "loss": 0.2078, "step": 30950 }, { "epoch": 1.6949570169194548, "grad_norm": 0.12227358669042587, "learning_rate": 2.416751166092071e-05, "loss": 0.1951, "step": 30955 }, { "epoch": 1.695230794502546, "grad_norm": 0.11728879064321518, "learning_rate": 2.4162441695396472e-05, "loss": 0.2122, "step": 30960 }, { "epoch": 1.6955045720856377, "grad_norm": 0.1435941755771637, "learning_rate": 2.415737172987224e-05, "loss": 0.2008, "step": 30965 }, { "epoch": 1.695778349668729, "grad_norm": 0.136163592338562, "learning_rate": 2.4152301764348005e-05, "loss": 0.2052, "step": 30970 }, { "epoch": 1.6960521272518205, "grad_norm": 0.10655861347913742, "learning_rate": 2.414723179882377e-05, "loss": 0.1993, "step": 30975 }, { "epoch": 1.6963259048349122, "grad_norm": 0.11349985748529434, "learning_rate": 2.4142161833299535e-05, "loss": 0.2023, "step": 30980 }, { "epoch": 1.6965996824180036, "grad_norm": 0.12091973423957825, "learning_rate": 2.41370918677753e-05, "loss": 0.201, "step": 30985 }, { "epoch": 1.696873460001095, "grad_norm": 0.13544687628746033, "learning_rate": 2.4132021902251065e-05, "loss": 0.1968, "step": 30990 }, { "epoch": 1.6971472375841867, "grad_norm": 0.12172035872936249, "learning_rate": 2.4126951936726832e-05, "loss": 0.2082, "step": 30995 }, { "epoch": 1.697421015167278, "grad_norm": 0.12822751700878143, "learning_rate": 2.41218819712026e-05, "loss": 0.2073, "step": 31000 }, { "epoch": 1.6976947927503696, "grad_norm": 0.13130758702754974, "learning_rate": 2.4116812005678362e-05, "loss": 0.2037, "step": 31005 }, { "epoch": 1.697968570333461, "grad_norm": 0.13903623819351196, "learning_rate": 2.411174204015413e-05, "loss": 0.2022, "step": 31010 }, { "epoch": 1.6982423479165525, "grad_norm": 0.10906360298395157, "learning_rate": 2.4106672074629892e-05, "loss": 0.1959, "step": 31015 }, { "epoch": 1.6985161254996441, "grad_norm": 0.12010005116462708, "learning_rate": 2.410160210910566e-05, "loss": 0.2052, "step": 31020 }, { "epoch": 1.6987899030827356, "grad_norm": 0.1292281448841095, "learning_rate": 2.4096532143581425e-05, "loss": 0.1982, "step": 31025 }, { "epoch": 1.699063680665827, "grad_norm": 0.116584911942482, "learning_rate": 2.409146217805719e-05, "loss": 0.1929, "step": 31030 }, { "epoch": 1.6993374582489187, "grad_norm": 0.1281173974275589, "learning_rate": 2.408639221253296e-05, "loss": 0.1949, "step": 31035 }, { "epoch": 1.6996112358320101, "grad_norm": 0.10759389400482178, "learning_rate": 2.4081322247008722e-05, "loss": 0.1934, "step": 31040 }, { "epoch": 1.6998850134151016, "grad_norm": 0.1213483139872551, "learning_rate": 2.407625228148449e-05, "loss": 0.2043, "step": 31045 }, { "epoch": 1.7001587909981932, "grad_norm": 0.11007728427648544, "learning_rate": 2.4071182315960252e-05, "loss": 0.1921, "step": 31050 }, { "epoch": 1.7004325685812844, "grad_norm": 0.1233634278178215, "learning_rate": 2.406611235043602e-05, "loss": 0.2042, "step": 31055 }, { "epoch": 1.700706346164376, "grad_norm": 0.11226756125688553, "learning_rate": 2.4061042384911782e-05, "loss": 0.1999, "step": 31060 }, { "epoch": 1.7009801237474675, "grad_norm": 0.10883785784244537, "learning_rate": 2.405597241938755e-05, "loss": 0.195, "step": 31065 }, { "epoch": 1.701253901330559, "grad_norm": 0.1141270250082016, "learning_rate": 2.4050902453863315e-05, "loss": 0.2043, "step": 31070 }, { "epoch": 1.7015276789136506, "grad_norm": 0.12526044249534607, "learning_rate": 2.4045832488339082e-05, "loss": 0.1988, "step": 31075 }, { "epoch": 1.701801456496742, "grad_norm": 0.1426815688610077, "learning_rate": 2.4040762522814845e-05, "loss": 0.2014, "step": 31080 }, { "epoch": 1.7020752340798335, "grad_norm": 0.10676448047161102, "learning_rate": 2.4035692557290612e-05, "loss": 0.2046, "step": 31085 }, { "epoch": 1.7023490116629252, "grad_norm": 0.11112970858812332, "learning_rate": 2.403062259176638e-05, "loss": 0.1955, "step": 31090 }, { "epoch": 1.7026227892460164, "grad_norm": 0.11972459405660629, "learning_rate": 2.4025552626242142e-05, "loss": 0.196, "step": 31095 }, { "epoch": 1.702896566829108, "grad_norm": 0.12258277088403702, "learning_rate": 2.402048266071791e-05, "loss": 0.1914, "step": 31100 }, { "epoch": 1.7031703444121995, "grad_norm": 0.12281712889671326, "learning_rate": 2.4015412695193672e-05, "loss": 0.1945, "step": 31105 }, { "epoch": 1.703444121995291, "grad_norm": 0.13192395865917206, "learning_rate": 2.401034272966944e-05, "loss": 0.1992, "step": 31110 }, { "epoch": 1.7037178995783826, "grad_norm": 0.14288629591464996, "learning_rate": 2.4005272764145205e-05, "loss": 0.1979, "step": 31115 }, { "epoch": 1.703991677161474, "grad_norm": 0.12234672904014587, "learning_rate": 2.4000202798620972e-05, "loss": 0.1977, "step": 31120 }, { "epoch": 1.7042654547445655, "grad_norm": 0.11812088638544083, "learning_rate": 2.3995132833096735e-05, "loss": 0.2032, "step": 31125 }, { "epoch": 1.7045392323276571, "grad_norm": 0.11533822864294052, "learning_rate": 2.3990062867572502e-05, "loss": 0.198, "step": 31130 }, { "epoch": 1.7048130099107484, "grad_norm": 0.1263800710439682, "learning_rate": 2.398499290204827e-05, "loss": 0.2065, "step": 31135 }, { "epoch": 1.70508678749384, "grad_norm": 0.12578751146793365, "learning_rate": 2.3979922936524032e-05, "loss": 0.2071, "step": 31140 }, { "epoch": 1.7053605650769315, "grad_norm": 0.10623335093259811, "learning_rate": 2.39748529709998e-05, "loss": 0.2132, "step": 31145 }, { "epoch": 1.705634342660023, "grad_norm": 0.13205614686012268, "learning_rate": 2.3969783005475562e-05, "loss": 0.1953, "step": 31150 }, { "epoch": 1.7059081202431146, "grad_norm": 0.12312353402376175, "learning_rate": 2.396471303995133e-05, "loss": 0.2057, "step": 31155 }, { "epoch": 1.706181897826206, "grad_norm": 0.11011122167110443, "learning_rate": 2.3959643074427095e-05, "loss": 0.1949, "step": 31160 }, { "epoch": 1.7064556754092974, "grad_norm": 0.11110662668943405, "learning_rate": 2.3954573108902862e-05, "loss": 0.1973, "step": 31165 }, { "epoch": 1.706729452992389, "grad_norm": 0.12233985960483551, "learning_rate": 2.3949503143378625e-05, "loss": 0.205, "step": 31170 }, { "epoch": 1.7070032305754805, "grad_norm": 0.11205592006444931, "learning_rate": 2.3944433177854392e-05, "loss": 0.2095, "step": 31175 }, { "epoch": 1.707277008158572, "grad_norm": 0.12286053597927094, "learning_rate": 2.3939363212330155e-05, "loss": 0.2065, "step": 31180 }, { "epoch": 1.7075507857416636, "grad_norm": 0.13932855427265167, "learning_rate": 2.3934293246805922e-05, "loss": 0.2032, "step": 31185 }, { "epoch": 1.7078245633247549, "grad_norm": 0.11848028004169464, "learning_rate": 2.392922328128169e-05, "loss": 0.2188, "step": 31190 }, { "epoch": 1.7080983409078465, "grad_norm": 0.11795206367969513, "learning_rate": 2.3924153315757452e-05, "loss": 0.206, "step": 31195 }, { "epoch": 1.708372118490938, "grad_norm": 0.11332638561725616, "learning_rate": 2.3919083350233222e-05, "loss": 0.2042, "step": 31200 }, { "epoch": 1.7086458960740294, "grad_norm": 0.12672929465770721, "learning_rate": 2.3914013384708986e-05, "loss": 0.1976, "step": 31205 }, { "epoch": 1.708919673657121, "grad_norm": 0.12150925397872925, "learning_rate": 2.3908943419184752e-05, "loss": 0.2003, "step": 31210 }, { "epoch": 1.7091934512402125, "grad_norm": 0.1264602392911911, "learning_rate": 2.3903873453660516e-05, "loss": 0.2022, "step": 31215 }, { "epoch": 1.709467228823304, "grad_norm": 0.13206028938293457, "learning_rate": 2.3898803488136282e-05, "loss": 0.1986, "step": 31220 }, { "epoch": 1.7097410064063956, "grad_norm": 0.12824100255966187, "learning_rate": 2.3893733522612046e-05, "loss": 0.2082, "step": 31225 }, { "epoch": 1.7100147839894868, "grad_norm": 0.11940079182386398, "learning_rate": 2.3888663557087812e-05, "loss": 0.1979, "step": 31230 }, { "epoch": 1.7102885615725785, "grad_norm": 0.1172834262251854, "learning_rate": 2.3883593591563576e-05, "loss": 0.2024, "step": 31235 }, { "epoch": 1.71056233915567, "grad_norm": 0.11693263053894043, "learning_rate": 2.3878523626039346e-05, "loss": 0.2004, "step": 31240 }, { "epoch": 1.7108361167387613, "grad_norm": 0.11722342669963837, "learning_rate": 2.387345366051511e-05, "loss": 0.2012, "step": 31245 }, { "epoch": 1.711109894321853, "grad_norm": 0.13953223824501038, "learning_rate": 2.3868383694990876e-05, "loss": 0.202, "step": 31250 }, { "epoch": 1.7113836719049444, "grad_norm": 0.14755287766456604, "learning_rate": 2.3863313729466642e-05, "loss": 0.2007, "step": 31255 }, { "epoch": 1.7116574494880359, "grad_norm": 0.13309048116207123, "learning_rate": 2.3858243763942406e-05, "loss": 0.2047, "step": 31260 }, { "epoch": 1.7119312270711275, "grad_norm": 0.1049404889345169, "learning_rate": 2.3853173798418172e-05, "loss": 0.2018, "step": 31265 }, { "epoch": 1.7122050046542188, "grad_norm": 0.11197520047426224, "learning_rate": 2.3848103832893936e-05, "loss": 0.2012, "step": 31270 }, { "epoch": 1.7124787822373104, "grad_norm": 0.12153616547584534, "learning_rate": 2.3843033867369702e-05, "loss": 0.1973, "step": 31275 }, { "epoch": 1.7127525598204019, "grad_norm": 0.11653094738721848, "learning_rate": 2.383796390184547e-05, "loss": 0.2055, "step": 31280 }, { "epoch": 1.7130263374034933, "grad_norm": 0.11103343963623047, "learning_rate": 2.3832893936321236e-05, "loss": 0.2044, "step": 31285 }, { "epoch": 1.713300114986585, "grad_norm": 0.10952840745449066, "learning_rate": 2.3827823970797e-05, "loss": 0.1983, "step": 31290 }, { "epoch": 1.7135738925696764, "grad_norm": 0.12647266685962677, "learning_rate": 2.3822754005272766e-05, "loss": 0.2036, "step": 31295 }, { "epoch": 1.7138476701527678, "grad_norm": 0.12242867052555084, "learning_rate": 2.381768403974853e-05, "loss": 0.2132, "step": 31300 }, { "epoch": 1.7141214477358595, "grad_norm": 0.1670655906200409, "learning_rate": 2.3812614074224296e-05, "loss": 0.2042, "step": 31305 }, { "epoch": 1.7143952253189507, "grad_norm": 0.14691725373268127, "learning_rate": 2.3807544108700062e-05, "loss": 0.208, "step": 31310 }, { "epoch": 1.7146690029020424, "grad_norm": 0.11275670677423477, "learning_rate": 2.3802474143175826e-05, "loss": 0.1991, "step": 31315 }, { "epoch": 1.7149427804851338, "grad_norm": 0.11486668884754181, "learning_rate": 2.3797404177651596e-05, "loss": 0.1968, "step": 31320 }, { "epoch": 1.7152165580682253, "grad_norm": 0.11846967041492462, "learning_rate": 2.379233421212736e-05, "loss": 0.2006, "step": 31325 }, { "epoch": 1.715490335651317, "grad_norm": 0.18985579907894135, "learning_rate": 2.3787264246603126e-05, "loss": 0.2159, "step": 31330 }, { "epoch": 1.7157641132344084, "grad_norm": 0.16409431397914886, "learning_rate": 2.378219428107889e-05, "loss": 0.2115, "step": 31335 }, { "epoch": 1.7160378908174998, "grad_norm": 0.11634977906942368, "learning_rate": 2.3777124315554656e-05, "loss": 0.1986, "step": 31340 }, { "epoch": 1.7163116684005915, "grad_norm": 0.10958248376846313, "learning_rate": 2.377205435003042e-05, "loss": 0.1959, "step": 31345 }, { "epoch": 1.716585445983683, "grad_norm": 0.15354102849960327, "learning_rate": 2.3766984384506186e-05, "loss": 0.1979, "step": 31350 }, { "epoch": 1.7168592235667743, "grad_norm": 0.11552434414625168, "learning_rate": 2.3761914418981952e-05, "loss": 0.1936, "step": 31355 }, { "epoch": 1.717133001149866, "grad_norm": 0.11181466281414032, "learning_rate": 2.375684445345772e-05, "loss": 0.2081, "step": 31360 }, { "epoch": 1.7174067787329572, "grad_norm": 0.1288347840309143, "learning_rate": 2.3751774487933482e-05, "loss": 0.2043, "step": 31365 }, { "epoch": 1.7176805563160489, "grad_norm": 0.11851821094751358, "learning_rate": 2.374670452240925e-05, "loss": 0.2044, "step": 31370 }, { "epoch": 1.7179543338991403, "grad_norm": 0.14132246375083923, "learning_rate": 2.3741634556885016e-05, "loss": 0.2019, "step": 31375 }, { "epoch": 1.7182281114822318, "grad_norm": 0.11287005245685577, "learning_rate": 2.373656459136078e-05, "loss": 0.1946, "step": 31380 }, { "epoch": 1.7185018890653234, "grad_norm": 0.14036013185977936, "learning_rate": 2.3731494625836546e-05, "loss": 0.2053, "step": 31385 }, { "epoch": 1.7187756666484149, "grad_norm": 0.17176596820354462, "learning_rate": 2.372642466031231e-05, "loss": 0.2039, "step": 31390 }, { "epoch": 1.7190494442315063, "grad_norm": 0.12042661011219025, "learning_rate": 2.3721354694788076e-05, "loss": 0.1859, "step": 31395 }, { "epoch": 1.719323221814598, "grad_norm": 0.12360003590583801, "learning_rate": 2.3716284729263843e-05, "loss": 0.1922, "step": 31400 }, { "epoch": 1.7195969993976892, "grad_norm": 0.13864558935165405, "learning_rate": 2.371121476373961e-05, "loss": 0.2022, "step": 31405 }, { "epoch": 1.7198707769807808, "grad_norm": 0.1285923272371292, "learning_rate": 2.3706144798215373e-05, "loss": 0.2014, "step": 31410 }, { "epoch": 1.7201445545638723, "grad_norm": 0.13295044004917145, "learning_rate": 2.370107483269114e-05, "loss": 0.2105, "step": 31415 }, { "epoch": 1.7204183321469637, "grad_norm": 0.11256369948387146, "learning_rate": 2.3696004867166906e-05, "loss": 0.1952, "step": 31420 }, { "epoch": 1.7206921097300554, "grad_norm": 0.1718769520521164, "learning_rate": 2.369093490164267e-05, "loss": 0.2059, "step": 31425 }, { "epoch": 1.7209658873131468, "grad_norm": 0.15549513697624207, "learning_rate": 2.3685864936118436e-05, "loss": 0.1956, "step": 31430 }, { "epoch": 1.7212396648962383, "grad_norm": 0.15474660694599152, "learning_rate": 2.36807949705942e-05, "loss": 0.2036, "step": 31435 }, { "epoch": 1.72151344247933, "grad_norm": 0.13971327245235443, "learning_rate": 2.367572500506997e-05, "loss": 0.202, "step": 31440 }, { "epoch": 1.7217872200624211, "grad_norm": 0.12565822899341583, "learning_rate": 2.3670655039545733e-05, "loss": 0.2012, "step": 31445 }, { "epoch": 1.7220609976455128, "grad_norm": 0.12483217567205429, "learning_rate": 2.36655850740215e-05, "loss": 0.2035, "step": 31450 }, { "epoch": 1.7223347752286042, "grad_norm": 0.10303717106580734, "learning_rate": 2.3660515108497263e-05, "loss": 0.1961, "step": 31455 }, { "epoch": 1.7226085528116957, "grad_norm": 0.14642342925071716, "learning_rate": 2.365544514297303e-05, "loss": 0.2108, "step": 31460 }, { "epoch": 1.7228823303947873, "grad_norm": 0.1578643023967743, "learning_rate": 2.3650375177448793e-05, "loss": 0.2051, "step": 31465 }, { "epoch": 1.7231561079778788, "grad_norm": 0.1638338416814804, "learning_rate": 2.364530521192456e-05, "loss": 0.2007, "step": 31470 }, { "epoch": 1.7234298855609702, "grad_norm": 0.16569751501083374, "learning_rate": 2.3640235246400326e-05, "loss": 0.1989, "step": 31475 }, { "epoch": 1.7237036631440619, "grad_norm": 0.13495410978794098, "learning_rate": 2.363516528087609e-05, "loss": 0.1981, "step": 31480 }, { "epoch": 1.7239774407271533, "grad_norm": 0.1706429272890091, "learning_rate": 2.363009531535186e-05, "loss": 0.2004, "step": 31485 }, { "epoch": 1.7242512183102447, "grad_norm": 0.12413641810417175, "learning_rate": 2.3625025349827623e-05, "loss": 0.2098, "step": 31490 }, { "epoch": 1.7245249958933364, "grad_norm": 0.12736333906650543, "learning_rate": 2.361995538430339e-05, "loss": 0.2033, "step": 31495 }, { "epoch": 1.7247987734764276, "grad_norm": 0.12442031502723694, "learning_rate": 2.3614885418779153e-05, "loss": 0.2059, "step": 31500 }, { "epoch": 1.7250725510595193, "grad_norm": 0.11568226665258408, "learning_rate": 2.360981545325492e-05, "loss": 0.2007, "step": 31505 }, { "epoch": 1.7253463286426107, "grad_norm": 0.11266893893480301, "learning_rate": 2.3604745487730683e-05, "loss": 0.1997, "step": 31510 }, { "epoch": 1.7256201062257022, "grad_norm": 0.11006473004817963, "learning_rate": 2.359967552220645e-05, "loss": 0.1999, "step": 31515 }, { "epoch": 1.7258938838087938, "grad_norm": 0.14842645823955536, "learning_rate": 2.3594605556682216e-05, "loss": 0.2074, "step": 31520 }, { "epoch": 1.7261676613918853, "grad_norm": 0.13902291655540466, "learning_rate": 2.3589535591157983e-05, "loss": 0.2033, "step": 31525 }, { "epoch": 1.7264414389749767, "grad_norm": 0.11276953667402267, "learning_rate": 2.3584465625633746e-05, "loss": 0.1996, "step": 31530 }, { "epoch": 1.7267152165580684, "grad_norm": 0.10192083567380905, "learning_rate": 2.3579395660109513e-05, "loss": 0.2004, "step": 31535 }, { "epoch": 1.7269889941411596, "grad_norm": 0.1307014524936676, "learning_rate": 2.357432569458528e-05, "loss": 0.1987, "step": 31540 }, { "epoch": 1.7272627717242512, "grad_norm": 0.12829908728599548, "learning_rate": 2.3569255729061043e-05, "loss": 0.1978, "step": 31545 }, { "epoch": 1.7275365493073427, "grad_norm": 0.10823149234056473, "learning_rate": 2.356418576353681e-05, "loss": 0.1953, "step": 31550 }, { "epoch": 1.7278103268904341, "grad_norm": 0.1102232113480568, "learning_rate": 2.3559115798012573e-05, "loss": 0.2012, "step": 31555 }, { "epoch": 1.7280841044735258, "grad_norm": 0.11845073103904724, "learning_rate": 2.355404583248834e-05, "loss": 0.2063, "step": 31560 }, { "epoch": 1.7283578820566172, "grad_norm": 0.11632895469665527, "learning_rate": 2.3548975866964106e-05, "loss": 0.2019, "step": 31565 }, { "epoch": 1.7286316596397087, "grad_norm": 0.11929570883512497, "learning_rate": 2.3543905901439873e-05, "loss": 0.2029, "step": 31570 }, { "epoch": 1.7289054372228003, "grad_norm": 0.12294696271419525, "learning_rate": 2.3538835935915636e-05, "loss": 0.2047, "step": 31575 }, { "epoch": 1.7291792148058915, "grad_norm": 0.12733441591262817, "learning_rate": 2.3533765970391403e-05, "loss": 0.2039, "step": 31580 }, { "epoch": 1.7294529923889832, "grad_norm": 0.10650545358657837, "learning_rate": 2.352869600486717e-05, "loss": 0.2079, "step": 31585 }, { "epoch": 1.7297267699720746, "grad_norm": 0.12134295701980591, "learning_rate": 2.3523626039342933e-05, "loss": 0.1982, "step": 31590 }, { "epoch": 1.730000547555166, "grad_norm": 0.11012349277734756, "learning_rate": 2.35185560738187e-05, "loss": 0.1993, "step": 31595 }, { "epoch": 1.7302743251382577, "grad_norm": 0.10680784285068512, "learning_rate": 2.3513486108294463e-05, "loss": 0.1998, "step": 31600 }, { "epoch": 1.7305481027213492, "grad_norm": 0.13655655086040497, "learning_rate": 2.3508416142770233e-05, "loss": 0.1972, "step": 31605 }, { "epoch": 1.7308218803044406, "grad_norm": 0.12429088354110718, "learning_rate": 2.3503346177245996e-05, "loss": 0.1947, "step": 31610 }, { "epoch": 1.7310956578875323, "grad_norm": 0.12964655458927155, "learning_rate": 2.3498276211721763e-05, "loss": 0.2044, "step": 31615 }, { "epoch": 1.7313694354706237, "grad_norm": 0.14288842678070068, "learning_rate": 2.3493206246197526e-05, "loss": 0.2029, "step": 31620 }, { "epoch": 1.7316432130537152, "grad_norm": 0.16619567573070526, "learning_rate": 2.3488136280673293e-05, "loss": 0.2006, "step": 31625 }, { "epoch": 1.7319169906368068, "grad_norm": 0.15040920674800873, "learning_rate": 2.3483066315149056e-05, "loss": 0.2034, "step": 31630 }, { "epoch": 1.732190768219898, "grad_norm": 0.10610753297805786, "learning_rate": 2.3477996349624823e-05, "loss": 0.2023, "step": 31635 }, { "epoch": 1.7324645458029897, "grad_norm": 0.14594735205173492, "learning_rate": 2.347292638410059e-05, "loss": 0.2143, "step": 31640 }, { "epoch": 1.7327383233860811, "grad_norm": 0.13237254321575165, "learning_rate": 2.3467856418576356e-05, "loss": 0.2026, "step": 31645 }, { "epoch": 1.7330121009691726, "grad_norm": 0.1270524561405182, "learning_rate": 2.3462786453052123e-05, "loss": 0.195, "step": 31650 }, { "epoch": 1.7332858785522642, "grad_norm": 0.1200714185833931, "learning_rate": 2.3457716487527886e-05, "loss": 0.1936, "step": 31655 }, { "epoch": 1.7335596561353557, "grad_norm": 0.13096612691879272, "learning_rate": 2.3452646522003653e-05, "loss": 0.2032, "step": 31660 }, { "epoch": 1.7338334337184471, "grad_norm": 0.11037791520357132, "learning_rate": 2.3447576556479416e-05, "loss": 0.1982, "step": 31665 }, { "epoch": 1.7341072113015388, "grad_norm": 0.14793531596660614, "learning_rate": 2.3442506590955183e-05, "loss": 0.1969, "step": 31670 }, { "epoch": 1.73438098888463, "grad_norm": 0.15043072402477264, "learning_rate": 2.3437436625430946e-05, "loss": 0.2104, "step": 31675 }, { "epoch": 1.7346547664677217, "grad_norm": 0.11220438778400421, "learning_rate": 2.3432366659906713e-05, "loss": 0.1918, "step": 31680 }, { "epoch": 1.734928544050813, "grad_norm": 0.11621104925870895, "learning_rate": 2.342729669438248e-05, "loss": 0.1956, "step": 31685 }, { "epoch": 1.7352023216339045, "grad_norm": 0.11301743984222412, "learning_rate": 2.3422226728858246e-05, "loss": 0.2031, "step": 31690 }, { "epoch": 1.7354760992169962, "grad_norm": 0.11894842982292175, "learning_rate": 2.341715676333401e-05, "loss": 0.2003, "step": 31695 }, { "epoch": 1.7357498768000876, "grad_norm": 0.12562723457813263, "learning_rate": 2.3412086797809776e-05, "loss": 0.2034, "step": 31700 }, { "epoch": 1.736023654383179, "grad_norm": 0.12785862386226654, "learning_rate": 2.3407016832285543e-05, "loss": 0.2021, "step": 31705 }, { "epoch": 1.7362974319662707, "grad_norm": 0.12703633308410645, "learning_rate": 2.3401946866761306e-05, "loss": 0.1997, "step": 31710 }, { "epoch": 1.736571209549362, "grad_norm": 0.12804898619651794, "learning_rate": 2.3396876901237073e-05, "loss": 0.2078, "step": 31715 }, { "epoch": 1.7368449871324536, "grad_norm": 0.15911875665187836, "learning_rate": 2.3391806935712836e-05, "loss": 0.2038, "step": 31720 }, { "epoch": 1.737118764715545, "grad_norm": 0.11751481890678406, "learning_rate": 2.3386736970188606e-05, "loss": 0.2092, "step": 31725 }, { "epoch": 1.7373925422986365, "grad_norm": 0.10593105107545853, "learning_rate": 2.338166700466437e-05, "loss": 0.2082, "step": 31730 }, { "epoch": 1.7376663198817281, "grad_norm": 0.11632005125284195, "learning_rate": 2.3376597039140136e-05, "loss": 0.1937, "step": 31735 }, { "epoch": 1.7379400974648196, "grad_norm": 0.12615536153316498, "learning_rate": 2.33715270736159e-05, "loss": 0.2048, "step": 31740 }, { "epoch": 1.738213875047911, "grad_norm": 0.11224543303251266, "learning_rate": 2.3366457108091666e-05, "loss": 0.2007, "step": 31745 }, { "epoch": 1.7384876526310027, "grad_norm": 0.13718393445014954, "learning_rate": 2.336138714256743e-05, "loss": 0.2016, "step": 31750 }, { "epoch": 1.738761430214094, "grad_norm": 0.1176488921046257, "learning_rate": 2.3356317177043196e-05, "loss": 0.2004, "step": 31755 }, { "epoch": 1.7390352077971856, "grad_norm": 0.12278978526592255, "learning_rate": 2.3351247211518963e-05, "loss": 0.2006, "step": 31760 }, { "epoch": 1.7393089853802772, "grad_norm": 0.1607063263654709, "learning_rate": 2.3346177245994726e-05, "loss": 0.2027, "step": 31765 }, { "epoch": 1.7395827629633684, "grad_norm": 0.15198872983455658, "learning_rate": 2.3341107280470496e-05, "loss": 0.2049, "step": 31770 }, { "epoch": 1.73985654054646, "grad_norm": 0.13405923545360565, "learning_rate": 2.333603731494626e-05, "loss": 0.2049, "step": 31775 }, { "epoch": 1.7401303181295515, "grad_norm": 0.13016735017299652, "learning_rate": 2.3330967349422026e-05, "loss": 0.2012, "step": 31780 }, { "epoch": 1.740404095712643, "grad_norm": 0.13273173570632935, "learning_rate": 2.332589738389779e-05, "loss": 0.2044, "step": 31785 }, { "epoch": 1.7406778732957346, "grad_norm": 0.11054529994726181, "learning_rate": 2.3320827418373556e-05, "loss": 0.2051, "step": 31790 }, { "epoch": 1.740951650878826, "grad_norm": 0.11705703288316727, "learning_rate": 2.331575745284932e-05, "loss": 0.2053, "step": 31795 }, { "epoch": 1.7412254284619175, "grad_norm": 0.12738099694252014, "learning_rate": 2.3310687487325086e-05, "loss": 0.2055, "step": 31800 }, { "epoch": 1.7414992060450092, "grad_norm": 0.12517720460891724, "learning_rate": 2.3305617521800853e-05, "loss": 0.1996, "step": 31805 }, { "epoch": 1.7417729836281004, "grad_norm": 0.12889468669891357, "learning_rate": 2.330054755627662e-05, "loss": 0.2052, "step": 31810 }, { "epoch": 1.742046761211192, "grad_norm": 0.13381464779376984, "learning_rate": 2.3295477590752383e-05, "loss": 0.2003, "step": 31815 }, { "epoch": 1.7423205387942835, "grad_norm": 0.12523306906223297, "learning_rate": 2.329040762522815e-05, "loss": 0.2018, "step": 31820 }, { "epoch": 1.742594316377375, "grad_norm": 0.12606924772262573, "learning_rate": 2.3285337659703917e-05, "loss": 0.204, "step": 31825 }, { "epoch": 1.7428680939604666, "grad_norm": 0.13021159172058105, "learning_rate": 2.328026769417968e-05, "loss": 0.2024, "step": 31830 }, { "epoch": 1.743141871543558, "grad_norm": 0.12695929408073425, "learning_rate": 2.3275197728655447e-05, "loss": 0.2049, "step": 31835 }, { "epoch": 1.7434156491266495, "grad_norm": 0.11659903824329376, "learning_rate": 2.327012776313121e-05, "loss": 0.2024, "step": 31840 }, { "epoch": 1.7436894267097411, "grad_norm": 0.14163459837436676, "learning_rate": 2.3265057797606977e-05, "loss": 0.2032, "step": 31845 }, { "epoch": 1.7439632042928324, "grad_norm": 0.1333906203508377, "learning_rate": 2.3259987832082743e-05, "loss": 0.1971, "step": 31850 }, { "epoch": 1.744236981875924, "grad_norm": 0.11433672159910202, "learning_rate": 2.325491786655851e-05, "loss": 0.2008, "step": 31855 }, { "epoch": 1.7445107594590155, "grad_norm": 0.131016805768013, "learning_rate": 2.3249847901034273e-05, "loss": 0.1987, "step": 31860 }, { "epoch": 1.744784537042107, "grad_norm": 0.108544260263443, "learning_rate": 2.324477793551004e-05, "loss": 0.2024, "step": 31865 }, { "epoch": 1.7450583146251986, "grad_norm": 0.11747010052204132, "learning_rate": 2.3239707969985807e-05, "loss": 0.1977, "step": 31870 }, { "epoch": 1.74533209220829, "grad_norm": 0.14298298954963684, "learning_rate": 2.323463800446157e-05, "loss": 0.2028, "step": 31875 }, { "epoch": 1.7456058697913814, "grad_norm": 0.13680912554264069, "learning_rate": 2.3229568038937337e-05, "loss": 0.2107, "step": 31880 }, { "epoch": 1.745879647374473, "grad_norm": 0.11821537464857101, "learning_rate": 2.32244980734131e-05, "loss": 0.1975, "step": 31885 }, { "epoch": 1.7461534249575643, "grad_norm": 0.10707857459783554, "learning_rate": 2.321942810788887e-05, "loss": 0.1923, "step": 31890 }, { "epoch": 1.746427202540656, "grad_norm": 0.14585135877132416, "learning_rate": 2.3214358142364633e-05, "loss": 0.2074, "step": 31895 }, { "epoch": 1.7467009801237474, "grad_norm": 0.16190677881240845, "learning_rate": 2.32092881768404e-05, "loss": 0.2061, "step": 31900 }, { "epoch": 1.7469747577068389, "grad_norm": 0.1226995438337326, "learning_rate": 2.3204218211316163e-05, "loss": 0.1999, "step": 31905 }, { "epoch": 1.7472485352899305, "grad_norm": 0.11979003250598907, "learning_rate": 2.319914824579193e-05, "loss": 0.1974, "step": 31910 }, { "epoch": 1.747522312873022, "grad_norm": 0.10958272218704224, "learning_rate": 2.3194078280267693e-05, "loss": 0.1998, "step": 31915 }, { "epoch": 1.7477960904561134, "grad_norm": 0.12581151723861694, "learning_rate": 2.318900831474346e-05, "loss": 0.1965, "step": 31920 }, { "epoch": 1.748069868039205, "grad_norm": 0.12888772785663605, "learning_rate": 2.3183938349219227e-05, "loss": 0.1994, "step": 31925 }, { "epoch": 1.7483436456222965, "grad_norm": 0.13680404424667358, "learning_rate": 2.3178868383694993e-05, "loss": 0.1994, "step": 31930 }, { "epoch": 1.748617423205388, "grad_norm": 0.12789903581142426, "learning_rate": 2.317379841817076e-05, "loss": 0.207, "step": 31935 }, { "epoch": 1.7488912007884796, "grad_norm": 0.10839606076478958, "learning_rate": 2.3168728452646523e-05, "loss": 0.2053, "step": 31940 }, { "epoch": 1.7491649783715708, "grad_norm": 0.1174214705824852, "learning_rate": 2.316365848712229e-05, "loss": 0.1985, "step": 31945 }, { "epoch": 1.7494387559546625, "grad_norm": 0.10501977801322937, "learning_rate": 2.3158588521598053e-05, "loss": 0.2035, "step": 31950 }, { "epoch": 1.749712533537754, "grad_norm": 0.11974260956048965, "learning_rate": 2.315351855607382e-05, "loss": 0.2031, "step": 31955 }, { "epoch": 1.7499863111208454, "grad_norm": 0.10244396328926086, "learning_rate": 2.3148448590549583e-05, "loss": 0.1973, "step": 31960 }, { "epoch": 1.750260088703937, "grad_norm": 0.13386675715446472, "learning_rate": 2.314337862502535e-05, "loss": 0.2035, "step": 31965 }, { "epoch": 1.7505338662870285, "grad_norm": 0.13361448049545288, "learning_rate": 2.3138308659501117e-05, "loss": 0.2048, "step": 31970 }, { "epoch": 1.75080764387012, "grad_norm": 0.14914166927337646, "learning_rate": 2.3133238693976883e-05, "loss": 0.2013, "step": 31975 }, { "epoch": 1.7510814214532116, "grad_norm": 0.12924297153949738, "learning_rate": 2.3128168728452647e-05, "loss": 0.2047, "step": 31980 }, { "epoch": 1.7513551990363028, "grad_norm": 0.11677846312522888, "learning_rate": 2.3123098762928413e-05, "loss": 0.1969, "step": 31985 }, { "epoch": 1.7516289766193944, "grad_norm": 0.12375063449144363, "learning_rate": 2.311802879740418e-05, "loss": 0.2064, "step": 31990 }, { "epoch": 1.7519027542024859, "grad_norm": 0.10613910108804703, "learning_rate": 2.3112958831879943e-05, "loss": 0.1966, "step": 31995 }, { "epoch": 1.7521765317855773, "grad_norm": 0.12012283504009247, "learning_rate": 2.310788886635571e-05, "loss": 0.2045, "step": 32000 }, { "epoch": 1.752450309368669, "grad_norm": 0.12129170447587967, "learning_rate": 2.3102818900831473e-05, "loss": 0.1973, "step": 32005 }, { "epoch": 1.7527240869517604, "grad_norm": 0.15431703627109528, "learning_rate": 2.3097748935307244e-05, "loss": 0.2069, "step": 32010 }, { "epoch": 1.7529978645348518, "grad_norm": 0.12700900435447693, "learning_rate": 2.3092678969783007e-05, "loss": 0.1969, "step": 32015 }, { "epoch": 1.7532716421179435, "grad_norm": 0.14643414318561554, "learning_rate": 2.3087609004258774e-05, "loss": 0.2023, "step": 32020 }, { "epoch": 1.7535454197010347, "grad_norm": 0.11656702309846878, "learning_rate": 2.3082539038734537e-05, "loss": 0.2037, "step": 32025 }, { "epoch": 1.7538191972841264, "grad_norm": 0.1298430860042572, "learning_rate": 2.3077469073210304e-05, "loss": 0.206, "step": 32030 }, { "epoch": 1.7540929748672178, "grad_norm": 0.12103034555912018, "learning_rate": 2.3072399107686067e-05, "loss": 0.2033, "step": 32035 }, { "epoch": 1.7543667524503093, "grad_norm": 0.10637018084526062, "learning_rate": 2.3067329142161834e-05, "loss": 0.2024, "step": 32040 }, { "epoch": 1.754640530033401, "grad_norm": 0.11146461963653564, "learning_rate": 2.30622591766376e-05, "loss": 0.2018, "step": 32045 }, { "epoch": 1.7549143076164924, "grad_norm": 0.11208029091358185, "learning_rate": 2.3057189211113364e-05, "loss": 0.2139, "step": 32050 }, { "epoch": 1.7551880851995838, "grad_norm": 0.11135444045066833, "learning_rate": 2.3052119245589134e-05, "loss": 0.197, "step": 32055 }, { "epoch": 1.7554618627826755, "grad_norm": 0.11490966379642487, "learning_rate": 2.3047049280064897e-05, "loss": 0.2002, "step": 32060 }, { "epoch": 1.755735640365767, "grad_norm": 0.14187420904636383, "learning_rate": 2.3041979314540664e-05, "loss": 0.1968, "step": 32065 }, { "epoch": 1.7560094179488583, "grad_norm": 0.112614206969738, "learning_rate": 2.3036909349016427e-05, "loss": 0.2055, "step": 32070 }, { "epoch": 1.75628319553195, "grad_norm": 0.11156858503818512, "learning_rate": 2.3031839383492194e-05, "loss": 0.1968, "step": 32075 }, { "epoch": 1.7565569731150412, "grad_norm": 0.1216898187994957, "learning_rate": 2.3026769417967957e-05, "loss": 0.1984, "step": 32080 }, { "epoch": 1.7568307506981329, "grad_norm": 0.13055294752120972, "learning_rate": 2.3021699452443724e-05, "loss": 0.2026, "step": 32085 }, { "epoch": 1.7571045282812243, "grad_norm": 0.11828099936246872, "learning_rate": 2.301662948691949e-05, "loss": 0.21, "step": 32090 }, { "epoch": 1.7573783058643158, "grad_norm": 0.11647534370422363, "learning_rate": 2.3011559521395257e-05, "loss": 0.2011, "step": 32095 }, { "epoch": 1.7576520834474074, "grad_norm": 0.13456889986991882, "learning_rate": 2.300648955587102e-05, "loss": 0.2066, "step": 32100 }, { "epoch": 1.7579258610304989, "grad_norm": 0.1117020696401596, "learning_rate": 2.3001419590346787e-05, "loss": 0.1967, "step": 32105 }, { "epoch": 1.7581996386135903, "grad_norm": 0.11073722690343857, "learning_rate": 2.2996349624822554e-05, "loss": 0.2031, "step": 32110 }, { "epoch": 1.758473416196682, "grad_norm": 0.1581917405128479, "learning_rate": 2.2991279659298317e-05, "loss": 0.2049, "step": 32115 }, { "epoch": 1.7587471937797732, "grad_norm": 0.10868711024522781, "learning_rate": 2.2986209693774084e-05, "loss": 0.2003, "step": 32120 }, { "epoch": 1.7590209713628648, "grad_norm": 0.10880212485790253, "learning_rate": 2.2981139728249847e-05, "loss": 0.1959, "step": 32125 }, { "epoch": 1.7592947489459563, "grad_norm": 0.10577774792909622, "learning_rate": 2.2976069762725614e-05, "loss": 0.1951, "step": 32130 }, { "epoch": 1.7595685265290477, "grad_norm": 0.13528072834014893, "learning_rate": 2.297099979720138e-05, "loss": 0.1959, "step": 32135 }, { "epoch": 1.7598423041121394, "grad_norm": 0.12061059474945068, "learning_rate": 2.2965929831677147e-05, "loss": 0.2016, "step": 32140 }, { "epoch": 1.7601160816952308, "grad_norm": 0.12173718959093094, "learning_rate": 2.296085986615291e-05, "loss": 0.2028, "step": 32145 }, { "epoch": 1.7603898592783223, "grad_norm": 0.11101876199245453, "learning_rate": 2.2955789900628677e-05, "loss": 0.2046, "step": 32150 }, { "epoch": 1.760663636861414, "grad_norm": 0.11993268132209778, "learning_rate": 2.2950719935104444e-05, "loss": 0.2067, "step": 32155 }, { "epoch": 1.7609374144445051, "grad_norm": 0.14910845458507538, "learning_rate": 2.2945649969580207e-05, "loss": 0.2024, "step": 32160 }, { "epoch": 1.7612111920275968, "grad_norm": 0.11396792531013489, "learning_rate": 2.2940580004055974e-05, "loss": 0.2021, "step": 32165 }, { "epoch": 1.7614849696106882, "grad_norm": 0.11901967227458954, "learning_rate": 2.2935510038531737e-05, "loss": 0.1952, "step": 32170 }, { "epoch": 1.7617587471937797, "grad_norm": 0.11906585097312927, "learning_rate": 2.2930440073007507e-05, "loss": 0.1955, "step": 32175 }, { "epoch": 1.7620325247768713, "grad_norm": 0.13813793659210205, "learning_rate": 2.292537010748327e-05, "loss": 0.2015, "step": 32180 }, { "epoch": 1.7623063023599628, "grad_norm": 0.107219398021698, "learning_rate": 2.2920300141959037e-05, "loss": 0.1947, "step": 32185 }, { "epoch": 1.7625800799430542, "grad_norm": 0.11549302935600281, "learning_rate": 2.29152301764348e-05, "loss": 0.2019, "step": 32190 }, { "epoch": 1.7628538575261459, "grad_norm": 0.12627655267715454, "learning_rate": 2.2910160210910567e-05, "loss": 0.2024, "step": 32195 }, { "epoch": 1.763127635109237, "grad_norm": 0.14012958109378815, "learning_rate": 2.290509024538633e-05, "loss": 0.1947, "step": 32200 }, { "epoch": 1.7634014126923288, "grad_norm": 0.11989053338766098, "learning_rate": 2.2900020279862097e-05, "loss": 0.1947, "step": 32205 }, { "epoch": 1.7636751902754204, "grad_norm": 0.11082832515239716, "learning_rate": 2.2894950314337864e-05, "loss": 0.2001, "step": 32210 }, { "epoch": 1.7639489678585116, "grad_norm": 0.12457893788814545, "learning_rate": 2.288988034881363e-05, "loss": 0.1963, "step": 32215 }, { "epoch": 1.7642227454416033, "grad_norm": 0.12371812015771866, "learning_rate": 2.2884810383289397e-05, "loss": 0.1981, "step": 32220 }, { "epoch": 1.7644965230246947, "grad_norm": 0.11019383370876312, "learning_rate": 2.287974041776516e-05, "loss": 0.2021, "step": 32225 }, { "epoch": 1.7647703006077862, "grad_norm": 0.13066259026527405, "learning_rate": 2.2874670452240927e-05, "loss": 0.2067, "step": 32230 }, { "epoch": 1.7650440781908778, "grad_norm": 0.119631826877594, "learning_rate": 2.286960048671669e-05, "loss": 0.2013, "step": 32235 }, { "epoch": 1.7653178557739693, "grad_norm": 0.1352846622467041, "learning_rate": 2.2864530521192457e-05, "loss": 0.2112, "step": 32240 }, { "epoch": 1.7655916333570607, "grad_norm": 0.12491133064031601, "learning_rate": 2.285946055566822e-05, "loss": 0.1951, "step": 32245 }, { "epoch": 1.7658654109401524, "grad_norm": 0.11156974732875824, "learning_rate": 2.2854390590143987e-05, "loss": 0.1962, "step": 32250 }, { "epoch": 1.7661391885232436, "grad_norm": 0.12660366296768188, "learning_rate": 2.2849320624619754e-05, "loss": 0.2059, "step": 32255 }, { "epoch": 1.7664129661063352, "grad_norm": 0.11118703335523605, "learning_rate": 2.284425065909552e-05, "loss": 0.207, "step": 32260 }, { "epoch": 1.7666867436894267, "grad_norm": 0.11664043366909027, "learning_rate": 2.2839180693571284e-05, "loss": 0.2118, "step": 32265 }, { "epoch": 1.7669605212725181, "grad_norm": 0.12003359943628311, "learning_rate": 2.283411072804705e-05, "loss": 0.2022, "step": 32270 }, { "epoch": 1.7672342988556098, "grad_norm": 0.12104073166847229, "learning_rate": 2.2829040762522817e-05, "loss": 0.2011, "step": 32275 }, { "epoch": 1.7675080764387012, "grad_norm": 0.112828828394413, "learning_rate": 2.282397079699858e-05, "loss": 0.206, "step": 32280 }, { "epoch": 1.7677818540217927, "grad_norm": 0.11737081408500671, "learning_rate": 2.2818900831474347e-05, "loss": 0.2008, "step": 32285 }, { "epoch": 1.7680556316048843, "grad_norm": 0.11429925262928009, "learning_rate": 2.281383086595011e-05, "loss": 0.2014, "step": 32290 }, { "epoch": 1.7683294091879755, "grad_norm": 0.12511342763900757, "learning_rate": 2.280876090042588e-05, "loss": 0.2033, "step": 32295 }, { "epoch": 1.7686031867710672, "grad_norm": 0.1375877410173416, "learning_rate": 2.2803690934901644e-05, "loss": 0.1984, "step": 32300 }, { "epoch": 1.7688769643541586, "grad_norm": 0.10913744568824768, "learning_rate": 2.279862096937741e-05, "loss": 0.1939, "step": 32305 }, { "epoch": 1.76915074193725, "grad_norm": 0.10507459193468094, "learning_rate": 2.2793551003853174e-05, "loss": 0.2004, "step": 32310 }, { "epoch": 1.7694245195203417, "grad_norm": 0.11164980381727219, "learning_rate": 2.278848103832894e-05, "loss": 0.2089, "step": 32315 }, { "epoch": 1.7696982971034332, "grad_norm": 0.15053053200244904, "learning_rate": 2.2783411072804707e-05, "loss": 0.2074, "step": 32320 }, { "epoch": 1.7699720746865246, "grad_norm": 0.1274721771478653, "learning_rate": 2.277834110728047e-05, "loss": 0.2084, "step": 32325 }, { "epoch": 1.7702458522696163, "grad_norm": 0.11296039074659348, "learning_rate": 2.2773271141756237e-05, "loss": 0.2015, "step": 32330 }, { "epoch": 1.7705196298527075, "grad_norm": 0.15040338039398193, "learning_rate": 2.2768201176232e-05, "loss": 0.2107, "step": 32335 }, { "epoch": 1.7707934074357992, "grad_norm": 0.14670822024345398, "learning_rate": 2.276313121070777e-05, "loss": 0.1956, "step": 32340 }, { "epoch": 1.7710671850188906, "grad_norm": 0.11554018408060074, "learning_rate": 2.2758061245183534e-05, "loss": 0.2, "step": 32345 }, { "epoch": 1.771340962601982, "grad_norm": 0.10691195726394653, "learning_rate": 2.27529912796593e-05, "loss": 0.1964, "step": 32350 }, { "epoch": 1.7716147401850737, "grad_norm": 0.1095898449420929, "learning_rate": 2.2747921314135064e-05, "loss": 0.1949, "step": 32355 }, { "epoch": 1.7718885177681651, "grad_norm": 0.1246255412697792, "learning_rate": 2.274285134861083e-05, "loss": 0.2053, "step": 32360 }, { "epoch": 1.7721622953512566, "grad_norm": 0.12709835171699524, "learning_rate": 2.2737781383086594e-05, "loss": 0.2018, "step": 32365 }, { "epoch": 1.7724360729343482, "grad_norm": 0.13337811827659607, "learning_rate": 2.273271141756236e-05, "loss": 0.2071, "step": 32370 }, { "epoch": 1.7727098505174397, "grad_norm": 0.15437661111354828, "learning_rate": 2.2727641452038127e-05, "loss": 0.2004, "step": 32375 }, { "epoch": 1.7729836281005311, "grad_norm": 0.11034933477640152, "learning_rate": 2.2722571486513894e-05, "loss": 0.2083, "step": 32380 }, { "epoch": 1.7732574056836228, "grad_norm": 0.13501858711242676, "learning_rate": 2.271750152098966e-05, "loss": 0.1977, "step": 32385 }, { "epoch": 1.773531183266714, "grad_norm": 0.1282951384782791, "learning_rate": 2.2712431555465424e-05, "loss": 0.1973, "step": 32390 }, { "epoch": 1.7738049608498057, "grad_norm": 0.13154177367687225, "learning_rate": 2.270736158994119e-05, "loss": 0.2046, "step": 32395 }, { "epoch": 1.774078738432897, "grad_norm": 0.1266564279794693, "learning_rate": 2.2702291624416954e-05, "loss": 0.2092, "step": 32400 }, { "epoch": 1.7743525160159885, "grad_norm": 0.12735480070114136, "learning_rate": 2.269722165889272e-05, "loss": 0.1952, "step": 32405 }, { "epoch": 1.7746262935990802, "grad_norm": 0.1262667328119278, "learning_rate": 2.2692151693368484e-05, "loss": 0.2048, "step": 32410 }, { "epoch": 1.7749000711821716, "grad_norm": 0.1211717426776886, "learning_rate": 2.268708172784425e-05, "loss": 0.1957, "step": 32415 }, { "epoch": 1.775173848765263, "grad_norm": 0.1172395870089531, "learning_rate": 2.2682011762320017e-05, "loss": 0.2021, "step": 32420 }, { "epoch": 1.7754476263483547, "grad_norm": 0.12171515077352524, "learning_rate": 2.2676941796795784e-05, "loss": 0.1994, "step": 32425 }, { "epoch": 1.775721403931446, "grad_norm": 0.12742404639720917, "learning_rate": 2.2671871831271547e-05, "loss": 0.2014, "step": 32430 }, { "epoch": 1.7759951815145376, "grad_norm": 0.13062748312950134, "learning_rate": 2.2666801865747314e-05, "loss": 0.1958, "step": 32435 }, { "epoch": 1.776268959097629, "grad_norm": 0.14272227883338928, "learning_rate": 2.266173190022308e-05, "loss": 0.1961, "step": 32440 }, { "epoch": 1.7765427366807205, "grad_norm": 0.11627557128667831, "learning_rate": 2.2656661934698844e-05, "loss": 0.1963, "step": 32445 }, { "epoch": 1.7768165142638122, "grad_norm": 0.13791486620903015, "learning_rate": 2.265159196917461e-05, "loss": 0.2042, "step": 32450 }, { "epoch": 1.7770902918469036, "grad_norm": 0.1124412789940834, "learning_rate": 2.2646522003650374e-05, "loss": 0.2009, "step": 32455 }, { "epoch": 1.777364069429995, "grad_norm": 0.1250070482492447, "learning_rate": 2.2641452038126144e-05, "loss": 0.2103, "step": 32460 }, { "epoch": 1.7776378470130867, "grad_norm": 0.14368025958538055, "learning_rate": 2.2636382072601908e-05, "loss": 0.2017, "step": 32465 }, { "epoch": 1.777911624596178, "grad_norm": 0.14160259068012238, "learning_rate": 2.2631312107077674e-05, "loss": 0.2044, "step": 32470 }, { "epoch": 1.7781854021792696, "grad_norm": 0.12050865590572357, "learning_rate": 2.2626242141553438e-05, "loss": 0.2126, "step": 32475 }, { "epoch": 1.778459179762361, "grad_norm": 0.12221486121416092, "learning_rate": 2.2621172176029204e-05, "loss": 0.1942, "step": 32480 }, { "epoch": 1.7787329573454524, "grad_norm": 0.14283454418182373, "learning_rate": 2.2616102210504968e-05, "loss": 0.2093, "step": 32485 }, { "epoch": 1.779006734928544, "grad_norm": 0.12006863206624985, "learning_rate": 2.2611032244980734e-05, "loss": 0.1936, "step": 32490 }, { "epoch": 1.7792805125116355, "grad_norm": 0.12604324519634247, "learning_rate": 2.26059622794565e-05, "loss": 0.2046, "step": 32495 }, { "epoch": 1.779554290094727, "grad_norm": 0.12048667669296265, "learning_rate": 2.2600892313932268e-05, "loss": 0.1974, "step": 32500 }, { "epoch": 1.7798280676778186, "grad_norm": 0.10148011893033981, "learning_rate": 2.2595822348408034e-05, "loss": 0.2002, "step": 32505 }, { "epoch": 1.78010184526091, "grad_norm": 0.12680009007453918, "learning_rate": 2.2590752382883798e-05, "loss": 0.2023, "step": 32510 }, { "epoch": 1.7803756228440015, "grad_norm": 0.1123206615447998, "learning_rate": 2.2585682417359564e-05, "loss": 0.2022, "step": 32515 }, { "epoch": 1.7806494004270932, "grad_norm": 0.11825510114431381, "learning_rate": 2.2580612451835328e-05, "loss": 0.1969, "step": 32520 }, { "epoch": 1.7809231780101844, "grad_norm": 0.10713184624910355, "learning_rate": 2.2575542486311094e-05, "loss": 0.1945, "step": 32525 }, { "epoch": 1.781196955593276, "grad_norm": 0.1142108216881752, "learning_rate": 2.2570472520786858e-05, "loss": 0.1942, "step": 32530 }, { "epoch": 1.7814707331763675, "grad_norm": 0.1147855669260025, "learning_rate": 2.2565402555262624e-05, "loss": 0.196, "step": 32535 }, { "epoch": 1.781744510759459, "grad_norm": 0.15848584473133087, "learning_rate": 2.256033258973839e-05, "loss": 0.1971, "step": 32540 }, { "epoch": 1.7820182883425506, "grad_norm": 0.13103972375392914, "learning_rate": 2.2555262624214158e-05, "loss": 0.1913, "step": 32545 }, { "epoch": 1.782292065925642, "grad_norm": 0.12534360587596893, "learning_rate": 2.255019265868992e-05, "loss": 0.2035, "step": 32550 }, { "epoch": 1.7825658435087335, "grad_norm": 0.14305907487869263, "learning_rate": 2.2545122693165688e-05, "loss": 0.207, "step": 32555 }, { "epoch": 1.7828396210918251, "grad_norm": 0.11795849353075027, "learning_rate": 2.2540052727641454e-05, "loss": 0.1953, "step": 32560 }, { "epoch": 1.7831133986749164, "grad_norm": 0.1097889095544815, "learning_rate": 2.2534982762117218e-05, "loss": 0.2022, "step": 32565 }, { "epoch": 1.783387176258008, "grad_norm": 0.10694356262683868, "learning_rate": 2.2529912796592984e-05, "loss": 0.1938, "step": 32570 }, { "epoch": 1.7836609538410995, "grad_norm": 0.14676831662654877, "learning_rate": 2.2524842831068748e-05, "loss": 0.1942, "step": 32575 }, { "epoch": 1.783934731424191, "grad_norm": 0.11330258846282959, "learning_rate": 2.2519772865544518e-05, "loss": 0.1977, "step": 32580 }, { "epoch": 1.7842085090072826, "grad_norm": 0.1094248816370964, "learning_rate": 2.251470290002028e-05, "loss": 0.1971, "step": 32585 }, { "epoch": 1.784482286590374, "grad_norm": 0.14072659611701965, "learning_rate": 2.2509632934496048e-05, "loss": 0.1996, "step": 32590 }, { "epoch": 1.7847560641734654, "grad_norm": 0.12062890827655792, "learning_rate": 2.250456296897181e-05, "loss": 0.209, "step": 32595 }, { "epoch": 1.785029841756557, "grad_norm": 0.11228038370609283, "learning_rate": 2.2499493003447578e-05, "loss": 0.2063, "step": 32600 }, { "epoch": 1.7853036193396483, "grad_norm": 0.134358748793602, "learning_rate": 2.2494423037923344e-05, "loss": 0.1993, "step": 32605 }, { "epoch": 1.78557739692274, "grad_norm": 0.1418728530406952, "learning_rate": 2.2489353072399108e-05, "loss": 0.1989, "step": 32610 }, { "epoch": 1.7858511745058314, "grad_norm": 0.11868695169687271, "learning_rate": 2.2484283106874874e-05, "loss": 0.1975, "step": 32615 }, { "epoch": 1.7861249520889229, "grad_norm": 0.13678322732448578, "learning_rate": 2.247921314135064e-05, "loss": 0.2107, "step": 32620 }, { "epoch": 1.7863987296720145, "grad_norm": 0.11284966766834259, "learning_rate": 2.2474143175826408e-05, "loss": 0.1971, "step": 32625 }, { "epoch": 1.786672507255106, "grad_norm": 0.12107985466718674, "learning_rate": 2.246907321030217e-05, "loss": 0.2068, "step": 32630 }, { "epoch": 1.7869462848381974, "grad_norm": 0.14520137012004852, "learning_rate": 2.2464003244777938e-05, "loss": 0.2066, "step": 32635 }, { "epoch": 1.787220062421289, "grad_norm": 0.13085806369781494, "learning_rate": 2.24589332792537e-05, "loss": 0.2051, "step": 32640 }, { "epoch": 1.7874938400043805, "grad_norm": 0.10947894304990768, "learning_rate": 2.2453863313729468e-05, "loss": 0.2084, "step": 32645 }, { "epoch": 1.787767617587472, "grad_norm": 0.12902282178401947, "learning_rate": 2.244879334820523e-05, "loss": 0.2006, "step": 32650 }, { "epoch": 1.7880413951705636, "grad_norm": 0.12890464067459106, "learning_rate": 2.2443723382680998e-05, "loss": 0.2038, "step": 32655 }, { "epoch": 1.7883151727536548, "grad_norm": 0.13598786294460297, "learning_rate": 2.2438653417156765e-05, "loss": 0.1974, "step": 32660 }, { "epoch": 1.7885889503367465, "grad_norm": 0.12792427837848663, "learning_rate": 2.243358345163253e-05, "loss": 0.1933, "step": 32665 }, { "epoch": 1.788862727919838, "grad_norm": 0.11857441067695618, "learning_rate": 2.2428513486108298e-05, "loss": 0.2036, "step": 32670 }, { "epoch": 1.7891365055029294, "grad_norm": 0.12368114292621613, "learning_rate": 2.242344352058406e-05, "loss": 0.2053, "step": 32675 }, { "epoch": 1.789410283086021, "grad_norm": 0.11420268565416336, "learning_rate": 2.2418373555059828e-05, "loss": 0.2049, "step": 32680 }, { "epoch": 1.7896840606691125, "grad_norm": 0.11496621370315552, "learning_rate": 2.241330358953559e-05, "loss": 0.1963, "step": 32685 }, { "epoch": 1.789957838252204, "grad_norm": 0.11706764250993729, "learning_rate": 2.2408233624011358e-05, "loss": 0.2064, "step": 32690 }, { "epoch": 1.7902316158352956, "grad_norm": 0.10568401962518692, "learning_rate": 2.240316365848712e-05, "loss": 0.1925, "step": 32695 }, { "epoch": 1.7905053934183868, "grad_norm": 0.11568376421928406, "learning_rate": 2.2398093692962888e-05, "loss": 0.195, "step": 32700 }, { "epoch": 1.7907791710014784, "grad_norm": 0.10675917565822601, "learning_rate": 2.2393023727438655e-05, "loss": 0.1985, "step": 32705 }, { "epoch": 1.7910529485845699, "grad_norm": 0.11117235571146011, "learning_rate": 2.238795376191442e-05, "loss": 0.1914, "step": 32710 }, { "epoch": 1.7913267261676613, "grad_norm": 0.11773569136857986, "learning_rate": 2.2382883796390185e-05, "loss": 0.2036, "step": 32715 }, { "epoch": 1.791600503750753, "grad_norm": 0.12721307575702667, "learning_rate": 2.237781383086595e-05, "loss": 0.2024, "step": 32720 }, { "epoch": 1.7918742813338444, "grad_norm": 0.11009059101343155, "learning_rate": 2.2372743865341718e-05, "loss": 0.2054, "step": 32725 }, { "epoch": 1.7921480589169358, "grad_norm": 0.12449080497026443, "learning_rate": 2.236767389981748e-05, "loss": 0.1896, "step": 32730 }, { "epoch": 1.7924218365000275, "grad_norm": 0.11431048810482025, "learning_rate": 2.2362603934293248e-05, "loss": 0.1978, "step": 32735 }, { "epoch": 1.7926956140831187, "grad_norm": 0.11886098235845566, "learning_rate": 2.235753396876901e-05, "loss": 0.2, "step": 32740 }, { "epoch": 1.7929693916662104, "grad_norm": 0.1317242532968521, "learning_rate": 2.235246400324478e-05, "loss": 0.2072, "step": 32745 }, { "epoch": 1.7932431692493018, "grad_norm": 0.11246499419212341, "learning_rate": 2.2347394037720545e-05, "loss": 0.2041, "step": 32750 }, { "epoch": 1.7935169468323933, "grad_norm": 0.1378512978553772, "learning_rate": 2.234232407219631e-05, "loss": 0.1944, "step": 32755 }, { "epoch": 1.793790724415485, "grad_norm": 0.14954149723052979, "learning_rate": 2.2337254106672075e-05, "loss": 0.2074, "step": 32760 }, { "epoch": 1.7940645019985764, "grad_norm": 0.13094764947891235, "learning_rate": 2.233218414114784e-05, "loss": 0.1963, "step": 32765 }, { "epoch": 1.7943382795816678, "grad_norm": 0.11218399554491043, "learning_rate": 2.2327114175623605e-05, "loss": 0.2012, "step": 32770 }, { "epoch": 1.7946120571647595, "grad_norm": 0.15380632877349854, "learning_rate": 2.232204421009937e-05, "loss": 0.2161, "step": 32775 }, { "epoch": 1.7948858347478507, "grad_norm": 0.13458387553691864, "learning_rate": 2.2316974244575138e-05, "loss": 0.2036, "step": 32780 }, { "epoch": 1.7951596123309423, "grad_norm": 0.11030180752277374, "learning_rate": 2.2311904279050905e-05, "loss": 0.1957, "step": 32785 }, { "epoch": 1.7954333899140338, "grad_norm": 0.11061108857393265, "learning_rate": 2.230683431352667e-05, "loss": 0.2003, "step": 32790 }, { "epoch": 1.7957071674971252, "grad_norm": 0.12286912649869919, "learning_rate": 2.2301764348002435e-05, "loss": 0.2168, "step": 32795 }, { "epoch": 1.7959809450802169, "grad_norm": 0.11177864670753479, "learning_rate": 2.22966943824782e-05, "loss": 0.1985, "step": 32800 }, { "epoch": 1.7962547226633083, "grad_norm": 0.14953552186489105, "learning_rate": 2.2291624416953965e-05, "loss": 0.2078, "step": 32805 }, { "epoch": 1.7965285002463998, "grad_norm": 0.12387030571699142, "learning_rate": 2.228655445142973e-05, "loss": 0.2003, "step": 32810 }, { "epoch": 1.7968022778294914, "grad_norm": 0.11777009814977646, "learning_rate": 2.2281484485905495e-05, "loss": 0.2054, "step": 32815 }, { "epoch": 1.7970760554125829, "grad_norm": 0.12894506752490997, "learning_rate": 2.227641452038126e-05, "loss": 0.1945, "step": 32820 }, { "epoch": 1.7973498329956743, "grad_norm": 0.13095320761203766, "learning_rate": 2.2271344554857028e-05, "loss": 0.2026, "step": 32825 }, { "epoch": 1.797623610578766, "grad_norm": 0.12050284445285797, "learning_rate": 2.2266274589332795e-05, "loss": 0.2001, "step": 32830 }, { "epoch": 1.7978973881618572, "grad_norm": 0.14280012249946594, "learning_rate": 2.2261204623808558e-05, "loss": 0.1986, "step": 32835 }, { "epoch": 1.7981711657449488, "grad_norm": 0.12449426203966141, "learning_rate": 2.2256134658284325e-05, "loss": 0.2041, "step": 32840 }, { "epoch": 1.7984449433280403, "grad_norm": 0.12880642712116241, "learning_rate": 2.225106469276009e-05, "loss": 0.1975, "step": 32845 }, { "epoch": 1.7987187209111317, "grad_norm": 0.1796945035457611, "learning_rate": 2.2245994727235855e-05, "loss": 0.199, "step": 32850 }, { "epoch": 1.7989924984942234, "grad_norm": 0.11193700134754181, "learning_rate": 2.224092476171162e-05, "loss": 0.1986, "step": 32855 }, { "epoch": 1.7992662760773148, "grad_norm": 0.11146838217973709, "learning_rate": 2.2235854796187385e-05, "loss": 0.1918, "step": 32860 }, { "epoch": 1.7995400536604063, "grad_norm": 0.11642026156187057, "learning_rate": 2.2230784830663155e-05, "loss": 0.2028, "step": 32865 }, { "epoch": 1.799813831243498, "grad_norm": 0.12405826151371002, "learning_rate": 2.2225714865138918e-05, "loss": 0.2018, "step": 32870 }, { "epoch": 1.8000876088265891, "grad_norm": 0.11274117231369019, "learning_rate": 2.2220644899614685e-05, "loss": 0.2028, "step": 32875 }, { "epoch": 1.8003613864096808, "grad_norm": 0.12060759961605072, "learning_rate": 2.2215574934090448e-05, "loss": 0.2078, "step": 32880 }, { "epoch": 1.8006351639927722, "grad_norm": 0.13178984820842743, "learning_rate": 2.2210504968566215e-05, "loss": 0.2046, "step": 32885 }, { "epoch": 1.8009089415758637, "grad_norm": 0.1234603077173233, "learning_rate": 2.220543500304198e-05, "loss": 0.2057, "step": 32890 }, { "epoch": 1.8011827191589553, "grad_norm": 0.10633989423513412, "learning_rate": 2.2200365037517745e-05, "loss": 0.1975, "step": 32895 }, { "epoch": 1.8014564967420468, "grad_norm": 0.11730607599020004, "learning_rate": 2.219529507199351e-05, "loss": 0.211, "step": 32900 }, { "epoch": 1.8017302743251382, "grad_norm": 0.12129252403974533, "learning_rate": 2.2190225106469278e-05, "loss": 0.1969, "step": 32905 }, { "epoch": 1.8020040519082299, "grad_norm": 0.10796985030174255, "learning_rate": 2.2185155140945045e-05, "loss": 0.2009, "step": 32910 }, { "epoch": 1.802277829491321, "grad_norm": 0.12111477553844452, "learning_rate": 2.2180085175420808e-05, "loss": 0.1969, "step": 32915 }, { "epoch": 1.8025516070744128, "grad_norm": 0.1251123696565628, "learning_rate": 2.2175015209896575e-05, "loss": 0.1961, "step": 32920 }, { "epoch": 1.8028253846575042, "grad_norm": 0.13571558892726898, "learning_rate": 2.2169945244372338e-05, "loss": 0.1919, "step": 32925 }, { "epoch": 1.8030991622405956, "grad_norm": 0.11519311368465424, "learning_rate": 2.2164875278848105e-05, "loss": 0.1973, "step": 32930 }, { "epoch": 1.8033729398236873, "grad_norm": 0.1433580219745636, "learning_rate": 2.2159805313323868e-05, "loss": 0.2032, "step": 32935 }, { "epoch": 1.8036467174067787, "grad_norm": 0.12016012519598007, "learning_rate": 2.2154735347799635e-05, "loss": 0.1993, "step": 32940 }, { "epoch": 1.8039204949898702, "grad_norm": 0.11835575848817825, "learning_rate": 2.21496653822754e-05, "loss": 0.1945, "step": 32945 }, { "epoch": 1.8041942725729618, "grad_norm": 0.10019023716449738, "learning_rate": 2.214459541675117e-05, "loss": 0.1927, "step": 32950 }, { "epoch": 1.8044680501560533, "grad_norm": 0.1423104703426361, "learning_rate": 2.2139525451226935e-05, "loss": 0.2023, "step": 32955 }, { "epoch": 1.8047418277391447, "grad_norm": 0.10456093400716782, "learning_rate": 2.21344554857027e-05, "loss": 0.2103, "step": 32960 }, { "epoch": 1.8050156053222364, "grad_norm": 0.11459831148386002, "learning_rate": 2.2129385520178465e-05, "loss": 0.1882, "step": 32965 }, { "epoch": 1.8052893829053276, "grad_norm": 0.11058877408504486, "learning_rate": 2.212431555465423e-05, "loss": 0.2028, "step": 32970 }, { "epoch": 1.8055631604884192, "grad_norm": 0.11422193050384521, "learning_rate": 2.2119245589129995e-05, "loss": 0.1944, "step": 32975 }, { "epoch": 1.8058369380715107, "grad_norm": 0.12022452056407928, "learning_rate": 2.211417562360576e-05, "loss": 0.206, "step": 32980 }, { "epoch": 1.8061107156546021, "grad_norm": 0.12174511700868607, "learning_rate": 2.2109105658081525e-05, "loss": 0.1938, "step": 32985 }, { "epoch": 1.8063844932376938, "grad_norm": 0.13593080639839172, "learning_rate": 2.210403569255729e-05, "loss": 0.1948, "step": 32990 }, { "epoch": 1.8066582708207852, "grad_norm": 0.12595242261886597, "learning_rate": 2.209896572703306e-05, "loss": 0.2146, "step": 32995 }, { "epoch": 1.8069320484038767, "grad_norm": 0.11754786223173141, "learning_rate": 2.209389576150882e-05, "loss": 0.2116, "step": 33000 }, { "epoch": 1.8072058259869683, "grad_norm": 0.09862277656793594, "learning_rate": 2.208882579598459e-05, "loss": 0.2003, "step": 33005 }, { "epoch": 1.8074796035700595, "grad_norm": 0.11103232204914093, "learning_rate": 2.2083755830460355e-05, "loss": 0.1959, "step": 33010 }, { "epoch": 1.8077533811531512, "grad_norm": 0.13871757686138153, "learning_rate": 2.207868586493612e-05, "loss": 0.2052, "step": 33015 }, { "epoch": 1.8080271587362426, "grad_norm": 0.10973524302244186, "learning_rate": 2.2073615899411885e-05, "loss": 0.2011, "step": 33020 }, { "epoch": 1.808300936319334, "grad_norm": 0.10652972012758255, "learning_rate": 2.206854593388765e-05, "loss": 0.1994, "step": 33025 }, { "epoch": 1.8085747139024257, "grad_norm": 0.10540200769901276, "learning_rate": 2.206347596836342e-05, "loss": 0.1951, "step": 33030 }, { "epoch": 1.8088484914855172, "grad_norm": 0.10320822149515152, "learning_rate": 2.2058406002839182e-05, "loss": 0.1911, "step": 33035 }, { "epoch": 1.8091222690686086, "grad_norm": 0.11364419013261795, "learning_rate": 2.205333603731495e-05, "loss": 0.1937, "step": 33040 }, { "epoch": 1.8093960466517003, "grad_norm": 0.1259901076555252, "learning_rate": 2.2048266071790712e-05, "loss": 0.1921, "step": 33045 }, { "epoch": 1.8096698242347915, "grad_norm": 0.11785919219255447, "learning_rate": 2.204319610626648e-05, "loss": 0.2037, "step": 33050 }, { "epoch": 1.8099436018178832, "grad_norm": 0.13897764682769775, "learning_rate": 2.2038126140742245e-05, "loss": 0.1946, "step": 33055 }, { "epoch": 1.8102173794009746, "grad_norm": 0.11541623622179031, "learning_rate": 2.203305617521801e-05, "loss": 0.2004, "step": 33060 }, { "epoch": 1.810491156984066, "grad_norm": 0.14172159135341644, "learning_rate": 2.2027986209693775e-05, "loss": 0.2067, "step": 33065 }, { "epoch": 1.8107649345671577, "grad_norm": 0.14792169630527496, "learning_rate": 2.2022916244169542e-05, "loss": 0.2013, "step": 33070 }, { "epoch": 1.8110387121502491, "grad_norm": 0.1510806828737259, "learning_rate": 2.201784627864531e-05, "loss": 0.202, "step": 33075 }, { "epoch": 1.8113124897333406, "grad_norm": 0.12449967861175537, "learning_rate": 2.2012776313121072e-05, "loss": 0.2001, "step": 33080 }, { "epoch": 1.8115862673164322, "grad_norm": 0.13092361390590668, "learning_rate": 2.200770634759684e-05, "loss": 0.1913, "step": 33085 }, { "epoch": 1.8118600448995237, "grad_norm": 0.11572759598493576, "learning_rate": 2.2002636382072602e-05, "loss": 0.2011, "step": 33090 }, { "epoch": 1.8121338224826151, "grad_norm": 0.12222190201282501, "learning_rate": 2.199756641654837e-05, "loss": 0.1986, "step": 33095 }, { "epoch": 1.8124076000657068, "grad_norm": 0.12475567311048508, "learning_rate": 2.1992496451024132e-05, "loss": 0.2, "step": 33100 }, { "epoch": 1.812681377648798, "grad_norm": 0.12066380679607391, "learning_rate": 2.19874264854999e-05, "loss": 0.2109, "step": 33105 }, { "epoch": 1.8129551552318897, "grad_norm": 0.11968192458152771, "learning_rate": 2.1982356519975665e-05, "loss": 0.2025, "step": 33110 }, { "epoch": 1.813228932814981, "grad_norm": 0.12042807787656784, "learning_rate": 2.1977286554451432e-05, "loss": 0.1904, "step": 33115 }, { "epoch": 1.8135027103980725, "grad_norm": 0.10723575204610825, "learning_rate": 2.19722165889272e-05, "loss": 0.1965, "step": 33120 }, { "epoch": 1.8137764879811642, "grad_norm": 0.10185207426548004, "learning_rate": 2.1967146623402962e-05, "loss": 0.1931, "step": 33125 }, { "epoch": 1.8140502655642556, "grad_norm": 0.12066283822059631, "learning_rate": 2.196207665787873e-05, "loss": 0.2004, "step": 33130 }, { "epoch": 1.814324043147347, "grad_norm": 0.12676744163036346, "learning_rate": 2.1957006692354492e-05, "loss": 0.204, "step": 33135 }, { "epoch": 1.8145978207304387, "grad_norm": 0.11236120015382767, "learning_rate": 2.195193672683026e-05, "loss": 0.1919, "step": 33140 }, { "epoch": 1.81487159831353, "grad_norm": 0.10112203657627106, "learning_rate": 2.1946866761306022e-05, "loss": 0.1964, "step": 33145 }, { "epoch": 1.8151453758966216, "grad_norm": 0.11900962144136429, "learning_rate": 2.1941796795781792e-05, "loss": 0.2043, "step": 33150 }, { "epoch": 1.815419153479713, "grad_norm": 0.10855615884065628, "learning_rate": 2.1936726830257555e-05, "loss": 0.2039, "step": 33155 }, { "epoch": 1.8156929310628045, "grad_norm": 0.11056850105524063, "learning_rate": 2.1931656864733322e-05, "loss": 0.1981, "step": 33160 }, { "epoch": 1.8159667086458962, "grad_norm": 0.12029948085546494, "learning_rate": 2.1926586899209085e-05, "loss": 0.2087, "step": 33165 }, { "epoch": 1.8162404862289876, "grad_norm": 0.11747217923402786, "learning_rate": 2.1921516933684852e-05, "loss": 0.2002, "step": 33170 }, { "epoch": 1.816514263812079, "grad_norm": 0.15391255915164948, "learning_rate": 2.191644696816062e-05, "loss": 0.2042, "step": 33175 }, { "epoch": 1.8167880413951707, "grad_norm": 0.13270708918571472, "learning_rate": 2.1911377002636382e-05, "loss": 0.1975, "step": 33180 }, { "epoch": 1.817061818978262, "grad_norm": 0.11221539229154587, "learning_rate": 2.190630703711215e-05, "loss": 0.1973, "step": 33185 }, { "epoch": 1.8173355965613536, "grad_norm": 0.11101441830396652, "learning_rate": 2.1901237071587915e-05, "loss": 0.2025, "step": 33190 }, { "epoch": 1.817609374144445, "grad_norm": 0.11589226871728897, "learning_rate": 2.1896167106063682e-05, "loss": 0.2011, "step": 33195 }, { "epoch": 1.8178831517275365, "grad_norm": 0.11775880306959152, "learning_rate": 2.1891097140539445e-05, "loss": 0.2025, "step": 33200 }, { "epoch": 1.8181569293106281, "grad_norm": 0.13049925863742828, "learning_rate": 2.1886027175015212e-05, "loss": 0.2007, "step": 33205 }, { "epoch": 1.8184307068937196, "grad_norm": 0.10921322554349899, "learning_rate": 2.1880957209490975e-05, "loss": 0.1933, "step": 33210 }, { "epoch": 1.818704484476811, "grad_norm": 0.10764483362436295, "learning_rate": 2.1875887243966742e-05, "loss": 0.1923, "step": 33215 }, { "epoch": 1.8189782620599027, "grad_norm": 0.11257178336381912, "learning_rate": 2.1870817278442505e-05, "loss": 0.1979, "step": 33220 }, { "epoch": 1.8192520396429939, "grad_norm": 0.1232302114367485, "learning_rate": 2.1865747312918272e-05, "loss": 0.2049, "step": 33225 }, { "epoch": 1.8195258172260855, "grad_norm": 0.12651510536670685, "learning_rate": 2.186067734739404e-05, "loss": 0.2034, "step": 33230 }, { "epoch": 1.8197995948091772, "grad_norm": 0.11193115264177322, "learning_rate": 2.1855607381869805e-05, "loss": 0.1996, "step": 33235 }, { "epoch": 1.8200733723922684, "grad_norm": 0.11456233263015747, "learning_rate": 2.1850537416345572e-05, "loss": 0.196, "step": 33240 }, { "epoch": 1.82034714997536, "grad_norm": 0.12274297326803207, "learning_rate": 2.1845467450821335e-05, "loss": 0.1997, "step": 33245 }, { "epoch": 1.8206209275584515, "grad_norm": 0.12349732965230942, "learning_rate": 2.1840397485297102e-05, "loss": 0.1996, "step": 33250 }, { "epoch": 1.820894705141543, "grad_norm": 0.11659226566553116, "learning_rate": 2.1835327519772865e-05, "loss": 0.2066, "step": 33255 }, { "epoch": 1.8211684827246346, "grad_norm": 0.10959985852241516, "learning_rate": 2.1830257554248632e-05, "loss": 0.2032, "step": 33260 }, { "epoch": 1.821442260307726, "grad_norm": 0.12160619348287582, "learning_rate": 2.1825187588724395e-05, "loss": 0.2013, "step": 33265 }, { "epoch": 1.8217160378908175, "grad_norm": 0.11251724511384964, "learning_rate": 2.1820117623200162e-05, "loss": 0.1959, "step": 33270 }, { "epoch": 1.8219898154739091, "grad_norm": 0.13194063305854797, "learning_rate": 2.181504765767593e-05, "loss": 0.2024, "step": 33275 }, { "epoch": 1.8222635930570004, "grad_norm": 0.12320002913475037, "learning_rate": 2.1809977692151696e-05, "loss": 0.1906, "step": 33280 }, { "epoch": 1.822537370640092, "grad_norm": 0.11908911913633347, "learning_rate": 2.180490772662746e-05, "loss": 0.1983, "step": 33285 }, { "epoch": 1.8228111482231835, "grad_norm": 0.12274731695652008, "learning_rate": 2.1799837761103225e-05, "loss": 0.1948, "step": 33290 }, { "epoch": 1.823084925806275, "grad_norm": 0.1095125824213028, "learning_rate": 2.1794767795578992e-05, "loss": 0.1967, "step": 33295 }, { "epoch": 1.8233587033893666, "grad_norm": 0.12234331667423248, "learning_rate": 2.1789697830054755e-05, "loss": 0.2087, "step": 33300 }, { "epoch": 1.823632480972458, "grad_norm": 0.11950350552797318, "learning_rate": 2.1784627864530522e-05, "loss": 0.1977, "step": 33305 }, { "epoch": 1.8239062585555494, "grad_norm": 0.14231395721435547, "learning_rate": 2.1779557899006285e-05, "loss": 0.2039, "step": 33310 }, { "epoch": 1.824180036138641, "grad_norm": 0.10738018155097961, "learning_rate": 2.1774487933482056e-05, "loss": 0.1975, "step": 33315 }, { "epoch": 1.8244538137217323, "grad_norm": 0.127175971865654, "learning_rate": 2.176941796795782e-05, "loss": 0.208, "step": 33320 }, { "epoch": 1.824727591304824, "grad_norm": 0.11944303661584854, "learning_rate": 2.1764348002433586e-05, "loss": 0.2026, "step": 33325 }, { "epoch": 1.8250013688879154, "grad_norm": 0.12083374708890915, "learning_rate": 2.175927803690935e-05, "loss": 0.2033, "step": 33330 }, { "epoch": 1.8252751464710069, "grad_norm": 0.13126643002033234, "learning_rate": 2.1754208071385116e-05, "loss": 0.2002, "step": 33335 }, { "epoch": 1.8255489240540985, "grad_norm": 0.11737839132547379, "learning_rate": 2.1749138105860882e-05, "loss": 0.2008, "step": 33340 }, { "epoch": 1.82582270163719, "grad_norm": 0.12453878670930862, "learning_rate": 2.1744068140336646e-05, "loss": 0.2014, "step": 33345 }, { "epoch": 1.8260964792202814, "grad_norm": 0.13716866075992584, "learning_rate": 2.1738998174812412e-05, "loss": 0.2054, "step": 33350 }, { "epoch": 1.826370256803373, "grad_norm": 0.12883011996746063, "learning_rate": 2.173392820928818e-05, "loss": 0.1973, "step": 33355 }, { "epoch": 1.8266440343864643, "grad_norm": 0.12313535064458847, "learning_rate": 2.1728858243763946e-05, "loss": 0.204, "step": 33360 }, { "epoch": 1.826917811969556, "grad_norm": 0.11397675424814224, "learning_rate": 2.172378827823971e-05, "loss": 0.1987, "step": 33365 }, { "epoch": 1.8271915895526474, "grad_norm": 0.11105111986398697, "learning_rate": 2.1718718312715476e-05, "loss": 0.1931, "step": 33370 }, { "epoch": 1.8274653671357388, "grad_norm": 0.12185043096542358, "learning_rate": 2.171364834719124e-05, "loss": 0.1979, "step": 33375 }, { "epoch": 1.8277391447188305, "grad_norm": 0.12850746512413025, "learning_rate": 2.1708578381667006e-05, "loss": 0.1973, "step": 33380 }, { "epoch": 1.828012922301922, "grad_norm": 0.1083001121878624, "learning_rate": 2.170350841614277e-05, "loss": 0.1976, "step": 33385 }, { "epoch": 1.8282866998850134, "grad_norm": 0.11213298887014389, "learning_rate": 2.1698438450618536e-05, "loss": 0.2011, "step": 33390 }, { "epoch": 1.828560477468105, "grad_norm": 0.1228611171245575, "learning_rate": 2.1693368485094302e-05, "loss": 0.2043, "step": 33395 }, { "epoch": 1.8288342550511965, "grad_norm": 0.14332202076911926, "learning_rate": 2.168829851957007e-05, "loss": 0.1995, "step": 33400 }, { "epoch": 1.829108032634288, "grad_norm": 0.11254194378852844, "learning_rate": 2.1683228554045836e-05, "loss": 0.1995, "step": 33405 }, { "epoch": 1.8293818102173796, "grad_norm": 0.12946508824825287, "learning_rate": 2.16781585885216e-05, "loss": 0.204, "step": 33410 }, { "epoch": 1.8296555878004708, "grad_norm": 0.10428696125745773, "learning_rate": 2.1673088622997366e-05, "loss": 0.2031, "step": 33415 }, { "epoch": 1.8299293653835624, "grad_norm": 0.11803123354911804, "learning_rate": 2.166801865747313e-05, "loss": 0.1903, "step": 33420 }, { "epoch": 1.8302031429666539, "grad_norm": 0.11437022686004639, "learning_rate": 2.1662948691948896e-05, "loss": 0.2025, "step": 33425 }, { "epoch": 1.8304769205497453, "grad_norm": 0.11765769124031067, "learning_rate": 2.165787872642466e-05, "loss": 0.1987, "step": 33430 }, { "epoch": 1.830750698132837, "grad_norm": 0.10314898192882538, "learning_rate": 2.165280876090043e-05, "loss": 0.1905, "step": 33435 }, { "epoch": 1.8310244757159284, "grad_norm": 0.11530479788780212, "learning_rate": 2.1647738795376192e-05, "loss": 0.1996, "step": 33440 }, { "epoch": 1.8312982532990199, "grad_norm": 0.1302480846643448, "learning_rate": 2.164266882985196e-05, "loss": 0.2102, "step": 33445 }, { "epoch": 1.8315720308821115, "grad_norm": 0.11808254569768906, "learning_rate": 2.1637598864327722e-05, "loss": 0.1946, "step": 33450 }, { "epoch": 1.8318458084652027, "grad_norm": 0.11054763197898865, "learning_rate": 2.163252889880349e-05, "loss": 0.2056, "step": 33455 }, { "epoch": 1.8321195860482944, "grad_norm": 0.14058344066143036, "learning_rate": 2.1627458933279256e-05, "loss": 0.2017, "step": 33460 }, { "epoch": 1.8323933636313858, "grad_norm": 0.13019369542598724, "learning_rate": 2.162238896775502e-05, "loss": 0.2003, "step": 33465 }, { "epoch": 1.8326671412144773, "grad_norm": 0.1170351579785347, "learning_rate": 2.1617319002230786e-05, "loss": 0.2023, "step": 33470 }, { "epoch": 1.832940918797569, "grad_norm": 0.13131871819496155, "learning_rate": 2.1612249036706552e-05, "loss": 0.2014, "step": 33475 }, { "epoch": 1.8332146963806604, "grad_norm": 0.10107412934303284, "learning_rate": 2.160717907118232e-05, "loss": 0.2013, "step": 33480 }, { "epoch": 1.8334884739637518, "grad_norm": 0.11625600606203079, "learning_rate": 2.1602109105658082e-05, "loss": 0.2032, "step": 33485 }, { "epoch": 1.8337622515468435, "grad_norm": 0.1061805710196495, "learning_rate": 2.159703914013385e-05, "loss": 0.2041, "step": 33490 }, { "epoch": 1.8340360291299347, "grad_norm": 0.11413567513227463, "learning_rate": 2.1591969174609612e-05, "loss": 0.1966, "step": 33495 }, { "epoch": 1.8343098067130263, "grad_norm": 0.1366979032754898, "learning_rate": 2.158689920908538e-05, "loss": 0.2072, "step": 33500 }, { "epoch": 1.8345835842961178, "grad_norm": 0.1229783371090889, "learning_rate": 2.1581829243561142e-05, "loss": 0.2117, "step": 33505 }, { "epoch": 1.8348573618792092, "grad_norm": 0.12414026260375977, "learning_rate": 2.157675927803691e-05, "loss": 0.2129, "step": 33510 }, { "epoch": 1.8351311394623009, "grad_norm": 0.1385430097579956, "learning_rate": 2.1571689312512676e-05, "loss": 0.1976, "step": 33515 }, { "epoch": 1.8354049170453923, "grad_norm": 0.11759444326162338, "learning_rate": 2.1566619346988443e-05, "loss": 0.2015, "step": 33520 }, { "epoch": 1.8356786946284838, "grad_norm": 0.12233404815196991, "learning_rate": 2.156154938146421e-05, "loss": 0.1983, "step": 33525 }, { "epoch": 1.8359524722115754, "grad_norm": 0.1100861057639122, "learning_rate": 2.1556479415939973e-05, "loss": 0.1953, "step": 33530 }, { "epoch": 1.8362262497946669, "grad_norm": 0.10626519471406937, "learning_rate": 2.155140945041574e-05, "loss": 0.1921, "step": 33535 }, { "epoch": 1.8365000273777583, "grad_norm": 0.09906193614006042, "learning_rate": 2.1546339484891503e-05, "loss": 0.1947, "step": 33540 }, { "epoch": 1.83677380496085, "grad_norm": 0.11240232735872269, "learning_rate": 2.154126951936727e-05, "loss": 0.2017, "step": 33545 }, { "epoch": 1.8370475825439412, "grad_norm": 0.12329424917697906, "learning_rate": 2.1536199553843033e-05, "loss": 0.2003, "step": 33550 }, { "epoch": 1.8373213601270328, "grad_norm": 0.1270771473646164, "learning_rate": 2.15311295883188e-05, "loss": 0.2026, "step": 33555 }, { "epoch": 1.8375951377101243, "grad_norm": 0.12330891937017441, "learning_rate": 2.1526059622794566e-05, "loss": 0.2037, "step": 33560 }, { "epoch": 1.8378689152932157, "grad_norm": 0.1328703612089157, "learning_rate": 2.1520989657270333e-05, "loss": 0.2063, "step": 33565 }, { "epoch": 1.8381426928763074, "grad_norm": 0.11874266713857651, "learning_rate": 2.1515919691746096e-05, "loss": 0.2015, "step": 33570 }, { "epoch": 1.8384164704593988, "grad_norm": 0.12662775814533234, "learning_rate": 2.1510849726221863e-05, "loss": 0.2012, "step": 33575 }, { "epoch": 1.8386902480424903, "grad_norm": 0.12124388664960861, "learning_rate": 2.150577976069763e-05, "loss": 0.1918, "step": 33580 }, { "epoch": 1.838964025625582, "grad_norm": 0.12026581913232803, "learning_rate": 2.1500709795173393e-05, "loss": 0.1937, "step": 33585 }, { "epoch": 1.8392378032086731, "grad_norm": 0.11147283017635345, "learning_rate": 2.149563982964916e-05, "loss": 0.195, "step": 33590 }, { "epoch": 1.8395115807917648, "grad_norm": 0.1365542858839035, "learning_rate": 2.1490569864124923e-05, "loss": 0.1988, "step": 33595 }, { "epoch": 1.8397853583748562, "grad_norm": 0.10421416163444519, "learning_rate": 2.1485499898600693e-05, "loss": 0.1971, "step": 33600 }, { "epoch": 1.8400591359579477, "grad_norm": 0.12216243147850037, "learning_rate": 2.1480429933076456e-05, "loss": 0.2084, "step": 33605 }, { "epoch": 1.8403329135410393, "grad_norm": 0.1126723513007164, "learning_rate": 2.1475359967552223e-05, "loss": 0.1966, "step": 33610 }, { "epoch": 1.8406066911241308, "grad_norm": 0.11475957185029984, "learning_rate": 2.1470290002027986e-05, "loss": 0.2061, "step": 33615 }, { "epoch": 1.8408804687072222, "grad_norm": 0.11500602215528488, "learning_rate": 2.1465220036503753e-05, "loss": 0.2026, "step": 33620 }, { "epoch": 1.8411542462903139, "grad_norm": 0.12316787987947464, "learning_rate": 2.146015007097952e-05, "loss": 0.2073, "step": 33625 }, { "epoch": 1.841428023873405, "grad_norm": 0.11274367570877075, "learning_rate": 2.1455080105455283e-05, "loss": 0.2043, "step": 33630 }, { "epoch": 1.8417018014564968, "grad_norm": 0.11418585479259491, "learning_rate": 2.145001013993105e-05, "loss": 0.211, "step": 33635 }, { "epoch": 1.8419755790395882, "grad_norm": 0.12472067028284073, "learning_rate": 2.1444940174406816e-05, "loss": 0.2059, "step": 33640 }, { "epoch": 1.8422493566226796, "grad_norm": 0.15210330486297607, "learning_rate": 2.1439870208882583e-05, "loss": 0.206, "step": 33645 }, { "epoch": 1.8425231342057713, "grad_norm": 0.11722112447023392, "learning_rate": 2.1434800243358346e-05, "loss": 0.1929, "step": 33650 }, { "epoch": 1.8427969117888627, "grad_norm": 0.11743392795324326, "learning_rate": 2.1429730277834113e-05, "loss": 0.1981, "step": 33655 }, { "epoch": 1.8430706893719542, "grad_norm": 0.1250438094139099, "learning_rate": 2.1424660312309876e-05, "loss": 0.1968, "step": 33660 }, { "epoch": 1.8433444669550458, "grad_norm": 0.1339370161294937, "learning_rate": 2.1419590346785643e-05, "loss": 0.1906, "step": 33665 }, { "epoch": 1.843618244538137, "grad_norm": 0.12706497311592102, "learning_rate": 2.1414520381261406e-05, "loss": 0.2035, "step": 33670 }, { "epoch": 1.8438920221212287, "grad_norm": 0.1197180226445198, "learning_rate": 2.1409450415737173e-05, "loss": 0.1982, "step": 33675 }, { "epoch": 1.8441657997043204, "grad_norm": 0.13085265457630157, "learning_rate": 2.140438045021294e-05, "loss": 0.2065, "step": 33680 }, { "epoch": 1.8444395772874116, "grad_norm": 0.13028614223003387, "learning_rate": 2.1399310484688706e-05, "loss": 0.1999, "step": 33685 }, { "epoch": 1.8447133548705033, "grad_norm": 0.13076433539390564, "learning_rate": 2.1394240519164473e-05, "loss": 0.2088, "step": 33690 }, { "epoch": 1.8449871324535947, "grad_norm": 0.12255417555570602, "learning_rate": 2.1389170553640236e-05, "loss": 0.2034, "step": 33695 }, { "epoch": 1.8452609100366861, "grad_norm": 0.10990981012582779, "learning_rate": 2.1384100588116003e-05, "loss": 0.1995, "step": 33700 }, { "epoch": 1.8455346876197778, "grad_norm": 0.11527310311794281, "learning_rate": 2.1379030622591766e-05, "loss": 0.1923, "step": 33705 }, { "epoch": 1.8458084652028692, "grad_norm": 0.11444005370140076, "learning_rate": 2.1373960657067533e-05, "loss": 0.2034, "step": 33710 }, { "epoch": 1.8460822427859607, "grad_norm": 0.10437353700399399, "learning_rate": 2.1368890691543296e-05, "loss": 0.1901, "step": 33715 }, { "epoch": 1.8463560203690523, "grad_norm": 0.11955393105745316, "learning_rate": 2.1363820726019066e-05, "loss": 0.2042, "step": 33720 }, { "epoch": 1.8466297979521435, "grad_norm": 0.1325261890888214, "learning_rate": 2.135875076049483e-05, "loss": 0.2089, "step": 33725 }, { "epoch": 1.8469035755352352, "grad_norm": 0.13629314303398132, "learning_rate": 2.1353680794970596e-05, "loss": 0.2113, "step": 33730 }, { "epoch": 1.8471773531183266, "grad_norm": 0.1113002672791481, "learning_rate": 2.134861082944636e-05, "loss": 0.1963, "step": 33735 }, { "epoch": 1.847451130701418, "grad_norm": 0.1306009143590927, "learning_rate": 2.1343540863922126e-05, "loss": 0.1961, "step": 33740 }, { "epoch": 1.8477249082845097, "grad_norm": 0.12746000289916992, "learning_rate": 2.1338470898397893e-05, "loss": 0.2078, "step": 33745 }, { "epoch": 1.8479986858676012, "grad_norm": 0.10521456599235535, "learning_rate": 2.1333400932873656e-05, "loss": 0.1949, "step": 33750 }, { "epoch": 1.8482724634506926, "grad_norm": 0.12080404907464981, "learning_rate": 2.1328330967349423e-05, "loss": 0.203, "step": 33755 }, { "epoch": 1.8485462410337843, "grad_norm": 0.1196846291422844, "learning_rate": 2.132326100182519e-05, "loss": 0.2001, "step": 33760 }, { "epoch": 1.8488200186168755, "grad_norm": 0.10310515016317368, "learning_rate": 2.1318191036300956e-05, "loss": 0.1995, "step": 33765 }, { "epoch": 1.8490937961999672, "grad_norm": 0.11440684646368027, "learning_rate": 2.131312107077672e-05, "loss": 0.1955, "step": 33770 }, { "epoch": 1.8493675737830586, "grad_norm": 0.12411879003047943, "learning_rate": 2.1308051105252486e-05, "loss": 0.2036, "step": 33775 }, { "epoch": 1.84964135136615, "grad_norm": 0.11521293967962265, "learning_rate": 2.130298113972825e-05, "loss": 0.203, "step": 33780 }, { "epoch": 1.8499151289492417, "grad_norm": 0.12936419248580933, "learning_rate": 2.1297911174204016e-05, "loss": 0.207, "step": 33785 }, { "epoch": 1.8501889065323331, "grad_norm": 0.11144011467695236, "learning_rate": 2.129284120867978e-05, "loss": 0.1965, "step": 33790 }, { "epoch": 1.8504626841154246, "grad_norm": 0.14054150879383087, "learning_rate": 2.1287771243155546e-05, "loss": 0.2078, "step": 33795 }, { "epoch": 1.8507364616985162, "grad_norm": 0.12386246025562286, "learning_rate": 2.1282701277631313e-05, "loss": 0.1952, "step": 33800 }, { "epoch": 1.8510102392816075, "grad_norm": 0.12129449844360352, "learning_rate": 2.127763131210708e-05, "loss": 0.2034, "step": 33805 }, { "epoch": 1.8512840168646991, "grad_norm": 0.14616718888282776, "learning_rate": 2.1272561346582846e-05, "loss": 0.1938, "step": 33810 }, { "epoch": 1.8515577944477906, "grad_norm": 0.13531909883022308, "learning_rate": 2.126749138105861e-05, "loss": 0.2095, "step": 33815 }, { "epoch": 1.851831572030882, "grad_norm": 0.1210315153002739, "learning_rate": 2.1262421415534376e-05, "loss": 0.2001, "step": 33820 }, { "epoch": 1.8521053496139737, "grad_norm": 0.11876390874385834, "learning_rate": 2.125735145001014e-05, "loss": 0.2042, "step": 33825 }, { "epoch": 1.852379127197065, "grad_norm": 0.13386911153793335, "learning_rate": 2.1252281484485906e-05, "loss": 0.1977, "step": 33830 }, { "epoch": 1.8526529047801565, "grad_norm": 0.11675062030553818, "learning_rate": 2.124721151896167e-05, "loss": 0.1899, "step": 33835 }, { "epoch": 1.8529266823632482, "grad_norm": 0.10612107068300247, "learning_rate": 2.1242141553437436e-05, "loss": 0.1927, "step": 33840 }, { "epoch": 1.8532004599463396, "grad_norm": 0.12050322443246841, "learning_rate": 2.1237071587913203e-05, "loss": 0.1976, "step": 33845 }, { "epoch": 1.853474237529431, "grad_norm": 0.11080314964056015, "learning_rate": 2.123200162238897e-05, "loss": 0.1972, "step": 33850 }, { "epoch": 1.8537480151125227, "grad_norm": 0.12591727077960968, "learning_rate": 2.1226931656864736e-05, "loss": 0.2031, "step": 33855 }, { "epoch": 1.854021792695614, "grad_norm": 0.12926432490348816, "learning_rate": 2.12218616913405e-05, "loss": 0.2029, "step": 33860 }, { "epoch": 1.8542955702787056, "grad_norm": 0.1182507798075676, "learning_rate": 2.1216791725816266e-05, "loss": 0.2076, "step": 33865 }, { "epoch": 1.854569347861797, "grad_norm": 0.1209491491317749, "learning_rate": 2.121172176029203e-05, "loss": 0.2059, "step": 33870 }, { "epoch": 1.8548431254448885, "grad_norm": 0.13114133477210999, "learning_rate": 2.1206651794767796e-05, "loss": 0.2046, "step": 33875 }, { "epoch": 1.8551169030279802, "grad_norm": 0.10392335802316666, "learning_rate": 2.120158182924356e-05, "loss": 0.1933, "step": 33880 }, { "epoch": 1.8553906806110716, "grad_norm": 0.12499808520078659, "learning_rate": 2.119651186371933e-05, "loss": 0.1986, "step": 33885 }, { "epoch": 1.855664458194163, "grad_norm": 0.11674756556749344, "learning_rate": 2.1191441898195093e-05, "loss": 0.1984, "step": 33890 }, { "epoch": 1.8559382357772547, "grad_norm": 0.11119252443313599, "learning_rate": 2.118637193267086e-05, "loss": 0.1945, "step": 33895 }, { "epoch": 1.856212013360346, "grad_norm": 0.12448236346244812, "learning_rate": 2.1181301967146623e-05, "loss": 0.2041, "step": 33900 }, { "epoch": 1.8564857909434376, "grad_norm": 0.11560377478599548, "learning_rate": 2.117623200162239e-05, "loss": 0.1995, "step": 33905 }, { "epoch": 1.856759568526529, "grad_norm": 0.12775471806526184, "learning_rate": 2.1171162036098156e-05, "loss": 0.1919, "step": 33910 }, { "epoch": 1.8570333461096205, "grad_norm": 0.13238507509231567, "learning_rate": 2.116609207057392e-05, "loss": 0.2123, "step": 33915 }, { "epoch": 1.8573071236927121, "grad_norm": 0.1352362036705017, "learning_rate": 2.1161022105049686e-05, "loss": 0.2009, "step": 33920 }, { "epoch": 1.8575809012758036, "grad_norm": 0.15908126533031464, "learning_rate": 2.1155952139525453e-05, "loss": 0.1953, "step": 33925 }, { "epoch": 1.857854678858895, "grad_norm": 0.11289733648300171, "learning_rate": 2.115088217400122e-05, "loss": 0.2023, "step": 33930 }, { "epoch": 1.8581284564419867, "grad_norm": 0.12501730024814606, "learning_rate": 2.1145812208476983e-05, "loss": 0.196, "step": 33935 }, { "epoch": 1.8584022340250779, "grad_norm": 0.1584809124469757, "learning_rate": 2.114074224295275e-05, "loss": 0.1973, "step": 33940 }, { "epoch": 1.8586760116081695, "grad_norm": 0.12661567330360413, "learning_rate": 2.1135672277428513e-05, "loss": 0.1973, "step": 33945 }, { "epoch": 1.858949789191261, "grad_norm": 0.11524798721075058, "learning_rate": 2.113060231190428e-05, "loss": 0.2068, "step": 33950 }, { "epoch": 1.8592235667743524, "grad_norm": 0.13122083246707916, "learning_rate": 2.1125532346380043e-05, "loss": 0.2057, "step": 33955 }, { "epoch": 1.859497344357444, "grad_norm": 0.12630218267440796, "learning_rate": 2.112046238085581e-05, "loss": 0.1954, "step": 33960 }, { "epoch": 1.8597711219405355, "grad_norm": 0.11052099615335464, "learning_rate": 2.1115392415331577e-05, "loss": 0.2006, "step": 33965 }, { "epoch": 1.860044899523627, "grad_norm": 0.10157103836536407, "learning_rate": 2.1110322449807343e-05, "loss": 0.1994, "step": 33970 }, { "epoch": 1.8603186771067186, "grad_norm": 0.11159870028495789, "learning_rate": 2.110525248428311e-05, "loss": 0.1927, "step": 33975 }, { "epoch": 1.86059245468981, "grad_norm": 0.11610930413007736, "learning_rate": 2.1100182518758873e-05, "loss": 0.2042, "step": 33980 }, { "epoch": 1.8608662322729015, "grad_norm": 0.10114062577486038, "learning_rate": 2.109511255323464e-05, "loss": 0.1972, "step": 33985 }, { "epoch": 1.8611400098559931, "grad_norm": 0.10066059231758118, "learning_rate": 2.1090042587710403e-05, "loss": 0.1967, "step": 33990 }, { "epoch": 1.8614137874390844, "grad_norm": 0.10929710417985916, "learning_rate": 2.108497262218617e-05, "loss": 0.1968, "step": 33995 }, { "epoch": 1.861687565022176, "grad_norm": 0.11774514615535736, "learning_rate": 2.1079902656661933e-05, "loss": 0.1987, "step": 34000 }, { "epoch": 1.8619613426052675, "grad_norm": 0.1120859906077385, "learning_rate": 2.1074832691137703e-05, "loss": 0.2019, "step": 34005 }, { "epoch": 1.862235120188359, "grad_norm": 0.10559573769569397, "learning_rate": 2.1069762725613467e-05, "loss": 0.1905, "step": 34010 }, { "epoch": 1.8625088977714506, "grad_norm": 0.1433372050523758, "learning_rate": 2.1064692760089233e-05, "loss": 0.2078, "step": 34015 }, { "epoch": 1.862782675354542, "grad_norm": 0.10841958224773407, "learning_rate": 2.1059622794564997e-05, "loss": 0.1904, "step": 34020 }, { "epoch": 1.8630564529376334, "grad_norm": 0.1413997858762741, "learning_rate": 2.1054552829040763e-05, "loss": 0.2051, "step": 34025 }, { "epoch": 1.863330230520725, "grad_norm": 0.10930555313825607, "learning_rate": 2.104948286351653e-05, "loss": 0.1971, "step": 34030 }, { "epoch": 1.8636040081038163, "grad_norm": 0.11138387024402618, "learning_rate": 2.1044412897992293e-05, "loss": 0.1985, "step": 34035 }, { "epoch": 1.863877785686908, "grad_norm": 0.11302424222230911, "learning_rate": 2.103934293246806e-05, "loss": 0.1979, "step": 34040 }, { "epoch": 1.8641515632699994, "grad_norm": 0.10910050570964813, "learning_rate": 2.1034272966943827e-05, "loss": 0.1983, "step": 34045 }, { "epoch": 1.8644253408530909, "grad_norm": 0.1156112477183342, "learning_rate": 2.1029203001419593e-05, "loss": 0.1948, "step": 34050 }, { "epoch": 1.8646991184361825, "grad_norm": 0.11938638985157013, "learning_rate": 2.1024133035895357e-05, "loss": 0.1959, "step": 34055 }, { "epoch": 1.864972896019274, "grad_norm": 0.10815286636352539, "learning_rate": 2.1019063070371123e-05, "loss": 0.2033, "step": 34060 }, { "epoch": 1.8652466736023654, "grad_norm": 0.12209278345108032, "learning_rate": 2.1013993104846887e-05, "loss": 0.2089, "step": 34065 }, { "epoch": 1.865520451185457, "grad_norm": 0.12542401254177094, "learning_rate": 2.1008923139322653e-05, "loss": 0.2024, "step": 34070 }, { "epoch": 1.8657942287685483, "grad_norm": 0.1131778433918953, "learning_rate": 2.100385317379842e-05, "loss": 0.1937, "step": 34075 }, { "epoch": 1.86606800635164, "grad_norm": 0.1270321160554886, "learning_rate": 2.0998783208274183e-05, "loss": 0.2049, "step": 34080 }, { "epoch": 1.8663417839347314, "grad_norm": 0.10554152727127075, "learning_rate": 2.099371324274995e-05, "loss": 0.1989, "step": 34085 }, { "epoch": 1.8666155615178228, "grad_norm": 0.11295914649963379, "learning_rate": 2.0988643277225717e-05, "loss": 0.1983, "step": 34090 }, { "epoch": 1.8668893391009145, "grad_norm": 0.10428421944379807, "learning_rate": 2.0983573311701483e-05, "loss": 0.1963, "step": 34095 }, { "epoch": 1.867163116684006, "grad_norm": 0.11168952286243439, "learning_rate": 2.0978503346177247e-05, "loss": 0.1995, "step": 34100 }, { "epoch": 1.8674368942670974, "grad_norm": 0.11000283062458038, "learning_rate": 2.0973433380653013e-05, "loss": 0.1924, "step": 34105 }, { "epoch": 1.867710671850189, "grad_norm": 0.12681201100349426, "learning_rate": 2.0968363415128777e-05, "loss": 0.2039, "step": 34110 }, { "epoch": 1.8679844494332805, "grad_norm": 0.10708180069923401, "learning_rate": 2.0963293449604543e-05, "loss": 0.2033, "step": 34115 }, { "epoch": 1.868258227016372, "grad_norm": 0.12602591514587402, "learning_rate": 2.0958223484080307e-05, "loss": 0.1996, "step": 34120 }, { "epoch": 1.8685320045994636, "grad_norm": 0.11260852962732315, "learning_rate": 2.0953153518556073e-05, "loss": 0.1965, "step": 34125 }, { "epoch": 1.8688057821825548, "grad_norm": 0.1098320484161377, "learning_rate": 2.094808355303184e-05, "loss": 0.2025, "step": 34130 }, { "epoch": 1.8690795597656464, "grad_norm": 0.11701597273349762, "learning_rate": 2.0943013587507607e-05, "loss": 0.2074, "step": 34135 }, { "epoch": 1.8693533373487379, "grad_norm": 0.1284744143486023, "learning_rate": 2.0937943621983374e-05, "loss": 0.2085, "step": 34140 }, { "epoch": 1.8696271149318293, "grad_norm": 0.1125420555472374, "learning_rate": 2.0932873656459137e-05, "loss": 0.2007, "step": 34145 }, { "epoch": 1.869900892514921, "grad_norm": 0.18050232529640198, "learning_rate": 2.0927803690934904e-05, "loss": 0.1986, "step": 34150 }, { "epoch": 1.8701746700980124, "grad_norm": 0.12428084760904312, "learning_rate": 2.0922733725410667e-05, "loss": 0.2017, "step": 34155 }, { "epoch": 1.8704484476811039, "grad_norm": 0.11872745305299759, "learning_rate": 2.0917663759886434e-05, "loss": 0.198, "step": 34160 }, { "epoch": 1.8707222252641955, "grad_norm": 0.11954833567142487, "learning_rate": 2.0912593794362197e-05, "loss": 0.1916, "step": 34165 }, { "epoch": 1.8709960028472867, "grad_norm": 0.11608783155679703, "learning_rate": 2.0907523828837967e-05, "loss": 0.2057, "step": 34170 }, { "epoch": 1.8712697804303784, "grad_norm": 0.11651258170604706, "learning_rate": 2.090245386331373e-05, "loss": 0.2066, "step": 34175 }, { "epoch": 1.8715435580134698, "grad_norm": 0.11003784090280533, "learning_rate": 2.0897383897789497e-05, "loss": 0.1953, "step": 34180 }, { "epoch": 1.8718173355965613, "grad_norm": 0.13525746762752533, "learning_rate": 2.089231393226526e-05, "loss": 0.2078, "step": 34185 }, { "epoch": 1.872091113179653, "grad_norm": 0.11201205849647522, "learning_rate": 2.0887243966741027e-05, "loss": 0.2007, "step": 34190 }, { "epoch": 1.8723648907627444, "grad_norm": 0.10529831051826477, "learning_rate": 2.0882174001216794e-05, "loss": 0.2078, "step": 34195 }, { "epoch": 1.8726386683458358, "grad_norm": 0.10016636550426483, "learning_rate": 2.0877104035692557e-05, "loss": 0.1959, "step": 34200 }, { "epoch": 1.8729124459289275, "grad_norm": 0.11854811012744904, "learning_rate": 2.0872034070168324e-05, "loss": 0.2022, "step": 34205 }, { "epoch": 1.8731862235120187, "grad_norm": 0.11144081503152847, "learning_rate": 2.086696410464409e-05, "loss": 0.2094, "step": 34210 }, { "epoch": 1.8734600010951103, "grad_norm": 0.11233587563037872, "learning_rate": 2.0861894139119857e-05, "loss": 0.1994, "step": 34215 }, { "epoch": 1.8737337786782018, "grad_norm": 0.11728724092245102, "learning_rate": 2.085682417359562e-05, "loss": 0.1997, "step": 34220 }, { "epoch": 1.8740075562612932, "grad_norm": 0.12158312648534775, "learning_rate": 2.0851754208071387e-05, "loss": 0.2056, "step": 34225 }, { "epoch": 1.874281333844385, "grad_norm": 0.1159537211060524, "learning_rate": 2.084668424254715e-05, "loss": 0.1959, "step": 34230 }, { "epoch": 1.8745551114274763, "grad_norm": 0.11763972043991089, "learning_rate": 2.0841614277022917e-05, "loss": 0.2028, "step": 34235 }, { "epoch": 1.8748288890105678, "grad_norm": 0.13081052899360657, "learning_rate": 2.083654431149868e-05, "loss": 0.2011, "step": 34240 }, { "epoch": 1.8751026665936594, "grad_norm": 0.1155523806810379, "learning_rate": 2.0831474345974447e-05, "loss": 0.1927, "step": 34245 }, { "epoch": 1.8753764441767506, "grad_norm": 0.12816168367862701, "learning_rate": 2.0826404380450214e-05, "loss": 0.1944, "step": 34250 }, { "epoch": 1.8756502217598423, "grad_norm": 0.11106289178133011, "learning_rate": 2.082133441492598e-05, "loss": 0.2055, "step": 34255 }, { "epoch": 1.8759239993429337, "grad_norm": 0.11137468367815018, "learning_rate": 2.0816264449401747e-05, "loss": 0.1924, "step": 34260 }, { "epoch": 1.8761977769260252, "grad_norm": 0.11122077703475952, "learning_rate": 2.081119448387751e-05, "loss": 0.2018, "step": 34265 }, { "epoch": 1.8764715545091168, "grad_norm": 0.1194000095129013, "learning_rate": 2.0806124518353277e-05, "loss": 0.1987, "step": 34270 }, { "epoch": 1.8767453320922083, "grad_norm": 0.1242581158876419, "learning_rate": 2.080105455282904e-05, "loss": 0.1996, "step": 34275 }, { "epoch": 1.8770191096752997, "grad_norm": 0.10992729663848877, "learning_rate": 2.0795984587304807e-05, "loss": 0.2069, "step": 34280 }, { "epoch": 1.8772928872583914, "grad_norm": 0.13364861905574799, "learning_rate": 2.079091462178057e-05, "loss": 0.2007, "step": 34285 }, { "epoch": 1.8775666648414828, "grad_norm": 0.11568015068769455, "learning_rate": 2.078584465625634e-05, "loss": 0.2006, "step": 34290 }, { "epoch": 1.8778404424245743, "grad_norm": 0.10329356789588928, "learning_rate": 2.0780774690732104e-05, "loss": 0.1983, "step": 34295 }, { "epoch": 1.878114220007666, "grad_norm": 0.11548503488302231, "learning_rate": 2.077570472520787e-05, "loss": 0.192, "step": 34300 }, { "epoch": 1.8783879975907571, "grad_norm": 0.09970741719007492, "learning_rate": 2.0770634759683634e-05, "loss": 0.1959, "step": 34305 }, { "epoch": 1.8786617751738488, "grad_norm": 0.13214702904224396, "learning_rate": 2.07655647941594e-05, "loss": 0.2011, "step": 34310 }, { "epoch": 1.8789355527569402, "grad_norm": 0.13102345168590546, "learning_rate": 2.0760494828635167e-05, "loss": 0.1907, "step": 34315 }, { "epoch": 1.8792093303400317, "grad_norm": 0.12313541024923325, "learning_rate": 2.075542486311093e-05, "loss": 0.2026, "step": 34320 }, { "epoch": 1.8794831079231233, "grad_norm": 0.10733439028263092, "learning_rate": 2.0750354897586697e-05, "loss": 0.2019, "step": 34325 }, { "epoch": 1.8797568855062148, "grad_norm": 0.11142939329147339, "learning_rate": 2.0745284932062464e-05, "loss": 0.203, "step": 34330 }, { "epoch": 1.8800306630893062, "grad_norm": 0.10528159886598587, "learning_rate": 2.074021496653823e-05, "loss": 0.1937, "step": 34335 }, { "epoch": 1.8803044406723979, "grad_norm": 0.11603746563196182, "learning_rate": 2.0735145001013994e-05, "loss": 0.1961, "step": 34340 }, { "epoch": 1.880578218255489, "grad_norm": 0.11205945163965225, "learning_rate": 2.073007503548976e-05, "loss": 0.2021, "step": 34345 }, { "epoch": 1.8808519958385808, "grad_norm": 0.10402835160493851, "learning_rate": 2.0725005069965524e-05, "loss": 0.1969, "step": 34350 }, { "epoch": 1.8811257734216722, "grad_norm": 0.10455922782421112, "learning_rate": 2.071993510444129e-05, "loss": 0.2014, "step": 34355 }, { "epoch": 1.8813995510047636, "grad_norm": 0.11098132282495499, "learning_rate": 2.0714865138917057e-05, "loss": 0.1988, "step": 34360 }, { "epoch": 1.8816733285878553, "grad_norm": 0.12573175132274628, "learning_rate": 2.070979517339282e-05, "loss": 0.1937, "step": 34365 }, { "epoch": 1.8819471061709467, "grad_norm": 0.11134106665849686, "learning_rate": 2.0704725207868587e-05, "loss": 0.1963, "step": 34370 }, { "epoch": 1.8822208837540382, "grad_norm": 0.09882201254367828, "learning_rate": 2.0699655242344354e-05, "loss": 0.1987, "step": 34375 }, { "epoch": 1.8824946613371298, "grad_norm": 0.11029057949781418, "learning_rate": 2.069458527682012e-05, "loss": 0.1951, "step": 34380 }, { "epoch": 1.882768438920221, "grad_norm": 0.11650790274143219, "learning_rate": 2.0689515311295884e-05, "loss": 0.2057, "step": 34385 }, { "epoch": 1.8830422165033127, "grad_norm": 0.11664305627346039, "learning_rate": 2.068444534577165e-05, "loss": 0.1962, "step": 34390 }, { "epoch": 1.8833159940864042, "grad_norm": 0.11056286841630936, "learning_rate": 2.0679375380247414e-05, "loss": 0.2003, "step": 34395 }, { "epoch": 1.8835897716694956, "grad_norm": 0.11373815685510635, "learning_rate": 2.067430541472318e-05, "loss": 0.2035, "step": 34400 }, { "epoch": 1.8838635492525873, "grad_norm": 0.11811237782239914, "learning_rate": 2.0669235449198944e-05, "loss": 0.2041, "step": 34405 }, { "epoch": 1.8841373268356787, "grad_norm": 0.12435802817344666, "learning_rate": 2.0664165483674714e-05, "loss": 0.203, "step": 34410 }, { "epoch": 1.8844111044187701, "grad_norm": 0.10788717865943909, "learning_rate": 2.0659095518150477e-05, "loss": 0.2014, "step": 34415 }, { "epoch": 1.8846848820018618, "grad_norm": 0.110253244638443, "learning_rate": 2.0654025552626244e-05, "loss": 0.1958, "step": 34420 }, { "epoch": 1.8849586595849532, "grad_norm": 0.1334262490272522, "learning_rate": 2.064895558710201e-05, "loss": 0.2046, "step": 34425 }, { "epoch": 1.8852324371680447, "grad_norm": 0.1168193519115448, "learning_rate": 2.0643885621577774e-05, "loss": 0.196, "step": 34430 }, { "epoch": 1.8855062147511363, "grad_norm": 0.12298060953617096, "learning_rate": 2.063881565605354e-05, "loss": 0.1963, "step": 34435 }, { "epoch": 1.8857799923342276, "grad_norm": 0.11294838786125183, "learning_rate": 2.0633745690529304e-05, "loss": 0.2029, "step": 34440 }, { "epoch": 1.8860537699173192, "grad_norm": 0.10875155031681061, "learning_rate": 2.062867572500507e-05, "loss": 0.1906, "step": 34445 }, { "epoch": 1.8863275475004107, "grad_norm": 0.11896884441375732, "learning_rate": 2.0623605759480834e-05, "loss": 0.2061, "step": 34450 }, { "epoch": 1.886601325083502, "grad_norm": 0.13155889511108398, "learning_rate": 2.0618535793956604e-05, "loss": 0.2002, "step": 34455 }, { "epoch": 1.8868751026665938, "grad_norm": 0.11286131292581558, "learning_rate": 2.0613465828432367e-05, "loss": 0.1966, "step": 34460 }, { "epoch": 1.8871488802496852, "grad_norm": 0.11658115684986115, "learning_rate": 2.0608395862908134e-05, "loss": 0.196, "step": 34465 }, { "epoch": 1.8874226578327766, "grad_norm": 0.12628735601902008, "learning_rate": 2.0603325897383897e-05, "loss": 0.2068, "step": 34470 }, { "epoch": 1.8876964354158683, "grad_norm": 0.11462435871362686, "learning_rate": 2.0598255931859664e-05, "loss": 0.2016, "step": 34475 }, { "epoch": 1.8879702129989595, "grad_norm": 0.13016602396965027, "learning_rate": 2.059318596633543e-05, "loss": 0.2032, "step": 34480 }, { "epoch": 1.8882439905820512, "grad_norm": 0.1373186558485031, "learning_rate": 2.0588116000811194e-05, "loss": 0.1944, "step": 34485 }, { "epoch": 1.8885177681651426, "grad_norm": 0.11851218342781067, "learning_rate": 2.058304603528696e-05, "loss": 0.1945, "step": 34490 }, { "epoch": 1.888791545748234, "grad_norm": 0.10542276501655579, "learning_rate": 2.0577976069762727e-05, "loss": 0.2008, "step": 34495 }, { "epoch": 1.8890653233313257, "grad_norm": 0.11790076643228531, "learning_rate": 2.0572906104238494e-05, "loss": 0.2031, "step": 34500 }, { "epoch": 1.8893391009144171, "grad_norm": 0.1147570088505745, "learning_rate": 2.0567836138714257e-05, "loss": 0.2037, "step": 34505 }, { "epoch": 1.8896128784975086, "grad_norm": 0.11601780354976654, "learning_rate": 2.0562766173190024e-05, "loss": 0.195, "step": 34510 }, { "epoch": 1.8898866560806002, "grad_norm": 0.11485123634338379, "learning_rate": 2.0557696207665787e-05, "loss": 0.203, "step": 34515 }, { "epoch": 1.8901604336636915, "grad_norm": 0.11964985728263855, "learning_rate": 2.0552626242141554e-05, "loss": 0.2048, "step": 34520 }, { "epoch": 1.8904342112467831, "grad_norm": 0.10211194306612015, "learning_rate": 2.0547556276617317e-05, "loss": 0.2, "step": 34525 }, { "epoch": 1.8907079888298746, "grad_norm": 0.11829770356416702, "learning_rate": 2.0542486311093084e-05, "loss": 0.1962, "step": 34530 }, { "epoch": 1.890981766412966, "grad_norm": 0.1033901646733284, "learning_rate": 2.053741634556885e-05, "loss": 0.2038, "step": 34535 }, { "epoch": 1.8912555439960577, "grad_norm": 0.13358373939990997, "learning_rate": 2.0532346380044617e-05, "loss": 0.1933, "step": 34540 }, { "epoch": 1.891529321579149, "grad_norm": 0.13093820214271545, "learning_rate": 2.0527276414520384e-05, "loss": 0.1978, "step": 34545 }, { "epoch": 1.8918030991622405, "grad_norm": 0.11437336355447769, "learning_rate": 2.0522206448996147e-05, "loss": 0.2009, "step": 34550 }, { "epoch": 1.8920768767453322, "grad_norm": 0.09926985204219818, "learning_rate": 2.0517136483471914e-05, "loss": 0.2071, "step": 34555 }, { "epoch": 1.8923506543284236, "grad_norm": 0.12560664117336273, "learning_rate": 2.0512066517947677e-05, "loss": 0.2006, "step": 34560 }, { "epoch": 1.892624431911515, "grad_norm": 0.11186370253562927, "learning_rate": 2.0506996552423444e-05, "loss": 0.1948, "step": 34565 }, { "epoch": 1.8928982094946067, "grad_norm": 0.11141493171453476, "learning_rate": 2.0501926586899207e-05, "loss": 0.2046, "step": 34570 }, { "epoch": 1.893171987077698, "grad_norm": 0.11882146447896957, "learning_rate": 2.0496856621374978e-05, "loss": 0.1999, "step": 34575 }, { "epoch": 1.8934457646607896, "grad_norm": 0.10954241454601288, "learning_rate": 2.049178665585074e-05, "loss": 0.1966, "step": 34580 }, { "epoch": 1.893719542243881, "grad_norm": 0.11110087484121323, "learning_rate": 2.0486716690326508e-05, "loss": 0.2041, "step": 34585 }, { "epoch": 1.8939933198269725, "grad_norm": 0.10523821413516998, "learning_rate": 2.048164672480227e-05, "loss": 0.1963, "step": 34590 }, { "epoch": 1.8942670974100642, "grad_norm": 0.10386832803487778, "learning_rate": 2.0476576759278038e-05, "loss": 0.1975, "step": 34595 }, { "epoch": 1.8945408749931556, "grad_norm": 0.11278632283210754, "learning_rate": 2.0471506793753804e-05, "loss": 0.1949, "step": 34600 }, { "epoch": 1.894814652576247, "grad_norm": 0.12491878122091293, "learning_rate": 2.0466436828229568e-05, "loss": 0.204, "step": 34605 }, { "epoch": 1.8950884301593387, "grad_norm": 0.11276987940073013, "learning_rate": 2.0461366862705334e-05, "loss": 0.191, "step": 34610 }, { "epoch": 1.89536220774243, "grad_norm": 0.11162425577640533, "learning_rate": 2.04562968971811e-05, "loss": 0.1882, "step": 34615 }, { "epoch": 1.8956359853255216, "grad_norm": 0.12585189938545227, "learning_rate": 2.0451226931656868e-05, "loss": 0.1985, "step": 34620 }, { "epoch": 1.895909762908613, "grad_norm": 0.11674290150403976, "learning_rate": 2.044615696613263e-05, "loss": 0.199, "step": 34625 }, { "epoch": 1.8961835404917045, "grad_norm": 0.10863678902387619, "learning_rate": 2.0441087000608398e-05, "loss": 0.1999, "step": 34630 }, { "epoch": 1.8964573180747961, "grad_norm": 0.10855698585510254, "learning_rate": 2.043601703508416e-05, "loss": 0.1984, "step": 34635 }, { "epoch": 1.8967310956578876, "grad_norm": 0.10905267298221588, "learning_rate": 2.0430947069559928e-05, "loss": 0.1876, "step": 34640 }, { "epoch": 1.897004873240979, "grad_norm": 0.1117197796702385, "learning_rate": 2.0425877104035694e-05, "loss": 0.1983, "step": 34645 }, { "epoch": 1.8972786508240707, "grad_norm": 0.12381478399038315, "learning_rate": 2.0420807138511458e-05, "loss": 0.2, "step": 34650 }, { "epoch": 1.8975524284071619, "grad_norm": 0.11077094823122025, "learning_rate": 2.0415737172987224e-05, "loss": 0.1982, "step": 34655 }, { "epoch": 1.8978262059902535, "grad_norm": 0.1521865427494049, "learning_rate": 2.041066720746299e-05, "loss": 0.2105, "step": 34660 }, { "epoch": 1.898099983573345, "grad_norm": 0.13081024587154388, "learning_rate": 2.0405597241938758e-05, "loss": 0.206, "step": 34665 }, { "epoch": 1.8983737611564364, "grad_norm": 0.13539190590381622, "learning_rate": 2.040052727641452e-05, "loss": 0.2006, "step": 34670 }, { "epoch": 1.898647538739528, "grad_norm": 0.14060157537460327, "learning_rate": 2.0395457310890288e-05, "loss": 0.1976, "step": 34675 }, { "epoch": 1.8989213163226195, "grad_norm": 0.1123872920870781, "learning_rate": 2.039038734536605e-05, "loss": 0.1905, "step": 34680 }, { "epoch": 1.899195093905711, "grad_norm": 0.11376192420721054, "learning_rate": 2.0385317379841818e-05, "loss": 0.208, "step": 34685 }, { "epoch": 1.8994688714888026, "grad_norm": 0.12239325046539307, "learning_rate": 2.038024741431758e-05, "loss": 0.199, "step": 34690 }, { "epoch": 1.8997426490718938, "grad_norm": 0.11956613510847092, "learning_rate": 2.037517744879335e-05, "loss": 0.2007, "step": 34695 }, { "epoch": 1.9000164266549855, "grad_norm": 0.130174919962883, "learning_rate": 2.0370107483269114e-05, "loss": 0.2026, "step": 34700 }, { "epoch": 1.9002902042380772, "grad_norm": 0.09680657088756561, "learning_rate": 2.036503751774488e-05, "loss": 0.1948, "step": 34705 }, { "epoch": 1.9005639818211684, "grad_norm": 0.11357084661722183, "learning_rate": 2.0359967552220648e-05, "loss": 0.2039, "step": 34710 }, { "epoch": 1.90083775940426, "grad_norm": 0.10723096132278442, "learning_rate": 2.035489758669641e-05, "loss": 0.1939, "step": 34715 }, { "epoch": 1.9011115369873515, "grad_norm": 0.12682995200157166, "learning_rate": 2.0349827621172178e-05, "loss": 0.1966, "step": 34720 }, { "epoch": 1.901385314570443, "grad_norm": 0.11752855032682419, "learning_rate": 2.034475765564794e-05, "loss": 0.188, "step": 34725 }, { "epoch": 1.9016590921535346, "grad_norm": 0.1365310698747635, "learning_rate": 2.0339687690123708e-05, "loss": 0.2001, "step": 34730 }, { "epoch": 1.901932869736626, "grad_norm": 0.1490255445241928, "learning_rate": 2.033461772459947e-05, "loss": 0.1973, "step": 34735 }, { "epoch": 1.9022066473197174, "grad_norm": 0.11312970519065857, "learning_rate": 2.032954775907524e-05, "loss": 0.1982, "step": 34740 }, { "epoch": 1.902480424902809, "grad_norm": 0.11298773437738419, "learning_rate": 2.0324477793551004e-05, "loss": 0.2014, "step": 34745 }, { "epoch": 1.9027542024859003, "grad_norm": 0.13831549882888794, "learning_rate": 2.031940782802677e-05, "loss": 0.2051, "step": 34750 }, { "epoch": 1.903027980068992, "grad_norm": 0.1122017428278923, "learning_rate": 2.0314337862502534e-05, "loss": 0.2019, "step": 34755 }, { "epoch": 1.9033017576520834, "grad_norm": 0.1266695261001587, "learning_rate": 2.03092678969783e-05, "loss": 0.1977, "step": 34760 }, { "epoch": 1.9035755352351749, "grad_norm": 0.10716529935598373, "learning_rate": 2.0304197931454068e-05, "loss": 0.1909, "step": 34765 }, { "epoch": 1.9038493128182665, "grad_norm": 0.11322619020938873, "learning_rate": 2.029912796592983e-05, "loss": 0.2069, "step": 34770 }, { "epoch": 1.904123090401358, "grad_norm": 0.13583792746067047, "learning_rate": 2.0294058000405598e-05, "loss": 0.1959, "step": 34775 }, { "epoch": 1.9043968679844494, "grad_norm": 0.11776324361562729, "learning_rate": 2.0288988034881365e-05, "loss": 0.2011, "step": 34780 }, { "epoch": 1.904670645567541, "grad_norm": 0.10504014045000076, "learning_rate": 2.028391806935713e-05, "loss": 0.1998, "step": 34785 }, { "epoch": 1.9049444231506323, "grad_norm": 0.13680781424045563, "learning_rate": 2.0278848103832895e-05, "loss": 0.2053, "step": 34790 }, { "epoch": 1.905218200733724, "grad_norm": 0.1181676909327507, "learning_rate": 2.027377813830866e-05, "loss": 0.1995, "step": 34795 }, { "epoch": 1.9054919783168154, "grad_norm": 0.11238887906074524, "learning_rate": 2.0268708172784425e-05, "loss": 0.1904, "step": 34800 }, { "epoch": 1.9057657558999068, "grad_norm": 0.11824862658977509, "learning_rate": 2.026363820726019e-05, "loss": 0.2067, "step": 34805 }, { "epoch": 1.9060395334829985, "grad_norm": 0.11073736846446991, "learning_rate": 2.0258568241735958e-05, "loss": 0.2068, "step": 34810 }, { "epoch": 1.90631331106609, "grad_norm": 0.11684705317020416, "learning_rate": 2.025349827621172e-05, "loss": 0.198, "step": 34815 }, { "epoch": 1.9065870886491814, "grad_norm": 0.1357523798942566, "learning_rate": 2.0248428310687488e-05, "loss": 0.2039, "step": 34820 }, { "epoch": 1.906860866232273, "grad_norm": 0.1423453390598297, "learning_rate": 2.0243358345163255e-05, "loss": 0.1987, "step": 34825 }, { "epoch": 1.9071346438153642, "grad_norm": 0.1219908818602562, "learning_rate": 2.023828837963902e-05, "loss": 0.1958, "step": 34830 }, { "epoch": 1.907408421398456, "grad_norm": 0.1107311025261879, "learning_rate": 2.0233218414114785e-05, "loss": 0.1925, "step": 34835 }, { "epoch": 1.9076821989815473, "grad_norm": 0.11161206662654877, "learning_rate": 2.022814844859055e-05, "loss": 0.2094, "step": 34840 }, { "epoch": 1.9079559765646388, "grad_norm": 0.10990630090236664, "learning_rate": 2.0223078483066315e-05, "loss": 0.2016, "step": 34845 }, { "epoch": 1.9082297541477304, "grad_norm": 0.1084267720580101, "learning_rate": 2.021800851754208e-05, "loss": 0.1899, "step": 34850 }, { "epoch": 1.9085035317308219, "grad_norm": 0.12952077388763428, "learning_rate": 2.0212938552017845e-05, "loss": 0.2027, "step": 34855 }, { "epoch": 1.9087773093139133, "grad_norm": 0.1149887815117836, "learning_rate": 2.0207868586493615e-05, "loss": 0.199, "step": 34860 }, { "epoch": 1.909051086897005, "grad_norm": 0.12384498119354248, "learning_rate": 2.0202798620969378e-05, "loss": 0.2107, "step": 34865 }, { "epoch": 1.9093248644800964, "grad_norm": 0.1216137707233429, "learning_rate": 2.0197728655445145e-05, "loss": 0.1994, "step": 34870 }, { "epoch": 1.9095986420631879, "grad_norm": 0.11681238561868668, "learning_rate": 2.019265868992091e-05, "loss": 0.1976, "step": 34875 }, { "epoch": 1.9098724196462795, "grad_norm": 0.11586841195821762, "learning_rate": 2.0187588724396675e-05, "loss": 0.2017, "step": 34880 }, { "epoch": 1.9101461972293707, "grad_norm": 0.11372943222522736, "learning_rate": 2.018251875887244e-05, "loss": 0.2009, "step": 34885 }, { "epoch": 1.9104199748124624, "grad_norm": 0.12224221229553223, "learning_rate": 2.0177448793348205e-05, "loss": 0.2002, "step": 34890 }, { "epoch": 1.9106937523955538, "grad_norm": 0.10427676886320114, "learning_rate": 2.017237882782397e-05, "loss": 0.2026, "step": 34895 }, { "epoch": 1.9109675299786453, "grad_norm": 0.10774306952953339, "learning_rate": 2.0167308862299738e-05, "loss": 0.1898, "step": 34900 }, { "epoch": 1.911241307561737, "grad_norm": 0.11900707334280014, "learning_rate": 2.0162238896775505e-05, "loss": 0.201, "step": 34905 }, { "epoch": 1.9115150851448284, "grad_norm": 0.11705043911933899, "learning_rate": 2.0157168931251268e-05, "loss": 0.1994, "step": 34910 }, { "epoch": 1.9117888627279198, "grad_norm": 0.11207827925682068, "learning_rate": 2.0152098965727035e-05, "loss": 0.1946, "step": 34915 }, { "epoch": 1.9120626403110115, "grad_norm": 0.12036240100860596, "learning_rate": 2.0147029000202798e-05, "loss": 0.2039, "step": 34920 }, { "epoch": 1.9123364178941027, "grad_norm": 0.11954519152641296, "learning_rate": 2.0141959034678565e-05, "loss": 0.2009, "step": 34925 }, { "epoch": 1.9126101954771944, "grad_norm": 0.1147250235080719, "learning_rate": 2.013688906915433e-05, "loss": 0.1959, "step": 34930 }, { "epoch": 1.9128839730602858, "grad_norm": 0.12245813757181168, "learning_rate": 2.0131819103630095e-05, "loss": 0.2007, "step": 34935 }, { "epoch": 1.9131577506433772, "grad_norm": 0.11564534157514572, "learning_rate": 2.0126749138105865e-05, "loss": 0.1996, "step": 34940 }, { "epoch": 1.913431528226469, "grad_norm": 0.11478875577449799, "learning_rate": 2.0121679172581628e-05, "loss": 0.2106, "step": 34945 }, { "epoch": 1.9137053058095603, "grad_norm": 0.12261900305747986, "learning_rate": 2.0116609207057395e-05, "loss": 0.1911, "step": 34950 }, { "epoch": 1.9139790833926518, "grad_norm": 0.11023017764091492, "learning_rate": 2.0111539241533158e-05, "loss": 0.1981, "step": 34955 }, { "epoch": 1.9142528609757434, "grad_norm": 0.11853543668985367, "learning_rate": 2.0106469276008925e-05, "loss": 0.1983, "step": 34960 }, { "epoch": 1.9145266385588346, "grad_norm": 0.11161856353282928, "learning_rate": 2.0101399310484688e-05, "loss": 0.204, "step": 34965 }, { "epoch": 1.9148004161419263, "grad_norm": 0.11294715106487274, "learning_rate": 2.0096329344960455e-05, "loss": 0.198, "step": 34970 }, { "epoch": 1.9150741937250177, "grad_norm": 0.14448073506355286, "learning_rate": 2.0091259379436218e-05, "loss": 0.1892, "step": 34975 }, { "epoch": 1.9153479713081092, "grad_norm": 0.10579713433980942, "learning_rate": 2.0086189413911988e-05, "loss": 0.1972, "step": 34980 }, { "epoch": 1.9156217488912008, "grad_norm": 0.11176447570323944, "learning_rate": 2.008111944838775e-05, "loss": 0.1991, "step": 34985 }, { "epoch": 1.9158955264742923, "grad_norm": 0.11240289360284805, "learning_rate": 2.0076049482863518e-05, "loss": 0.2035, "step": 34990 }, { "epoch": 1.9161693040573837, "grad_norm": 0.11966845393180847, "learning_rate": 2.0070979517339285e-05, "loss": 0.1897, "step": 34995 }, { "epoch": 1.9164430816404754, "grad_norm": 0.12483011931180954, "learning_rate": 2.0065909551815048e-05, "loss": 0.1968, "step": 35000 }, { "epoch": 1.9167168592235668, "grad_norm": 0.13643893599510193, "learning_rate": 2.0060839586290815e-05, "loss": 0.2025, "step": 35005 }, { "epoch": 1.9169906368066583, "grad_norm": 0.10982363671064377, "learning_rate": 2.0055769620766578e-05, "loss": 0.1965, "step": 35010 }, { "epoch": 1.91726441438975, "grad_norm": 0.10459936410188675, "learning_rate": 2.0050699655242345e-05, "loss": 0.193, "step": 35015 }, { "epoch": 1.9175381919728411, "grad_norm": 0.11260470747947693, "learning_rate": 2.0045629689718108e-05, "loss": 0.1972, "step": 35020 }, { "epoch": 1.9178119695559328, "grad_norm": 0.12506835162639618, "learning_rate": 2.0040559724193878e-05, "loss": 0.1965, "step": 35025 }, { "epoch": 1.9180857471390242, "grad_norm": 0.11023157089948654, "learning_rate": 2.003548975866964e-05, "loss": 0.2005, "step": 35030 }, { "epoch": 1.9183595247221157, "grad_norm": 0.11533322930335999, "learning_rate": 2.0030419793145408e-05, "loss": 0.1927, "step": 35035 }, { "epoch": 1.9186333023052073, "grad_norm": 0.1237012967467308, "learning_rate": 2.002534982762117e-05, "loss": 0.1969, "step": 35040 }, { "epoch": 1.9189070798882988, "grad_norm": 0.11407732218503952, "learning_rate": 2.0020279862096938e-05, "loss": 0.1965, "step": 35045 }, { "epoch": 1.9191808574713902, "grad_norm": 0.11862650513648987, "learning_rate": 2.0015209896572705e-05, "loss": 0.2052, "step": 35050 }, { "epoch": 1.9194546350544819, "grad_norm": 0.13681341707706451, "learning_rate": 2.0010139931048468e-05, "loss": 0.1985, "step": 35055 }, { "epoch": 1.919728412637573, "grad_norm": 0.10342103987932205, "learning_rate": 2.0005069965524235e-05, "loss": 0.2004, "step": 35060 }, { "epoch": 1.9200021902206648, "grad_norm": 0.11945755779743195, "learning_rate": 2e-05, "loss": 0.1947, "step": 35065 }, { "epoch": 1.9202759678037562, "grad_norm": 0.12389388680458069, "learning_rate": 1.999493003447577e-05, "loss": 0.2037, "step": 35070 }, { "epoch": 1.9205497453868476, "grad_norm": 0.13059614598751068, "learning_rate": 1.998986006895153e-05, "loss": 0.2054, "step": 35075 }, { "epoch": 1.9208235229699393, "grad_norm": 0.11453570425510406, "learning_rate": 1.99847901034273e-05, "loss": 0.2003, "step": 35080 }, { "epoch": 1.9210973005530307, "grad_norm": 0.1279679536819458, "learning_rate": 1.997972013790306e-05, "loss": 0.2045, "step": 35085 }, { "epoch": 1.9213710781361222, "grad_norm": 0.1199006587266922, "learning_rate": 1.997465017237883e-05, "loss": 0.1975, "step": 35090 }, { "epoch": 1.9216448557192138, "grad_norm": 0.10709433257579803, "learning_rate": 1.9969580206854595e-05, "loss": 0.2001, "step": 35095 }, { "epoch": 1.921918633302305, "grad_norm": 0.11266077309846878, "learning_rate": 1.996451024133036e-05, "loss": 0.1995, "step": 35100 }, { "epoch": 1.9221924108853967, "grad_norm": 0.12666140496730804, "learning_rate": 1.9959440275806125e-05, "loss": 0.2045, "step": 35105 }, { "epoch": 1.9224661884684882, "grad_norm": 0.12225411832332611, "learning_rate": 1.9954370310281892e-05, "loss": 0.1992, "step": 35110 }, { "epoch": 1.9227399660515796, "grad_norm": 0.10647204518318176, "learning_rate": 1.994930034475766e-05, "loss": 0.2016, "step": 35115 }, { "epoch": 1.9230137436346713, "grad_norm": 0.1107003316283226, "learning_rate": 1.9944230379233422e-05, "loss": 0.199, "step": 35120 }, { "epoch": 1.9232875212177627, "grad_norm": 0.12728719413280487, "learning_rate": 1.993916041370919e-05, "loss": 0.2057, "step": 35125 }, { "epoch": 1.9235612988008541, "grad_norm": 0.11882712692022324, "learning_rate": 1.9934090448184952e-05, "loss": 0.2001, "step": 35130 }, { "epoch": 1.9238350763839458, "grad_norm": 0.10862667858600616, "learning_rate": 1.992902048266072e-05, "loss": 0.2017, "step": 35135 }, { "epoch": 1.924108853967037, "grad_norm": 0.12714754045009613, "learning_rate": 1.9923950517136482e-05, "loss": 0.1988, "step": 35140 }, { "epoch": 1.9243826315501287, "grad_norm": 0.13943620026111603, "learning_rate": 1.9918880551612252e-05, "loss": 0.2035, "step": 35145 }, { "epoch": 1.9246564091332203, "grad_norm": 0.1068180501461029, "learning_rate": 1.9913810586088015e-05, "loss": 0.199, "step": 35150 }, { "epoch": 1.9249301867163116, "grad_norm": 0.1126459464430809, "learning_rate": 1.9908740620563782e-05, "loss": 0.1924, "step": 35155 }, { "epoch": 1.9252039642994032, "grad_norm": 0.1484026461839676, "learning_rate": 1.990367065503955e-05, "loss": 0.1942, "step": 35160 }, { "epoch": 1.9254777418824947, "grad_norm": 0.11027566343545914, "learning_rate": 1.9898600689515312e-05, "loss": 0.2015, "step": 35165 }, { "epoch": 1.925751519465586, "grad_norm": 0.14671607315540314, "learning_rate": 1.989353072399108e-05, "loss": 0.2009, "step": 35170 }, { "epoch": 1.9260252970486778, "grad_norm": 0.12746459245681763, "learning_rate": 1.9888460758466842e-05, "loss": 0.2026, "step": 35175 }, { "epoch": 1.9262990746317692, "grad_norm": 0.11085615307092667, "learning_rate": 1.988339079294261e-05, "loss": 0.2032, "step": 35180 }, { "epoch": 1.9265728522148606, "grad_norm": 0.10431914776563644, "learning_rate": 1.9878320827418375e-05, "loss": 0.1989, "step": 35185 }, { "epoch": 1.9268466297979523, "grad_norm": 0.10669360309839249, "learning_rate": 1.9873250861894142e-05, "loss": 0.1907, "step": 35190 }, { "epoch": 1.9271204073810435, "grad_norm": 0.13779239356517792, "learning_rate": 1.9868180896369905e-05, "loss": 0.1956, "step": 35195 }, { "epoch": 1.9273941849641352, "grad_norm": 0.1185617744922638, "learning_rate": 1.9863110930845672e-05, "loss": 0.196, "step": 35200 }, { "epoch": 1.9276679625472266, "grad_norm": 0.11705485731363297, "learning_rate": 1.9858040965321435e-05, "loss": 0.2001, "step": 35205 }, { "epoch": 1.927941740130318, "grad_norm": 0.11583719402551651, "learning_rate": 1.9852970999797202e-05, "loss": 0.2043, "step": 35210 }, { "epoch": 1.9282155177134097, "grad_norm": 0.12771551311016083, "learning_rate": 1.984790103427297e-05, "loss": 0.2075, "step": 35215 }, { "epoch": 1.9284892952965011, "grad_norm": 0.14701810479164124, "learning_rate": 1.9842831068748732e-05, "loss": 0.2006, "step": 35220 }, { "epoch": 1.9287630728795926, "grad_norm": 0.1064361184835434, "learning_rate": 1.9837761103224502e-05, "loss": 0.1935, "step": 35225 }, { "epoch": 1.9290368504626842, "grad_norm": 0.098638154566288, "learning_rate": 1.9832691137700265e-05, "loss": 0.1961, "step": 35230 }, { "epoch": 1.9293106280457755, "grad_norm": 0.11216079443693161, "learning_rate": 1.9827621172176032e-05, "loss": 0.1979, "step": 35235 }, { "epoch": 1.9295844056288671, "grad_norm": 0.12840186059474945, "learning_rate": 1.9822551206651795e-05, "loss": 0.1955, "step": 35240 }, { "epoch": 1.9298581832119586, "grad_norm": 0.13429082930088043, "learning_rate": 1.9817481241127562e-05, "loss": 0.1986, "step": 35245 }, { "epoch": 1.93013196079505, "grad_norm": 0.11879726499319077, "learning_rate": 1.9812411275603325e-05, "loss": 0.1995, "step": 35250 }, { "epoch": 1.9304057383781417, "grad_norm": 0.10392232239246368, "learning_rate": 1.9807341310079092e-05, "loss": 0.1901, "step": 35255 }, { "epoch": 1.930679515961233, "grad_norm": 0.11619976162910461, "learning_rate": 1.9802271344554855e-05, "loss": 0.2072, "step": 35260 }, { "epoch": 1.9309532935443245, "grad_norm": 0.11562761664390564, "learning_rate": 1.9797201379030625e-05, "loss": 0.1982, "step": 35265 }, { "epoch": 1.9312270711274162, "grad_norm": 0.11606224626302719, "learning_rate": 1.979213141350639e-05, "loss": 0.1963, "step": 35270 }, { "epoch": 1.9315008487105074, "grad_norm": 0.11367302387952805, "learning_rate": 1.9787061447982155e-05, "loss": 0.2019, "step": 35275 }, { "epoch": 1.931774626293599, "grad_norm": 0.10090304166078568, "learning_rate": 1.9781991482457922e-05, "loss": 0.2039, "step": 35280 }, { "epoch": 1.9320484038766905, "grad_norm": 0.12315189838409424, "learning_rate": 1.9776921516933685e-05, "loss": 0.1938, "step": 35285 }, { "epoch": 1.932322181459782, "grad_norm": 0.12982220947742462, "learning_rate": 1.9771851551409452e-05, "loss": 0.1984, "step": 35290 }, { "epoch": 1.9325959590428736, "grad_norm": 0.11785957217216492, "learning_rate": 1.9766781585885215e-05, "loss": 0.2033, "step": 35295 }, { "epoch": 1.932869736625965, "grad_norm": 0.1116907075047493, "learning_rate": 1.9761711620360982e-05, "loss": 0.2101, "step": 35300 }, { "epoch": 1.9331435142090565, "grad_norm": 0.13454270362854004, "learning_rate": 1.9756641654836745e-05, "loss": 0.1966, "step": 35305 }, { "epoch": 1.9334172917921482, "grad_norm": 0.12205381691455841, "learning_rate": 1.9751571689312515e-05, "loss": 0.1968, "step": 35310 }, { "epoch": 1.9336910693752396, "grad_norm": 0.11428706347942352, "learning_rate": 1.974650172378828e-05, "loss": 0.1961, "step": 35315 }, { "epoch": 1.933964846958331, "grad_norm": 0.11900696158409119, "learning_rate": 1.9741431758264045e-05, "loss": 0.2042, "step": 35320 }, { "epoch": 1.9342386245414227, "grad_norm": 0.1157333180308342, "learning_rate": 1.973636179273981e-05, "loss": 0.1996, "step": 35325 }, { "epoch": 1.934512402124514, "grad_norm": 0.14778271317481995, "learning_rate": 1.9731291827215575e-05, "loss": 0.201, "step": 35330 }, { "epoch": 1.9347861797076056, "grad_norm": 0.10887763649225235, "learning_rate": 1.9726221861691342e-05, "loss": 0.2001, "step": 35335 }, { "epoch": 1.935059957290697, "grad_norm": 0.10082723945379257, "learning_rate": 1.9721151896167105e-05, "loss": 0.2042, "step": 35340 }, { "epoch": 1.9353337348737885, "grad_norm": 0.11316993832588196, "learning_rate": 1.9716081930642872e-05, "loss": 0.2001, "step": 35345 }, { "epoch": 1.9356075124568801, "grad_norm": 0.12818925082683563, "learning_rate": 1.971101196511864e-05, "loss": 0.1993, "step": 35350 }, { "epoch": 1.9358812900399716, "grad_norm": 0.11991366744041443, "learning_rate": 1.9705941999594405e-05, "loss": 0.2068, "step": 35355 }, { "epoch": 1.936155067623063, "grad_norm": 0.11023017019033432, "learning_rate": 1.970087203407017e-05, "loss": 0.2077, "step": 35360 }, { "epoch": 1.9364288452061547, "grad_norm": 0.10893020778894424, "learning_rate": 1.9695802068545935e-05, "loss": 0.1984, "step": 35365 }, { "epoch": 1.9367026227892459, "grad_norm": 0.1205700933933258, "learning_rate": 1.96907321030217e-05, "loss": 0.2096, "step": 35370 }, { "epoch": 1.9369764003723375, "grad_norm": 0.12556397914886475, "learning_rate": 1.9685662137497465e-05, "loss": 0.2048, "step": 35375 }, { "epoch": 1.937250177955429, "grad_norm": 0.10707902908325195, "learning_rate": 1.9680592171973232e-05, "loss": 0.2046, "step": 35380 }, { "epoch": 1.9375239555385204, "grad_norm": 0.10245898365974426, "learning_rate": 1.9675522206448995e-05, "loss": 0.1917, "step": 35385 }, { "epoch": 1.937797733121612, "grad_norm": 0.10841188579797745, "learning_rate": 1.9670452240924762e-05, "loss": 0.1987, "step": 35390 }, { "epoch": 1.9380715107047035, "grad_norm": 0.11359228193759918, "learning_rate": 1.966538227540053e-05, "loss": 0.2094, "step": 35395 }, { "epoch": 1.938345288287795, "grad_norm": 0.10343876481056213, "learning_rate": 1.9660312309876296e-05, "loss": 0.1994, "step": 35400 }, { "epoch": 1.9386190658708866, "grad_norm": 0.12536944448947906, "learning_rate": 1.965524234435206e-05, "loss": 0.2048, "step": 35405 }, { "epoch": 1.9388928434539778, "grad_norm": 0.11146146059036255, "learning_rate": 1.9650172378827826e-05, "loss": 0.2074, "step": 35410 }, { "epoch": 1.9391666210370695, "grad_norm": 0.12695428729057312, "learning_rate": 1.964510241330359e-05, "loss": 0.195, "step": 35415 }, { "epoch": 1.939440398620161, "grad_norm": 0.11515969038009644, "learning_rate": 1.9640032447779356e-05, "loss": 0.1981, "step": 35420 }, { "epoch": 1.9397141762032524, "grad_norm": 0.10513901710510254, "learning_rate": 1.963496248225512e-05, "loss": 0.1923, "step": 35425 }, { "epoch": 1.939987953786344, "grad_norm": 0.10950743407011032, "learning_rate": 1.962989251673089e-05, "loss": 0.2067, "step": 35430 }, { "epoch": 1.9402617313694355, "grad_norm": 0.11302995681762695, "learning_rate": 1.9624822551206652e-05, "loss": 0.2077, "step": 35435 }, { "epoch": 1.940535508952527, "grad_norm": 0.11055571585893631, "learning_rate": 1.961975258568242e-05, "loss": 0.2022, "step": 35440 }, { "epoch": 1.9408092865356186, "grad_norm": 0.11473860591650009, "learning_rate": 1.9614682620158186e-05, "loss": 0.208, "step": 35445 }, { "epoch": 1.94108306411871, "grad_norm": 0.10234501212835312, "learning_rate": 1.960961265463395e-05, "loss": 0.1932, "step": 35450 }, { "epoch": 1.9413568417018015, "grad_norm": 0.10667123645544052, "learning_rate": 1.9604542689109716e-05, "loss": 0.2074, "step": 35455 }, { "epoch": 1.9416306192848931, "grad_norm": 0.11249510198831558, "learning_rate": 1.959947272358548e-05, "loss": 0.194, "step": 35460 }, { "epoch": 1.9419043968679843, "grad_norm": 0.12097690254449844, "learning_rate": 1.9594402758061246e-05, "loss": 0.2083, "step": 35465 }, { "epoch": 1.942178174451076, "grad_norm": 0.1381942480802536, "learning_rate": 1.9589332792537012e-05, "loss": 0.1935, "step": 35470 }, { "epoch": 1.9424519520341674, "grad_norm": 0.12511368095874786, "learning_rate": 1.958426282701278e-05, "loss": 0.2013, "step": 35475 }, { "epoch": 1.9427257296172589, "grad_norm": 0.12324976921081543, "learning_rate": 1.9579192861488542e-05, "loss": 0.1932, "step": 35480 }, { "epoch": 1.9429995072003505, "grad_norm": 0.12416751682758331, "learning_rate": 1.957412289596431e-05, "loss": 0.199, "step": 35485 }, { "epoch": 1.943273284783442, "grad_norm": 0.11393250524997711, "learning_rate": 1.9569052930440072e-05, "loss": 0.201, "step": 35490 }, { "epoch": 1.9435470623665334, "grad_norm": 0.12608304619789124, "learning_rate": 1.956398296491584e-05, "loss": 0.2121, "step": 35495 }, { "epoch": 1.943820839949625, "grad_norm": 0.12344170361757278, "learning_rate": 1.9558912999391606e-05, "loss": 0.1962, "step": 35500 }, { "epoch": 1.9440946175327163, "grad_norm": 0.11234962940216064, "learning_rate": 1.955384303386737e-05, "loss": 0.1897, "step": 35505 }, { "epoch": 1.944368395115808, "grad_norm": 0.12059460580348969, "learning_rate": 1.954877306834314e-05, "loss": 0.208, "step": 35510 }, { "epoch": 1.9446421726988994, "grad_norm": 0.1176193505525589, "learning_rate": 1.9543703102818902e-05, "loss": 0.1972, "step": 35515 }, { "epoch": 1.9449159502819908, "grad_norm": 0.1342698633670807, "learning_rate": 1.953863313729467e-05, "loss": 0.203, "step": 35520 }, { "epoch": 1.9451897278650825, "grad_norm": 0.11705771833658218, "learning_rate": 1.9533563171770432e-05, "loss": 0.1988, "step": 35525 }, { "epoch": 1.945463505448174, "grad_norm": 0.10228165239095688, "learning_rate": 1.95284932062462e-05, "loss": 0.1934, "step": 35530 }, { "epoch": 1.9457372830312654, "grad_norm": 0.11840846389532089, "learning_rate": 1.9523423240721962e-05, "loss": 0.204, "step": 35535 }, { "epoch": 1.946011060614357, "grad_norm": 0.10577970743179321, "learning_rate": 1.951835327519773e-05, "loss": 0.1839, "step": 35540 }, { "epoch": 1.9462848381974482, "grad_norm": 0.12361057847738266, "learning_rate": 1.9513283309673496e-05, "loss": 0.1979, "step": 35545 }, { "epoch": 1.94655861578054, "grad_norm": 0.10665567219257355, "learning_rate": 1.9508213344149262e-05, "loss": 0.189, "step": 35550 }, { "epoch": 1.9468323933636313, "grad_norm": 0.13459248840808868, "learning_rate": 1.9503143378625026e-05, "loss": 0.2015, "step": 35555 }, { "epoch": 1.9471061709467228, "grad_norm": 0.12566950917243958, "learning_rate": 1.9498073413100792e-05, "loss": 0.1982, "step": 35560 }, { "epoch": 1.9473799485298144, "grad_norm": 0.11417513340711594, "learning_rate": 1.949300344757656e-05, "loss": 0.207, "step": 35565 }, { "epoch": 1.9476537261129059, "grad_norm": 0.10839232802391052, "learning_rate": 1.9487933482052322e-05, "loss": 0.2136, "step": 35570 }, { "epoch": 1.9479275036959973, "grad_norm": 0.11170551180839539, "learning_rate": 1.948286351652809e-05, "loss": 0.1956, "step": 35575 }, { "epoch": 1.948201281279089, "grad_norm": 0.11581030488014221, "learning_rate": 1.9477793551003852e-05, "loss": 0.2063, "step": 35580 }, { "epoch": 1.9484750588621804, "grad_norm": 0.1467932015657425, "learning_rate": 1.947272358547962e-05, "loss": 0.1978, "step": 35585 }, { "epoch": 1.9487488364452719, "grad_norm": 0.12872356176376343, "learning_rate": 1.9467653619955386e-05, "loss": 0.2005, "step": 35590 }, { "epoch": 1.9490226140283635, "grad_norm": 0.11914677917957306, "learning_rate": 1.9462583654431152e-05, "loss": 0.1948, "step": 35595 }, { "epoch": 1.9492963916114547, "grad_norm": 0.10800829529762268, "learning_rate": 1.9457513688906916e-05, "loss": 0.1957, "step": 35600 }, { "epoch": 1.9495701691945464, "grad_norm": 0.13474899530410767, "learning_rate": 1.9452443723382682e-05, "loss": 0.2094, "step": 35605 }, { "epoch": 1.9498439467776378, "grad_norm": 0.11249689012765884, "learning_rate": 1.944737375785845e-05, "loss": 0.1993, "step": 35610 }, { "epoch": 1.9501177243607293, "grad_norm": 0.11764787882566452, "learning_rate": 1.9442303792334212e-05, "loss": 0.1997, "step": 35615 }, { "epoch": 1.950391501943821, "grad_norm": 0.11713830381631851, "learning_rate": 1.943723382680998e-05, "loss": 0.1907, "step": 35620 }, { "epoch": 1.9506652795269124, "grad_norm": 0.12141213566064835, "learning_rate": 1.9432163861285742e-05, "loss": 0.1959, "step": 35625 }, { "epoch": 1.9509390571100038, "grad_norm": 0.13080503046512604, "learning_rate": 1.942709389576151e-05, "loss": 0.1926, "step": 35630 }, { "epoch": 1.9512128346930955, "grad_norm": 0.11775415390729904, "learning_rate": 1.9422023930237276e-05, "loss": 0.1969, "step": 35635 }, { "epoch": 1.9514866122761867, "grad_norm": 0.11382921785116196, "learning_rate": 1.9416953964713043e-05, "loss": 0.1994, "step": 35640 }, { "epoch": 1.9517603898592784, "grad_norm": 0.12734250724315643, "learning_rate": 1.9411883999188806e-05, "loss": 0.2027, "step": 35645 }, { "epoch": 1.9520341674423698, "grad_norm": 0.11073010414838791, "learning_rate": 1.9406814033664573e-05, "loss": 0.2038, "step": 35650 }, { "epoch": 1.9523079450254612, "grad_norm": 0.12465716153383255, "learning_rate": 1.9401744068140336e-05, "loss": 0.203, "step": 35655 }, { "epoch": 1.952581722608553, "grad_norm": 0.1322498917579651, "learning_rate": 1.9396674102616103e-05, "loss": 0.1942, "step": 35660 }, { "epoch": 1.9528555001916443, "grad_norm": 0.11933471262454987, "learning_rate": 1.939160413709187e-05, "loss": 0.1983, "step": 35665 }, { "epoch": 1.9531292777747358, "grad_norm": 0.11740487068891525, "learning_rate": 1.9386534171567633e-05, "loss": 0.2001, "step": 35670 }, { "epoch": 1.9534030553578274, "grad_norm": 0.11626984924077988, "learning_rate": 1.9381464206043403e-05, "loss": 0.1948, "step": 35675 }, { "epoch": 1.9536768329409187, "grad_norm": 0.10493987053632736, "learning_rate": 1.9376394240519166e-05, "loss": 0.2053, "step": 35680 }, { "epoch": 1.9539506105240103, "grad_norm": 0.11413602530956268, "learning_rate": 1.9371324274994933e-05, "loss": 0.1984, "step": 35685 }, { "epoch": 1.9542243881071018, "grad_norm": 0.10926701873540878, "learning_rate": 1.9366254309470696e-05, "loss": 0.1906, "step": 35690 }, { "epoch": 1.9544981656901932, "grad_norm": 0.11532521992921829, "learning_rate": 1.9361184343946463e-05, "loss": 0.2038, "step": 35695 }, { "epoch": 1.9547719432732849, "grad_norm": 0.12156158685684204, "learning_rate": 1.9356114378422226e-05, "loss": 0.2076, "step": 35700 }, { "epoch": 1.9550457208563763, "grad_norm": 0.11245828121900558, "learning_rate": 1.9351044412897993e-05, "loss": 0.201, "step": 35705 }, { "epoch": 1.9553194984394677, "grad_norm": 0.11404264718294144, "learning_rate": 1.9345974447373756e-05, "loss": 0.2041, "step": 35710 }, { "epoch": 1.9555932760225594, "grad_norm": 0.11337865889072418, "learning_rate": 1.9340904481849526e-05, "loss": 0.1964, "step": 35715 }, { "epoch": 1.9558670536056506, "grad_norm": 0.10260611027479172, "learning_rate": 1.933583451632529e-05, "loss": 0.2074, "step": 35720 }, { "epoch": 1.9561408311887423, "grad_norm": 0.13473689556121826, "learning_rate": 1.9330764550801056e-05, "loss": 0.1951, "step": 35725 }, { "epoch": 1.9564146087718337, "grad_norm": 0.12547604739665985, "learning_rate": 1.9325694585276823e-05, "loss": 0.1985, "step": 35730 }, { "epoch": 1.9566883863549251, "grad_norm": 0.12848228216171265, "learning_rate": 1.9320624619752586e-05, "loss": 0.1969, "step": 35735 }, { "epoch": 1.9569621639380168, "grad_norm": 0.12434971332550049, "learning_rate": 1.9315554654228353e-05, "loss": 0.1995, "step": 35740 }, { "epoch": 1.9572359415211082, "grad_norm": 0.10496009141206741, "learning_rate": 1.9310484688704116e-05, "loss": 0.1978, "step": 35745 }, { "epoch": 1.9575097191041997, "grad_norm": 0.10484972596168518, "learning_rate": 1.9305414723179883e-05, "loss": 0.1997, "step": 35750 }, { "epoch": 1.9577834966872913, "grad_norm": 0.10292582213878632, "learning_rate": 1.930034475765565e-05, "loss": 0.1984, "step": 35755 }, { "epoch": 1.9580572742703828, "grad_norm": 0.10935252159833908, "learning_rate": 1.9295274792131416e-05, "loss": 0.1964, "step": 35760 }, { "epoch": 1.9583310518534742, "grad_norm": 0.11869202554225922, "learning_rate": 1.929020482660718e-05, "loss": 0.1954, "step": 35765 }, { "epoch": 1.9586048294365659, "grad_norm": 0.12519606947898865, "learning_rate": 1.9285134861082946e-05, "loss": 0.1996, "step": 35770 }, { "epoch": 1.958878607019657, "grad_norm": 0.1207905188202858, "learning_rate": 1.928006489555871e-05, "loss": 0.1944, "step": 35775 }, { "epoch": 1.9591523846027488, "grad_norm": 0.10818508267402649, "learning_rate": 1.9274994930034476e-05, "loss": 0.1991, "step": 35780 }, { "epoch": 1.9594261621858402, "grad_norm": 0.11653348803520203, "learning_rate": 1.9269924964510243e-05, "loss": 0.2149, "step": 35785 }, { "epoch": 1.9596999397689316, "grad_norm": 0.12311088293790817, "learning_rate": 1.9264854998986006e-05, "loss": 0.1921, "step": 35790 }, { "epoch": 1.9599737173520233, "grad_norm": 0.10383331775665283, "learning_rate": 1.9259785033461776e-05, "loss": 0.1964, "step": 35795 }, { "epoch": 1.9602474949351147, "grad_norm": 0.11197567731142044, "learning_rate": 1.925471506793754e-05, "loss": 0.2006, "step": 35800 }, { "epoch": 1.9605212725182062, "grad_norm": 0.09833533316850662, "learning_rate": 1.9249645102413306e-05, "loss": 0.1962, "step": 35805 }, { "epoch": 1.9607950501012978, "grad_norm": 0.11702683568000793, "learning_rate": 1.924457513688907e-05, "loss": 0.2021, "step": 35810 }, { "epoch": 1.961068827684389, "grad_norm": 0.11331284046173096, "learning_rate": 1.9239505171364836e-05, "loss": 0.1965, "step": 35815 }, { "epoch": 1.9613426052674807, "grad_norm": 0.11228696256875992, "learning_rate": 1.92344352058406e-05, "loss": 0.1941, "step": 35820 }, { "epoch": 1.9616163828505722, "grad_norm": 0.12437669932842255, "learning_rate": 1.9229365240316366e-05, "loss": 0.2009, "step": 35825 }, { "epoch": 1.9618901604336636, "grad_norm": 0.11181319504976273, "learning_rate": 1.9224295274792133e-05, "loss": 0.1982, "step": 35830 }, { "epoch": 1.9621639380167553, "grad_norm": 0.11747244745492935, "learning_rate": 1.92192253092679e-05, "loss": 0.1996, "step": 35835 }, { "epoch": 1.9624377155998467, "grad_norm": 0.1357382833957672, "learning_rate": 1.9214155343743663e-05, "loss": 0.1982, "step": 35840 }, { "epoch": 1.9627114931829381, "grad_norm": 0.12568825483322144, "learning_rate": 1.920908537821943e-05, "loss": 0.2087, "step": 35845 }, { "epoch": 1.9629852707660298, "grad_norm": 0.10052300244569778, "learning_rate": 1.9204015412695196e-05, "loss": 0.2061, "step": 35850 }, { "epoch": 1.963259048349121, "grad_norm": 0.11821973323822021, "learning_rate": 1.919894544717096e-05, "loss": 0.1971, "step": 35855 }, { "epoch": 1.9635328259322127, "grad_norm": 0.12573423981666565, "learning_rate": 1.9193875481646726e-05, "loss": 0.2064, "step": 35860 }, { "epoch": 1.9638066035153041, "grad_norm": 0.12152290344238281, "learning_rate": 1.918880551612249e-05, "loss": 0.202, "step": 35865 }, { "epoch": 1.9640803810983956, "grad_norm": 0.12620607018470764, "learning_rate": 1.9183735550598256e-05, "loss": 0.2054, "step": 35870 }, { "epoch": 1.9643541586814872, "grad_norm": 0.12713463604450226, "learning_rate": 1.9178665585074023e-05, "loss": 0.2086, "step": 35875 }, { "epoch": 1.9646279362645787, "grad_norm": 0.13029374182224274, "learning_rate": 1.917359561954979e-05, "loss": 0.1949, "step": 35880 }, { "epoch": 1.96490171384767, "grad_norm": 0.12310991436243057, "learning_rate": 1.9168525654025553e-05, "loss": 0.1983, "step": 35885 }, { "epoch": 1.9651754914307618, "grad_norm": 0.12861399352550507, "learning_rate": 1.916345568850132e-05, "loss": 0.2077, "step": 35890 }, { "epoch": 1.9654492690138532, "grad_norm": 0.10948207974433899, "learning_rate": 1.9158385722977086e-05, "loss": 0.1931, "step": 35895 }, { "epoch": 1.9657230465969446, "grad_norm": 0.11143393069505692, "learning_rate": 1.915331575745285e-05, "loss": 0.2035, "step": 35900 }, { "epoch": 1.9659968241800363, "grad_norm": 0.12042929977178574, "learning_rate": 1.9148245791928616e-05, "loss": 0.2068, "step": 35905 }, { "epoch": 1.9662706017631275, "grad_norm": 0.10230347514152527, "learning_rate": 1.914317582640438e-05, "loss": 0.1878, "step": 35910 }, { "epoch": 1.9665443793462192, "grad_norm": 0.1292700618505478, "learning_rate": 1.9138105860880146e-05, "loss": 0.1964, "step": 35915 }, { "epoch": 1.9668181569293106, "grad_norm": 0.12474816292524338, "learning_rate": 1.9133035895355913e-05, "loss": 0.2101, "step": 35920 }, { "epoch": 1.967091934512402, "grad_norm": 0.14495985209941864, "learning_rate": 1.912796592983168e-05, "loss": 0.2011, "step": 35925 }, { "epoch": 1.9673657120954937, "grad_norm": 0.1144600436091423, "learning_rate": 1.9122895964307443e-05, "loss": 0.2108, "step": 35930 }, { "epoch": 1.9676394896785852, "grad_norm": 0.12237971276044846, "learning_rate": 1.911782599878321e-05, "loss": 0.1986, "step": 35935 }, { "epoch": 1.9679132672616766, "grad_norm": 0.1257680505514145, "learning_rate": 1.9112756033258973e-05, "loss": 0.2008, "step": 35940 }, { "epoch": 1.9681870448447683, "grad_norm": 0.11959938704967499, "learning_rate": 1.910768606773474e-05, "loss": 0.1978, "step": 35945 }, { "epoch": 1.9684608224278595, "grad_norm": 0.11010350286960602, "learning_rate": 1.9102616102210506e-05, "loss": 0.204, "step": 35950 }, { "epoch": 1.9687346000109511, "grad_norm": 0.10933307558298111, "learning_rate": 1.909754613668627e-05, "loss": 0.1975, "step": 35955 }, { "epoch": 1.9690083775940426, "grad_norm": 0.13856405019760132, "learning_rate": 1.909247617116204e-05, "loss": 0.1983, "step": 35960 }, { "epoch": 1.969282155177134, "grad_norm": 0.12368268519639969, "learning_rate": 1.9087406205637803e-05, "loss": 0.2012, "step": 35965 }, { "epoch": 1.9695559327602257, "grad_norm": 0.11333741992712021, "learning_rate": 1.908233624011357e-05, "loss": 0.1951, "step": 35970 }, { "epoch": 1.969829710343317, "grad_norm": 0.13491398096084595, "learning_rate": 1.9077266274589333e-05, "loss": 0.2095, "step": 35975 }, { "epoch": 1.9701034879264085, "grad_norm": 0.11697246879339218, "learning_rate": 1.90721963090651e-05, "loss": 0.199, "step": 35980 }, { "epoch": 1.9703772655095002, "grad_norm": 0.11402148753404617, "learning_rate": 1.9067126343540863e-05, "loss": 0.2085, "step": 35985 }, { "epoch": 1.9706510430925914, "grad_norm": 0.12261570245027542, "learning_rate": 1.906205637801663e-05, "loss": 0.1949, "step": 35990 }, { "epoch": 1.970924820675683, "grad_norm": 0.1067100241780281, "learning_rate": 1.9056986412492393e-05, "loss": 0.2028, "step": 35995 }, { "epoch": 1.9711985982587745, "grad_norm": 0.11165715754032135, "learning_rate": 1.9051916446968163e-05, "loss": 0.196, "step": 36000 }, { "epoch": 1.971472375841866, "grad_norm": 0.13166207075119019, "learning_rate": 1.9046846481443926e-05, "loss": 0.2061, "step": 36005 }, { "epoch": 1.9717461534249576, "grad_norm": 0.15046168863773346, "learning_rate": 1.9041776515919693e-05, "loss": 0.1913, "step": 36010 }, { "epoch": 1.972019931008049, "grad_norm": 0.1227826476097107, "learning_rate": 1.903670655039546e-05, "loss": 0.2125, "step": 36015 }, { "epoch": 1.9722937085911405, "grad_norm": 0.11889000236988068, "learning_rate": 1.9031636584871223e-05, "loss": 0.1985, "step": 36020 }, { "epoch": 1.9725674861742322, "grad_norm": 0.11288944631814957, "learning_rate": 1.902656661934699e-05, "loss": 0.1945, "step": 36025 }, { "epoch": 1.9728412637573236, "grad_norm": 0.12970209121704102, "learning_rate": 1.9021496653822753e-05, "loss": 0.2063, "step": 36030 }, { "epoch": 1.973115041340415, "grad_norm": 0.12118294090032578, "learning_rate": 1.901642668829852e-05, "loss": 0.2036, "step": 36035 }, { "epoch": 1.9733888189235067, "grad_norm": 0.11415355652570724, "learning_rate": 1.9011356722774287e-05, "loss": 0.1976, "step": 36040 }, { "epoch": 1.973662596506598, "grad_norm": 0.12567874789237976, "learning_rate": 1.9006286757250053e-05, "loss": 0.2049, "step": 36045 }, { "epoch": 1.9739363740896896, "grad_norm": 0.10461548715829849, "learning_rate": 1.9001216791725816e-05, "loss": 0.2014, "step": 36050 }, { "epoch": 1.974210151672781, "grad_norm": 0.10739488899707794, "learning_rate": 1.8996146826201583e-05, "loss": 0.1932, "step": 36055 }, { "epoch": 1.9744839292558725, "grad_norm": 0.10964275896549225, "learning_rate": 1.8991076860677346e-05, "loss": 0.2027, "step": 36060 }, { "epoch": 1.9747577068389641, "grad_norm": 0.1300467550754547, "learning_rate": 1.8986006895153113e-05, "loss": 0.1991, "step": 36065 }, { "epoch": 1.9750314844220556, "grad_norm": 0.1092514619231224, "learning_rate": 1.898093692962888e-05, "loss": 0.2073, "step": 36070 }, { "epoch": 1.975305262005147, "grad_norm": 0.12467580288648605, "learning_rate": 1.8975866964104643e-05, "loss": 0.1899, "step": 36075 }, { "epoch": 1.9755790395882387, "grad_norm": 0.11190284043550491, "learning_rate": 1.8970796998580413e-05, "loss": 0.1975, "step": 36080 }, { "epoch": 1.9758528171713299, "grad_norm": 0.09683739393949509, "learning_rate": 1.8965727033056177e-05, "loss": 0.2001, "step": 36085 }, { "epoch": 1.9761265947544215, "grad_norm": 0.12574605643749237, "learning_rate": 1.8960657067531943e-05, "loss": 0.1983, "step": 36090 }, { "epoch": 1.976400372337513, "grad_norm": 0.13856646418571472, "learning_rate": 1.8955587102007707e-05, "loss": 0.2082, "step": 36095 }, { "epoch": 1.9766741499206044, "grad_norm": 0.11306066066026688, "learning_rate": 1.8950517136483473e-05, "loss": 0.2128, "step": 36100 }, { "epoch": 1.976947927503696, "grad_norm": 0.1194499209523201, "learning_rate": 1.8945447170959237e-05, "loss": 0.1999, "step": 36105 }, { "epoch": 1.9772217050867875, "grad_norm": 0.1147579625248909, "learning_rate": 1.8940377205435003e-05, "loss": 0.2104, "step": 36110 }, { "epoch": 1.977495482669879, "grad_norm": 0.10808674246072769, "learning_rate": 1.893530723991077e-05, "loss": 0.2009, "step": 36115 }, { "epoch": 1.9777692602529706, "grad_norm": 0.12042011320590973, "learning_rate": 1.8930237274386537e-05, "loss": 0.2033, "step": 36120 }, { "epoch": 1.9780430378360618, "grad_norm": 0.11600857973098755, "learning_rate": 1.89251673088623e-05, "loss": 0.1954, "step": 36125 }, { "epoch": 1.9783168154191535, "grad_norm": 0.09158486872911453, "learning_rate": 1.8920097343338067e-05, "loss": 0.1892, "step": 36130 }, { "epoch": 1.978590593002245, "grad_norm": 0.13868069648742676, "learning_rate": 1.8915027377813833e-05, "loss": 0.1983, "step": 36135 }, { "epoch": 1.9788643705853364, "grad_norm": 0.11865727603435516, "learning_rate": 1.8909957412289597e-05, "loss": 0.201, "step": 36140 }, { "epoch": 1.979138148168428, "grad_norm": 0.1162828728556633, "learning_rate": 1.8904887446765363e-05, "loss": 0.2059, "step": 36145 }, { "epoch": 1.9794119257515195, "grad_norm": 0.14072713255882263, "learning_rate": 1.8899817481241127e-05, "loss": 0.2011, "step": 36150 }, { "epoch": 1.979685703334611, "grad_norm": 0.1002449095249176, "learning_rate": 1.8894747515716893e-05, "loss": 0.1998, "step": 36155 }, { "epoch": 1.9799594809177026, "grad_norm": 0.11691279709339142, "learning_rate": 1.888967755019266e-05, "loss": 0.1942, "step": 36160 }, { "epoch": 1.9802332585007938, "grad_norm": 0.12631411850452423, "learning_rate": 1.8884607584668427e-05, "loss": 0.2033, "step": 36165 }, { "epoch": 1.9805070360838855, "grad_norm": 0.11573415994644165, "learning_rate": 1.887953761914419e-05, "loss": 0.2149, "step": 36170 }, { "epoch": 1.9807808136669771, "grad_norm": 0.1147134080529213, "learning_rate": 1.8874467653619957e-05, "loss": 0.1977, "step": 36175 }, { "epoch": 1.9810545912500683, "grad_norm": 0.14986629784107208, "learning_rate": 1.8869397688095723e-05, "loss": 0.198, "step": 36180 }, { "epoch": 1.98132836883316, "grad_norm": 0.11329303681850433, "learning_rate": 1.8864327722571487e-05, "loss": 0.1985, "step": 36185 }, { "epoch": 1.9816021464162514, "grad_norm": 0.11525125801563263, "learning_rate": 1.8859257757047253e-05, "loss": 0.2129, "step": 36190 }, { "epoch": 1.9818759239993429, "grad_norm": 0.13645218312740326, "learning_rate": 1.8854187791523017e-05, "loss": 0.2005, "step": 36195 }, { "epoch": 1.9821497015824345, "grad_norm": 0.10933941602706909, "learning_rate": 1.8849117825998783e-05, "loss": 0.1938, "step": 36200 }, { "epoch": 1.982423479165526, "grad_norm": 0.11643391847610474, "learning_rate": 1.884404786047455e-05, "loss": 0.2014, "step": 36205 }, { "epoch": 1.9826972567486174, "grad_norm": 0.09949152171611786, "learning_rate": 1.8838977894950317e-05, "loss": 0.1946, "step": 36210 }, { "epoch": 1.982971034331709, "grad_norm": 0.11964214593172073, "learning_rate": 1.883390792942608e-05, "loss": 0.2048, "step": 36215 }, { "epoch": 1.9832448119148003, "grad_norm": 0.1072273775935173, "learning_rate": 1.8828837963901847e-05, "loss": 0.2015, "step": 36220 }, { "epoch": 1.983518589497892, "grad_norm": 0.1135999783873558, "learning_rate": 1.882376799837761e-05, "loss": 0.2022, "step": 36225 }, { "epoch": 1.9837923670809834, "grad_norm": 0.11143501847982407, "learning_rate": 1.8818698032853377e-05, "loss": 0.1977, "step": 36230 }, { "epoch": 1.9840661446640748, "grad_norm": 0.13432396948337555, "learning_rate": 1.8813628067329143e-05, "loss": 0.2085, "step": 36235 }, { "epoch": 1.9843399222471665, "grad_norm": 0.10617323219776154, "learning_rate": 1.8808558101804907e-05, "loss": 0.2007, "step": 36240 }, { "epoch": 1.984613699830258, "grad_norm": 0.11027207225561142, "learning_rate": 1.8803488136280677e-05, "loss": 0.1946, "step": 36245 }, { "epoch": 1.9848874774133494, "grad_norm": 0.12417039275169373, "learning_rate": 1.879841817075644e-05, "loss": 0.2005, "step": 36250 }, { "epoch": 1.985161254996441, "grad_norm": 0.13110551238059998, "learning_rate": 1.8793348205232207e-05, "loss": 0.1992, "step": 36255 }, { "epoch": 1.9854350325795322, "grad_norm": 0.12009645253419876, "learning_rate": 1.878827823970797e-05, "loss": 0.1979, "step": 36260 }, { "epoch": 1.985708810162624, "grad_norm": 0.11020415276288986, "learning_rate": 1.8783208274183737e-05, "loss": 0.2033, "step": 36265 }, { "epoch": 1.9859825877457153, "grad_norm": 0.10447253286838531, "learning_rate": 1.87781383086595e-05, "loss": 0.2037, "step": 36270 }, { "epoch": 1.9862563653288068, "grad_norm": 0.11744888126850128, "learning_rate": 1.8773068343135267e-05, "loss": 0.2003, "step": 36275 }, { "epoch": 1.9865301429118984, "grad_norm": 0.12142102420330048, "learning_rate": 1.8767998377611034e-05, "loss": 0.1948, "step": 36280 }, { "epoch": 1.9868039204949899, "grad_norm": 0.11507678776979446, "learning_rate": 1.87629284120868e-05, "loss": 0.2012, "step": 36285 }, { "epoch": 1.9870776980780813, "grad_norm": 0.11227278411388397, "learning_rate": 1.8757858446562564e-05, "loss": 0.2056, "step": 36290 }, { "epoch": 1.987351475661173, "grad_norm": 0.12092763930559158, "learning_rate": 1.875278848103833e-05, "loss": 0.1887, "step": 36295 }, { "epoch": 1.9876252532442642, "grad_norm": 0.12709257006645203, "learning_rate": 1.8747718515514097e-05, "loss": 0.205, "step": 36300 }, { "epoch": 1.9878990308273559, "grad_norm": 0.10921268165111542, "learning_rate": 1.874264854998986e-05, "loss": 0.1932, "step": 36305 }, { "epoch": 1.9881728084104473, "grad_norm": 0.1288956254720688, "learning_rate": 1.8737578584465627e-05, "loss": 0.1997, "step": 36310 }, { "epoch": 1.9884465859935387, "grad_norm": 0.11290805041790009, "learning_rate": 1.873250861894139e-05, "loss": 0.2064, "step": 36315 }, { "epoch": 1.9887203635766304, "grad_norm": 0.11866036802530289, "learning_rate": 1.8727438653417157e-05, "loss": 0.2055, "step": 36320 }, { "epoch": 1.9889941411597218, "grad_norm": 0.13611803948879242, "learning_rate": 1.8722368687892924e-05, "loss": 0.1935, "step": 36325 }, { "epoch": 1.9892679187428133, "grad_norm": 0.10041315853595734, "learning_rate": 1.871729872236869e-05, "loss": 0.1947, "step": 36330 }, { "epoch": 1.989541696325905, "grad_norm": 0.10828723758459091, "learning_rate": 1.8712228756844454e-05, "loss": 0.1934, "step": 36335 }, { "epoch": 1.9898154739089964, "grad_norm": 0.11674392968416214, "learning_rate": 1.870715879132022e-05, "loss": 0.192, "step": 36340 }, { "epoch": 1.9900892514920878, "grad_norm": 0.12140288949012756, "learning_rate": 1.8702088825795987e-05, "loss": 0.2056, "step": 36345 }, { "epoch": 1.9903630290751795, "grad_norm": 0.12083318084478378, "learning_rate": 1.869701886027175e-05, "loss": 0.2062, "step": 36350 }, { "epoch": 1.9906368066582707, "grad_norm": 0.12619240581989288, "learning_rate": 1.8691948894747517e-05, "loss": 0.2083, "step": 36355 }, { "epoch": 1.9909105842413624, "grad_norm": 0.11962557584047318, "learning_rate": 1.868687892922328e-05, "loss": 0.1921, "step": 36360 }, { "epoch": 1.9911843618244538, "grad_norm": 0.1131710335612297, "learning_rate": 1.868180896369905e-05, "loss": 0.1851, "step": 36365 }, { "epoch": 1.9914581394075452, "grad_norm": 0.10297909379005432, "learning_rate": 1.8676738998174814e-05, "loss": 0.1899, "step": 36370 }, { "epoch": 1.991731916990637, "grad_norm": 0.10625437647104263, "learning_rate": 1.867166903265058e-05, "loss": 0.1997, "step": 36375 }, { "epoch": 1.9920056945737283, "grad_norm": 0.11982033401727676, "learning_rate": 1.8666599067126344e-05, "loss": 0.2017, "step": 36380 }, { "epoch": 1.9922794721568198, "grad_norm": 0.11841816455125809, "learning_rate": 1.866152910160211e-05, "loss": 0.2089, "step": 36385 }, { "epoch": 1.9925532497399114, "grad_norm": 0.10400987416505814, "learning_rate": 1.8656459136077874e-05, "loss": 0.1957, "step": 36390 }, { "epoch": 1.9928270273230027, "grad_norm": 0.1128574088215828, "learning_rate": 1.865138917055364e-05, "loss": 0.1957, "step": 36395 }, { "epoch": 1.9931008049060943, "grad_norm": 0.10472845286130905, "learning_rate": 1.8646319205029407e-05, "loss": 0.2048, "step": 36400 }, { "epoch": 1.9933745824891858, "grad_norm": 0.11846628785133362, "learning_rate": 1.8641249239505174e-05, "loss": 0.2035, "step": 36405 }, { "epoch": 1.9936483600722772, "grad_norm": 0.11875025182962418, "learning_rate": 1.863617927398094e-05, "loss": 0.1973, "step": 36410 }, { "epoch": 1.9939221376553689, "grad_norm": 0.11014831811189651, "learning_rate": 1.8631109308456704e-05, "loss": 0.1999, "step": 36415 }, { "epoch": 1.9941959152384603, "grad_norm": 0.11021525412797928, "learning_rate": 1.862603934293247e-05, "loss": 0.1953, "step": 36420 }, { "epoch": 1.9944696928215517, "grad_norm": 0.11560619622468948, "learning_rate": 1.8620969377408234e-05, "loss": 0.2052, "step": 36425 }, { "epoch": 1.9947434704046434, "grad_norm": 0.11648529767990112, "learning_rate": 1.8615899411884e-05, "loss": 0.1988, "step": 36430 }, { "epoch": 1.9950172479877346, "grad_norm": 0.11633467674255371, "learning_rate": 1.8610829446359764e-05, "loss": 0.1894, "step": 36435 }, { "epoch": 1.9952910255708263, "grad_norm": 0.11852136999368668, "learning_rate": 1.860575948083553e-05, "loss": 0.198, "step": 36440 }, { "epoch": 1.9955648031539177, "grad_norm": 0.11320362985134125, "learning_rate": 1.8600689515311297e-05, "loss": 0.1994, "step": 36445 }, { "epoch": 1.9958385807370091, "grad_norm": 0.12121828645467758, "learning_rate": 1.8595619549787064e-05, "loss": 0.1941, "step": 36450 }, { "epoch": 1.9961123583201008, "grad_norm": 0.1099611297249794, "learning_rate": 1.8590549584262827e-05, "loss": 0.2004, "step": 36455 }, { "epoch": 1.9963861359031922, "grad_norm": 0.1410277634859085, "learning_rate": 1.8585479618738594e-05, "loss": 0.2067, "step": 36460 }, { "epoch": 1.9966599134862837, "grad_norm": 0.11880359053611755, "learning_rate": 1.858040965321436e-05, "loss": 0.204, "step": 36465 }, { "epoch": 1.9969336910693753, "grad_norm": 0.11048094928264618, "learning_rate": 1.8575339687690124e-05, "loss": 0.2081, "step": 36470 }, { "epoch": 1.9972074686524668, "grad_norm": 0.11514881253242493, "learning_rate": 1.857026972216589e-05, "loss": 0.1892, "step": 36475 }, { "epoch": 1.9974812462355582, "grad_norm": 0.12329578399658203, "learning_rate": 1.8565199756641654e-05, "loss": 0.1925, "step": 36480 }, { "epoch": 1.9977550238186499, "grad_norm": 0.11538849025964737, "learning_rate": 1.8560129791117424e-05, "loss": 0.1954, "step": 36485 }, { "epoch": 1.998028801401741, "grad_norm": 0.11354686319828033, "learning_rate": 1.8555059825593187e-05, "loss": 0.2043, "step": 36490 }, { "epoch": 1.9983025789848328, "grad_norm": 0.11807053536176682, "learning_rate": 1.8549989860068954e-05, "loss": 0.2044, "step": 36495 }, { "epoch": 1.9985763565679242, "grad_norm": 0.12007009238004684, "learning_rate": 1.8544919894544717e-05, "loss": 0.2004, "step": 36500 }, { "epoch": 1.9988501341510156, "grad_norm": 0.1223059594631195, "learning_rate": 1.8539849929020484e-05, "loss": 0.2025, "step": 36505 }, { "epoch": 1.9991239117341073, "grad_norm": 0.12284389138221741, "learning_rate": 1.8534779963496247e-05, "loss": 0.1989, "step": 36510 }, { "epoch": 1.9993976893171987, "grad_norm": 0.12396152317523956, "learning_rate": 1.8529709997972014e-05, "loss": 0.199, "step": 36515 }, { "epoch": 1.9996714669002902, "grad_norm": 0.12314889580011368, "learning_rate": 1.852464003244778e-05, "loss": 0.2068, "step": 36520 }, { "epoch": 1.9999452444833818, "grad_norm": 0.11077113449573517, "learning_rate": 1.8519570066923544e-05, "loss": 0.1944, "step": 36525 }, { "epoch": 2.000219022066473, "grad_norm": 0.11980654299259186, "learning_rate": 1.8514500101399314e-05, "loss": 0.1908, "step": 36530 }, { "epoch": 2.0004927996495647, "grad_norm": 0.11131956428289413, "learning_rate": 1.8509430135875077e-05, "loss": 0.1844, "step": 36535 }, { "epoch": 2.0007665772326564, "grad_norm": 0.10849495977163315, "learning_rate": 1.8504360170350844e-05, "loss": 0.1839, "step": 36540 }, { "epoch": 2.0010403548157476, "grad_norm": 0.11028425395488739, "learning_rate": 1.8499290204826607e-05, "loss": 0.1842, "step": 36545 }, { "epoch": 2.0013141323988393, "grad_norm": 0.12701457738876343, "learning_rate": 1.8494220239302374e-05, "loss": 0.1806, "step": 36550 }, { "epoch": 2.0015879099819305, "grad_norm": 0.10340720415115356, "learning_rate": 1.8489150273778137e-05, "loss": 0.1858, "step": 36555 }, { "epoch": 2.001861687565022, "grad_norm": 0.1126629039645195, "learning_rate": 1.8484080308253904e-05, "loss": 0.1847, "step": 36560 }, { "epoch": 2.002135465148114, "grad_norm": 0.11299916356801987, "learning_rate": 1.847901034272967e-05, "loss": 0.1879, "step": 36565 }, { "epoch": 2.002409242731205, "grad_norm": 0.1157100722193718, "learning_rate": 1.8473940377205437e-05, "loss": 0.1854, "step": 36570 }, { "epoch": 2.0026830203142967, "grad_norm": 0.10539805889129639, "learning_rate": 1.84688704116812e-05, "loss": 0.1893, "step": 36575 }, { "epoch": 2.0029567978973883, "grad_norm": 0.11916998773813248, "learning_rate": 1.8463800446156967e-05, "loss": 0.187, "step": 36580 }, { "epoch": 2.0032305754804796, "grad_norm": 0.11170325428247452, "learning_rate": 1.8458730480632734e-05, "loss": 0.1785, "step": 36585 }, { "epoch": 2.003504353063571, "grad_norm": 0.10598218441009521, "learning_rate": 1.8453660515108497e-05, "loss": 0.1848, "step": 36590 }, { "epoch": 2.0037781306466624, "grad_norm": 0.1163366287946701, "learning_rate": 1.8448590549584264e-05, "loss": 0.1809, "step": 36595 }, { "epoch": 2.004051908229754, "grad_norm": 0.10403487831354141, "learning_rate": 1.8443520584060027e-05, "loss": 0.1871, "step": 36600 }, { "epoch": 2.0043256858128458, "grad_norm": 0.10440102964639664, "learning_rate": 1.8438450618535794e-05, "loss": 0.1859, "step": 36605 }, { "epoch": 2.004599463395937, "grad_norm": 0.10686089843511581, "learning_rate": 1.843338065301156e-05, "loss": 0.1893, "step": 36610 }, { "epoch": 2.0048732409790286, "grad_norm": 0.10368625819683075, "learning_rate": 1.8428310687487327e-05, "loss": 0.1806, "step": 36615 }, { "epoch": 2.0051470185621203, "grad_norm": 0.09855940192937851, "learning_rate": 1.842324072196309e-05, "loss": 0.1827, "step": 36620 }, { "epoch": 2.0054207961452115, "grad_norm": 0.11007962375879288, "learning_rate": 1.8418170756438857e-05, "loss": 0.1816, "step": 36625 }, { "epoch": 2.005694573728303, "grad_norm": 0.11022203415632248, "learning_rate": 1.8413100790914624e-05, "loss": 0.1868, "step": 36630 }, { "epoch": 2.005968351311395, "grad_norm": 0.12140152603387833, "learning_rate": 1.8408030825390387e-05, "loss": 0.1802, "step": 36635 }, { "epoch": 2.006242128894486, "grad_norm": 0.10779473930597305, "learning_rate": 1.8402960859866154e-05, "loss": 0.1893, "step": 36640 }, { "epoch": 2.0065159064775777, "grad_norm": 0.1101098582148552, "learning_rate": 1.8397890894341917e-05, "loss": 0.1869, "step": 36645 }, { "epoch": 2.006789684060669, "grad_norm": 0.12799149751663208, "learning_rate": 1.8392820928817687e-05, "loss": 0.1886, "step": 36650 }, { "epoch": 2.0070634616437606, "grad_norm": 0.13411758840084076, "learning_rate": 1.838775096329345e-05, "loss": 0.1842, "step": 36655 }, { "epoch": 2.0073372392268523, "grad_norm": 0.11018586903810501, "learning_rate": 1.8382680997769217e-05, "loss": 0.1852, "step": 36660 }, { "epoch": 2.0076110168099435, "grad_norm": 0.10031341761350632, "learning_rate": 1.837761103224498e-05, "loss": 0.1871, "step": 36665 }, { "epoch": 2.007884794393035, "grad_norm": 0.11002688854932785, "learning_rate": 1.8372541066720747e-05, "loss": 0.1876, "step": 36670 }, { "epoch": 2.008158571976127, "grad_norm": 0.11507706344127655, "learning_rate": 1.836747110119651e-05, "loss": 0.1806, "step": 36675 }, { "epoch": 2.008432349559218, "grad_norm": 0.10103649646043777, "learning_rate": 1.8362401135672277e-05, "loss": 0.1879, "step": 36680 }, { "epoch": 2.0087061271423097, "grad_norm": 0.10373739898204803, "learning_rate": 1.8357331170148044e-05, "loss": 0.1828, "step": 36685 }, { "epoch": 2.008979904725401, "grad_norm": 0.10519316792488098, "learning_rate": 1.835226120462381e-05, "loss": 0.1827, "step": 36690 }, { "epoch": 2.0092536823084926, "grad_norm": 0.10853685438632965, "learning_rate": 1.8347191239099578e-05, "loss": 0.1837, "step": 36695 }, { "epoch": 2.009527459891584, "grad_norm": 0.11961819231510162, "learning_rate": 1.834212127357534e-05, "loss": 0.1838, "step": 36700 }, { "epoch": 2.0098012374746754, "grad_norm": 0.13490213453769684, "learning_rate": 1.8337051308051108e-05, "loss": 0.1865, "step": 36705 }, { "epoch": 2.010075015057767, "grad_norm": 0.1259204000234604, "learning_rate": 1.833198134252687e-05, "loss": 0.1924, "step": 36710 }, { "epoch": 2.0103487926408588, "grad_norm": 0.11566388607025146, "learning_rate": 1.8326911377002638e-05, "loss": 0.186, "step": 36715 }, { "epoch": 2.01062257022395, "grad_norm": 0.10634792596101761, "learning_rate": 1.83218414114784e-05, "loss": 0.1805, "step": 36720 }, { "epoch": 2.0108963478070416, "grad_norm": 0.10010656714439392, "learning_rate": 1.8316771445954168e-05, "loss": 0.1869, "step": 36725 }, { "epoch": 2.011170125390133, "grad_norm": 0.1313575804233551, "learning_rate": 1.8311701480429934e-05, "loss": 0.1929, "step": 36730 }, { "epoch": 2.0114439029732245, "grad_norm": 0.10964900255203247, "learning_rate": 1.83066315149057e-05, "loss": 0.1852, "step": 36735 }, { "epoch": 2.011717680556316, "grad_norm": 0.10389315336942673, "learning_rate": 1.8301561549381464e-05, "loss": 0.184, "step": 36740 }, { "epoch": 2.0119914581394074, "grad_norm": 0.1178760826587677, "learning_rate": 1.829649158385723e-05, "loss": 0.1892, "step": 36745 }, { "epoch": 2.012265235722499, "grad_norm": 0.10326763987541199, "learning_rate": 1.8291421618332998e-05, "loss": 0.1857, "step": 36750 }, { "epoch": 2.0125390133055907, "grad_norm": 0.1246754601597786, "learning_rate": 1.828635165280876e-05, "loss": 0.1868, "step": 36755 }, { "epoch": 2.012812790888682, "grad_norm": 0.10135886818170547, "learning_rate": 1.8281281687284528e-05, "loss": 0.1885, "step": 36760 }, { "epoch": 2.0130865684717736, "grad_norm": 0.10558269917964935, "learning_rate": 1.827621172176029e-05, "loss": 0.1829, "step": 36765 }, { "epoch": 2.0133603460548652, "grad_norm": 0.09900134056806564, "learning_rate": 1.827114175623606e-05, "loss": 0.185, "step": 36770 }, { "epoch": 2.0136341236379565, "grad_norm": 0.10087677836418152, "learning_rate": 1.8266071790711824e-05, "loss": 0.1848, "step": 36775 }, { "epoch": 2.013907901221048, "grad_norm": 0.10711873322725296, "learning_rate": 1.826100182518759e-05, "loss": 0.1852, "step": 36780 }, { "epoch": 2.0141816788041393, "grad_norm": 0.1062476858496666, "learning_rate": 1.8255931859663354e-05, "loss": 0.1867, "step": 36785 }, { "epoch": 2.014455456387231, "grad_norm": 0.11401458084583282, "learning_rate": 1.825086189413912e-05, "loss": 0.1807, "step": 36790 }, { "epoch": 2.0147292339703227, "grad_norm": 0.11004601418972015, "learning_rate": 1.8245791928614884e-05, "loss": 0.1891, "step": 36795 }, { "epoch": 2.015003011553414, "grad_norm": 0.10908740758895874, "learning_rate": 1.824072196309065e-05, "loss": 0.1882, "step": 36800 }, { "epoch": 2.0152767891365055, "grad_norm": 0.1339176893234253, "learning_rate": 1.8235651997566418e-05, "loss": 0.1863, "step": 36805 }, { "epoch": 2.015550566719597, "grad_norm": 0.11169841140508652, "learning_rate": 1.823058203204218e-05, "loss": 0.1829, "step": 36810 }, { "epoch": 2.0158243443026884, "grad_norm": 0.11157535761594772, "learning_rate": 1.822551206651795e-05, "loss": 0.1798, "step": 36815 }, { "epoch": 2.01609812188578, "grad_norm": 0.11585879325866699, "learning_rate": 1.8220442100993714e-05, "loss": 0.1898, "step": 36820 }, { "epoch": 2.0163718994688713, "grad_norm": 0.10300913453102112, "learning_rate": 1.821537213546948e-05, "loss": 0.1788, "step": 36825 }, { "epoch": 2.016645677051963, "grad_norm": 0.11363488435745239, "learning_rate": 1.8210302169945244e-05, "loss": 0.1892, "step": 36830 }, { "epoch": 2.0169194546350546, "grad_norm": 0.13192321360111237, "learning_rate": 1.820523220442101e-05, "loss": 0.1897, "step": 36835 }, { "epoch": 2.017193232218146, "grad_norm": 0.10507949441671371, "learning_rate": 1.8200162238896774e-05, "loss": 0.1837, "step": 36840 }, { "epoch": 2.0174670098012375, "grad_norm": 0.10484914481639862, "learning_rate": 1.819509227337254e-05, "loss": 0.185, "step": 36845 }, { "epoch": 2.017740787384329, "grad_norm": 0.1162559762597084, "learning_rate": 1.8190022307848308e-05, "loss": 0.1767, "step": 36850 }, { "epoch": 2.0180145649674204, "grad_norm": 0.1057514101266861, "learning_rate": 1.8184952342324074e-05, "loss": 0.1952, "step": 36855 }, { "epoch": 2.018288342550512, "grad_norm": 0.09932076185941696, "learning_rate": 1.8179882376799838e-05, "loss": 0.1852, "step": 36860 }, { "epoch": 2.0185621201336033, "grad_norm": 0.1185365840792656, "learning_rate": 1.8174812411275604e-05, "loss": 0.1908, "step": 36865 }, { "epoch": 2.018835897716695, "grad_norm": 0.12438847869634628, "learning_rate": 1.816974244575137e-05, "loss": 0.19, "step": 36870 }, { "epoch": 2.0191096752997866, "grad_norm": 0.10652254521846771, "learning_rate": 1.8164672480227134e-05, "loss": 0.1833, "step": 36875 }, { "epoch": 2.019383452882878, "grad_norm": 0.10398226231336594, "learning_rate": 1.81596025147029e-05, "loss": 0.1786, "step": 36880 }, { "epoch": 2.0196572304659695, "grad_norm": 0.09140579402446747, "learning_rate": 1.8154532549178664e-05, "loss": 0.1854, "step": 36885 }, { "epoch": 2.019931008049061, "grad_norm": 0.11896517872810364, "learning_rate": 1.814946258365443e-05, "loss": 0.1909, "step": 36890 }, { "epoch": 2.0202047856321523, "grad_norm": 0.11182963103055954, "learning_rate": 1.8144392618130198e-05, "loss": 0.1871, "step": 36895 }, { "epoch": 2.020478563215244, "grad_norm": 0.10793524980545044, "learning_rate": 1.8139322652605965e-05, "loss": 0.1861, "step": 36900 }, { "epoch": 2.020752340798335, "grad_norm": 0.10659928619861603, "learning_rate": 1.8134252687081728e-05, "loss": 0.1894, "step": 36905 }, { "epoch": 2.021026118381427, "grad_norm": 0.1284186840057373, "learning_rate": 1.8129182721557495e-05, "loss": 0.1879, "step": 36910 }, { "epoch": 2.0212998959645185, "grad_norm": 0.1089845821261406, "learning_rate": 1.812411275603326e-05, "loss": 0.1908, "step": 36915 }, { "epoch": 2.0215736735476098, "grad_norm": 0.12636658549308777, "learning_rate": 1.8119042790509025e-05, "loss": 0.1891, "step": 36920 }, { "epoch": 2.0218474511307014, "grad_norm": 0.13198800384998322, "learning_rate": 1.811397282498479e-05, "loss": 0.1913, "step": 36925 }, { "epoch": 2.022121228713793, "grad_norm": 0.09714566171169281, "learning_rate": 1.8108902859460555e-05, "loss": 0.1932, "step": 36930 }, { "epoch": 2.0223950062968843, "grad_norm": 0.10848009586334229, "learning_rate": 1.8103832893936325e-05, "loss": 0.1801, "step": 36935 }, { "epoch": 2.022668783879976, "grad_norm": 0.11404915153980255, "learning_rate": 1.8098762928412088e-05, "loss": 0.189, "step": 36940 }, { "epoch": 2.0229425614630676, "grad_norm": 0.09847749024629593, "learning_rate": 1.8093692962887855e-05, "loss": 0.1845, "step": 36945 }, { "epoch": 2.023216339046159, "grad_norm": 0.1053517609834671, "learning_rate": 1.8088622997363618e-05, "loss": 0.1828, "step": 36950 }, { "epoch": 2.0234901166292505, "grad_norm": 0.10306534171104431, "learning_rate": 1.8083553031839385e-05, "loss": 0.1861, "step": 36955 }, { "epoch": 2.0237638942123417, "grad_norm": 0.10229828953742981, "learning_rate": 1.8078483066315148e-05, "loss": 0.1865, "step": 36960 }, { "epoch": 2.0240376717954334, "grad_norm": 0.09406106919050217, "learning_rate": 1.8073413100790915e-05, "loss": 0.1739, "step": 36965 }, { "epoch": 2.024311449378525, "grad_norm": 0.09829128533601761, "learning_rate": 1.806834313526668e-05, "loss": 0.1807, "step": 36970 }, { "epoch": 2.0245852269616162, "grad_norm": 0.10570138692855835, "learning_rate": 1.8063273169742448e-05, "loss": 0.1807, "step": 36975 }, { "epoch": 2.024859004544708, "grad_norm": 0.11678178608417511, "learning_rate": 1.8058203204218215e-05, "loss": 0.19, "step": 36980 }, { "epoch": 2.0251327821277996, "grad_norm": 0.10166459530591965, "learning_rate": 1.8053133238693978e-05, "loss": 0.1861, "step": 36985 }, { "epoch": 2.025406559710891, "grad_norm": 0.09773466736078262, "learning_rate": 1.8048063273169745e-05, "loss": 0.1892, "step": 36990 }, { "epoch": 2.0256803372939824, "grad_norm": 0.09906823933124542, "learning_rate": 1.8042993307645508e-05, "loss": 0.1794, "step": 36995 }, { "epoch": 2.0259541148770737, "grad_norm": 0.10284005850553513, "learning_rate": 1.8037923342121275e-05, "loss": 0.1772, "step": 37000 }, { "epoch": 2.0262278924601653, "grad_norm": 0.09639189392328262, "learning_rate": 1.8032853376597038e-05, "loss": 0.1865, "step": 37005 }, { "epoch": 2.026501670043257, "grad_norm": 0.09536781907081604, "learning_rate": 1.8027783411072805e-05, "loss": 0.1808, "step": 37010 }, { "epoch": 2.026775447626348, "grad_norm": 0.09978749603033066, "learning_rate": 1.802271344554857e-05, "loss": 0.1884, "step": 37015 }, { "epoch": 2.02704922520944, "grad_norm": 0.1286657154560089, "learning_rate": 1.8017643480024338e-05, "loss": 0.194, "step": 37020 }, { "epoch": 2.0273230027925315, "grad_norm": 0.10983192175626755, "learning_rate": 1.80125735145001e-05, "loss": 0.1923, "step": 37025 }, { "epoch": 2.0275967803756227, "grad_norm": 0.11545810103416443, "learning_rate": 1.8007503548975868e-05, "loss": 0.1894, "step": 37030 }, { "epoch": 2.0278705579587144, "grad_norm": 0.11027272790670395, "learning_rate": 1.8002433583451635e-05, "loss": 0.188, "step": 37035 }, { "epoch": 2.0281443355418056, "grad_norm": 0.1044444590806961, "learning_rate": 1.7997363617927398e-05, "loss": 0.1861, "step": 37040 }, { "epoch": 2.0284181131248973, "grad_norm": 0.10381131619215012, "learning_rate": 1.7992293652403165e-05, "loss": 0.194, "step": 37045 }, { "epoch": 2.028691890707989, "grad_norm": 0.09769640117883682, "learning_rate": 1.7987223686878928e-05, "loss": 0.1792, "step": 37050 }, { "epoch": 2.02896566829108, "grad_norm": 0.09131965786218643, "learning_rate": 1.7982153721354698e-05, "loss": 0.1792, "step": 37055 }, { "epoch": 2.029239445874172, "grad_norm": 0.10710328817367554, "learning_rate": 1.797708375583046e-05, "loss": 0.1863, "step": 37060 }, { "epoch": 2.0295132234572635, "grad_norm": 0.10808239132165909, "learning_rate": 1.7972013790306228e-05, "loss": 0.181, "step": 37065 }, { "epoch": 2.0297870010403547, "grad_norm": 0.10235410183668137, "learning_rate": 1.796694382478199e-05, "loss": 0.1849, "step": 37070 }, { "epoch": 2.0300607786234464, "grad_norm": 0.10433840751647949, "learning_rate": 1.7961873859257758e-05, "loss": 0.1872, "step": 37075 }, { "epoch": 2.030334556206538, "grad_norm": 0.09503635764122009, "learning_rate": 1.795680389373352e-05, "loss": 0.1824, "step": 37080 }, { "epoch": 2.0306083337896292, "grad_norm": 0.11306995153427124, "learning_rate": 1.7951733928209288e-05, "loss": 0.1877, "step": 37085 }, { "epoch": 2.030882111372721, "grad_norm": 0.09989356249570847, "learning_rate": 1.7946663962685055e-05, "loss": 0.1805, "step": 37090 }, { "epoch": 2.031155888955812, "grad_norm": 0.1094404086470604, "learning_rate": 1.7941593997160818e-05, "loss": 0.178, "step": 37095 }, { "epoch": 2.0314296665389038, "grad_norm": 0.10614845156669617, "learning_rate": 1.7936524031636588e-05, "loss": 0.1892, "step": 37100 }, { "epoch": 2.0317034441219954, "grad_norm": 0.12901419401168823, "learning_rate": 1.793145406611235e-05, "loss": 0.1898, "step": 37105 }, { "epoch": 2.0319772217050867, "grad_norm": 0.1070905327796936, "learning_rate": 1.7926384100588118e-05, "loss": 0.1793, "step": 37110 }, { "epoch": 2.0322509992881783, "grad_norm": 0.10273000597953796, "learning_rate": 1.792131413506388e-05, "loss": 0.1884, "step": 37115 }, { "epoch": 2.03252477687127, "grad_norm": 0.11220382899045944, "learning_rate": 1.7916244169539648e-05, "loss": 0.1814, "step": 37120 }, { "epoch": 2.032798554454361, "grad_norm": 0.1043035015463829, "learning_rate": 1.791117420401541e-05, "loss": 0.1822, "step": 37125 }, { "epoch": 2.033072332037453, "grad_norm": 0.09990787506103516, "learning_rate": 1.7906104238491178e-05, "loss": 0.1889, "step": 37130 }, { "epoch": 2.033346109620544, "grad_norm": 0.09819093346595764, "learning_rate": 1.7901034272966945e-05, "loss": 0.1878, "step": 37135 }, { "epoch": 2.0336198872036357, "grad_norm": 0.09559361636638641, "learning_rate": 1.789596430744271e-05, "loss": 0.1828, "step": 37140 }, { "epoch": 2.0338936647867274, "grad_norm": 0.09411493688821793, "learning_rate": 1.7890894341918475e-05, "loss": 0.1793, "step": 37145 }, { "epoch": 2.0341674423698186, "grad_norm": 0.101727195084095, "learning_rate": 1.788582437639424e-05, "loss": 0.1856, "step": 37150 }, { "epoch": 2.0344412199529103, "grad_norm": 0.11062688380479813, "learning_rate": 1.7880754410870008e-05, "loss": 0.1877, "step": 37155 }, { "epoch": 2.034714997536002, "grad_norm": 0.11220742762088776, "learning_rate": 1.787568444534577e-05, "loss": 0.1854, "step": 37160 }, { "epoch": 2.034988775119093, "grad_norm": 0.09972076863050461, "learning_rate": 1.7870614479821538e-05, "loss": 0.1833, "step": 37165 }, { "epoch": 2.035262552702185, "grad_norm": 0.10381205379962921, "learning_rate": 1.78655445142973e-05, "loss": 0.1832, "step": 37170 }, { "epoch": 2.035536330285276, "grad_norm": 0.10658256709575653, "learning_rate": 1.7860474548773068e-05, "loss": 0.1834, "step": 37175 }, { "epoch": 2.0358101078683677, "grad_norm": 0.11211571842432022, "learning_rate": 1.7855404583248835e-05, "loss": 0.1885, "step": 37180 }, { "epoch": 2.0360838854514594, "grad_norm": 0.10328131914138794, "learning_rate": 1.78503346177246e-05, "loss": 0.1838, "step": 37185 }, { "epoch": 2.0363576630345506, "grad_norm": 0.09677953273057938, "learning_rate": 1.7845264652200365e-05, "loss": 0.1827, "step": 37190 }, { "epoch": 2.0366314406176422, "grad_norm": 0.10407250374555588, "learning_rate": 1.784019468667613e-05, "loss": 0.1846, "step": 37195 }, { "epoch": 2.036905218200734, "grad_norm": 0.1092122420668602, "learning_rate": 1.78351247211519e-05, "loss": 0.1795, "step": 37200 }, { "epoch": 2.037178995783825, "grad_norm": 0.11141540110111237, "learning_rate": 1.783005475562766e-05, "loss": 0.1831, "step": 37205 }, { "epoch": 2.0374527733669168, "grad_norm": 0.10685522109270096, "learning_rate": 1.782498479010343e-05, "loss": 0.1758, "step": 37210 }, { "epoch": 2.0377265509500084, "grad_norm": 0.10257931053638458, "learning_rate": 1.781991482457919e-05, "loss": 0.1775, "step": 37215 }, { "epoch": 2.0380003285330996, "grad_norm": 0.12469112128019333, "learning_rate": 1.7814844859054962e-05, "loss": 0.1829, "step": 37220 }, { "epoch": 2.0382741061161913, "grad_norm": 0.1120796948671341, "learning_rate": 1.7809774893530725e-05, "loss": 0.1934, "step": 37225 }, { "epoch": 2.0385478836992825, "grad_norm": 0.10212334245443344, "learning_rate": 1.7804704928006492e-05, "loss": 0.175, "step": 37230 }, { "epoch": 2.038821661282374, "grad_norm": 0.10665559768676758, "learning_rate": 1.7799634962482255e-05, "loss": 0.1887, "step": 37235 }, { "epoch": 2.039095438865466, "grad_norm": 0.10829953849315643, "learning_rate": 1.7794564996958022e-05, "loss": 0.1931, "step": 37240 }, { "epoch": 2.039369216448557, "grad_norm": 0.11334723979234695, "learning_rate": 1.7789495031433785e-05, "loss": 0.1872, "step": 37245 }, { "epoch": 2.0396429940316487, "grad_norm": 0.0962260439991951, "learning_rate": 1.7784425065909552e-05, "loss": 0.1763, "step": 37250 }, { "epoch": 2.0399167716147404, "grad_norm": 0.10858875513076782, "learning_rate": 1.777935510038532e-05, "loss": 0.1815, "step": 37255 }, { "epoch": 2.0401905491978316, "grad_norm": 0.0990605279803276, "learning_rate": 1.7774285134861085e-05, "loss": 0.1827, "step": 37260 }, { "epoch": 2.0404643267809233, "grad_norm": 0.10657120496034622, "learning_rate": 1.7769215169336852e-05, "loss": 0.1814, "step": 37265 }, { "epoch": 2.0407381043640145, "grad_norm": 0.11189638823270798, "learning_rate": 1.7764145203812615e-05, "loss": 0.1938, "step": 37270 }, { "epoch": 2.041011881947106, "grad_norm": 0.11330438405275345, "learning_rate": 1.7759075238288382e-05, "loss": 0.1887, "step": 37275 }, { "epoch": 2.041285659530198, "grad_norm": 0.11171632260084152, "learning_rate": 1.7754005272764145e-05, "loss": 0.1793, "step": 37280 }, { "epoch": 2.041559437113289, "grad_norm": 0.1029433161020279, "learning_rate": 1.7748935307239912e-05, "loss": 0.1785, "step": 37285 }, { "epoch": 2.0418332146963807, "grad_norm": 0.10677721351385117, "learning_rate": 1.7743865341715675e-05, "loss": 0.1856, "step": 37290 }, { "epoch": 2.0421069922794723, "grad_norm": 0.10821770131587982, "learning_rate": 1.7738795376191442e-05, "loss": 0.1842, "step": 37295 }, { "epoch": 2.0423807698625636, "grad_norm": 0.11987493187189102, "learning_rate": 1.773372541066721e-05, "loss": 0.1884, "step": 37300 }, { "epoch": 2.0426545474456552, "grad_norm": 0.10622447729110718, "learning_rate": 1.7728655445142975e-05, "loss": 0.1851, "step": 37305 }, { "epoch": 2.0429283250287464, "grad_norm": 0.09968264400959015, "learning_rate": 1.772358547961874e-05, "loss": 0.1832, "step": 37310 }, { "epoch": 2.043202102611838, "grad_norm": 0.1135331392288208, "learning_rate": 1.7718515514094505e-05, "loss": 0.1856, "step": 37315 }, { "epoch": 2.0434758801949298, "grad_norm": 0.09644638746976852, "learning_rate": 1.7713445548570272e-05, "loss": 0.1802, "step": 37320 }, { "epoch": 2.043749657778021, "grad_norm": 0.09904980659484863, "learning_rate": 1.7708375583046035e-05, "loss": 0.1845, "step": 37325 }, { "epoch": 2.0440234353611126, "grad_norm": 0.11482199281454086, "learning_rate": 1.7703305617521802e-05, "loss": 0.1834, "step": 37330 }, { "epoch": 2.0442972129442043, "grad_norm": 0.10014753043651581, "learning_rate": 1.7698235651997565e-05, "loss": 0.1894, "step": 37335 }, { "epoch": 2.0445709905272955, "grad_norm": 0.12362977117300034, "learning_rate": 1.7693165686473335e-05, "loss": 0.1859, "step": 37340 }, { "epoch": 2.044844768110387, "grad_norm": 0.108364038169384, "learning_rate": 1.76880957209491e-05, "loss": 0.1901, "step": 37345 }, { "epoch": 2.0451185456934784, "grad_norm": 0.1151733249425888, "learning_rate": 1.7683025755424865e-05, "loss": 0.1835, "step": 37350 }, { "epoch": 2.04539232327657, "grad_norm": 0.11603135615587234, "learning_rate": 1.767795578990063e-05, "loss": 0.1882, "step": 37355 }, { "epoch": 2.0456661008596617, "grad_norm": 0.1197606697678566, "learning_rate": 1.7672885824376395e-05, "loss": 0.1908, "step": 37360 }, { "epoch": 2.045939878442753, "grad_norm": 0.11783095449209213, "learning_rate": 1.7667815858852162e-05, "loss": 0.1896, "step": 37365 }, { "epoch": 2.0462136560258446, "grad_norm": 0.10308840125799179, "learning_rate": 1.7662745893327925e-05, "loss": 0.1856, "step": 37370 }, { "epoch": 2.0464874336089363, "grad_norm": 0.11641623079776764, "learning_rate": 1.7657675927803692e-05, "loss": 0.1869, "step": 37375 }, { "epoch": 2.0467612111920275, "grad_norm": 0.10999744385480881, "learning_rate": 1.765260596227946e-05, "loss": 0.1916, "step": 37380 }, { "epoch": 2.047034988775119, "grad_norm": 0.10072749853134155, "learning_rate": 1.7647535996755225e-05, "loss": 0.1831, "step": 37385 }, { "epoch": 2.047308766358211, "grad_norm": 0.10523232072591782, "learning_rate": 1.764246603123099e-05, "loss": 0.1784, "step": 37390 }, { "epoch": 2.047582543941302, "grad_norm": 0.0943993404507637, "learning_rate": 1.7637396065706755e-05, "loss": 0.1808, "step": 37395 }, { "epoch": 2.0478563215243937, "grad_norm": 0.09628935903310776, "learning_rate": 1.763232610018252e-05, "loss": 0.1906, "step": 37400 }, { "epoch": 2.048130099107485, "grad_norm": 0.11802823841571808, "learning_rate": 1.7627256134658285e-05, "loss": 0.1871, "step": 37405 }, { "epoch": 2.0484038766905766, "grad_norm": 0.10973605513572693, "learning_rate": 1.762218616913405e-05, "loss": 0.184, "step": 37410 }, { "epoch": 2.048677654273668, "grad_norm": 0.10554825514554977, "learning_rate": 1.7617116203609815e-05, "loss": 0.1909, "step": 37415 }, { "epoch": 2.0489514318567594, "grad_norm": 0.10382809489965439, "learning_rate": 1.7612046238085582e-05, "loss": 0.1923, "step": 37420 }, { "epoch": 2.049225209439851, "grad_norm": 0.1009402945637703, "learning_rate": 1.760697627256135e-05, "loss": 0.1813, "step": 37425 }, { "epoch": 2.0494989870229428, "grad_norm": 0.12206439673900604, "learning_rate": 1.7601906307037115e-05, "loss": 0.193, "step": 37430 }, { "epoch": 2.049772764606034, "grad_norm": 0.11698764562606812, "learning_rate": 1.759683634151288e-05, "loss": 0.1911, "step": 37435 }, { "epoch": 2.0500465421891256, "grad_norm": 0.11146742850542068, "learning_rate": 1.7591766375988645e-05, "loss": 0.1766, "step": 37440 }, { "epoch": 2.050320319772217, "grad_norm": 0.10618100315332413, "learning_rate": 1.758669641046441e-05, "loss": 0.1792, "step": 37445 }, { "epoch": 2.0505940973553085, "grad_norm": 0.10535610467195511, "learning_rate": 1.7581626444940175e-05, "loss": 0.1845, "step": 37450 }, { "epoch": 2.0508678749384, "grad_norm": 0.10939913988113403, "learning_rate": 1.757655647941594e-05, "loss": 0.187, "step": 37455 }, { "epoch": 2.0511416525214914, "grad_norm": 0.09663452953100204, "learning_rate": 1.7571486513891705e-05, "loss": 0.1747, "step": 37460 }, { "epoch": 2.051415430104583, "grad_norm": 0.10901714116334915, "learning_rate": 1.7566416548367472e-05, "loss": 0.1843, "step": 37465 }, { "epoch": 2.0516892076876747, "grad_norm": 0.09609844535589218, "learning_rate": 1.756134658284324e-05, "loss": 0.1794, "step": 37470 }, { "epoch": 2.051962985270766, "grad_norm": 0.09759145230054855, "learning_rate": 1.7556276617319002e-05, "loss": 0.1852, "step": 37475 }, { "epoch": 2.0522367628538576, "grad_norm": 0.09986846148967743, "learning_rate": 1.755120665179477e-05, "loss": 0.1905, "step": 37480 }, { "epoch": 2.052510540436949, "grad_norm": 0.10109385848045349, "learning_rate": 1.7546136686270535e-05, "loss": 0.1831, "step": 37485 }, { "epoch": 2.0527843180200405, "grad_norm": 0.11694563180208206, "learning_rate": 1.75410667207463e-05, "loss": 0.186, "step": 37490 }, { "epoch": 2.053058095603132, "grad_norm": 0.11011926084756851, "learning_rate": 1.7535996755222065e-05, "loss": 0.1863, "step": 37495 }, { "epoch": 2.0533318731862233, "grad_norm": 0.09892313182353973, "learning_rate": 1.753092678969783e-05, "loss": 0.1857, "step": 37500 }, { "epoch": 2.053605650769315, "grad_norm": 0.09958115965127945, "learning_rate": 1.75258568241736e-05, "loss": 0.1791, "step": 37505 }, { "epoch": 2.0538794283524067, "grad_norm": 0.10446624457836151, "learning_rate": 1.7520786858649362e-05, "loss": 0.1804, "step": 37510 }, { "epoch": 2.054153205935498, "grad_norm": 0.10443367063999176, "learning_rate": 1.751571689312513e-05, "loss": 0.187, "step": 37515 }, { "epoch": 2.0544269835185895, "grad_norm": 0.09442794322967529, "learning_rate": 1.7510646927600892e-05, "loss": 0.19, "step": 37520 }, { "epoch": 2.054700761101681, "grad_norm": 0.11467209458351135, "learning_rate": 1.750557696207666e-05, "loss": 0.1871, "step": 37525 }, { "epoch": 2.0549745386847724, "grad_norm": 0.10706394165754318, "learning_rate": 1.7500506996552422e-05, "loss": 0.1845, "step": 37530 }, { "epoch": 2.055248316267864, "grad_norm": 0.10076707601547241, "learning_rate": 1.749543703102819e-05, "loss": 0.1816, "step": 37535 }, { "epoch": 2.0555220938509553, "grad_norm": 0.10736531019210815, "learning_rate": 1.7490367065503956e-05, "loss": 0.1846, "step": 37540 }, { "epoch": 2.055795871434047, "grad_norm": 0.1152850016951561, "learning_rate": 1.7485297099979722e-05, "loss": 0.1857, "step": 37545 }, { "epoch": 2.0560696490171386, "grad_norm": 0.10089332610368729, "learning_rate": 1.748022713445549e-05, "loss": 0.1808, "step": 37550 }, { "epoch": 2.05634342660023, "grad_norm": 0.11150874942541122, "learning_rate": 1.7475157168931252e-05, "loss": 0.1844, "step": 37555 }, { "epoch": 2.0566172041833215, "grad_norm": 0.11179140210151672, "learning_rate": 1.747008720340702e-05, "loss": 0.1898, "step": 37560 }, { "epoch": 2.056890981766413, "grad_norm": 0.10498160123825073, "learning_rate": 1.7465017237882782e-05, "loss": 0.1772, "step": 37565 }, { "epoch": 2.0571647593495044, "grad_norm": 0.10431960225105286, "learning_rate": 1.745994727235855e-05, "loss": 0.1834, "step": 37570 }, { "epoch": 2.057438536932596, "grad_norm": 0.10700464993715286, "learning_rate": 1.7454877306834312e-05, "loss": 0.1899, "step": 37575 }, { "epoch": 2.0577123145156873, "grad_norm": 0.09672199934720993, "learning_rate": 1.744980734131008e-05, "loss": 0.1873, "step": 37580 }, { "epoch": 2.057986092098779, "grad_norm": 0.10483919084072113, "learning_rate": 1.7444737375785846e-05, "loss": 0.1823, "step": 37585 }, { "epoch": 2.0582598696818706, "grad_norm": 0.10838399827480316, "learning_rate": 1.7439667410261612e-05, "loss": 0.1877, "step": 37590 }, { "epoch": 2.058533647264962, "grad_norm": 0.09941903501749039, "learning_rate": 1.7434597444737376e-05, "loss": 0.1816, "step": 37595 }, { "epoch": 2.0588074248480535, "grad_norm": 0.09483379870653152, "learning_rate": 1.7429527479213142e-05, "loss": 0.1804, "step": 37600 }, { "epoch": 2.059081202431145, "grad_norm": 0.1011316105723381, "learning_rate": 1.742445751368891e-05, "loss": 0.1826, "step": 37605 }, { "epoch": 2.0593549800142363, "grad_norm": 0.10163789242506027, "learning_rate": 1.7419387548164672e-05, "loss": 0.18, "step": 37610 }, { "epoch": 2.059628757597328, "grad_norm": 0.10285358875989914, "learning_rate": 1.741431758264044e-05, "loss": 0.1814, "step": 37615 }, { "epoch": 2.059902535180419, "grad_norm": 0.10013741999864578, "learning_rate": 1.7409247617116202e-05, "loss": 0.1837, "step": 37620 }, { "epoch": 2.060176312763511, "grad_norm": 0.09926372021436691, "learning_rate": 1.7404177651591972e-05, "loss": 0.1821, "step": 37625 }, { "epoch": 2.0604500903466025, "grad_norm": 0.10009361058473587, "learning_rate": 1.7399107686067736e-05, "loss": 0.1924, "step": 37630 }, { "epoch": 2.0607238679296938, "grad_norm": 0.11926800012588501, "learning_rate": 1.7394037720543502e-05, "loss": 0.1938, "step": 37635 }, { "epoch": 2.0609976455127854, "grad_norm": 0.10198720544576645, "learning_rate": 1.7388967755019266e-05, "loss": 0.1929, "step": 37640 }, { "epoch": 2.061271423095877, "grad_norm": 0.11261947453022003, "learning_rate": 1.7383897789495032e-05, "loss": 0.1868, "step": 37645 }, { "epoch": 2.0615452006789683, "grad_norm": 0.1104668527841568, "learning_rate": 1.73788278239708e-05, "loss": 0.1915, "step": 37650 }, { "epoch": 2.06181897826206, "grad_norm": 0.1027214452624321, "learning_rate": 1.7373757858446562e-05, "loss": 0.1857, "step": 37655 }, { "epoch": 2.0620927558451516, "grad_norm": 0.10836538672447205, "learning_rate": 1.736868789292233e-05, "loss": 0.1842, "step": 37660 }, { "epoch": 2.062366533428243, "grad_norm": 0.12817290425300598, "learning_rate": 1.7363617927398096e-05, "loss": 0.1826, "step": 37665 }, { "epoch": 2.0626403110113345, "grad_norm": 0.10420025140047073, "learning_rate": 1.7358547961873862e-05, "loss": 0.1795, "step": 37670 }, { "epoch": 2.0629140885944257, "grad_norm": 0.09649033099412918, "learning_rate": 1.7353477996349626e-05, "loss": 0.1808, "step": 37675 }, { "epoch": 2.0631878661775174, "grad_norm": 0.10574838519096375, "learning_rate": 1.7348408030825392e-05, "loss": 0.1832, "step": 37680 }, { "epoch": 2.063461643760609, "grad_norm": 0.1037253811955452, "learning_rate": 1.7343338065301156e-05, "loss": 0.182, "step": 37685 }, { "epoch": 2.0637354213437002, "grad_norm": 0.10902561247348785, "learning_rate": 1.7338268099776922e-05, "loss": 0.1872, "step": 37690 }, { "epoch": 2.064009198926792, "grad_norm": 0.1035037413239479, "learning_rate": 1.7333198134252686e-05, "loss": 0.1903, "step": 37695 }, { "epoch": 2.0642829765098836, "grad_norm": 0.09908920526504517, "learning_rate": 1.7328128168728452e-05, "loss": 0.1825, "step": 37700 }, { "epoch": 2.064556754092975, "grad_norm": 0.09952095150947571, "learning_rate": 1.732305820320422e-05, "loss": 0.1765, "step": 37705 }, { "epoch": 2.0648305316760664, "grad_norm": 0.09781864285469055, "learning_rate": 1.7317988237679986e-05, "loss": 0.1813, "step": 37710 }, { "epoch": 2.0651043092591577, "grad_norm": 0.11135271936655045, "learning_rate": 1.7312918272155753e-05, "loss": 0.187, "step": 37715 }, { "epoch": 2.0653780868422493, "grad_norm": 0.11166691780090332, "learning_rate": 1.7307848306631516e-05, "loss": 0.1897, "step": 37720 }, { "epoch": 2.065651864425341, "grad_norm": 0.09400974214076996, "learning_rate": 1.7302778341107282e-05, "loss": 0.1832, "step": 37725 }, { "epoch": 2.065925642008432, "grad_norm": 0.11590029299259186, "learning_rate": 1.7297708375583046e-05, "loss": 0.1843, "step": 37730 }, { "epoch": 2.066199419591524, "grad_norm": 0.1224733367562294, "learning_rate": 1.7292638410058812e-05, "loss": 0.1868, "step": 37735 }, { "epoch": 2.0664731971746155, "grad_norm": 0.11117291450500488, "learning_rate": 1.7287568444534576e-05, "loss": 0.1845, "step": 37740 }, { "epoch": 2.0667469747577067, "grad_norm": 0.11347877979278564, "learning_rate": 1.7282498479010342e-05, "loss": 0.1895, "step": 37745 }, { "epoch": 2.0670207523407984, "grad_norm": 0.09641212970018387, "learning_rate": 1.727742851348611e-05, "loss": 0.1857, "step": 37750 }, { "epoch": 2.0672945299238896, "grad_norm": 0.09683031588792801, "learning_rate": 1.7272358547961876e-05, "loss": 0.1851, "step": 37755 }, { "epoch": 2.0675683075069813, "grad_norm": 0.10056864470243454, "learning_rate": 1.726728858243764e-05, "loss": 0.1851, "step": 37760 }, { "epoch": 2.067842085090073, "grad_norm": 0.0982591062784195, "learning_rate": 1.7262218616913406e-05, "loss": 0.182, "step": 37765 }, { "epoch": 2.068115862673164, "grad_norm": 0.11589270830154419, "learning_rate": 1.7257148651389173e-05, "loss": 0.1844, "step": 37770 }, { "epoch": 2.068389640256256, "grad_norm": 0.0975986048579216, "learning_rate": 1.7252078685864936e-05, "loss": 0.1825, "step": 37775 }, { "epoch": 2.0686634178393475, "grad_norm": 0.10665585100650787, "learning_rate": 1.7247008720340703e-05, "loss": 0.182, "step": 37780 }, { "epoch": 2.0689371954224387, "grad_norm": 0.09985343366861343, "learning_rate": 1.7241938754816466e-05, "loss": 0.1875, "step": 37785 }, { "epoch": 2.0692109730055304, "grad_norm": 0.11507595330476761, "learning_rate": 1.7236868789292236e-05, "loss": 0.1801, "step": 37790 }, { "epoch": 2.0694847505886216, "grad_norm": 0.10014067590236664, "learning_rate": 1.7231798823768e-05, "loss": 0.1863, "step": 37795 }, { "epoch": 2.0697585281717132, "grad_norm": 0.09808798879384995, "learning_rate": 1.7226728858243766e-05, "loss": 0.1812, "step": 37800 }, { "epoch": 2.070032305754805, "grad_norm": 0.09912259876728058, "learning_rate": 1.722165889271953e-05, "loss": 0.1834, "step": 37805 }, { "epoch": 2.070306083337896, "grad_norm": 0.09890453517436981, "learning_rate": 1.7216588927195296e-05, "loss": 0.1847, "step": 37810 }, { "epoch": 2.070579860920988, "grad_norm": 0.10783401876688004, "learning_rate": 1.721151896167106e-05, "loss": 0.1835, "step": 37815 }, { "epoch": 2.0708536385040794, "grad_norm": 0.10105080157518387, "learning_rate": 1.7206448996146826e-05, "loss": 0.1795, "step": 37820 }, { "epoch": 2.0711274160871707, "grad_norm": 0.12159617990255356, "learning_rate": 1.7201379030622593e-05, "loss": 0.1833, "step": 37825 }, { "epoch": 2.0714011936702623, "grad_norm": 0.105323426425457, "learning_rate": 1.719630906509836e-05, "loss": 0.1987, "step": 37830 }, { "epoch": 2.071674971253354, "grad_norm": 0.09308040142059326, "learning_rate": 1.7191239099574126e-05, "loss": 0.1821, "step": 37835 }, { "epoch": 2.071948748836445, "grad_norm": 0.11123740673065186, "learning_rate": 1.718616913404989e-05, "loss": 0.1832, "step": 37840 }, { "epoch": 2.072222526419537, "grad_norm": 0.09708216041326523, "learning_rate": 1.7181099168525656e-05, "loss": 0.1789, "step": 37845 }, { "epoch": 2.072496304002628, "grad_norm": 0.09725556522607803, "learning_rate": 1.717602920300142e-05, "loss": 0.1846, "step": 37850 }, { "epoch": 2.0727700815857197, "grad_norm": 0.09148304164409637, "learning_rate": 1.7170959237477186e-05, "loss": 0.1809, "step": 37855 }, { "epoch": 2.0730438591688114, "grad_norm": 0.09314518421888351, "learning_rate": 1.716588927195295e-05, "loss": 0.1742, "step": 37860 }, { "epoch": 2.0733176367519026, "grad_norm": 0.09605951607227325, "learning_rate": 1.7160819306428716e-05, "loss": 0.1798, "step": 37865 }, { "epoch": 2.0735914143349943, "grad_norm": 0.10438118875026703, "learning_rate": 1.7155749340904483e-05, "loss": 0.1902, "step": 37870 }, { "epoch": 2.073865191918086, "grad_norm": 0.09941181540489197, "learning_rate": 1.715067937538025e-05, "loss": 0.191, "step": 37875 }, { "epoch": 2.074138969501177, "grad_norm": 0.09729140996932983, "learning_rate": 1.7145609409856013e-05, "loss": 0.1805, "step": 37880 }, { "epoch": 2.074412747084269, "grad_norm": 0.1125316172838211, "learning_rate": 1.714053944433178e-05, "loss": 0.1894, "step": 37885 }, { "epoch": 2.07468652466736, "grad_norm": 0.1651480793952942, "learning_rate": 1.7135469478807546e-05, "loss": 0.1857, "step": 37890 }, { "epoch": 2.0749603022504517, "grad_norm": 0.09938346594572067, "learning_rate": 1.713039951328331e-05, "loss": 0.1824, "step": 37895 }, { "epoch": 2.0752340798335434, "grad_norm": 0.09270168095827103, "learning_rate": 1.7125329547759076e-05, "loss": 0.1879, "step": 37900 }, { "epoch": 2.0755078574166346, "grad_norm": 0.10665760934352875, "learning_rate": 1.712025958223484e-05, "loss": 0.1817, "step": 37905 }, { "epoch": 2.0757816349997262, "grad_norm": 0.09273895621299744, "learning_rate": 1.711518961671061e-05, "loss": 0.1816, "step": 37910 }, { "epoch": 2.076055412582818, "grad_norm": 0.10572884231805801, "learning_rate": 1.7110119651186373e-05, "loss": 0.188, "step": 37915 }, { "epoch": 2.076329190165909, "grad_norm": 0.11370440572500229, "learning_rate": 1.710504968566214e-05, "loss": 0.1853, "step": 37920 }, { "epoch": 2.0766029677490008, "grad_norm": 0.12090325355529785, "learning_rate": 1.7099979720137903e-05, "loss": 0.1879, "step": 37925 }, { "epoch": 2.0768767453320924, "grad_norm": 0.10816255956888199, "learning_rate": 1.709490975461367e-05, "loss": 0.1835, "step": 37930 }, { "epoch": 2.0771505229151837, "grad_norm": 0.09618420898914337, "learning_rate": 1.7089839789089436e-05, "loss": 0.1793, "step": 37935 }, { "epoch": 2.0774243004982753, "grad_norm": 0.111148402094841, "learning_rate": 1.70847698235652e-05, "loss": 0.185, "step": 37940 }, { "epoch": 2.0776980780813665, "grad_norm": 0.12070133537054062, "learning_rate": 1.7079699858040966e-05, "loss": 0.1888, "step": 37945 }, { "epoch": 2.077971855664458, "grad_norm": 0.10892996191978455, "learning_rate": 1.7074629892516733e-05, "loss": 0.1901, "step": 37950 }, { "epoch": 2.07824563324755, "grad_norm": 0.10895252227783203, "learning_rate": 1.70695599269925e-05, "loss": 0.189, "step": 37955 }, { "epoch": 2.078519410830641, "grad_norm": 0.11331746727228165, "learning_rate": 1.7064489961468263e-05, "loss": 0.1859, "step": 37960 }, { "epoch": 2.0787931884137327, "grad_norm": 0.11346716433763504, "learning_rate": 1.705941999594403e-05, "loss": 0.1873, "step": 37965 }, { "epoch": 2.0790669659968244, "grad_norm": 0.10658660531044006, "learning_rate": 1.7054350030419793e-05, "loss": 0.1835, "step": 37970 }, { "epoch": 2.0793407435799156, "grad_norm": 0.10715033859014511, "learning_rate": 1.704928006489556e-05, "loss": 0.1781, "step": 37975 }, { "epoch": 2.0796145211630073, "grad_norm": 0.09760522842407227, "learning_rate": 1.7044210099371323e-05, "loss": 0.1852, "step": 37980 }, { "epoch": 2.0798882987460985, "grad_norm": 0.11545638740062714, "learning_rate": 1.703914013384709e-05, "loss": 0.1903, "step": 37985 }, { "epoch": 2.08016207632919, "grad_norm": 0.0996537134051323, "learning_rate": 1.7034070168322856e-05, "loss": 0.1923, "step": 37990 }, { "epoch": 2.080435853912282, "grad_norm": 0.10729096084833145, "learning_rate": 1.7029000202798623e-05, "loss": 0.1824, "step": 37995 }, { "epoch": 2.080709631495373, "grad_norm": 0.12150322645902634, "learning_rate": 1.702393023727439e-05, "loss": 0.1875, "step": 38000 }, { "epoch": 2.0809834090784647, "grad_norm": 0.10587571561336517, "learning_rate": 1.7018860271750153e-05, "loss": 0.1819, "step": 38005 }, { "epoch": 2.0812571866615563, "grad_norm": 0.11378419399261475, "learning_rate": 1.701379030622592e-05, "loss": 0.182, "step": 38010 }, { "epoch": 2.0815309642446476, "grad_norm": 0.09906535595655441, "learning_rate": 1.7008720340701683e-05, "loss": 0.1803, "step": 38015 }, { "epoch": 2.0818047418277392, "grad_norm": 0.11158961057662964, "learning_rate": 1.700365037517745e-05, "loss": 0.1792, "step": 38020 }, { "epoch": 2.0820785194108304, "grad_norm": 0.11151128262281418, "learning_rate": 1.6998580409653213e-05, "loss": 0.1779, "step": 38025 }, { "epoch": 2.082352296993922, "grad_norm": 0.10147317498922348, "learning_rate": 1.699351044412898e-05, "loss": 0.1797, "step": 38030 }, { "epoch": 2.0826260745770138, "grad_norm": 0.1129341572523117, "learning_rate": 1.6988440478604746e-05, "loss": 0.1903, "step": 38035 }, { "epoch": 2.082899852160105, "grad_norm": 0.10653745383024216, "learning_rate": 1.6983370513080513e-05, "loss": 0.1974, "step": 38040 }, { "epoch": 2.0831736297431966, "grad_norm": 0.09842853248119354, "learning_rate": 1.6978300547556276e-05, "loss": 0.1743, "step": 38045 }, { "epoch": 2.0834474073262883, "grad_norm": 0.11125640571117401, "learning_rate": 1.6973230582032043e-05, "loss": 0.1939, "step": 38050 }, { "epoch": 2.0837211849093795, "grad_norm": 0.09283453226089478, "learning_rate": 1.696816061650781e-05, "loss": 0.1778, "step": 38055 }, { "epoch": 2.083994962492471, "grad_norm": 0.10615187883377075, "learning_rate": 1.6963090650983573e-05, "loss": 0.1848, "step": 38060 }, { "epoch": 2.0842687400755624, "grad_norm": 0.11372986435890198, "learning_rate": 1.695802068545934e-05, "loss": 0.1906, "step": 38065 }, { "epoch": 2.084542517658654, "grad_norm": 0.11331389099359512, "learning_rate": 1.6952950719935103e-05, "loss": 0.1762, "step": 38070 }, { "epoch": 2.0848162952417457, "grad_norm": 0.11913503706455231, "learning_rate": 1.6947880754410873e-05, "loss": 0.1858, "step": 38075 }, { "epoch": 2.085090072824837, "grad_norm": 0.10220987349748611, "learning_rate": 1.6942810788886636e-05, "loss": 0.1903, "step": 38080 }, { "epoch": 2.0853638504079286, "grad_norm": 0.10198331624269485, "learning_rate": 1.6937740823362403e-05, "loss": 0.1856, "step": 38085 }, { "epoch": 2.0856376279910203, "grad_norm": 0.11332394927740097, "learning_rate": 1.6932670857838166e-05, "loss": 0.1827, "step": 38090 }, { "epoch": 2.0859114055741115, "grad_norm": 0.1107875406742096, "learning_rate": 1.6927600892313933e-05, "loss": 0.1805, "step": 38095 }, { "epoch": 2.086185183157203, "grad_norm": 0.11968015879392624, "learning_rate": 1.69225309267897e-05, "loss": 0.1924, "step": 38100 }, { "epoch": 2.086458960740295, "grad_norm": 0.10166548937559128, "learning_rate": 1.6917460961265463e-05, "loss": 0.1839, "step": 38105 }, { "epoch": 2.086732738323386, "grad_norm": 0.1071438416838646, "learning_rate": 1.691239099574123e-05, "loss": 0.1756, "step": 38110 }, { "epoch": 2.0870065159064777, "grad_norm": 0.11275304108858109, "learning_rate": 1.6907321030216996e-05, "loss": 0.1889, "step": 38115 }, { "epoch": 2.087280293489569, "grad_norm": 0.1070980578660965, "learning_rate": 1.6902251064692763e-05, "loss": 0.1937, "step": 38120 }, { "epoch": 2.0875540710726606, "grad_norm": 0.11434422433376312, "learning_rate": 1.6897181099168526e-05, "loss": 0.181, "step": 38125 }, { "epoch": 2.087827848655752, "grad_norm": 0.11367521435022354, "learning_rate": 1.6892111133644293e-05, "loss": 0.1821, "step": 38130 }, { "epoch": 2.0881016262388434, "grad_norm": 0.10230574756860733, "learning_rate": 1.6887041168120056e-05, "loss": 0.1904, "step": 38135 }, { "epoch": 2.088375403821935, "grad_norm": 0.10235457867383957, "learning_rate": 1.6881971202595823e-05, "loss": 0.1861, "step": 38140 }, { "epoch": 2.0886491814050268, "grad_norm": 0.11180038750171661, "learning_rate": 1.6876901237071586e-05, "loss": 0.1906, "step": 38145 }, { "epoch": 2.088922958988118, "grad_norm": 0.11319909989833832, "learning_rate": 1.6871831271547353e-05, "loss": 0.1871, "step": 38150 }, { "epoch": 2.0891967365712096, "grad_norm": 0.10392751544713974, "learning_rate": 1.686676130602312e-05, "loss": 0.185, "step": 38155 }, { "epoch": 2.089470514154301, "grad_norm": 0.1008305549621582, "learning_rate": 1.6861691340498887e-05, "loss": 0.1906, "step": 38160 }, { "epoch": 2.0897442917373925, "grad_norm": 0.10427473485469818, "learning_rate": 1.6856621374974653e-05, "loss": 0.1905, "step": 38165 }, { "epoch": 2.090018069320484, "grad_norm": 0.10879669338464737, "learning_rate": 1.6851551409450417e-05, "loss": 0.1815, "step": 38170 }, { "epoch": 2.0902918469035754, "grad_norm": 0.11659824103116989, "learning_rate": 1.6846481443926183e-05, "loss": 0.1822, "step": 38175 }, { "epoch": 2.090565624486667, "grad_norm": 0.11422441154718399, "learning_rate": 1.6841411478401947e-05, "loss": 0.1894, "step": 38180 }, { "epoch": 2.0908394020697587, "grad_norm": 0.12144828587770462, "learning_rate": 1.6836341512877713e-05, "loss": 0.1854, "step": 38185 }, { "epoch": 2.09111317965285, "grad_norm": 0.11734017729759216, "learning_rate": 1.6831271547353477e-05, "loss": 0.1805, "step": 38190 }, { "epoch": 2.0913869572359416, "grad_norm": 0.10861942172050476, "learning_rate": 1.6826201581829247e-05, "loss": 0.1822, "step": 38195 }, { "epoch": 2.091660734819033, "grad_norm": 0.11895030736923218, "learning_rate": 1.682113161630501e-05, "loss": 0.1816, "step": 38200 }, { "epoch": 2.0919345124021245, "grad_norm": 0.10604124516248703, "learning_rate": 1.6816061650780777e-05, "loss": 0.1839, "step": 38205 }, { "epoch": 2.092208289985216, "grad_norm": 0.10280094295740128, "learning_rate": 1.681099168525654e-05, "loss": 0.1735, "step": 38210 }, { "epoch": 2.0924820675683073, "grad_norm": 0.10024364292621613, "learning_rate": 1.6805921719732307e-05, "loss": 0.1755, "step": 38215 }, { "epoch": 2.092755845151399, "grad_norm": 0.10214376449584961, "learning_rate": 1.6800851754208073e-05, "loss": 0.1884, "step": 38220 }, { "epoch": 2.0930296227344907, "grad_norm": 0.10457012802362442, "learning_rate": 1.6795781788683837e-05, "loss": 0.1838, "step": 38225 }, { "epoch": 2.093303400317582, "grad_norm": 0.11355660855770111, "learning_rate": 1.6790711823159603e-05, "loss": 0.1839, "step": 38230 }, { "epoch": 2.0935771779006735, "grad_norm": 0.1104784905910492, "learning_rate": 1.678564185763537e-05, "loss": 0.1868, "step": 38235 }, { "epoch": 2.0938509554837648, "grad_norm": 0.09993547946214676, "learning_rate": 1.6780571892111137e-05, "loss": 0.1795, "step": 38240 }, { "epoch": 2.0941247330668564, "grad_norm": 0.1312730312347412, "learning_rate": 1.67755019265869e-05, "loss": 0.1892, "step": 38245 }, { "epoch": 2.094398510649948, "grad_norm": 0.10247629880905151, "learning_rate": 1.6770431961062667e-05, "loss": 0.1827, "step": 38250 }, { "epoch": 2.0946722882330393, "grad_norm": 0.10848450660705566, "learning_rate": 1.676536199553843e-05, "loss": 0.1889, "step": 38255 }, { "epoch": 2.094946065816131, "grad_norm": 0.09886088967323303, "learning_rate": 1.6760292030014197e-05, "loss": 0.1835, "step": 38260 }, { "epoch": 2.0952198433992226, "grad_norm": 0.102932870388031, "learning_rate": 1.675522206448996e-05, "loss": 0.1802, "step": 38265 }, { "epoch": 2.095493620982314, "grad_norm": 0.10074256360530853, "learning_rate": 1.6750152098965727e-05, "loss": 0.1813, "step": 38270 }, { "epoch": 2.0957673985654055, "grad_norm": 0.11068911105394363, "learning_rate": 1.6745082133441493e-05, "loss": 0.1823, "step": 38275 }, { "epoch": 2.096041176148497, "grad_norm": 0.0990254357457161, "learning_rate": 1.674001216791726e-05, "loss": 0.1822, "step": 38280 }, { "epoch": 2.0963149537315884, "grad_norm": 0.09755880385637283, "learning_rate": 1.6734942202393027e-05, "loss": 0.182, "step": 38285 }, { "epoch": 2.09658873131468, "grad_norm": 0.10438558459281921, "learning_rate": 1.672987223686879e-05, "loss": 0.1862, "step": 38290 }, { "epoch": 2.0968625088977713, "grad_norm": 0.09901385009288788, "learning_rate": 1.6724802271344557e-05, "loss": 0.1835, "step": 38295 }, { "epoch": 2.097136286480863, "grad_norm": 0.10680180042982101, "learning_rate": 1.671973230582032e-05, "loss": 0.1862, "step": 38300 }, { "epoch": 2.0974100640639546, "grad_norm": 0.10191906243562698, "learning_rate": 1.6714662340296087e-05, "loss": 0.1889, "step": 38305 }, { "epoch": 2.097683841647046, "grad_norm": 0.10041820257902145, "learning_rate": 1.670959237477185e-05, "loss": 0.1865, "step": 38310 }, { "epoch": 2.0979576192301375, "grad_norm": 0.09917423874139786, "learning_rate": 1.6704522409247617e-05, "loss": 0.1836, "step": 38315 }, { "epoch": 2.098231396813229, "grad_norm": 0.10225224494934082, "learning_rate": 1.6699452443723383e-05, "loss": 0.1819, "step": 38320 }, { "epoch": 2.0985051743963203, "grad_norm": 0.09534076601266861, "learning_rate": 1.669438247819915e-05, "loss": 0.1762, "step": 38325 }, { "epoch": 2.098778951979412, "grad_norm": 0.0985129252076149, "learning_rate": 1.6689312512674913e-05, "loss": 0.184, "step": 38330 }, { "epoch": 2.099052729562503, "grad_norm": 0.09724278002977371, "learning_rate": 1.668424254715068e-05, "loss": 0.1817, "step": 38335 }, { "epoch": 2.099326507145595, "grad_norm": 0.09937519580125809, "learning_rate": 1.6679172581626447e-05, "loss": 0.1808, "step": 38340 }, { "epoch": 2.0996002847286865, "grad_norm": 0.10247579962015152, "learning_rate": 1.667410261610221e-05, "loss": 0.1836, "step": 38345 }, { "epoch": 2.0998740623117778, "grad_norm": 0.10170808434486389, "learning_rate": 1.6669032650577977e-05, "loss": 0.1774, "step": 38350 }, { "epoch": 2.1001478398948694, "grad_norm": 0.1240035891532898, "learning_rate": 1.666396268505374e-05, "loss": 0.1878, "step": 38355 }, { "epoch": 2.100421617477961, "grad_norm": 0.10051672160625458, "learning_rate": 1.665889271952951e-05, "loss": 0.1867, "step": 38360 }, { "epoch": 2.1006953950610523, "grad_norm": 0.0950232446193695, "learning_rate": 1.6653822754005273e-05, "loss": 0.1853, "step": 38365 }, { "epoch": 2.100969172644144, "grad_norm": 0.10732164978981018, "learning_rate": 1.664875278848104e-05, "loss": 0.1854, "step": 38370 }, { "epoch": 2.1012429502272356, "grad_norm": 0.10398642718791962, "learning_rate": 1.6643682822956803e-05, "loss": 0.1886, "step": 38375 }, { "epoch": 2.101516727810327, "grad_norm": 0.10699009150266647, "learning_rate": 1.663861285743257e-05, "loss": 0.1778, "step": 38380 }, { "epoch": 2.1017905053934185, "grad_norm": 0.09515600651502609, "learning_rate": 1.6633542891908337e-05, "loss": 0.1801, "step": 38385 }, { "epoch": 2.1020642829765097, "grad_norm": 0.10651636123657227, "learning_rate": 1.66284729263841e-05, "loss": 0.1872, "step": 38390 }, { "epoch": 2.1023380605596014, "grad_norm": 0.10705246776342392, "learning_rate": 1.6623402960859867e-05, "loss": 0.1789, "step": 38395 }, { "epoch": 2.102611838142693, "grad_norm": 0.09633200615644455, "learning_rate": 1.6618332995335634e-05, "loss": 0.1863, "step": 38400 }, { "epoch": 2.1028856157257843, "grad_norm": 0.10995806008577347, "learning_rate": 1.66132630298114e-05, "loss": 0.1939, "step": 38405 }, { "epoch": 2.103159393308876, "grad_norm": 0.11100474745035172, "learning_rate": 1.6608193064287164e-05, "loss": 0.1921, "step": 38410 }, { "epoch": 2.1034331708919676, "grad_norm": 0.11475689709186554, "learning_rate": 1.660312309876293e-05, "loss": 0.1786, "step": 38415 }, { "epoch": 2.103706948475059, "grad_norm": 0.10278704762458801, "learning_rate": 1.6598053133238694e-05, "loss": 0.1878, "step": 38420 }, { "epoch": 2.1039807260581505, "grad_norm": 0.11398862302303314, "learning_rate": 1.659298316771446e-05, "loss": 0.1904, "step": 38425 }, { "epoch": 2.1042545036412417, "grad_norm": 0.1221034824848175, "learning_rate": 1.6587913202190224e-05, "loss": 0.1888, "step": 38430 }, { "epoch": 2.1045282812243333, "grad_norm": 0.11013835668563843, "learning_rate": 1.658284323666599e-05, "loss": 0.1869, "step": 38435 }, { "epoch": 2.104802058807425, "grad_norm": 0.10209217667579651, "learning_rate": 1.6577773271141757e-05, "loss": 0.1843, "step": 38440 }, { "epoch": 2.105075836390516, "grad_norm": 0.09584195166826248, "learning_rate": 1.6572703305617524e-05, "loss": 0.1854, "step": 38445 }, { "epoch": 2.105349613973608, "grad_norm": 0.09710851311683655, "learning_rate": 1.656763334009329e-05, "loss": 0.1892, "step": 38450 }, { "epoch": 2.1056233915566995, "grad_norm": 0.11114978045225143, "learning_rate": 1.6562563374569054e-05, "loss": 0.1878, "step": 38455 }, { "epoch": 2.1058971691397907, "grad_norm": 0.1151353195309639, "learning_rate": 1.655749340904482e-05, "loss": 0.1852, "step": 38460 }, { "epoch": 2.1061709467228824, "grad_norm": 0.09985001385211945, "learning_rate": 1.6552423443520584e-05, "loss": 0.1773, "step": 38465 }, { "epoch": 2.1064447243059736, "grad_norm": 0.10549964010715485, "learning_rate": 1.654735347799635e-05, "loss": 0.183, "step": 38470 }, { "epoch": 2.1067185018890653, "grad_norm": 0.10148415714502335, "learning_rate": 1.6542283512472114e-05, "loss": 0.187, "step": 38475 }, { "epoch": 2.106992279472157, "grad_norm": 0.10410163551568985, "learning_rate": 1.6537213546947884e-05, "loss": 0.1828, "step": 38480 }, { "epoch": 2.107266057055248, "grad_norm": 0.1065419614315033, "learning_rate": 1.6532143581423647e-05, "loss": 0.1891, "step": 38485 }, { "epoch": 2.10753983463834, "grad_norm": 0.10222028940916061, "learning_rate": 1.6527073615899414e-05, "loss": 0.1851, "step": 38490 }, { "epoch": 2.1078136122214315, "grad_norm": 0.11160530894994736, "learning_rate": 1.6522003650375177e-05, "loss": 0.1843, "step": 38495 }, { "epoch": 2.1080873898045227, "grad_norm": 0.0983550176024437, "learning_rate": 1.6516933684850944e-05, "loss": 0.1841, "step": 38500 }, { "epoch": 2.1083611673876144, "grad_norm": 0.0969831570982933, "learning_rate": 1.651186371932671e-05, "loss": 0.1847, "step": 38505 }, { "epoch": 2.1086349449707056, "grad_norm": 0.09765508770942688, "learning_rate": 1.6506793753802474e-05, "loss": 0.1785, "step": 38510 }, { "epoch": 2.1089087225537972, "grad_norm": 0.10050148516893387, "learning_rate": 1.650172378827824e-05, "loss": 0.1913, "step": 38515 }, { "epoch": 2.109182500136889, "grad_norm": 0.10138005018234253, "learning_rate": 1.6496653822754007e-05, "loss": 0.1848, "step": 38520 }, { "epoch": 2.10945627771998, "grad_norm": 0.11455552279949188, "learning_rate": 1.6491583857229774e-05, "loss": 0.1846, "step": 38525 }, { "epoch": 2.109730055303072, "grad_norm": 0.09650366008281708, "learning_rate": 1.6486513891705537e-05, "loss": 0.1798, "step": 38530 }, { "epoch": 2.1100038328861634, "grad_norm": 0.10211431980133057, "learning_rate": 1.6481443926181304e-05, "loss": 0.1786, "step": 38535 }, { "epoch": 2.1102776104692547, "grad_norm": 0.10130520164966583, "learning_rate": 1.6476373960657067e-05, "loss": 0.1912, "step": 38540 }, { "epoch": 2.1105513880523463, "grad_norm": 0.10247281193733215, "learning_rate": 1.6471303995132834e-05, "loss": 0.1911, "step": 38545 }, { "epoch": 2.110825165635438, "grad_norm": 0.10025212913751602, "learning_rate": 1.6466234029608597e-05, "loss": 0.1833, "step": 38550 }, { "epoch": 2.111098943218529, "grad_norm": 0.10393943637609482, "learning_rate": 1.6461164064084364e-05, "loss": 0.1835, "step": 38555 }, { "epoch": 2.111372720801621, "grad_norm": 0.10089893639087677, "learning_rate": 1.645609409856013e-05, "loss": 0.1826, "step": 38560 }, { "epoch": 2.111646498384712, "grad_norm": 0.09788360446691513, "learning_rate": 1.6451024133035897e-05, "loss": 0.1953, "step": 38565 }, { "epoch": 2.1119202759678037, "grad_norm": 0.10886611044406891, "learning_rate": 1.6445954167511664e-05, "loss": 0.1777, "step": 38570 }, { "epoch": 2.1121940535508954, "grad_norm": 0.11126511543989182, "learning_rate": 1.6440884201987427e-05, "loss": 0.188, "step": 38575 }, { "epoch": 2.1124678311339866, "grad_norm": 0.0927673801779747, "learning_rate": 1.6435814236463194e-05, "loss": 0.1768, "step": 38580 }, { "epoch": 2.1127416087170783, "grad_norm": 0.09815074503421783, "learning_rate": 1.6430744270938957e-05, "loss": 0.1836, "step": 38585 }, { "epoch": 2.11301538630017, "grad_norm": 0.1068490743637085, "learning_rate": 1.6425674305414724e-05, "loss": 0.1827, "step": 38590 }, { "epoch": 2.113289163883261, "grad_norm": 0.09997664391994476, "learning_rate": 1.6420604339890487e-05, "loss": 0.183, "step": 38595 }, { "epoch": 2.113562941466353, "grad_norm": 0.1135849803686142, "learning_rate": 1.6415534374366254e-05, "loss": 0.195, "step": 38600 }, { "epoch": 2.113836719049444, "grad_norm": 0.10089685022830963, "learning_rate": 1.641046440884202e-05, "loss": 0.1785, "step": 38605 }, { "epoch": 2.1141104966325357, "grad_norm": 0.10320322215557098, "learning_rate": 1.6405394443317787e-05, "loss": 0.1905, "step": 38610 }, { "epoch": 2.1143842742156274, "grad_norm": 0.09422679990530014, "learning_rate": 1.640032447779355e-05, "loss": 0.1795, "step": 38615 }, { "epoch": 2.1146580517987186, "grad_norm": 0.11297955363988876, "learning_rate": 1.6395254512269317e-05, "loss": 0.1987, "step": 38620 }, { "epoch": 2.1149318293818102, "grad_norm": 0.10294009745121002, "learning_rate": 1.6390184546745084e-05, "loss": 0.1801, "step": 38625 }, { "epoch": 2.115205606964902, "grad_norm": 0.10043984651565552, "learning_rate": 1.6385114581220847e-05, "loss": 0.1905, "step": 38630 }, { "epoch": 2.115479384547993, "grad_norm": 0.11122091114521027, "learning_rate": 1.6380044615696614e-05, "loss": 0.1844, "step": 38635 }, { "epoch": 2.1157531621310848, "grad_norm": 0.11253602802753448, "learning_rate": 1.6374974650172377e-05, "loss": 0.1836, "step": 38640 }, { "epoch": 2.116026939714176, "grad_norm": 0.09620825201272964, "learning_rate": 1.6369904684648147e-05, "loss": 0.1818, "step": 38645 }, { "epoch": 2.1163007172972677, "grad_norm": 0.0993560329079628, "learning_rate": 1.636483471912391e-05, "loss": 0.1811, "step": 38650 }, { "epoch": 2.1165744948803593, "grad_norm": 0.10235144942998886, "learning_rate": 1.6359764753599677e-05, "loss": 0.1871, "step": 38655 }, { "epoch": 2.1168482724634505, "grad_norm": 0.10168028622865677, "learning_rate": 1.635469478807544e-05, "loss": 0.1846, "step": 38660 }, { "epoch": 2.117122050046542, "grad_norm": 0.10541002452373505, "learning_rate": 1.6349624822551207e-05, "loss": 0.1785, "step": 38665 }, { "epoch": 2.117395827629634, "grad_norm": 0.09159104526042938, "learning_rate": 1.6344554857026974e-05, "loss": 0.1812, "step": 38670 }, { "epoch": 2.117669605212725, "grad_norm": 0.10567228496074677, "learning_rate": 1.6339484891502737e-05, "loss": 0.1875, "step": 38675 }, { "epoch": 2.1179433827958167, "grad_norm": 0.0961470827460289, "learning_rate": 1.6334414925978504e-05, "loss": 0.187, "step": 38680 }, { "epoch": 2.118217160378908, "grad_norm": 0.11714893579483032, "learning_rate": 1.632934496045427e-05, "loss": 0.1778, "step": 38685 }, { "epoch": 2.1184909379619996, "grad_norm": 0.10622705519199371, "learning_rate": 1.6324274994930037e-05, "loss": 0.1795, "step": 38690 }, { "epoch": 2.1187647155450913, "grad_norm": 0.10707473009824753, "learning_rate": 1.63192050294058e-05, "loss": 0.1875, "step": 38695 }, { "epoch": 2.1190384931281825, "grad_norm": 0.09493469446897507, "learning_rate": 1.6314135063881567e-05, "loss": 0.1789, "step": 38700 }, { "epoch": 2.119312270711274, "grad_norm": 0.10775158554315567, "learning_rate": 1.630906509835733e-05, "loss": 0.189, "step": 38705 }, { "epoch": 2.119586048294366, "grad_norm": 0.10185819119215012, "learning_rate": 1.6303995132833097e-05, "loss": 0.1842, "step": 38710 }, { "epoch": 2.119859825877457, "grad_norm": 0.10477839410305023, "learning_rate": 1.629892516730886e-05, "loss": 0.1893, "step": 38715 }, { "epoch": 2.1201336034605487, "grad_norm": 0.09935832023620605, "learning_rate": 1.6293855201784627e-05, "loss": 0.1831, "step": 38720 }, { "epoch": 2.1204073810436403, "grad_norm": 0.09128192067146301, "learning_rate": 1.6288785236260394e-05, "loss": 0.1852, "step": 38725 }, { "epoch": 2.1206811586267316, "grad_norm": 0.09883061796426773, "learning_rate": 1.628371527073616e-05, "loss": 0.1867, "step": 38730 }, { "epoch": 2.1209549362098232, "grad_norm": 0.10861195623874664, "learning_rate": 1.6278645305211927e-05, "loss": 0.1859, "step": 38735 }, { "epoch": 2.1212287137929144, "grad_norm": 0.10067669302225113, "learning_rate": 1.627357533968769e-05, "loss": 0.1814, "step": 38740 }, { "epoch": 2.121502491376006, "grad_norm": 0.1044008731842041, "learning_rate": 1.6268505374163457e-05, "loss": 0.1812, "step": 38745 }, { "epoch": 2.1217762689590978, "grad_norm": 0.09268850833177567, "learning_rate": 1.626343540863922e-05, "loss": 0.1928, "step": 38750 }, { "epoch": 2.122050046542189, "grad_norm": 0.1113753616809845, "learning_rate": 1.6258365443114987e-05, "loss": 0.1874, "step": 38755 }, { "epoch": 2.1223238241252806, "grad_norm": 0.11159487813711166, "learning_rate": 1.625329547759075e-05, "loss": 0.1851, "step": 38760 }, { "epoch": 2.1225976017083723, "grad_norm": 0.10026172548532486, "learning_rate": 1.624822551206652e-05, "loss": 0.1928, "step": 38765 }, { "epoch": 2.1228713792914635, "grad_norm": 0.10039138048887253, "learning_rate": 1.6243155546542284e-05, "loss": 0.1855, "step": 38770 }, { "epoch": 2.123145156874555, "grad_norm": 0.10995075851678848, "learning_rate": 1.623808558101805e-05, "loss": 0.1986, "step": 38775 }, { "epoch": 2.1234189344576464, "grad_norm": 0.09935932606458664, "learning_rate": 1.6233015615493814e-05, "loss": 0.1774, "step": 38780 }, { "epoch": 2.123692712040738, "grad_norm": 0.10212473571300507, "learning_rate": 1.622794564996958e-05, "loss": 0.1921, "step": 38785 }, { "epoch": 2.1239664896238297, "grad_norm": 0.09925620257854462, "learning_rate": 1.6222875684445348e-05, "loss": 0.1767, "step": 38790 }, { "epoch": 2.124240267206921, "grad_norm": 0.10600248724222183, "learning_rate": 1.621780571892111e-05, "loss": 0.1866, "step": 38795 }, { "epoch": 2.1245140447900126, "grad_norm": 0.09672253578901291, "learning_rate": 1.6212735753396878e-05, "loss": 0.184, "step": 38800 }, { "epoch": 2.1247878223731043, "grad_norm": 0.09354798495769501, "learning_rate": 1.6207665787872644e-05, "loss": 0.1889, "step": 38805 }, { "epoch": 2.1250615999561955, "grad_norm": 0.09679169207811356, "learning_rate": 1.620259582234841e-05, "loss": 0.1823, "step": 38810 }, { "epoch": 2.125335377539287, "grad_norm": 0.09935320913791656, "learning_rate": 1.6197525856824174e-05, "loss": 0.1854, "step": 38815 }, { "epoch": 2.125609155122379, "grad_norm": 0.10126404464244843, "learning_rate": 1.619245589129994e-05, "loss": 0.188, "step": 38820 }, { "epoch": 2.12588293270547, "grad_norm": 0.10233066231012344, "learning_rate": 1.6187385925775704e-05, "loss": 0.183, "step": 38825 }, { "epoch": 2.1261567102885617, "grad_norm": 0.09901636093854904, "learning_rate": 1.618231596025147e-05, "loss": 0.1867, "step": 38830 }, { "epoch": 2.126430487871653, "grad_norm": 0.09661772102117538, "learning_rate": 1.6177245994727238e-05, "loss": 0.1903, "step": 38835 }, { "epoch": 2.1267042654547446, "grad_norm": 0.09606967866420746, "learning_rate": 1.6172176029203e-05, "loss": 0.1889, "step": 38840 }, { "epoch": 2.126978043037836, "grad_norm": 0.10977518558502197, "learning_rate": 1.6167106063678768e-05, "loss": 0.1839, "step": 38845 }, { "epoch": 2.1272518206209274, "grad_norm": 0.10838300734758377, "learning_rate": 1.6162036098154534e-05, "loss": 0.1864, "step": 38850 }, { "epoch": 2.127525598204019, "grad_norm": 0.10109944641590118, "learning_rate": 1.61569661326303e-05, "loss": 0.1848, "step": 38855 }, { "epoch": 2.1277993757871108, "grad_norm": 0.10132098197937012, "learning_rate": 1.6151896167106064e-05, "loss": 0.1815, "step": 38860 }, { "epoch": 2.128073153370202, "grad_norm": 0.09792722761631012, "learning_rate": 1.614682620158183e-05, "loss": 0.183, "step": 38865 }, { "epoch": 2.1283469309532936, "grad_norm": 0.10015343129634857, "learning_rate": 1.6141756236057594e-05, "loss": 0.1811, "step": 38870 }, { "epoch": 2.128620708536385, "grad_norm": 0.11688145995140076, "learning_rate": 1.613668627053336e-05, "loss": 0.1913, "step": 38875 }, { "epoch": 2.1288944861194765, "grad_norm": 0.10199620574712753, "learning_rate": 1.6131616305009124e-05, "loss": 0.1824, "step": 38880 }, { "epoch": 2.129168263702568, "grad_norm": 0.14012378454208374, "learning_rate": 1.612654633948489e-05, "loss": 0.1918, "step": 38885 }, { "epoch": 2.1294420412856594, "grad_norm": 0.12808993458747864, "learning_rate": 1.6121476373960658e-05, "loss": 0.1763, "step": 38890 }, { "epoch": 2.129715818868751, "grad_norm": 0.11963830888271332, "learning_rate": 1.6116406408436424e-05, "loss": 0.1827, "step": 38895 }, { "epoch": 2.1299895964518427, "grad_norm": 0.0991918221116066, "learning_rate": 1.611133644291219e-05, "loss": 0.1767, "step": 38900 }, { "epoch": 2.130263374034934, "grad_norm": 0.10482951253652573, "learning_rate": 1.6106266477387954e-05, "loss": 0.1982, "step": 38905 }, { "epoch": 2.1305371516180256, "grad_norm": 0.1090136393904686, "learning_rate": 1.610119651186372e-05, "loss": 0.177, "step": 38910 }, { "epoch": 2.130810929201117, "grad_norm": 0.10999777913093567, "learning_rate": 1.6096126546339484e-05, "loss": 0.1845, "step": 38915 }, { "epoch": 2.1310847067842085, "grad_norm": 0.10584862530231476, "learning_rate": 1.609105658081525e-05, "loss": 0.181, "step": 38920 }, { "epoch": 2.1313584843673, "grad_norm": 0.1107875183224678, "learning_rate": 1.6085986615291014e-05, "loss": 0.1797, "step": 38925 }, { "epoch": 2.1316322619503913, "grad_norm": 0.10502752661705017, "learning_rate": 1.6080916649766784e-05, "loss": 0.1915, "step": 38930 }, { "epoch": 2.131906039533483, "grad_norm": 0.1180124431848526, "learning_rate": 1.6075846684242548e-05, "loss": 0.1853, "step": 38935 }, { "epoch": 2.1321798171165747, "grad_norm": 0.10863224416971207, "learning_rate": 1.6070776718718314e-05, "loss": 0.1873, "step": 38940 }, { "epoch": 2.132453594699666, "grad_norm": 0.10173045098781586, "learning_rate": 1.6065706753194078e-05, "loss": 0.1846, "step": 38945 }, { "epoch": 2.1327273722827575, "grad_norm": 0.10066522657871246, "learning_rate": 1.6060636787669844e-05, "loss": 0.1882, "step": 38950 }, { "epoch": 2.1330011498658488, "grad_norm": 0.10018596798181534, "learning_rate": 1.605556682214561e-05, "loss": 0.1853, "step": 38955 }, { "epoch": 2.1332749274489404, "grad_norm": 0.10782995820045471, "learning_rate": 1.6050496856621374e-05, "loss": 0.1881, "step": 38960 }, { "epoch": 2.133548705032032, "grad_norm": 0.1013183742761612, "learning_rate": 1.604542689109714e-05, "loss": 0.184, "step": 38965 }, { "epoch": 2.1338224826151233, "grad_norm": 0.10664663463830948, "learning_rate": 1.6040356925572908e-05, "loss": 0.1841, "step": 38970 }, { "epoch": 2.134096260198215, "grad_norm": 0.112313412129879, "learning_rate": 1.6035286960048674e-05, "loss": 0.1938, "step": 38975 }, { "epoch": 2.1343700377813066, "grad_norm": 0.1031661406159401, "learning_rate": 1.6030216994524438e-05, "loss": 0.1887, "step": 38980 }, { "epoch": 2.134643815364398, "grad_norm": 0.0963326096534729, "learning_rate": 1.6025147029000204e-05, "loss": 0.1753, "step": 38985 }, { "epoch": 2.1349175929474895, "grad_norm": 0.09619317948818207, "learning_rate": 1.6020077063475968e-05, "loss": 0.1846, "step": 38990 }, { "epoch": 2.135191370530581, "grad_norm": 0.10865292698144913, "learning_rate": 1.6015007097951734e-05, "loss": 0.1897, "step": 38995 }, { "epoch": 2.1354651481136724, "grad_norm": 0.09160803258419037, "learning_rate": 1.6009937132427498e-05, "loss": 0.1865, "step": 39000 }, { "epoch": 2.135738925696764, "grad_norm": 0.09090422093868256, "learning_rate": 1.6004867166903264e-05, "loss": 0.1781, "step": 39005 }, { "epoch": 2.1360127032798553, "grad_norm": 0.09833754599094391, "learning_rate": 1.599979720137903e-05, "loss": 0.1915, "step": 39010 }, { "epoch": 2.136286480862947, "grad_norm": 0.15113914012908936, "learning_rate": 1.5994727235854798e-05, "loss": 0.1948, "step": 39015 }, { "epoch": 2.1365602584460386, "grad_norm": 0.1009901911020279, "learning_rate": 1.5989657270330565e-05, "loss": 0.1792, "step": 39020 }, { "epoch": 2.13683403602913, "grad_norm": 0.10668907314538956, "learning_rate": 1.5984587304806328e-05, "loss": 0.1817, "step": 39025 }, { "epoch": 2.1371078136122215, "grad_norm": 0.10014167428016663, "learning_rate": 1.5979517339282095e-05, "loss": 0.1843, "step": 39030 }, { "epoch": 2.137381591195313, "grad_norm": 0.11696076393127441, "learning_rate": 1.5974447373757858e-05, "loss": 0.1867, "step": 39035 }, { "epoch": 2.1376553687784043, "grad_norm": 0.10452961176633835, "learning_rate": 1.5969377408233625e-05, "loss": 0.1805, "step": 39040 }, { "epoch": 2.137929146361496, "grad_norm": 0.10382542759180069, "learning_rate": 1.5964307442709388e-05, "loss": 0.1857, "step": 39045 }, { "epoch": 2.138202923944587, "grad_norm": 0.10512217879295349, "learning_rate": 1.5959237477185158e-05, "loss": 0.1886, "step": 39050 }, { "epoch": 2.138476701527679, "grad_norm": 0.11076440662145615, "learning_rate": 1.595416751166092e-05, "loss": 0.1938, "step": 39055 }, { "epoch": 2.1387504791107705, "grad_norm": 0.10822059214115143, "learning_rate": 1.5949097546136688e-05, "loss": 0.1876, "step": 39060 }, { "epoch": 2.1390242566938618, "grad_norm": 0.09834688901901245, "learning_rate": 1.594402758061245e-05, "loss": 0.1848, "step": 39065 }, { "epoch": 2.1392980342769534, "grad_norm": 0.14514705538749695, "learning_rate": 1.5938957615088218e-05, "loss": 0.1823, "step": 39070 }, { "epoch": 2.139571811860045, "grad_norm": 0.10388848185539246, "learning_rate": 1.5933887649563985e-05, "loss": 0.1867, "step": 39075 }, { "epoch": 2.1398455894431363, "grad_norm": 0.10920527577400208, "learning_rate": 1.5928817684039748e-05, "loss": 0.1871, "step": 39080 }, { "epoch": 2.140119367026228, "grad_norm": 0.11066518723964691, "learning_rate": 1.5923747718515515e-05, "loss": 0.1959, "step": 39085 }, { "epoch": 2.1403931446093196, "grad_norm": 0.10666519403457642, "learning_rate": 1.591867775299128e-05, "loss": 0.1766, "step": 39090 }, { "epoch": 2.140666922192411, "grad_norm": 0.08970311284065247, "learning_rate": 1.5913607787467048e-05, "loss": 0.1773, "step": 39095 }, { "epoch": 2.1409406997755025, "grad_norm": 0.11034402251243591, "learning_rate": 1.590853782194281e-05, "loss": 0.187, "step": 39100 }, { "epoch": 2.1412144773585937, "grad_norm": 0.09064363688230515, "learning_rate": 1.5903467856418578e-05, "loss": 0.1787, "step": 39105 }, { "epoch": 2.1414882549416854, "grad_norm": 0.1029854342341423, "learning_rate": 1.589839789089434e-05, "loss": 0.1901, "step": 39110 }, { "epoch": 2.141762032524777, "grad_norm": 0.11389129608869553, "learning_rate": 1.5893327925370108e-05, "loss": 0.1821, "step": 39115 }, { "epoch": 2.1420358101078683, "grad_norm": 0.09721826016902924, "learning_rate": 1.5888257959845875e-05, "loss": 0.1818, "step": 39120 }, { "epoch": 2.14230958769096, "grad_norm": 0.10015609115362167, "learning_rate": 1.5883187994321638e-05, "loss": 0.1861, "step": 39125 }, { "epoch": 2.142583365274051, "grad_norm": 0.12448891997337341, "learning_rate": 1.5878118028797405e-05, "loss": 0.188, "step": 39130 }, { "epoch": 2.142857142857143, "grad_norm": 0.11083988845348358, "learning_rate": 1.587304806327317e-05, "loss": 0.184, "step": 39135 }, { "epoch": 2.1431309204402345, "grad_norm": 0.09555383771657944, "learning_rate": 1.5867978097748938e-05, "loss": 0.1804, "step": 39140 }, { "epoch": 2.1434046980233257, "grad_norm": 0.10586022585630417, "learning_rate": 1.58629081322247e-05, "loss": 0.1838, "step": 39145 }, { "epoch": 2.1436784756064173, "grad_norm": 0.10802409797906876, "learning_rate": 1.5857838166700468e-05, "loss": 0.187, "step": 39150 }, { "epoch": 2.143952253189509, "grad_norm": 0.09515625983476639, "learning_rate": 1.585276820117623e-05, "loss": 0.1789, "step": 39155 }, { "epoch": 2.1442260307726, "grad_norm": 0.11174628138542175, "learning_rate": 1.5847698235651998e-05, "loss": 0.2034, "step": 39160 }, { "epoch": 2.144499808355692, "grad_norm": 0.11216280609369278, "learning_rate": 1.584262827012776e-05, "loss": 0.1924, "step": 39165 }, { "epoch": 2.1447735859387835, "grad_norm": 0.10719379782676697, "learning_rate": 1.5837558304603528e-05, "loss": 0.1885, "step": 39170 }, { "epoch": 2.1450473635218748, "grad_norm": 0.09728197753429413, "learning_rate": 1.5832488339079295e-05, "loss": 0.1917, "step": 39175 }, { "epoch": 2.1453211411049664, "grad_norm": 0.11038800328969955, "learning_rate": 1.582741837355506e-05, "loss": 0.1906, "step": 39180 }, { "epoch": 2.1455949186880576, "grad_norm": 0.11038743704557419, "learning_rate": 1.5822348408030828e-05, "loss": 0.1783, "step": 39185 }, { "epoch": 2.1458686962711493, "grad_norm": 0.11727868020534515, "learning_rate": 1.581727844250659e-05, "loss": 0.1786, "step": 39190 }, { "epoch": 2.146142473854241, "grad_norm": 0.1028253361582756, "learning_rate": 1.5812208476982358e-05, "loss": 0.1822, "step": 39195 }, { "epoch": 2.146416251437332, "grad_norm": 0.09582751989364624, "learning_rate": 1.580713851145812e-05, "loss": 0.1822, "step": 39200 }, { "epoch": 2.146690029020424, "grad_norm": 0.09532082080841064, "learning_rate": 1.5802068545933888e-05, "loss": 0.1818, "step": 39205 }, { "epoch": 2.1469638066035155, "grad_norm": 0.09342372417449951, "learning_rate": 1.579699858040965e-05, "loss": 0.183, "step": 39210 }, { "epoch": 2.1472375841866067, "grad_norm": 0.10974077880382538, "learning_rate": 1.579192861488542e-05, "loss": 0.1897, "step": 39215 }, { "epoch": 2.1475113617696984, "grad_norm": 0.10416245460510254, "learning_rate": 1.5786858649361185e-05, "loss": 0.1829, "step": 39220 }, { "epoch": 2.1477851393527896, "grad_norm": 0.10009033232927322, "learning_rate": 1.578178868383695e-05, "loss": 0.1825, "step": 39225 }, { "epoch": 2.1480589169358812, "grad_norm": 0.09244386106729507, "learning_rate": 1.5776718718312715e-05, "loss": 0.1785, "step": 39230 }, { "epoch": 2.148332694518973, "grad_norm": 0.0904136449098587, "learning_rate": 1.577164875278848e-05, "loss": 0.1827, "step": 39235 }, { "epoch": 2.148606472102064, "grad_norm": 0.10041461139917374, "learning_rate": 1.5766578787264248e-05, "loss": 0.1796, "step": 39240 }, { "epoch": 2.148880249685156, "grad_norm": 0.11244404315948486, "learning_rate": 1.576150882174001e-05, "loss": 0.1931, "step": 39245 }, { "epoch": 2.1491540272682474, "grad_norm": 0.11183919757604599, "learning_rate": 1.5756438856215778e-05, "loss": 0.1845, "step": 39250 }, { "epoch": 2.1494278048513387, "grad_norm": 0.11018578708171844, "learning_rate": 1.5751368890691545e-05, "loss": 0.1852, "step": 39255 }, { "epoch": 2.1497015824344303, "grad_norm": 0.09852013736963272, "learning_rate": 1.574629892516731e-05, "loss": 0.1863, "step": 39260 }, { "epoch": 2.149975360017522, "grad_norm": 0.09894821792840958, "learning_rate": 1.5741228959643075e-05, "loss": 0.1787, "step": 39265 }, { "epoch": 2.150249137600613, "grad_norm": 0.10583722591400146, "learning_rate": 1.573615899411884e-05, "loss": 0.1881, "step": 39270 }, { "epoch": 2.150522915183705, "grad_norm": 0.10106747597455978, "learning_rate": 1.5731089028594605e-05, "loss": 0.1807, "step": 39275 }, { "epoch": 2.150796692766796, "grad_norm": 0.10720410197973251, "learning_rate": 1.572601906307037e-05, "loss": 0.1839, "step": 39280 }, { "epoch": 2.1510704703498877, "grad_norm": 0.10276371985673904, "learning_rate": 1.5720949097546135e-05, "loss": 0.1791, "step": 39285 }, { "epoch": 2.1513442479329794, "grad_norm": 0.09973111748695374, "learning_rate": 1.57158791320219e-05, "loss": 0.1878, "step": 39290 }, { "epoch": 2.1516180255160706, "grad_norm": 0.11422938108444214, "learning_rate": 1.5710809166497668e-05, "loss": 0.1901, "step": 39295 }, { "epoch": 2.1518918030991623, "grad_norm": 0.1042686402797699, "learning_rate": 1.5705739200973435e-05, "loss": 0.1793, "step": 39300 }, { "epoch": 2.152165580682254, "grad_norm": 0.09029609709978104, "learning_rate": 1.57006692354492e-05, "loss": 0.1789, "step": 39305 }, { "epoch": 2.152439358265345, "grad_norm": 0.10137035697698593, "learning_rate": 1.5695599269924965e-05, "loss": 0.1777, "step": 39310 }, { "epoch": 2.152713135848437, "grad_norm": 0.11070159077644348, "learning_rate": 1.569052930440073e-05, "loss": 0.184, "step": 39315 }, { "epoch": 2.152986913431528, "grad_norm": 0.09970613569021225, "learning_rate": 1.5685459338876495e-05, "loss": 0.1817, "step": 39320 }, { "epoch": 2.1532606910146197, "grad_norm": 0.08786715567111969, "learning_rate": 1.568038937335226e-05, "loss": 0.1797, "step": 39325 }, { "epoch": 2.1535344685977114, "grad_norm": 0.09157438576221466, "learning_rate": 1.5675319407828025e-05, "loss": 0.1875, "step": 39330 }, { "epoch": 2.1538082461808026, "grad_norm": 0.10989505052566528, "learning_rate": 1.5670249442303795e-05, "loss": 0.1848, "step": 39335 }, { "epoch": 2.1540820237638942, "grad_norm": 0.10548624396324158, "learning_rate": 1.566517947677956e-05, "loss": 0.1763, "step": 39340 }, { "epoch": 2.154355801346986, "grad_norm": 0.10008352249860764, "learning_rate": 1.5660109511255325e-05, "loss": 0.1816, "step": 39345 }, { "epoch": 2.154629578930077, "grad_norm": 0.11191616952419281, "learning_rate": 1.565503954573109e-05, "loss": 0.1816, "step": 39350 }, { "epoch": 2.1549033565131688, "grad_norm": 0.09348438680171967, "learning_rate": 1.5649969580206855e-05, "loss": 0.1823, "step": 39355 }, { "epoch": 2.15517713409626, "grad_norm": 0.11031501740217209, "learning_rate": 1.5644899614682622e-05, "loss": 0.1924, "step": 39360 }, { "epoch": 2.1554509116793517, "grad_norm": 0.10764501243829727, "learning_rate": 1.5639829649158385e-05, "loss": 0.1897, "step": 39365 }, { "epoch": 2.1557246892624433, "grad_norm": 0.1108761578798294, "learning_rate": 1.5634759683634152e-05, "loss": 0.1856, "step": 39370 }, { "epoch": 2.1559984668455345, "grad_norm": 0.1000797376036644, "learning_rate": 1.562968971810992e-05, "loss": 0.1814, "step": 39375 }, { "epoch": 2.156272244428626, "grad_norm": 0.0942096933722496, "learning_rate": 1.5624619752585685e-05, "loss": 0.1785, "step": 39380 }, { "epoch": 2.156546022011718, "grad_norm": 0.10904290527105331, "learning_rate": 1.561954978706145e-05, "loss": 0.1756, "step": 39385 }, { "epoch": 2.156819799594809, "grad_norm": 0.11077087372541428, "learning_rate": 1.5614479821537215e-05, "loss": 0.186, "step": 39390 }, { "epoch": 2.1570935771779007, "grad_norm": 0.10317523032426834, "learning_rate": 1.560940985601298e-05, "loss": 0.1805, "step": 39395 }, { "epoch": 2.157367354760992, "grad_norm": 0.08923258632421494, "learning_rate": 1.5604339890488745e-05, "loss": 0.1863, "step": 39400 }, { "epoch": 2.1576411323440836, "grad_norm": 0.09939119964838028, "learning_rate": 1.5599269924964512e-05, "loss": 0.1831, "step": 39405 }, { "epoch": 2.1579149099271753, "grad_norm": 0.09455330669879913, "learning_rate": 1.5594199959440275e-05, "loss": 0.1822, "step": 39410 }, { "epoch": 2.1581886875102665, "grad_norm": 0.09720157831907272, "learning_rate": 1.5589129993916042e-05, "loss": 0.1842, "step": 39415 }, { "epoch": 2.158462465093358, "grad_norm": 0.1019645482301712, "learning_rate": 1.558406002839181e-05, "loss": 0.1822, "step": 39420 }, { "epoch": 2.15873624267645, "grad_norm": 0.1137833520770073, "learning_rate": 1.5578990062867575e-05, "loss": 0.1845, "step": 39425 }, { "epoch": 2.159010020259541, "grad_norm": 0.10393640398979187, "learning_rate": 1.557392009734334e-05, "loss": 0.187, "step": 39430 }, { "epoch": 2.1592837978426327, "grad_norm": 0.09278592467308044, "learning_rate": 1.5568850131819105e-05, "loss": 0.1805, "step": 39435 }, { "epoch": 2.1595575754257244, "grad_norm": 0.1109565868973732, "learning_rate": 1.556378016629487e-05, "loss": 0.1797, "step": 39440 }, { "epoch": 2.1598313530088156, "grad_norm": 0.10337568819522858, "learning_rate": 1.5558710200770635e-05, "loss": 0.1871, "step": 39445 }, { "epoch": 2.1601051305919072, "grad_norm": 0.09423023462295532, "learning_rate": 1.55536402352464e-05, "loss": 0.1899, "step": 39450 }, { "epoch": 2.1603789081749984, "grad_norm": 0.10708023607730865, "learning_rate": 1.554857026972217e-05, "loss": 0.1893, "step": 39455 }, { "epoch": 2.16065268575809, "grad_norm": 0.10413410514593124, "learning_rate": 1.5543500304197932e-05, "loss": 0.1886, "step": 39460 }, { "epoch": 2.1609264633411818, "grad_norm": 0.10574109107255936, "learning_rate": 1.55384303386737e-05, "loss": 0.1868, "step": 39465 }, { "epoch": 2.161200240924273, "grad_norm": 0.11037757247686386, "learning_rate": 1.5533360373149465e-05, "loss": 0.1888, "step": 39470 }, { "epoch": 2.1614740185073646, "grad_norm": 0.09804778546094894, "learning_rate": 1.552829040762523e-05, "loss": 0.1772, "step": 39475 }, { "epoch": 2.1617477960904563, "grad_norm": 0.09232217818498611, "learning_rate": 1.5523220442100995e-05, "loss": 0.1759, "step": 39480 }, { "epoch": 2.1620215736735475, "grad_norm": 0.10635889321565628, "learning_rate": 1.551815047657676e-05, "loss": 0.1882, "step": 39485 }, { "epoch": 2.162295351256639, "grad_norm": 0.09960595518350601, "learning_rate": 1.5513080511052525e-05, "loss": 0.1824, "step": 39490 }, { "epoch": 2.1625691288397304, "grad_norm": 0.09730837494134903, "learning_rate": 1.550801054552829e-05, "loss": 0.1872, "step": 39495 }, { "epoch": 2.162842906422822, "grad_norm": 0.09986397624015808, "learning_rate": 1.550294058000406e-05, "loss": 0.1771, "step": 39500 }, { "epoch": 2.1631166840059137, "grad_norm": 0.10496912896633148, "learning_rate": 1.5497870614479822e-05, "loss": 0.1793, "step": 39505 }, { "epoch": 2.163390461589005, "grad_norm": 0.09414724260568619, "learning_rate": 1.549280064895559e-05, "loss": 0.1855, "step": 39510 }, { "epoch": 2.1636642391720966, "grad_norm": 0.110031358897686, "learning_rate": 1.5487730683431352e-05, "loss": 0.1852, "step": 39515 }, { "epoch": 2.1639380167551883, "grad_norm": 0.10642935335636139, "learning_rate": 1.548266071790712e-05, "loss": 0.1891, "step": 39520 }, { "epoch": 2.1642117943382795, "grad_norm": 0.10865288972854614, "learning_rate": 1.5477590752382885e-05, "loss": 0.1945, "step": 39525 }, { "epoch": 2.164485571921371, "grad_norm": 0.09697838127613068, "learning_rate": 1.547252078685865e-05, "loss": 0.1854, "step": 39530 }, { "epoch": 2.164759349504463, "grad_norm": 0.10434098541736603, "learning_rate": 1.5467450821334415e-05, "loss": 0.1811, "step": 39535 }, { "epoch": 2.165033127087554, "grad_norm": 0.1089794859290123, "learning_rate": 1.5462380855810182e-05, "loss": 0.186, "step": 39540 }, { "epoch": 2.1653069046706457, "grad_norm": 0.12051394581794739, "learning_rate": 1.545731089028595e-05, "loss": 0.1969, "step": 39545 }, { "epoch": 2.165580682253737, "grad_norm": 0.10307544469833374, "learning_rate": 1.5452240924761712e-05, "loss": 0.186, "step": 39550 }, { "epoch": 2.1658544598368286, "grad_norm": 0.1324082762002945, "learning_rate": 1.544717095923748e-05, "loss": 0.1923, "step": 39555 }, { "epoch": 2.16612823741992, "grad_norm": 0.10543349385261536, "learning_rate": 1.5442100993713242e-05, "loss": 0.188, "step": 39560 }, { "epoch": 2.1664020150030114, "grad_norm": 0.09835212677717209, "learning_rate": 1.543703102818901e-05, "loss": 0.1857, "step": 39565 }, { "epoch": 2.166675792586103, "grad_norm": 0.10627230256795883, "learning_rate": 1.5431961062664775e-05, "loss": 0.1819, "step": 39570 }, { "epoch": 2.1669495701691943, "grad_norm": 0.09918653964996338, "learning_rate": 1.542689109714054e-05, "loss": 0.1873, "step": 39575 }, { "epoch": 2.167223347752286, "grad_norm": 0.10654430091381073, "learning_rate": 1.5421821131616305e-05, "loss": 0.1854, "step": 39580 }, { "epoch": 2.1674971253353776, "grad_norm": 0.11826574057340622, "learning_rate": 1.5416751166092072e-05, "loss": 0.1864, "step": 39585 }, { "epoch": 2.167770902918469, "grad_norm": 0.095404252409935, "learning_rate": 1.541168120056784e-05, "loss": 0.1757, "step": 39590 }, { "epoch": 2.1680446805015605, "grad_norm": 0.10745777189731598, "learning_rate": 1.5406611235043602e-05, "loss": 0.1783, "step": 39595 }, { "epoch": 2.168318458084652, "grad_norm": 0.09030863642692566, "learning_rate": 1.540154126951937e-05, "loss": 0.1841, "step": 39600 }, { "epoch": 2.1685922356677434, "grad_norm": 0.11168615520000458, "learning_rate": 1.5396471303995132e-05, "loss": 0.1894, "step": 39605 }, { "epoch": 2.168866013250835, "grad_norm": 0.09967119991779327, "learning_rate": 1.53914013384709e-05, "loss": 0.1969, "step": 39610 }, { "epoch": 2.1691397908339267, "grad_norm": 0.10170292854309082, "learning_rate": 1.5386331372946662e-05, "loss": 0.1802, "step": 39615 }, { "epoch": 2.169413568417018, "grad_norm": 0.10869438201189041, "learning_rate": 1.5381261407422432e-05, "loss": 0.1818, "step": 39620 }, { "epoch": 2.1696873460001096, "grad_norm": 0.09739410132169724, "learning_rate": 1.5376191441898195e-05, "loss": 0.179, "step": 39625 }, { "epoch": 2.169961123583201, "grad_norm": 0.10821060091257095, "learning_rate": 1.5371121476373962e-05, "loss": 0.1856, "step": 39630 }, { "epoch": 2.1702349011662925, "grad_norm": 0.09986371546983719, "learning_rate": 1.536605151084973e-05, "loss": 0.1855, "step": 39635 }, { "epoch": 2.170508678749384, "grad_norm": 0.10687069594860077, "learning_rate": 1.5360981545325492e-05, "loss": 0.1823, "step": 39640 }, { "epoch": 2.1707824563324754, "grad_norm": 0.09281111508607864, "learning_rate": 1.535591157980126e-05, "loss": 0.1863, "step": 39645 }, { "epoch": 2.171056233915567, "grad_norm": 0.1035076156258583, "learning_rate": 1.5350841614277022e-05, "loss": 0.1956, "step": 39650 }, { "epoch": 2.1713300114986587, "grad_norm": 0.09813643991947174, "learning_rate": 1.534577164875279e-05, "loss": 0.1945, "step": 39655 }, { "epoch": 2.17160378908175, "grad_norm": 0.0991445779800415, "learning_rate": 1.5340701683228556e-05, "loss": 0.1798, "step": 39660 }, { "epoch": 2.1718775666648416, "grad_norm": 0.1335199922323227, "learning_rate": 1.5335631717704322e-05, "loss": 0.1858, "step": 39665 }, { "epoch": 2.1721513442479328, "grad_norm": 0.09722574800252914, "learning_rate": 1.5330561752180086e-05, "loss": 0.1887, "step": 39670 }, { "epoch": 2.1724251218310244, "grad_norm": 0.10000735521316528, "learning_rate": 1.5325491786655852e-05, "loss": 0.1913, "step": 39675 }, { "epoch": 2.172698899414116, "grad_norm": 0.09907980263233185, "learning_rate": 1.5320421821131616e-05, "loss": 0.1871, "step": 39680 }, { "epoch": 2.1729726769972073, "grad_norm": 0.10044898092746735, "learning_rate": 1.5315351855607382e-05, "loss": 0.1855, "step": 39685 }, { "epoch": 2.173246454580299, "grad_norm": 0.09427400678396225, "learning_rate": 1.531028189008315e-05, "loss": 0.179, "step": 39690 }, { "epoch": 2.1735202321633906, "grad_norm": 0.10058152675628662, "learning_rate": 1.5305211924558912e-05, "loss": 0.1847, "step": 39695 }, { "epoch": 2.173794009746482, "grad_norm": 0.09988921880722046, "learning_rate": 1.5300141959034682e-05, "loss": 0.1876, "step": 39700 }, { "epoch": 2.1740677873295735, "grad_norm": 0.10023220628499985, "learning_rate": 1.5295071993510446e-05, "loss": 0.1787, "step": 39705 }, { "epoch": 2.174341564912665, "grad_norm": 0.10640964657068253, "learning_rate": 1.5290002027986212e-05, "loss": 0.1856, "step": 39710 }, { "epoch": 2.1746153424957564, "grad_norm": 0.11216147243976593, "learning_rate": 1.5284932062461976e-05, "loss": 0.1808, "step": 39715 }, { "epoch": 2.174889120078848, "grad_norm": 0.10707858204841614, "learning_rate": 1.5279862096937742e-05, "loss": 0.1837, "step": 39720 }, { "epoch": 2.1751628976619393, "grad_norm": 0.0957586020231247, "learning_rate": 1.5274792131413506e-05, "loss": 0.1899, "step": 39725 }, { "epoch": 2.175436675245031, "grad_norm": 0.09333062916994095, "learning_rate": 1.5269722165889272e-05, "loss": 0.1829, "step": 39730 }, { "epoch": 2.1757104528281226, "grad_norm": 0.10062284022569656, "learning_rate": 1.5264652200365036e-05, "loss": 0.1803, "step": 39735 }, { "epoch": 2.175984230411214, "grad_norm": 0.11086602509021759, "learning_rate": 1.5259582234840806e-05, "loss": 0.1806, "step": 39740 }, { "epoch": 2.1762580079943055, "grad_norm": 0.10212758183479309, "learning_rate": 1.525451226931657e-05, "loss": 0.1921, "step": 39745 }, { "epoch": 2.176531785577397, "grad_norm": 0.11548256874084473, "learning_rate": 1.5249442303792336e-05, "loss": 0.1901, "step": 39750 }, { "epoch": 2.1768055631604883, "grad_norm": 0.10447871685028076, "learning_rate": 1.52443723382681e-05, "loss": 0.1852, "step": 39755 }, { "epoch": 2.17707934074358, "grad_norm": 0.09301804006099701, "learning_rate": 1.5239302372743866e-05, "loss": 0.1807, "step": 39760 }, { "epoch": 2.177353118326671, "grad_norm": 0.10675886273384094, "learning_rate": 1.523423240721963e-05, "loss": 0.1839, "step": 39765 }, { "epoch": 2.177626895909763, "grad_norm": 0.10073806345462799, "learning_rate": 1.5229162441695397e-05, "loss": 0.1807, "step": 39770 }, { "epoch": 2.1779006734928545, "grad_norm": 0.10470276325941086, "learning_rate": 1.5224092476171162e-05, "loss": 0.1812, "step": 39775 }, { "epoch": 2.1781744510759458, "grad_norm": 0.1010056659579277, "learning_rate": 1.5219022510646927e-05, "loss": 0.1857, "step": 39780 }, { "epoch": 2.1784482286590374, "grad_norm": 0.10877266526222229, "learning_rate": 1.5213952545122696e-05, "loss": 0.1825, "step": 39785 }, { "epoch": 2.178722006242129, "grad_norm": 0.10984375327825546, "learning_rate": 1.520888257959846e-05, "loss": 0.1867, "step": 39790 }, { "epoch": 2.1789957838252203, "grad_norm": 0.09757522493600845, "learning_rate": 1.5203812614074226e-05, "loss": 0.185, "step": 39795 }, { "epoch": 2.179269561408312, "grad_norm": 0.11155889183282852, "learning_rate": 1.519874264854999e-05, "loss": 0.1888, "step": 39800 }, { "epoch": 2.179543338991403, "grad_norm": 0.10513725876808167, "learning_rate": 1.5193672683025756e-05, "loss": 0.1975, "step": 39805 }, { "epoch": 2.179817116574495, "grad_norm": 0.09656667709350586, "learning_rate": 1.518860271750152e-05, "loss": 0.1874, "step": 39810 }, { "epoch": 2.1800908941575865, "grad_norm": 0.09844190627336502, "learning_rate": 1.5183532751977286e-05, "loss": 0.1831, "step": 39815 }, { "epoch": 2.1803646717406777, "grad_norm": 0.09244680404663086, "learning_rate": 1.517846278645305e-05, "loss": 0.1766, "step": 39820 }, { "epoch": 2.1806384493237694, "grad_norm": 0.10209917277097702, "learning_rate": 1.5173392820928819e-05, "loss": 0.1843, "step": 39825 }, { "epoch": 2.180912226906861, "grad_norm": 0.10271991789340973, "learning_rate": 1.5168322855404584e-05, "loss": 0.1811, "step": 39830 }, { "epoch": 2.1811860044899523, "grad_norm": 0.1040230542421341, "learning_rate": 1.516325288988035e-05, "loss": 0.1841, "step": 39835 }, { "epoch": 2.181459782073044, "grad_norm": 0.10604498535394669, "learning_rate": 1.5158182924356116e-05, "loss": 0.1854, "step": 39840 }, { "epoch": 2.181733559656135, "grad_norm": 0.11507530510425568, "learning_rate": 1.515311295883188e-05, "loss": 0.1811, "step": 39845 }, { "epoch": 2.182007337239227, "grad_norm": 0.11294595897197723, "learning_rate": 1.5148042993307646e-05, "loss": 0.1871, "step": 39850 }, { "epoch": 2.1822811148223185, "grad_norm": 0.0952506735920906, "learning_rate": 1.514297302778341e-05, "loss": 0.1794, "step": 39855 }, { "epoch": 2.1825548924054097, "grad_norm": 0.10834860801696777, "learning_rate": 1.5137903062259176e-05, "loss": 0.1848, "step": 39860 }, { "epoch": 2.1828286699885013, "grad_norm": 0.10035626590251923, "learning_rate": 1.5132833096734944e-05, "loss": 0.1922, "step": 39865 }, { "epoch": 2.183102447571593, "grad_norm": 0.09754160046577454, "learning_rate": 1.512776313121071e-05, "loss": 0.1944, "step": 39870 }, { "epoch": 2.183376225154684, "grad_norm": 0.10214075446128845, "learning_rate": 1.5122693165686474e-05, "loss": 0.1832, "step": 39875 }, { "epoch": 2.183650002737776, "grad_norm": 0.10031560808420181, "learning_rate": 1.511762320016224e-05, "loss": 0.1807, "step": 39880 }, { "epoch": 2.1839237803208675, "grad_norm": 0.09118049591779709, "learning_rate": 1.5112553234638004e-05, "loss": 0.189, "step": 39885 }, { "epoch": 2.1841975579039588, "grad_norm": 0.0939457044005394, "learning_rate": 1.5107483269113771e-05, "loss": 0.1838, "step": 39890 }, { "epoch": 2.1844713354870504, "grad_norm": 0.09146524220705032, "learning_rate": 1.5102413303589536e-05, "loss": 0.181, "step": 39895 }, { "epoch": 2.1847451130701416, "grad_norm": 0.09781929850578308, "learning_rate": 1.5097343338065301e-05, "loss": 0.1776, "step": 39900 }, { "epoch": 2.1850188906532333, "grad_norm": 0.09861278533935547, "learning_rate": 1.509227337254107e-05, "loss": 0.1805, "step": 39905 }, { "epoch": 2.185292668236325, "grad_norm": 0.09583495557308197, "learning_rate": 1.5087203407016834e-05, "loss": 0.1753, "step": 39910 }, { "epoch": 2.185566445819416, "grad_norm": 0.11173141002655029, "learning_rate": 1.50821334414926e-05, "loss": 0.1787, "step": 39915 }, { "epoch": 2.185840223402508, "grad_norm": 0.12087766826152802, "learning_rate": 1.5077063475968364e-05, "loss": 0.197, "step": 39920 }, { "epoch": 2.1861140009855995, "grad_norm": 0.09394342452287674, "learning_rate": 1.507199351044413e-05, "loss": 0.185, "step": 39925 }, { "epoch": 2.1863877785686907, "grad_norm": 0.09791036695241928, "learning_rate": 1.5066923544919894e-05, "loss": 0.1867, "step": 39930 }, { "epoch": 2.1866615561517824, "grad_norm": 0.09714122116565704, "learning_rate": 1.506185357939566e-05, "loss": 0.1831, "step": 39935 }, { "epoch": 2.1869353337348736, "grad_norm": 0.09775759279727936, "learning_rate": 1.5056783613871426e-05, "loss": 0.1914, "step": 39940 }, { "epoch": 2.1872091113179652, "grad_norm": 0.09799706935882568, "learning_rate": 1.5051713648347193e-05, "loss": 0.1847, "step": 39945 }, { "epoch": 2.187482888901057, "grad_norm": 0.11018962413072586, "learning_rate": 1.5046643682822958e-05, "loss": 0.189, "step": 39950 }, { "epoch": 2.187756666484148, "grad_norm": 0.09152909368276596, "learning_rate": 1.5041573717298724e-05, "loss": 0.178, "step": 39955 }, { "epoch": 2.18803044406724, "grad_norm": 0.10270945727825165, "learning_rate": 1.503650375177449e-05, "loss": 0.1832, "step": 39960 }, { "epoch": 2.1883042216503314, "grad_norm": 0.09070687741041183, "learning_rate": 1.5031433786250254e-05, "loss": 0.181, "step": 39965 }, { "epoch": 2.1885779992334227, "grad_norm": 0.10334227234125137, "learning_rate": 1.502636382072602e-05, "loss": 0.1805, "step": 39970 }, { "epoch": 2.1888517768165143, "grad_norm": 0.10629154741764069, "learning_rate": 1.5021293855201784e-05, "loss": 0.1973, "step": 39975 }, { "epoch": 2.189125554399606, "grad_norm": 0.11168912053108215, "learning_rate": 1.501622388967755e-05, "loss": 0.1788, "step": 39980 }, { "epoch": 2.189399331982697, "grad_norm": 0.1025480180978775, "learning_rate": 1.5011153924153318e-05, "loss": 0.1863, "step": 39985 }, { "epoch": 2.189673109565789, "grad_norm": 0.10131008177995682, "learning_rate": 1.5006083958629083e-05, "loss": 0.1726, "step": 39990 }, { "epoch": 2.18994688714888, "grad_norm": 0.09924253076314926, "learning_rate": 1.5001013993104848e-05, "loss": 0.1826, "step": 39995 }, { "epoch": 2.1902206647319717, "grad_norm": 0.11863270401954651, "learning_rate": 1.4995944027580613e-05, "loss": 0.1798, "step": 40000 }, { "epoch": 2.1904944423150634, "grad_norm": 0.12550655007362366, "learning_rate": 1.499087406205638e-05, "loss": 0.1873, "step": 40005 }, { "epoch": 2.1907682198981546, "grad_norm": 0.10720142722129822, "learning_rate": 1.4985804096532144e-05, "loss": 0.1825, "step": 40010 }, { "epoch": 2.1910419974812463, "grad_norm": 0.09387122839689255, "learning_rate": 1.498073413100791e-05, "loss": 0.1834, "step": 40015 }, { "epoch": 2.1913157750643375, "grad_norm": 0.09996958076953888, "learning_rate": 1.4975664165483674e-05, "loss": 0.1809, "step": 40020 }, { "epoch": 2.191589552647429, "grad_norm": 0.10469173640012741, "learning_rate": 1.4970594199959443e-05, "loss": 0.182, "step": 40025 }, { "epoch": 2.191863330230521, "grad_norm": 0.10221898555755615, "learning_rate": 1.4965524234435208e-05, "loss": 0.1872, "step": 40030 }, { "epoch": 2.192137107813612, "grad_norm": 0.11116732656955719, "learning_rate": 1.4960454268910973e-05, "loss": 0.1836, "step": 40035 }, { "epoch": 2.1924108853967037, "grad_norm": 0.10481706261634827, "learning_rate": 1.4955384303386738e-05, "loss": 0.1938, "step": 40040 }, { "epoch": 2.1926846629797954, "grad_norm": 0.11507978290319443, "learning_rate": 1.4950314337862503e-05, "loss": 0.1868, "step": 40045 }, { "epoch": 2.1929584405628866, "grad_norm": 0.0982622504234314, "learning_rate": 1.4945244372338268e-05, "loss": 0.186, "step": 40050 }, { "epoch": 2.1932322181459782, "grad_norm": 0.09043512493371964, "learning_rate": 1.4940174406814034e-05, "loss": 0.1806, "step": 40055 }, { "epoch": 2.19350599572907, "grad_norm": 0.11436416953802109, "learning_rate": 1.49351044412898e-05, "loss": 0.1874, "step": 40060 }, { "epoch": 2.193779773312161, "grad_norm": 0.09899690002202988, "learning_rate": 1.4930034475765564e-05, "loss": 0.1813, "step": 40065 }, { "epoch": 2.194053550895253, "grad_norm": 0.10561764240264893, "learning_rate": 1.4924964510241333e-05, "loss": 0.1871, "step": 40070 }, { "epoch": 2.194327328478344, "grad_norm": 0.10549639910459518, "learning_rate": 1.4919894544717098e-05, "loss": 0.1889, "step": 40075 }, { "epoch": 2.1946011060614357, "grad_norm": 0.09622802585363388, "learning_rate": 1.4914824579192863e-05, "loss": 0.189, "step": 40080 }, { "epoch": 2.1948748836445273, "grad_norm": 0.12188003957271576, "learning_rate": 1.4909754613668628e-05, "loss": 0.1827, "step": 40085 }, { "epoch": 2.1951486612276185, "grad_norm": 0.09492774307727814, "learning_rate": 1.4904684648144393e-05, "loss": 0.1881, "step": 40090 }, { "epoch": 2.19542243881071, "grad_norm": 0.10241812467575073, "learning_rate": 1.4899614682620158e-05, "loss": 0.181, "step": 40095 }, { "epoch": 2.195696216393802, "grad_norm": 0.09194514155387878, "learning_rate": 1.4894544717095923e-05, "loss": 0.1815, "step": 40100 }, { "epoch": 2.195969993976893, "grad_norm": 0.09870973229408264, "learning_rate": 1.4889474751571688e-05, "loss": 0.1851, "step": 40105 }, { "epoch": 2.1962437715599847, "grad_norm": 0.10585951060056686, "learning_rate": 1.4884404786047456e-05, "loss": 0.1817, "step": 40110 }, { "epoch": 2.196517549143076, "grad_norm": 0.11449932307004929, "learning_rate": 1.4879334820523221e-05, "loss": 0.1825, "step": 40115 }, { "epoch": 2.1967913267261676, "grad_norm": 0.11094663292169571, "learning_rate": 1.4874264854998988e-05, "loss": 0.1854, "step": 40120 }, { "epoch": 2.1970651043092593, "grad_norm": 0.105802021920681, "learning_rate": 1.4869194889474753e-05, "loss": 0.1855, "step": 40125 }, { "epoch": 2.1973388818923505, "grad_norm": 0.1184224784374237, "learning_rate": 1.4864124923950518e-05, "loss": 0.1872, "step": 40130 }, { "epoch": 2.197612659475442, "grad_norm": 0.10434585809707642, "learning_rate": 1.4859054958426283e-05, "loss": 0.1848, "step": 40135 }, { "epoch": 2.197886437058534, "grad_norm": 0.09766148030757904, "learning_rate": 1.4853984992902048e-05, "loss": 0.1853, "step": 40140 }, { "epoch": 2.198160214641625, "grad_norm": 0.10263407975435257, "learning_rate": 1.4848915027377813e-05, "loss": 0.1883, "step": 40145 }, { "epoch": 2.1984339922247167, "grad_norm": 0.10423167049884796, "learning_rate": 1.4843845061853581e-05, "loss": 0.1838, "step": 40150 }, { "epoch": 2.1987077698078084, "grad_norm": 0.09277244657278061, "learning_rate": 1.4838775096329346e-05, "loss": 0.1875, "step": 40155 }, { "epoch": 2.1989815473908996, "grad_norm": 0.10575610399246216, "learning_rate": 1.4833705130805111e-05, "loss": 0.1878, "step": 40160 }, { "epoch": 2.1992553249739912, "grad_norm": 0.09591691195964813, "learning_rate": 1.4828635165280876e-05, "loss": 0.1853, "step": 40165 }, { "epoch": 2.1995291025570824, "grad_norm": 0.10546210408210754, "learning_rate": 1.4823565199756641e-05, "loss": 0.1794, "step": 40170 }, { "epoch": 2.199802880140174, "grad_norm": 0.119564950466156, "learning_rate": 1.4818495234232408e-05, "loss": 0.1804, "step": 40175 }, { "epoch": 2.2000766577232658, "grad_norm": 0.10447001457214355, "learning_rate": 1.4813425268708173e-05, "loss": 0.1928, "step": 40180 }, { "epoch": 2.200350435306357, "grad_norm": 0.11236561834812164, "learning_rate": 1.4808355303183938e-05, "loss": 0.1914, "step": 40185 }, { "epoch": 2.2006242128894486, "grad_norm": 0.10117977857589722, "learning_rate": 1.4803285337659706e-05, "loss": 0.1893, "step": 40190 }, { "epoch": 2.2008979904725403, "grad_norm": 0.11563816666603088, "learning_rate": 1.4798215372135471e-05, "loss": 0.1805, "step": 40195 }, { "epoch": 2.2011717680556315, "grad_norm": 0.11048925668001175, "learning_rate": 1.4793145406611236e-05, "loss": 0.1866, "step": 40200 }, { "epoch": 2.201445545638723, "grad_norm": 0.10917674750089645, "learning_rate": 1.4788075441087001e-05, "loss": 0.1843, "step": 40205 }, { "epoch": 2.2017193232218144, "grad_norm": 0.10190431028604507, "learning_rate": 1.4783005475562766e-05, "loss": 0.1806, "step": 40210 }, { "epoch": 2.201993100804906, "grad_norm": 0.09668679535388947, "learning_rate": 1.4777935510038531e-05, "loss": 0.1854, "step": 40215 }, { "epoch": 2.2022668783879977, "grad_norm": 0.09532254934310913, "learning_rate": 1.4772865544514296e-05, "loss": 0.1802, "step": 40220 }, { "epoch": 2.202540655971089, "grad_norm": 0.1007944718003273, "learning_rate": 1.4767795578990063e-05, "loss": 0.182, "step": 40225 }, { "epoch": 2.2028144335541806, "grad_norm": 0.09394484013319016, "learning_rate": 1.476272561346583e-05, "loss": 0.1743, "step": 40230 }, { "epoch": 2.2030882111372723, "grad_norm": 0.10280007123947144, "learning_rate": 1.4757655647941595e-05, "loss": 0.1947, "step": 40235 }, { "epoch": 2.2033619887203635, "grad_norm": 0.09905005991458893, "learning_rate": 1.4752585682417361e-05, "loss": 0.1871, "step": 40240 }, { "epoch": 2.203635766303455, "grad_norm": 0.10355281829833984, "learning_rate": 1.4747515716893126e-05, "loss": 0.1812, "step": 40245 }, { "epoch": 2.2039095438865464, "grad_norm": 0.10036640614271164, "learning_rate": 1.4742445751368891e-05, "loss": 0.1858, "step": 40250 }, { "epoch": 2.204183321469638, "grad_norm": 0.09689967334270477, "learning_rate": 1.4737375785844656e-05, "loss": 0.185, "step": 40255 }, { "epoch": 2.2044570990527297, "grad_norm": 0.0870467945933342, "learning_rate": 1.4732305820320421e-05, "loss": 0.1741, "step": 40260 }, { "epoch": 2.204730876635821, "grad_norm": 0.09954776614904404, "learning_rate": 1.4727235854796186e-05, "loss": 0.1842, "step": 40265 }, { "epoch": 2.2050046542189126, "grad_norm": 0.11511655896902084, "learning_rate": 1.4722165889271955e-05, "loss": 0.1808, "step": 40270 }, { "epoch": 2.2052784318020042, "grad_norm": 0.10718125104904175, "learning_rate": 1.471709592374772e-05, "loss": 0.1911, "step": 40275 }, { "epoch": 2.2055522093850954, "grad_norm": 0.09056812524795532, "learning_rate": 1.4712025958223485e-05, "loss": 0.1825, "step": 40280 }, { "epoch": 2.205825986968187, "grad_norm": 0.10049278289079666, "learning_rate": 1.470695599269925e-05, "loss": 0.1874, "step": 40285 }, { "epoch": 2.2060997645512783, "grad_norm": 0.0935632660984993, "learning_rate": 1.4701886027175017e-05, "loss": 0.1805, "step": 40290 }, { "epoch": 2.20637354213437, "grad_norm": 0.10377585142850876, "learning_rate": 1.4696816061650782e-05, "loss": 0.1849, "step": 40295 }, { "epoch": 2.2066473197174616, "grad_norm": 0.09885457903146744, "learning_rate": 1.4691746096126547e-05, "loss": 0.1945, "step": 40300 }, { "epoch": 2.206921097300553, "grad_norm": 0.10135332494974136, "learning_rate": 1.4686676130602312e-05, "loss": 0.1768, "step": 40305 }, { "epoch": 2.2071948748836445, "grad_norm": 0.10239486396312714, "learning_rate": 1.468160616507808e-05, "loss": 0.1777, "step": 40310 }, { "epoch": 2.207468652466736, "grad_norm": 0.11055764555931091, "learning_rate": 1.4676536199553845e-05, "loss": 0.1838, "step": 40315 }, { "epoch": 2.2077424300498274, "grad_norm": 0.11178875714540482, "learning_rate": 1.467146623402961e-05, "loss": 0.1845, "step": 40320 }, { "epoch": 2.208016207632919, "grad_norm": 0.09728924185037613, "learning_rate": 1.4666396268505375e-05, "loss": 0.1773, "step": 40325 }, { "epoch": 2.2082899852160107, "grad_norm": 0.10573067516088486, "learning_rate": 1.466132630298114e-05, "loss": 0.1832, "step": 40330 }, { "epoch": 2.208563762799102, "grad_norm": 0.11831295490264893, "learning_rate": 1.4656256337456905e-05, "loss": 0.1838, "step": 40335 }, { "epoch": 2.2088375403821936, "grad_norm": 0.11109837144613266, "learning_rate": 1.4651186371932672e-05, "loss": 0.1884, "step": 40340 }, { "epoch": 2.209111317965285, "grad_norm": 0.11077888309955597, "learning_rate": 1.4646116406408437e-05, "loss": 0.1912, "step": 40345 }, { "epoch": 2.2093850955483765, "grad_norm": 0.10311539471149445, "learning_rate": 1.4641046440884203e-05, "loss": 0.1861, "step": 40350 }, { "epoch": 2.209658873131468, "grad_norm": 0.11044403165578842, "learning_rate": 1.463597647535997e-05, "loss": 0.1887, "step": 40355 }, { "epoch": 2.2099326507145594, "grad_norm": 0.107412189245224, "learning_rate": 1.4630906509835735e-05, "loss": 0.1819, "step": 40360 }, { "epoch": 2.210206428297651, "grad_norm": 0.09496244788169861, "learning_rate": 1.46258365443115e-05, "loss": 0.185, "step": 40365 }, { "epoch": 2.2104802058807427, "grad_norm": 0.10530674457550049, "learning_rate": 1.4620766578787265e-05, "loss": 0.1847, "step": 40370 }, { "epoch": 2.210753983463834, "grad_norm": 0.09033943712711334, "learning_rate": 1.461569661326303e-05, "loss": 0.1847, "step": 40375 }, { "epoch": 2.2110277610469256, "grad_norm": 0.09693527221679688, "learning_rate": 1.4610626647738795e-05, "loss": 0.1893, "step": 40380 }, { "epoch": 2.2113015386300168, "grad_norm": 0.10753358155488968, "learning_rate": 1.460555668221456e-05, "loss": 0.1972, "step": 40385 }, { "epoch": 2.2115753162131084, "grad_norm": 0.10271114110946655, "learning_rate": 1.4600486716690327e-05, "loss": 0.1856, "step": 40390 }, { "epoch": 2.2118490937962, "grad_norm": 0.09229617565870285, "learning_rate": 1.4595416751166093e-05, "loss": 0.1914, "step": 40395 }, { "epoch": 2.2121228713792913, "grad_norm": 0.10105101764202118, "learning_rate": 1.4590346785641858e-05, "loss": 0.1783, "step": 40400 }, { "epoch": 2.212396648962383, "grad_norm": 0.10061480849981308, "learning_rate": 1.4585276820117625e-05, "loss": 0.1874, "step": 40405 }, { "epoch": 2.2126704265454746, "grad_norm": 0.10203287750482559, "learning_rate": 1.458020685459339e-05, "loss": 0.1805, "step": 40410 }, { "epoch": 2.212944204128566, "grad_norm": 0.10872232913970947, "learning_rate": 1.4575136889069155e-05, "loss": 0.1773, "step": 40415 }, { "epoch": 2.2132179817116575, "grad_norm": 0.09730243682861328, "learning_rate": 1.457006692354492e-05, "loss": 0.1867, "step": 40420 }, { "epoch": 2.213491759294749, "grad_norm": 0.11582788825035095, "learning_rate": 1.4564996958020685e-05, "loss": 0.187, "step": 40425 }, { "epoch": 2.2137655368778404, "grad_norm": 0.14083679020404816, "learning_rate": 1.455992699249645e-05, "loss": 0.1911, "step": 40430 }, { "epoch": 2.214039314460932, "grad_norm": 0.10227849334478378, "learning_rate": 1.4554857026972218e-05, "loss": 0.1855, "step": 40435 }, { "epoch": 2.2143130920440233, "grad_norm": 0.09923503547906876, "learning_rate": 1.4549787061447983e-05, "loss": 0.1812, "step": 40440 }, { "epoch": 2.214586869627115, "grad_norm": 0.1120222955942154, "learning_rate": 1.4544717095923748e-05, "loss": 0.1808, "step": 40445 }, { "epoch": 2.2148606472102066, "grad_norm": 0.10459109395742416, "learning_rate": 1.4539647130399513e-05, "loss": 0.1849, "step": 40450 }, { "epoch": 2.215134424793298, "grad_norm": 0.09549857676029205, "learning_rate": 1.453457716487528e-05, "loss": 0.1828, "step": 40455 }, { "epoch": 2.2154082023763895, "grad_norm": 0.10753439366817474, "learning_rate": 1.4529507199351045e-05, "loss": 0.1876, "step": 40460 }, { "epoch": 2.2156819799594807, "grad_norm": 0.09169381111860275, "learning_rate": 1.452443723382681e-05, "loss": 0.1802, "step": 40465 }, { "epoch": 2.2159557575425723, "grad_norm": 0.09711705893278122, "learning_rate": 1.4519367268302575e-05, "loss": 0.1901, "step": 40470 }, { "epoch": 2.216229535125664, "grad_norm": 0.09102559089660645, "learning_rate": 1.4514297302778344e-05, "loss": 0.1784, "step": 40475 }, { "epoch": 2.2165033127087552, "grad_norm": 0.10338005423545837, "learning_rate": 1.4509227337254108e-05, "loss": 0.1846, "step": 40480 }, { "epoch": 2.216777090291847, "grad_norm": 0.09729132801294327, "learning_rate": 1.4504157371729873e-05, "loss": 0.1787, "step": 40485 }, { "epoch": 2.2170508678749385, "grad_norm": 0.1030944213271141, "learning_rate": 1.4499087406205638e-05, "loss": 0.1872, "step": 40490 }, { "epoch": 2.2173246454580298, "grad_norm": 0.09800469875335693, "learning_rate": 1.4494017440681403e-05, "loss": 0.1852, "step": 40495 }, { "epoch": 2.2175984230411214, "grad_norm": 0.09545637667179108, "learning_rate": 1.4488947475157168e-05, "loss": 0.196, "step": 40500 }, { "epoch": 2.217872200624213, "grad_norm": 0.09549784660339355, "learning_rate": 1.4483877509632933e-05, "loss": 0.1846, "step": 40505 }, { "epoch": 2.2181459782073043, "grad_norm": 0.10195209830999374, "learning_rate": 1.44788075441087e-05, "loss": 0.1871, "step": 40510 }, { "epoch": 2.218419755790396, "grad_norm": 0.10982973128557205, "learning_rate": 1.4473737578584467e-05, "loss": 0.1862, "step": 40515 }, { "epoch": 2.218693533373487, "grad_norm": 0.13300514221191406, "learning_rate": 1.4468667613060234e-05, "loss": 0.1921, "step": 40520 }, { "epoch": 2.218967310956579, "grad_norm": 0.09874072670936584, "learning_rate": 1.4463597647535999e-05, "loss": 0.1853, "step": 40525 }, { "epoch": 2.2192410885396705, "grad_norm": 0.1016993448138237, "learning_rate": 1.4458527682011764e-05, "loss": 0.185, "step": 40530 }, { "epoch": 2.2195148661227617, "grad_norm": 0.10614906251430511, "learning_rate": 1.4453457716487529e-05, "loss": 0.1857, "step": 40535 }, { "epoch": 2.2197886437058534, "grad_norm": 0.09938119351863861, "learning_rate": 1.4448387750963294e-05, "loss": 0.1852, "step": 40540 }, { "epoch": 2.220062421288945, "grad_norm": 0.09691956639289856, "learning_rate": 1.4443317785439059e-05, "loss": 0.1863, "step": 40545 }, { "epoch": 2.2203361988720363, "grad_norm": 0.09546605497598648, "learning_rate": 1.4438247819914824e-05, "loss": 0.1776, "step": 40550 }, { "epoch": 2.220609976455128, "grad_norm": 0.10708858072757721, "learning_rate": 1.4433177854390592e-05, "loss": 0.1852, "step": 40555 }, { "epoch": 2.220883754038219, "grad_norm": 0.1050800308585167, "learning_rate": 1.4428107888866357e-05, "loss": 0.1816, "step": 40560 }, { "epoch": 2.221157531621311, "grad_norm": 0.09988722205162048, "learning_rate": 1.4423037923342122e-05, "loss": 0.1833, "step": 40565 }, { "epoch": 2.2214313092044025, "grad_norm": 0.10214812308549881, "learning_rate": 1.4417967957817887e-05, "loss": 0.189, "step": 40570 }, { "epoch": 2.2217050867874937, "grad_norm": 0.11007920652627945, "learning_rate": 1.4412897992293654e-05, "loss": 0.1803, "step": 40575 }, { "epoch": 2.2219788643705853, "grad_norm": 0.10998901724815369, "learning_rate": 1.4407828026769419e-05, "loss": 0.1923, "step": 40580 }, { "epoch": 2.222252641953677, "grad_norm": 0.1103132888674736, "learning_rate": 1.4402758061245184e-05, "loss": 0.1823, "step": 40585 }, { "epoch": 2.222526419536768, "grad_norm": 0.10736851394176483, "learning_rate": 1.4397688095720949e-05, "loss": 0.1873, "step": 40590 }, { "epoch": 2.22280019711986, "grad_norm": 0.10253860056400299, "learning_rate": 1.4392618130196717e-05, "loss": 0.1737, "step": 40595 }, { "epoch": 2.2230739747029515, "grad_norm": 0.09916577488183975, "learning_rate": 1.4387548164672482e-05, "loss": 0.1783, "step": 40600 }, { "epoch": 2.2233477522860428, "grad_norm": 0.11456939578056335, "learning_rate": 1.4382478199148247e-05, "loss": 0.1932, "step": 40605 }, { "epoch": 2.2236215298691344, "grad_norm": 0.09508129209280014, "learning_rate": 1.4377408233624012e-05, "loss": 0.1872, "step": 40610 }, { "epoch": 2.2238953074522256, "grad_norm": 0.09632720798254013, "learning_rate": 1.4372338268099777e-05, "loss": 0.1817, "step": 40615 }, { "epoch": 2.2241690850353173, "grad_norm": 0.10708621144294739, "learning_rate": 1.4367268302575542e-05, "loss": 0.1867, "step": 40620 }, { "epoch": 2.224442862618409, "grad_norm": 0.09675903618335724, "learning_rate": 1.4362198337051309e-05, "loss": 0.1895, "step": 40625 }, { "epoch": 2.2247166402015, "grad_norm": 0.1026313528418541, "learning_rate": 1.4357128371527074e-05, "loss": 0.1885, "step": 40630 }, { "epoch": 2.224990417784592, "grad_norm": 0.10234194993972778, "learning_rate": 1.435205840600284e-05, "loss": 0.1902, "step": 40635 }, { "epoch": 2.2252641953676835, "grad_norm": 0.10640605539083481, "learning_rate": 1.4346988440478607e-05, "loss": 0.1894, "step": 40640 }, { "epoch": 2.2255379729507747, "grad_norm": 0.10120875388383865, "learning_rate": 1.4341918474954372e-05, "loss": 0.1834, "step": 40645 }, { "epoch": 2.2258117505338664, "grad_norm": 0.10251607745885849, "learning_rate": 1.4336848509430137e-05, "loss": 0.1866, "step": 40650 }, { "epoch": 2.2260855281169576, "grad_norm": 0.09488438069820404, "learning_rate": 1.4331778543905902e-05, "loss": 0.1829, "step": 40655 }, { "epoch": 2.2263593057000493, "grad_norm": 0.09940876811742783, "learning_rate": 1.4326708578381667e-05, "loss": 0.1695, "step": 40660 }, { "epoch": 2.226633083283141, "grad_norm": 0.10883704572916031, "learning_rate": 1.4321638612857432e-05, "loss": 0.1797, "step": 40665 }, { "epoch": 2.226906860866232, "grad_norm": 0.10295246541500092, "learning_rate": 1.4316568647333197e-05, "loss": 0.181, "step": 40670 }, { "epoch": 2.227180638449324, "grad_norm": 0.0956542119383812, "learning_rate": 1.4311498681808964e-05, "loss": 0.1909, "step": 40675 }, { "epoch": 2.2274544160324155, "grad_norm": 0.0940789207816124, "learning_rate": 1.430642871628473e-05, "loss": 0.1829, "step": 40680 }, { "epoch": 2.2277281936155067, "grad_norm": 0.1110207587480545, "learning_rate": 1.4301358750760495e-05, "loss": 0.1784, "step": 40685 }, { "epoch": 2.2280019711985983, "grad_norm": 0.11023125797510147, "learning_rate": 1.4296288785236262e-05, "loss": 0.1882, "step": 40690 }, { "epoch": 2.22827574878169, "grad_norm": 0.10587551444768906, "learning_rate": 1.4291218819712027e-05, "loss": 0.1853, "step": 40695 }, { "epoch": 2.228549526364781, "grad_norm": 0.11073237657546997, "learning_rate": 1.4286148854187792e-05, "loss": 0.1861, "step": 40700 }, { "epoch": 2.228823303947873, "grad_norm": 0.09783535450696945, "learning_rate": 1.4281078888663557e-05, "loss": 0.1847, "step": 40705 }, { "epoch": 2.229097081530964, "grad_norm": 0.09674584865570068, "learning_rate": 1.4276008923139322e-05, "loss": 0.1811, "step": 40710 }, { "epoch": 2.2293708591140557, "grad_norm": 0.10393312573432922, "learning_rate": 1.4270938957615087e-05, "loss": 0.1837, "step": 40715 }, { "epoch": 2.2296446366971474, "grad_norm": 0.1104050949215889, "learning_rate": 1.4265868992090856e-05, "loss": 0.1893, "step": 40720 }, { "epoch": 2.2299184142802386, "grad_norm": 0.10283147543668747, "learning_rate": 1.426079902656662e-05, "loss": 0.1875, "step": 40725 }, { "epoch": 2.2301921918633303, "grad_norm": 0.10418841242790222, "learning_rate": 1.4255729061042386e-05, "loss": 0.1807, "step": 40730 }, { "epoch": 2.2304659694464215, "grad_norm": 0.09924541413784027, "learning_rate": 1.425065909551815e-05, "loss": 0.187, "step": 40735 }, { "epoch": 2.230739747029513, "grad_norm": 0.09796470403671265, "learning_rate": 1.4245589129993917e-05, "loss": 0.1826, "step": 40740 }, { "epoch": 2.231013524612605, "grad_norm": 0.11747688055038452, "learning_rate": 1.4240519164469682e-05, "loss": 0.1948, "step": 40745 }, { "epoch": 2.231287302195696, "grad_norm": 0.10987100005149841, "learning_rate": 1.4235449198945447e-05, "loss": 0.1854, "step": 40750 }, { "epoch": 2.2315610797787877, "grad_norm": 0.09986120462417603, "learning_rate": 1.4230379233421212e-05, "loss": 0.1803, "step": 40755 }, { "epoch": 2.2318348573618794, "grad_norm": 0.10501012206077576, "learning_rate": 1.422530926789698e-05, "loss": 0.187, "step": 40760 }, { "epoch": 2.2321086349449706, "grad_norm": 0.09658972918987274, "learning_rate": 1.4220239302372746e-05, "loss": 0.1796, "step": 40765 }, { "epoch": 2.2323824125280622, "grad_norm": 0.11871190369129181, "learning_rate": 1.421516933684851e-05, "loss": 0.18, "step": 40770 }, { "epoch": 2.232656190111154, "grad_norm": 0.10077200829982758, "learning_rate": 1.4210099371324276e-05, "loss": 0.1903, "step": 40775 }, { "epoch": 2.232929967694245, "grad_norm": 0.09089154750108719, "learning_rate": 1.420502940580004e-05, "loss": 0.1794, "step": 40780 }, { "epoch": 2.233203745277337, "grad_norm": 0.12282971292734146, "learning_rate": 1.4199959440275806e-05, "loss": 0.1868, "step": 40785 }, { "epoch": 2.233477522860428, "grad_norm": 0.0947708860039711, "learning_rate": 1.4194889474751572e-05, "loss": 0.1768, "step": 40790 }, { "epoch": 2.2337513004435197, "grad_norm": 0.10398297756910324, "learning_rate": 1.4189819509227337e-05, "loss": 0.1791, "step": 40795 }, { "epoch": 2.2340250780266113, "grad_norm": 0.10915011912584305, "learning_rate": 1.4184749543703104e-05, "loss": 0.19, "step": 40800 }, { "epoch": 2.2342988556097025, "grad_norm": 0.09309860318899155, "learning_rate": 1.417967957817887e-05, "loss": 0.1817, "step": 40805 }, { "epoch": 2.234572633192794, "grad_norm": 0.09796267747879028, "learning_rate": 1.4174609612654636e-05, "loss": 0.18, "step": 40810 }, { "epoch": 2.234846410775886, "grad_norm": 0.0938730239868164, "learning_rate": 1.41695396471304e-05, "loss": 0.1885, "step": 40815 }, { "epoch": 2.235120188358977, "grad_norm": 0.09720876067876816, "learning_rate": 1.4164469681606166e-05, "loss": 0.1919, "step": 40820 }, { "epoch": 2.2353939659420687, "grad_norm": 0.10082047432661057, "learning_rate": 1.415939971608193e-05, "loss": 0.1878, "step": 40825 }, { "epoch": 2.23566774352516, "grad_norm": 0.10297410190105438, "learning_rate": 1.4154329750557696e-05, "loss": 0.1843, "step": 40830 }, { "epoch": 2.2359415211082516, "grad_norm": 0.09788815677165985, "learning_rate": 1.414925978503346e-05, "loss": 0.1873, "step": 40835 }, { "epoch": 2.2362152986913433, "grad_norm": 0.1204514279961586, "learning_rate": 1.4144189819509229e-05, "loss": 0.1868, "step": 40840 }, { "epoch": 2.2364890762744345, "grad_norm": 0.10465264320373535, "learning_rate": 1.4139119853984994e-05, "loss": 0.1882, "step": 40845 }, { "epoch": 2.236762853857526, "grad_norm": 0.09636274725198746, "learning_rate": 1.4134049888460759e-05, "loss": 0.1844, "step": 40850 }, { "epoch": 2.237036631440618, "grad_norm": 0.11024843901395798, "learning_rate": 1.4128979922936526e-05, "loss": 0.1792, "step": 40855 }, { "epoch": 2.237310409023709, "grad_norm": 0.09799376130104065, "learning_rate": 1.412390995741229e-05, "loss": 0.1823, "step": 40860 }, { "epoch": 2.2375841866068007, "grad_norm": 0.1065327376127243, "learning_rate": 1.4118839991888056e-05, "loss": 0.1906, "step": 40865 }, { "epoch": 2.2378579641898924, "grad_norm": 0.10127045959234238, "learning_rate": 1.411377002636382e-05, "loss": 0.1866, "step": 40870 }, { "epoch": 2.2381317417729836, "grad_norm": 0.09033747762441635, "learning_rate": 1.4108700060839586e-05, "loss": 0.1815, "step": 40875 }, { "epoch": 2.2384055193560752, "grad_norm": 0.11822931468486786, "learning_rate": 1.4103630095315354e-05, "loss": 0.1913, "step": 40880 }, { "epoch": 2.2386792969391665, "grad_norm": 0.10091247409582138, "learning_rate": 1.4098560129791119e-05, "loss": 0.1898, "step": 40885 }, { "epoch": 2.238953074522258, "grad_norm": 0.09074476361274719, "learning_rate": 1.4093490164266884e-05, "loss": 0.1792, "step": 40890 }, { "epoch": 2.2392268521053498, "grad_norm": 0.10279318690299988, "learning_rate": 1.4088420198742649e-05, "loss": 0.1809, "step": 40895 }, { "epoch": 2.239500629688441, "grad_norm": 0.09650986641645432, "learning_rate": 1.4083350233218414e-05, "loss": 0.1834, "step": 40900 }, { "epoch": 2.2397744072715327, "grad_norm": 0.09436120092868805, "learning_rate": 1.4078280267694179e-05, "loss": 0.1871, "step": 40905 }, { "epoch": 2.240048184854624, "grad_norm": 0.09716974943876266, "learning_rate": 1.4073210302169946e-05, "loss": 0.18, "step": 40910 }, { "epoch": 2.2403219624377155, "grad_norm": 0.10934276133775711, "learning_rate": 1.406814033664571e-05, "loss": 0.1828, "step": 40915 }, { "epoch": 2.240595740020807, "grad_norm": 0.10416768491268158, "learning_rate": 1.406307037112148e-05, "loss": 0.1985, "step": 40920 }, { "epoch": 2.2408695176038984, "grad_norm": 0.1052105501294136, "learning_rate": 1.4058000405597244e-05, "loss": 0.1857, "step": 40925 }, { "epoch": 2.24114329518699, "grad_norm": 0.1120087057352066, "learning_rate": 1.405293044007301e-05, "loss": 0.1813, "step": 40930 }, { "epoch": 2.2414170727700817, "grad_norm": 0.10048895329236984, "learning_rate": 1.4047860474548774e-05, "loss": 0.1846, "step": 40935 }, { "epoch": 2.241690850353173, "grad_norm": 0.10569155216217041, "learning_rate": 1.404279050902454e-05, "loss": 0.1845, "step": 40940 }, { "epoch": 2.2419646279362646, "grad_norm": 0.10814271122217178, "learning_rate": 1.4037720543500304e-05, "loss": 0.178, "step": 40945 }, { "epoch": 2.2422384055193563, "grad_norm": 0.09914222359657288, "learning_rate": 1.403265057797607e-05, "loss": 0.1885, "step": 40950 }, { "epoch": 2.2425121831024475, "grad_norm": 0.1317281275987625, "learning_rate": 1.4027580612451834e-05, "loss": 0.1775, "step": 40955 }, { "epoch": 2.242785960685539, "grad_norm": 0.10537948459386826, "learning_rate": 1.4022510646927601e-05, "loss": 0.1925, "step": 40960 }, { "epoch": 2.2430597382686304, "grad_norm": 0.10730981081724167, "learning_rate": 1.4017440681403368e-05, "loss": 0.1833, "step": 40965 }, { "epoch": 2.243333515851722, "grad_norm": 0.09878210723400116, "learning_rate": 1.4012370715879133e-05, "loss": 0.1918, "step": 40970 }, { "epoch": 2.2436072934348137, "grad_norm": 0.1013360396027565, "learning_rate": 1.40073007503549e-05, "loss": 0.177, "step": 40975 }, { "epoch": 2.243881071017905, "grad_norm": 0.1126856803894043, "learning_rate": 1.4002230784830664e-05, "loss": 0.1907, "step": 40980 }, { "epoch": 2.2441548486009966, "grad_norm": 0.09773527085781097, "learning_rate": 1.399716081930643e-05, "loss": 0.1887, "step": 40985 }, { "epoch": 2.2444286261840882, "grad_norm": 0.0974610298871994, "learning_rate": 1.3992090853782194e-05, "loss": 0.1842, "step": 40990 }, { "epoch": 2.2447024037671794, "grad_norm": 0.09470871090888977, "learning_rate": 1.398702088825796e-05, "loss": 0.1841, "step": 40995 }, { "epoch": 2.244976181350271, "grad_norm": 0.09959852695465088, "learning_rate": 1.3981950922733724e-05, "loss": 0.1777, "step": 41000 }, { "epoch": 2.2452499589333623, "grad_norm": 0.0987277552485466, "learning_rate": 1.3976880957209493e-05, "loss": 0.1826, "step": 41005 }, { "epoch": 2.245523736516454, "grad_norm": 0.10086732357740402, "learning_rate": 1.3971810991685258e-05, "loss": 0.1801, "step": 41010 }, { "epoch": 2.2457975140995456, "grad_norm": 0.11413417756557465, "learning_rate": 1.3966741026161023e-05, "loss": 0.1853, "step": 41015 }, { "epoch": 2.246071291682637, "grad_norm": 0.10650953650474548, "learning_rate": 1.3961671060636788e-05, "loss": 0.1874, "step": 41020 }, { "epoch": 2.2463450692657285, "grad_norm": 0.09821593016386032, "learning_rate": 1.3956601095112554e-05, "loss": 0.1871, "step": 41025 }, { "epoch": 2.24661884684882, "grad_norm": 0.10032104700803757, "learning_rate": 1.395153112958832e-05, "loss": 0.1817, "step": 41030 }, { "epoch": 2.2468926244319114, "grad_norm": 0.0963156595826149, "learning_rate": 1.3946461164064084e-05, "loss": 0.1951, "step": 41035 }, { "epoch": 2.247166402015003, "grad_norm": 0.09586146473884583, "learning_rate": 1.394139119853985e-05, "loss": 0.1862, "step": 41040 }, { "epoch": 2.2474401795980947, "grad_norm": 0.09945333003997803, "learning_rate": 1.3936321233015618e-05, "loss": 0.1797, "step": 41045 }, { "epoch": 2.247713957181186, "grad_norm": 0.10803482681512833, "learning_rate": 1.3931251267491383e-05, "loss": 0.1934, "step": 41050 }, { "epoch": 2.2479877347642776, "grad_norm": 0.09671062976121902, "learning_rate": 1.3926181301967148e-05, "loss": 0.1807, "step": 41055 }, { "epoch": 2.248261512347369, "grad_norm": 0.10877598822116852, "learning_rate": 1.3921111336442913e-05, "loss": 0.1811, "step": 41060 }, { "epoch": 2.2485352899304605, "grad_norm": 0.10898376256227493, "learning_rate": 1.3916041370918678e-05, "loss": 0.1884, "step": 41065 }, { "epoch": 2.248809067513552, "grad_norm": 0.11518579721450806, "learning_rate": 1.3910971405394443e-05, "loss": 0.182, "step": 41070 }, { "epoch": 2.2490828450966434, "grad_norm": 0.09886952489614487, "learning_rate": 1.390590143987021e-05, "loss": 0.1877, "step": 41075 }, { "epoch": 2.249356622679735, "grad_norm": 0.08899125456809998, "learning_rate": 1.3900831474345974e-05, "loss": 0.1782, "step": 41080 }, { "epoch": 2.2496304002628267, "grad_norm": 0.10680528730154037, "learning_rate": 1.3895761508821741e-05, "loss": 0.1927, "step": 41085 }, { "epoch": 2.249904177845918, "grad_norm": 0.11012183129787445, "learning_rate": 1.3890691543297508e-05, "loss": 0.1845, "step": 41090 }, { "epoch": 2.2501779554290096, "grad_norm": 0.11059250682592392, "learning_rate": 1.3885621577773273e-05, "loss": 0.1855, "step": 41095 }, { "epoch": 2.2504517330121008, "grad_norm": 0.09933041036128998, "learning_rate": 1.3880551612249038e-05, "loss": 0.1877, "step": 41100 }, { "epoch": 2.2507255105951924, "grad_norm": 0.11326534301042557, "learning_rate": 1.3875481646724803e-05, "loss": 0.191, "step": 41105 }, { "epoch": 2.250999288178284, "grad_norm": 0.10013479739427567, "learning_rate": 1.3870411681200568e-05, "loss": 0.1825, "step": 41110 }, { "epoch": 2.2512730657613753, "grad_norm": 0.10434222966432571, "learning_rate": 1.3865341715676333e-05, "loss": 0.1833, "step": 41115 }, { "epoch": 2.251546843344467, "grad_norm": 0.10152486711740494, "learning_rate": 1.3860271750152098e-05, "loss": 0.1857, "step": 41120 }, { "epoch": 2.2518206209275586, "grad_norm": 0.09344097971916199, "learning_rate": 1.3855201784627866e-05, "loss": 0.1791, "step": 41125 }, { "epoch": 2.25209439851065, "grad_norm": 0.09327199310064316, "learning_rate": 1.3850131819103631e-05, "loss": 0.177, "step": 41130 }, { "epoch": 2.2523681760937415, "grad_norm": 0.11261142790317535, "learning_rate": 1.3845061853579396e-05, "loss": 0.1901, "step": 41135 }, { "epoch": 2.252641953676833, "grad_norm": 0.10262145847082138, "learning_rate": 1.3839991888055163e-05, "loss": 0.1893, "step": 41140 }, { "epoch": 2.2529157312599244, "grad_norm": 0.1070597842335701, "learning_rate": 1.3834921922530928e-05, "loss": 0.1919, "step": 41145 }, { "epoch": 2.253189508843016, "grad_norm": 0.10460013151168823, "learning_rate": 1.3829851957006693e-05, "loss": 0.1722, "step": 41150 }, { "epoch": 2.2534632864261073, "grad_norm": 0.09794186800718307, "learning_rate": 1.3824781991482458e-05, "loss": 0.1762, "step": 41155 }, { "epoch": 2.253737064009199, "grad_norm": 0.10266757011413574, "learning_rate": 1.3819712025958223e-05, "loss": 0.1803, "step": 41160 }, { "epoch": 2.2540108415922906, "grad_norm": 0.09962110221385956, "learning_rate": 1.3814642060433991e-05, "loss": 0.184, "step": 41165 }, { "epoch": 2.254284619175382, "grad_norm": 0.1022232249379158, "learning_rate": 1.3809572094909756e-05, "loss": 0.1844, "step": 41170 }, { "epoch": 2.2545583967584735, "grad_norm": 0.11350217461585999, "learning_rate": 1.3804502129385521e-05, "loss": 0.1867, "step": 41175 }, { "epoch": 2.2548321743415647, "grad_norm": 0.11348321288824081, "learning_rate": 1.3799432163861286e-05, "loss": 0.1799, "step": 41180 }, { "epoch": 2.2551059519246563, "grad_norm": 0.09685854613780975, "learning_rate": 1.3794362198337051e-05, "loss": 0.1912, "step": 41185 }, { "epoch": 2.255379729507748, "grad_norm": 0.09307413548231125, "learning_rate": 1.3789292232812818e-05, "loss": 0.1774, "step": 41190 }, { "epoch": 2.2556535070908392, "grad_norm": 0.09929604828357697, "learning_rate": 1.3784222267288583e-05, "loss": 0.1792, "step": 41195 }, { "epoch": 2.255927284673931, "grad_norm": 0.0890815407037735, "learning_rate": 1.3779152301764348e-05, "loss": 0.1843, "step": 41200 }, { "epoch": 2.2562010622570225, "grad_norm": 0.10674907267093658, "learning_rate": 1.3774082336240116e-05, "loss": 0.1761, "step": 41205 }, { "epoch": 2.2564748398401138, "grad_norm": 0.10424605756998062, "learning_rate": 1.3769012370715881e-05, "loss": 0.188, "step": 41210 }, { "epoch": 2.2567486174232054, "grad_norm": 0.09541741758584976, "learning_rate": 1.3763942405191646e-05, "loss": 0.1803, "step": 41215 }, { "epoch": 2.257022395006297, "grad_norm": 0.1221439316868782, "learning_rate": 1.3758872439667411e-05, "loss": 0.1897, "step": 41220 }, { "epoch": 2.2572961725893883, "grad_norm": 0.09832751750946045, "learning_rate": 1.3753802474143176e-05, "loss": 0.1855, "step": 41225 }, { "epoch": 2.25756995017248, "grad_norm": 0.0963604599237442, "learning_rate": 1.3748732508618941e-05, "loss": 0.1802, "step": 41230 }, { "epoch": 2.257843727755571, "grad_norm": 0.09617149084806442, "learning_rate": 1.3743662543094706e-05, "loss": 0.1822, "step": 41235 }, { "epoch": 2.258117505338663, "grad_norm": 0.1036391407251358, "learning_rate": 1.3738592577570471e-05, "loss": 0.183, "step": 41240 }, { "epoch": 2.2583912829217545, "grad_norm": 0.09904374182224274, "learning_rate": 1.3733522612046238e-05, "loss": 0.1795, "step": 41245 }, { "epoch": 2.2586650605048457, "grad_norm": 0.09770121425390244, "learning_rate": 1.3728452646522005e-05, "loss": 0.1892, "step": 41250 }, { "epoch": 2.2589388380879374, "grad_norm": 0.1066843569278717, "learning_rate": 1.3723382680997771e-05, "loss": 0.1865, "step": 41255 }, { "epoch": 2.259212615671029, "grad_norm": 0.10027941316366196, "learning_rate": 1.3718312715473536e-05, "loss": 0.184, "step": 41260 }, { "epoch": 2.2594863932541203, "grad_norm": 0.0986623466014862, "learning_rate": 1.3713242749949301e-05, "loss": 0.1807, "step": 41265 }, { "epoch": 2.259760170837212, "grad_norm": 0.09822666645050049, "learning_rate": 1.3708172784425066e-05, "loss": 0.1738, "step": 41270 }, { "epoch": 2.260033948420303, "grad_norm": 0.10092794895172119, "learning_rate": 1.3703102818900831e-05, "loss": 0.1761, "step": 41275 }, { "epoch": 2.260307726003395, "grad_norm": 0.11301422864198685, "learning_rate": 1.3698032853376596e-05, "loss": 0.1787, "step": 41280 }, { "epoch": 2.2605815035864865, "grad_norm": 0.08750457316637039, "learning_rate": 1.3692962887852361e-05, "loss": 0.1827, "step": 41285 }, { "epoch": 2.2608552811695777, "grad_norm": 0.10094507038593292, "learning_rate": 1.368789292232813e-05, "loss": 0.1809, "step": 41290 }, { "epoch": 2.2611290587526693, "grad_norm": 0.0907929390668869, "learning_rate": 1.3682822956803895e-05, "loss": 0.1788, "step": 41295 }, { "epoch": 2.261402836335761, "grad_norm": 0.10004288703203201, "learning_rate": 1.367775299127966e-05, "loss": 0.1852, "step": 41300 }, { "epoch": 2.261676613918852, "grad_norm": 0.10423902422189713, "learning_rate": 1.3672683025755425e-05, "loss": 0.1906, "step": 41305 }, { "epoch": 2.261950391501944, "grad_norm": 0.10592292249202728, "learning_rate": 1.3667613060231191e-05, "loss": 0.1868, "step": 41310 }, { "epoch": 2.2622241690850355, "grad_norm": 0.1017584577202797, "learning_rate": 1.3662543094706956e-05, "loss": 0.1825, "step": 41315 }, { "epoch": 2.2624979466681268, "grad_norm": 0.1030174195766449, "learning_rate": 1.3657473129182721e-05, "loss": 0.1869, "step": 41320 }, { "epoch": 2.2627717242512184, "grad_norm": 0.09808380901813507, "learning_rate": 1.3652403163658486e-05, "loss": 0.1966, "step": 41325 }, { "epoch": 2.2630455018343096, "grad_norm": 0.10664746165275574, "learning_rate": 1.3647333198134255e-05, "loss": 0.1774, "step": 41330 }, { "epoch": 2.2633192794174013, "grad_norm": 0.10596118122339249, "learning_rate": 1.364226323261002e-05, "loss": 0.1912, "step": 41335 }, { "epoch": 2.263593057000493, "grad_norm": 0.09722492843866348, "learning_rate": 1.3637193267085785e-05, "loss": 0.1777, "step": 41340 }, { "epoch": 2.263866834583584, "grad_norm": 0.09466185420751572, "learning_rate": 1.363212330156155e-05, "loss": 0.1892, "step": 41345 }, { "epoch": 2.264140612166676, "grad_norm": 0.09185106307268143, "learning_rate": 1.3627053336037315e-05, "loss": 0.1825, "step": 41350 }, { "epoch": 2.264414389749767, "grad_norm": 0.09927211701869965, "learning_rate": 1.362198337051308e-05, "loss": 0.1836, "step": 41355 }, { "epoch": 2.2646881673328587, "grad_norm": 0.10275242477655411, "learning_rate": 1.3616913404988847e-05, "loss": 0.1927, "step": 41360 }, { "epoch": 2.2649619449159504, "grad_norm": 0.11221839487552643, "learning_rate": 1.3611843439464612e-05, "loss": 0.1812, "step": 41365 }, { "epoch": 2.2652357224990416, "grad_norm": 0.10393305867910385, "learning_rate": 1.3606773473940378e-05, "loss": 0.1834, "step": 41370 }, { "epoch": 2.2655095000821333, "grad_norm": 0.10703404247760773, "learning_rate": 1.3601703508416145e-05, "loss": 0.186, "step": 41375 }, { "epoch": 2.265783277665225, "grad_norm": 0.10817929357290268, "learning_rate": 1.359663354289191e-05, "loss": 0.1885, "step": 41380 }, { "epoch": 2.266057055248316, "grad_norm": 0.09635531157255173, "learning_rate": 1.3591563577367675e-05, "loss": 0.1845, "step": 41385 }, { "epoch": 2.266330832831408, "grad_norm": 0.11056249588727951, "learning_rate": 1.358649361184344e-05, "loss": 0.1812, "step": 41390 }, { "epoch": 2.2666046104144995, "grad_norm": 0.09871792048215866, "learning_rate": 1.3581423646319205e-05, "loss": 0.179, "step": 41395 }, { "epoch": 2.2668783879975907, "grad_norm": 0.10209421068429947, "learning_rate": 1.357635368079497e-05, "loss": 0.1911, "step": 41400 }, { "epoch": 2.2671521655806823, "grad_norm": 0.10503952950239182, "learning_rate": 1.3571283715270735e-05, "loss": 0.1841, "step": 41405 }, { "epoch": 2.267425943163774, "grad_norm": 0.09939110279083252, "learning_rate": 1.3566213749746503e-05, "loss": 0.18, "step": 41410 }, { "epoch": 2.267699720746865, "grad_norm": 0.099647156894207, "learning_rate": 1.3561143784222268e-05, "loss": 0.1754, "step": 41415 }, { "epoch": 2.267973498329957, "grad_norm": 0.09178414940834045, "learning_rate": 1.3556073818698033e-05, "loss": 0.1794, "step": 41420 }, { "epoch": 2.268247275913048, "grad_norm": 0.09849527478218079, "learning_rate": 1.35510038531738e-05, "loss": 0.1775, "step": 41425 }, { "epoch": 2.2685210534961397, "grad_norm": 0.10432475060224533, "learning_rate": 1.3545933887649565e-05, "loss": 0.1837, "step": 41430 }, { "epoch": 2.2687948310792314, "grad_norm": 0.10211288928985596, "learning_rate": 1.354086392212533e-05, "loss": 0.1874, "step": 41435 }, { "epoch": 2.2690686086623226, "grad_norm": 0.09708049148321152, "learning_rate": 1.3535793956601095e-05, "loss": 0.1873, "step": 41440 }, { "epoch": 2.2693423862454143, "grad_norm": 0.10120492428541183, "learning_rate": 1.353072399107686e-05, "loss": 0.1814, "step": 41445 }, { "epoch": 2.2696161638285055, "grad_norm": 0.09256169945001602, "learning_rate": 1.3525654025552628e-05, "loss": 0.1806, "step": 41450 }, { "epoch": 2.269889941411597, "grad_norm": 0.10353324562311172, "learning_rate": 1.3520584060028393e-05, "loss": 0.1824, "step": 41455 }, { "epoch": 2.270163718994689, "grad_norm": 0.1253502517938614, "learning_rate": 1.3515514094504158e-05, "loss": 0.1841, "step": 41460 }, { "epoch": 2.27043749657778, "grad_norm": 0.10939069837331772, "learning_rate": 1.3510444128979923e-05, "loss": 0.1792, "step": 41465 }, { "epoch": 2.2707112741608717, "grad_norm": 0.10369033366441727, "learning_rate": 1.3505374163455688e-05, "loss": 0.1865, "step": 41470 }, { "epoch": 2.2709850517439634, "grad_norm": 0.09362860023975372, "learning_rate": 1.3500304197931455e-05, "loss": 0.1825, "step": 41475 }, { "epoch": 2.2712588293270546, "grad_norm": 0.11431045085191727, "learning_rate": 1.349523423240722e-05, "loss": 0.1916, "step": 41480 }, { "epoch": 2.2715326069101462, "grad_norm": 0.10556834936141968, "learning_rate": 1.3490164266882985e-05, "loss": 0.1799, "step": 41485 }, { "epoch": 2.271806384493238, "grad_norm": 0.10675966739654541, "learning_rate": 1.3485094301358753e-05, "loss": 0.1929, "step": 41490 }, { "epoch": 2.272080162076329, "grad_norm": 0.17052631080150604, "learning_rate": 1.3480024335834518e-05, "loss": 0.1825, "step": 41495 }, { "epoch": 2.272353939659421, "grad_norm": 0.09897313266992569, "learning_rate": 1.3474954370310283e-05, "loss": 0.1866, "step": 41500 }, { "epoch": 2.272627717242512, "grad_norm": 0.10302560031414032, "learning_rate": 1.3469884404786048e-05, "loss": 0.176, "step": 41505 }, { "epoch": 2.2729014948256037, "grad_norm": 0.11443886160850525, "learning_rate": 1.3464814439261813e-05, "loss": 0.1862, "step": 41510 }, { "epoch": 2.2731752724086953, "grad_norm": 0.10330365598201752, "learning_rate": 1.3459744473737578e-05, "loss": 0.1819, "step": 41515 }, { "epoch": 2.2734490499917865, "grad_norm": 0.09994687885046005, "learning_rate": 1.3454674508213343e-05, "loss": 0.1864, "step": 41520 }, { "epoch": 2.273722827574878, "grad_norm": 0.11613750457763672, "learning_rate": 1.344960454268911e-05, "loss": 0.1879, "step": 41525 }, { "epoch": 2.2739966051579694, "grad_norm": 0.10729033499956131, "learning_rate": 1.3444534577164877e-05, "loss": 0.1888, "step": 41530 }, { "epoch": 2.274270382741061, "grad_norm": 0.1070038303732872, "learning_rate": 1.3439464611640642e-05, "loss": 0.1872, "step": 41535 }, { "epoch": 2.2745441603241527, "grad_norm": 0.10997626185417175, "learning_rate": 1.3434394646116409e-05, "loss": 0.19, "step": 41540 }, { "epoch": 2.274817937907244, "grad_norm": 0.10524243861436844, "learning_rate": 1.3429324680592174e-05, "loss": 0.1832, "step": 41545 }, { "epoch": 2.2750917154903356, "grad_norm": 0.10015732795000076, "learning_rate": 1.3424254715067939e-05, "loss": 0.1923, "step": 41550 }, { "epoch": 2.2753654930734273, "grad_norm": 0.09941622614860535, "learning_rate": 1.3419184749543704e-05, "loss": 0.1806, "step": 41555 }, { "epoch": 2.2756392706565185, "grad_norm": 0.09553741663694382, "learning_rate": 1.3414114784019469e-05, "loss": 0.1816, "step": 41560 }, { "epoch": 2.27591304823961, "grad_norm": 0.08845698088407516, "learning_rate": 1.3409044818495233e-05, "loss": 0.1846, "step": 41565 }, { "epoch": 2.276186825822702, "grad_norm": 0.10140558332204819, "learning_rate": 1.3403974852970998e-05, "loss": 0.1925, "step": 41570 }, { "epoch": 2.276460603405793, "grad_norm": 0.10169810801744461, "learning_rate": 1.3398904887446767e-05, "loss": 0.1862, "step": 41575 }, { "epoch": 2.2767343809888847, "grad_norm": 0.09892179816961288, "learning_rate": 1.3393834921922532e-05, "loss": 0.1814, "step": 41580 }, { "epoch": 2.2770081585719764, "grad_norm": 0.12042652815580368, "learning_rate": 1.3388764956398297e-05, "loss": 0.1838, "step": 41585 }, { "epoch": 2.2772819361550676, "grad_norm": 0.10682898759841919, "learning_rate": 1.3383694990874064e-05, "loss": 0.1932, "step": 41590 }, { "epoch": 2.2775557137381592, "grad_norm": 0.10168872773647308, "learning_rate": 1.3378625025349829e-05, "loss": 0.1856, "step": 41595 }, { "epoch": 2.2778294913212505, "grad_norm": 0.09592802822589874, "learning_rate": 1.3373555059825594e-05, "loss": 0.183, "step": 41600 }, { "epoch": 2.278103268904342, "grad_norm": 0.09560118615627289, "learning_rate": 1.3368485094301359e-05, "loss": 0.1828, "step": 41605 }, { "epoch": 2.2783770464874338, "grad_norm": 0.09633562713861465, "learning_rate": 1.3363415128777124e-05, "loss": 0.187, "step": 41610 }, { "epoch": 2.278650824070525, "grad_norm": 0.09355694055557251, "learning_rate": 1.3358345163252892e-05, "loss": 0.1858, "step": 41615 }, { "epoch": 2.2789246016536167, "grad_norm": 0.1105257198214531, "learning_rate": 1.3353275197728657e-05, "loss": 0.1828, "step": 41620 }, { "epoch": 2.279198379236708, "grad_norm": 0.09894584119319916, "learning_rate": 1.3348205232204422e-05, "loss": 0.192, "step": 41625 }, { "epoch": 2.2794721568197995, "grad_norm": 0.09538181871175766, "learning_rate": 1.3343135266680187e-05, "loss": 0.1742, "step": 41630 }, { "epoch": 2.279745934402891, "grad_norm": 0.08996082097291946, "learning_rate": 1.3338065301155952e-05, "loss": 0.1789, "step": 41635 }, { "epoch": 2.2800197119859824, "grad_norm": 0.10024190694093704, "learning_rate": 1.3332995335631717e-05, "loss": 0.1814, "step": 41640 }, { "epoch": 2.280293489569074, "grad_norm": 0.11865492910146713, "learning_rate": 1.3327925370107484e-05, "loss": 0.1875, "step": 41645 }, { "epoch": 2.2805672671521657, "grad_norm": 0.09830183535814285, "learning_rate": 1.3322855404583249e-05, "loss": 0.1866, "step": 41650 }, { "epoch": 2.280841044735257, "grad_norm": 0.10851959884166718, "learning_rate": 1.3317785439059017e-05, "loss": 0.1817, "step": 41655 }, { "epoch": 2.2811148223183486, "grad_norm": 0.10964789241552353, "learning_rate": 1.3312715473534782e-05, "loss": 0.1876, "step": 41660 }, { "epoch": 2.2813885999014403, "grad_norm": 0.1085992380976677, "learning_rate": 1.3307645508010547e-05, "loss": 0.1882, "step": 41665 }, { "epoch": 2.2816623774845315, "grad_norm": 0.11176404356956482, "learning_rate": 1.3302575542486312e-05, "loss": 0.1943, "step": 41670 }, { "epoch": 2.281936155067623, "grad_norm": 0.10791496187448502, "learning_rate": 1.3297505576962077e-05, "loss": 0.1856, "step": 41675 }, { "epoch": 2.2822099326507144, "grad_norm": 0.0884869247674942, "learning_rate": 1.3292435611437842e-05, "loss": 0.1751, "step": 41680 }, { "epoch": 2.282483710233806, "grad_norm": 0.10172625631093979, "learning_rate": 1.3287365645913607e-05, "loss": 0.1829, "step": 41685 }, { "epoch": 2.2827574878168977, "grad_norm": 0.10998955368995667, "learning_rate": 1.3282295680389372e-05, "loss": 0.1811, "step": 41690 }, { "epoch": 2.283031265399989, "grad_norm": 0.15126655995845795, "learning_rate": 1.327722571486514e-05, "loss": 0.1843, "step": 41695 }, { "epoch": 2.2833050429830806, "grad_norm": 0.09676673263311386, "learning_rate": 1.3272155749340905e-05, "loss": 0.1852, "step": 41700 }, { "epoch": 2.2835788205661722, "grad_norm": 0.09932487457990646, "learning_rate": 1.326708578381667e-05, "loss": 0.1757, "step": 41705 }, { "epoch": 2.2838525981492634, "grad_norm": 0.09175457805395126, "learning_rate": 1.3262015818292437e-05, "loss": 0.184, "step": 41710 }, { "epoch": 2.284126375732355, "grad_norm": 0.10188382118940353, "learning_rate": 1.3256945852768202e-05, "loss": 0.1888, "step": 41715 }, { "epoch": 2.2844001533154463, "grad_norm": 0.10390207171440125, "learning_rate": 1.3251875887243967e-05, "loss": 0.1828, "step": 41720 }, { "epoch": 2.284673930898538, "grad_norm": 0.09597169607877731, "learning_rate": 1.3246805921719732e-05, "loss": 0.1831, "step": 41725 }, { "epoch": 2.2849477084816296, "grad_norm": 0.10212693363428116, "learning_rate": 1.3241735956195497e-05, "loss": 0.1821, "step": 41730 }, { "epoch": 2.285221486064721, "grad_norm": 0.0933665856719017, "learning_rate": 1.3236665990671265e-05, "loss": 0.1802, "step": 41735 }, { "epoch": 2.2854952636478125, "grad_norm": 0.09649015963077545, "learning_rate": 1.323159602514703e-05, "loss": 0.1862, "step": 41740 }, { "epoch": 2.285769041230904, "grad_norm": 0.11980509012937546, "learning_rate": 1.3226526059622795e-05, "loss": 0.1939, "step": 41745 }, { "epoch": 2.2860428188139954, "grad_norm": 0.10492952913045883, "learning_rate": 1.322145609409856e-05, "loss": 0.1848, "step": 41750 }, { "epoch": 2.286316596397087, "grad_norm": 0.0965087041258812, "learning_rate": 1.3216386128574325e-05, "loss": 0.1734, "step": 41755 }, { "epoch": 2.2865903739801787, "grad_norm": 0.1126910075545311, "learning_rate": 1.3211316163050092e-05, "loss": 0.1826, "step": 41760 }, { "epoch": 2.28686415156327, "grad_norm": 0.09400046616792679, "learning_rate": 1.3206246197525857e-05, "loss": 0.1812, "step": 41765 }, { "epoch": 2.2871379291463616, "grad_norm": 0.09910719841718674, "learning_rate": 1.3201176232001622e-05, "loss": 0.1893, "step": 41770 }, { "epoch": 2.287411706729453, "grad_norm": 0.09706379473209381, "learning_rate": 1.319610626647739e-05, "loss": 0.1852, "step": 41775 }, { "epoch": 2.2876854843125445, "grad_norm": 0.09236827492713928, "learning_rate": 1.3191036300953156e-05, "loss": 0.1906, "step": 41780 }, { "epoch": 2.287959261895636, "grad_norm": 0.09966307133436203, "learning_rate": 1.318596633542892e-05, "loss": 0.1873, "step": 41785 }, { "epoch": 2.2882330394787274, "grad_norm": 0.08822400122880936, "learning_rate": 1.3180896369904686e-05, "loss": 0.185, "step": 41790 }, { "epoch": 2.288506817061819, "grad_norm": 0.1097472533583641, "learning_rate": 1.317582640438045e-05, "loss": 0.1854, "step": 41795 }, { "epoch": 2.2887805946449102, "grad_norm": 0.09811652451753616, "learning_rate": 1.3170756438856216e-05, "loss": 0.1834, "step": 41800 }, { "epoch": 2.289054372228002, "grad_norm": 0.09847734123468399, "learning_rate": 1.316568647333198e-05, "loss": 0.1798, "step": 41805 }, { "epoch": 2.2893281498110936, "grad_norm": 0.09796130657196045, "learning_rate": 1.3160616507807747e-05, "loss": 0.1886, "step": 41810 }, { "epoch": 2.2896019273941848, "grad_norm": 0.09520271420478821, "learning_rate": 1.3155546542283514e-05, "loss": 0.1804, "step": 41815 }, { "epoch": 2.2898757049772764, "grad_norm": 0.0975230485200882, "learning_rate": 1.3150476576759279e-05, "loss": 0.188, "step": 41820 }, { "epoch": 2.290149482560368, "grad_norm": 0.10113546252250671, "learning_rate": 1.3145406611235046e-05, "loss": 0.1857, "step": 41825 }, { "epoch": 2.2904232601434593, "grad_norm": 0.09204521030187607, "learning_rate": 1.314033664571081e-05, "loss": 0.1897, "step": 41830 }, { "epoch": 2.290697037726551, "grad_norm": 0.12157327681779861, "learning_rate": 1.3135266680186576e-05, "loss": 0.1887, "step": 41835 }, { "epoch": 2.2909708153096426, "grad_norm": 0.09136530011892319, "learning_rate": 1.313019671466234e-05, "loss": 0.1786, "step": 41840 }, { "epoch": 2.291244592892734, "grad_norm": 0.10053446888923645, "learning_rate": 1.3125126749138106e-05, "loss": 0.1787, "step": 41845 }, { "epoch": 2.2915183704758255, "grad_norm": 0.10549941658973694, "learning_rate": 1.312005678361387e-05, "loss": 0.1819, "step": 41850 }, { "epoch": 2.291792148058917, "grad_norm": 0.10350437462329865, "learning_rate": 1.3114986818089636e-05, "loss": 0.1948, "step": 41855 }, { "epoch": 2.2920659256420084, "grad_norm": 0.12756343185901642, "learning_rate": 1.3109916852565404e-05, "loss": 0.1927, "step": 41860 }, { "epoch": 2.2923397032251, "grad_norm": 0.117339588701725, "learning_rate": 1.3104846887041169e-05, "loss": 0.1765, "step": 41865 }, { "epoch": 2.2926134808081913, "grad_norm": 0.10649541020393372, "learning_rate": 1.3099776921516934e-05, "loss": 0.183, "step": 41870 }, { "epoch": 2.292887258391283, "grad_norm": 0.1033749058842659, "learning_rate": 1.30947069559927e-05, "loss": 0.1841, "step": 41875 }, { "epoch": 2.2931610359743746, "grad_norm": 0.10713876038789749, "learning_rate": 1.3089636990468466e-05, "loss": 0.1821, "step": 41880 }, { "epoch": 2.293434813557466, "grad_norm": 0.09294908493757248, "learning_rate": 1.308456702494423e-05, "loss": 0.1909, "step": 41885 }, { "epoch": 2.2937085911405575, "grad_norm": 0.08930008858442307, "learning_rate": 1.3079497059419996e-05, "loss": 0.1893, "step": 41890 }, { "epoch": 2.2939823687236487, "grad_norm": 0.11334573477506638, "learning_rate": 1.307442709389576e-05, "loss": 0.1871, "step": 41895 }, { "epoch": 2.2942561463067404, "grad_norm": 0.10268614441156387, "learning_rate": 1.3069357128371529e-05, "loss": 0.1826, "step": 41900 }, { "epoch": 2.294529923889832, "grad_norm": 0.10661248117685318, "learning_rate": 1.3064287162847294e-05, "loss": 0.1798, "step": 41905 }, { "epoch": 2.2948037014729232, "grad_norm": 0.11570168286561966, "learning_rate": 1.3059217197323059e-05, "loss": 0.1971, "step": 41910 }, { "epoch": 2.295077479056015, "grad_norm": 0.0936882346868515, "learning_rate": 1.3054147231798824e-05, "loss": 0.1936, "step": 41915 }, { "epoch": 2.2953512566391066, "grad_norm": 0.09431619197130203, "learning_rate": 1.3049077266274589e-05, "loss": 0.1827, "step": 41920 }, { "epoch": 2.2956250342221978, "grad_norm": 0.10910343378782272, "learning_rate": 1.3044007300750356e-05, "loss": 0.182, "step": 41925 }, { "epoch": 2.2958988118052894, "grad_norm": 0.11203954368829727, "learning_rate": 1.303893733522612e-05, "loss": 0.1868, "step": 41930 }, { "epoch": 2.296172589388381, "grad_norm": 0.10382761061191559, "learning_rate": 1.3033867369701886e-05, "loss": 0.1812, "step": 41935 }, { "epoch": 2.2964463669714723, "grad_norm": 0.09604097902774811, "learning_rate": 1.3028797404177654e-05, "loss": 0.1723, "step": 41940 }, { "epoch": 2.296720144554564, "grad_norm": 0.1111835464835167, "learning_rate": 1.3023727438653419e-05, "loss": 0.1842, "step": 41945 }, { "epoch": 2.296993922137655, "grad_norm": 0.10183115303516388, "learning_rate": 1.3018657473129184e-05, "loss": 0.1779, "step": 41950 }, { "epoch": 2.297267699720747, "grad_norm": 0.09450449794530869, "learning_rate": 1.3013587507604949e-05, "loss": 0.1784, "step": 41955 }, { "epoch": 2.2975414773038385, "grad_norm": 0.09867876023054123, "learning_rate": 1.3008517542080714e-05, "loss": 0.1827, "step": 41960 }, { "epoch": 2.2978152548869297, "grad_norm": 0.11268699169158936, "learning_rate": 1.3003447576556479e-05, "loss": 0.1877, "step": 41965 }, { "epoch": 2.2980890324700214, "grad_norm": 0.1130729466676712, "learning_rate": 1.2998377611032244e-05, "loss": 0.1801, "step": 41970 }, { "epoch": 2.2983628100531126, "grad_norm": 0.10051940381526947, "learning_rate": 1.2993307645508009e-05, "loss": 0.1784, "step": 41975 }, { "epoch": 2.2986365876362043, "grad_norm": 0.11171168833971024, "learning_rate": 1.2988237679983778e-05, "loss": 0.1851, "step": 41980 }, { "epoch": 2.298910365219296, "grad_norm": 0.10188854485750198, "learning_rate": 1.2983167714459543e-05, "loss": 0.1781, "step": 41985 }, { "epoch": 2.299184142802387, "grad_norm": 0.09636097401380539, "learning_rate": 1.297809774893531e-05, "loss": 0.1854, "step": 41990 }, { "epoch": 2.299457920385479, "grad_norm": 0.09871707111597061, "learning_rate": 1.2973027783411074e-05, "loss": 0.1891, "step": 41995 }, { "epoch": 2.2997316979685705, "grad_norm": 0.11053470522165298, "learning_rate": 1.296795781788684e-05, "loss": 0.1982, "step": 42000 }, { "epoch": 2.3000054755516617, "grad_norm": 0.10069114714860916, "learning_rate": 1.2962887852362604e-05, "loss": 0.1802, "step": 42005 }, { "epoch": 2.3002792531347533, "grad_norm": 0.11412873864173889, "learning_rate": 1.295781788683837e-05, "loss": 0.1845, "step": 42010 }, { "epoch": 2.300553030717845, "grad_norm": 0.10542377829551697, "learning_rate": 1.2952747921314134e-05, "loss": 0.1746, "step": 42015 }, { "epoch": 2.300826808300936, "grad_norm": 0.09467948973178864, "learning_rate": 1.2947677955789903e-05, "loss": 0.19, "step": 42020 }, { "epoch": 2.301100585884028, "grad_norm": 0.10681160539388657, "learning_rate": 1.2942607990265668e-05, "loss": 0.1895, "step": 42025 }, { "epoch": 2.3013743634671195, "grad_norm": 0.09555335342884064, "learning_rate": 1.2937538024741433e-05, "loss": 0.1871, "step": 42030 }, { "epoch": 2.3016481410502108, "grad_norm": 0.1393038034439087, "learning_rate": 1.2932468059217198e-05, "loss": 0.1801, "step": 42035 }, { "epoch": 2.3019219186333024, "grad_norm": 0.09932119399309158, "learning_rate": 1.2927398093692963e-05, "loss": 0.1759, "step": 42040 }, { "epoch": 2.3021956962163936, "grad_norm": 0.11119747906923294, "learning_rate": 1.292232812816873e-05, "loss": 0.1867, "step": 42045 }, { "epoch": 2.3024694737994853, "grad_norm": 0.09837325662374496, "learning_rate": 1.2917258162644494e-05, "loss": 0.184, "step": 42050 }, { "epoch": 2.302743251382577, "grad_norm": 0.13609673082828522, "learning_rate": 1.291218819712026e-05, "loss": 0.181, "step": 42055 }, { "epoch": 2.303017028965668, "grad_norm": 0.10189847648143768, "learning_rate": 1.2907118231596028e-05, "loss": 0.1947, "step": 42060 }, { "epoch": 2.30329080654876, "grad_norm": 0.10156822949647903, "learning_rate": 1.2902048266071793e-05, "loss": 0.1926, "step": 42065 }, { "epoch": 2.303564584131851, "grad_norm": 0.10925108194351196, "learning_rate": 1.2896978300547558e-05, "loss": 0.1852, "step": 42070 }, { "epoch": 2.3038383617149427, "grad_norm": 0.10776301473379135, "learning_rate": 1.2891908335023323e-05, "loss": 0.1884, "step": 42075 }, { "epoch": 2.3041121392980344, "grad_norm": 0.09825704991817474, "learning_rate": 1.2886838369499088e-05, "loss": 0.1851, "step": 42080 }, { "epoch": 2.3043859168811256, "grad_norm": 0.10819975286722183, "learning_rate": 1.2881768403974853e-05, "loss": 0.1854, "step": 42085 }, { "epoch": 2.3046596944642173, "grad_norm": 0.10295402258634567, "learning_rate": 1.2876698438450618e-05, "loss": 0.1847, "step": 42090 }, { "epoch": 2.304933472047309, "grad_norm": 0.11023037880659103, "learning_rate": 1.2871628472926384e-05, "loss": 0.1916, "step": 42095 }, { "epoch": 2.3052072496304, "grad_norm": 0.12402240186929703, "learning_rate": 1.2866558507402151e-05, "loss": 0.1821, "step": 42100 }, { "epoch": 2.305481027213492, "grad_norm": 0.0909523293375969, "learning_rate": 1.2861488541877916e-05, "loss": 0.1867, "step": 42105 }, { "epoch": 2.3057548047965835, "grad_norm": 0.09475123882293701, "learning_rate": 1.2856418576353683e-05, "loss": 0.1895, "step": 42110 }, { "epoch": 2.3060285823796747, "grad_norm": 0.11299292743206024, "learning_rate": 1.2851348610829448e-05, "loss": 0.178, "step": 42115 }, { "epoch": 2.3063023599627663, "grad_norm": 0.1015717089176178, "learning_rate": 1.2846278645305213e-05, "loss": 0.1937, "step": 42120 }, { "epoch": 2.3065761375458576, "grad_norm": 0.09631261229515076, "learning_rate": 1.2841208679780978e-05, "loss": 0.1809, "step": 42125 }, { "epoch": 2.306849915128949, "grad_norm": 0.10458290576934814, "learning_rate": 1.2836138714256743e-05, "loss": 0.1846, "step": 42130 }, { "epoch": 2.307123692712041, "grad_norm": 0.11140696704387665, "learning_rate": 1.2831068748732508e-05, "loss": 0.192, "step": 42135 }, { "epoch": 2.307397470295132, "grad_norm": 0.10651320964097977, "learning_rate": 1.2825998783208273e-05, "loss": 0.1764, "step": 42140 }, { "epoch": 2.3076712478782238, "grad_norm": 0.11305992305278778, "learning_rate": 1.2820928817684041e-05, "loss": 0.1821, "step": 42145 }, { "epoch": 2.3079450254613154, "grad_norm": 0.10921566188335419, "learning_rate": 1.2815858852159806e-05, "loss": 0.1826, "step": 42150 }, { "epoch": 2.3082188030444066, "grad_norm": 0.08963719755411148, "learning_rate": 1.2810788886635571e-05, "loss": 0.1785, "step": 42155 }, { "epoch": 2.3084925806274983, "grad_norm": 0.10765636712312698, "learning_rate": 1.2805718921111338e-05, "loss": 0.1778, "step": 42160 }, { "epoch": 2.3087663582105895, "grad_norm": 0.10060500353574753, "learning_rate": 1.2800648955587103e-05, "loss": 0.1826, "step": 42165 }, { "epoch": 2.309040135793681, "grad_norm": 0.12844416499137878, "learning_rate": 1.2795578990062868e-05, "loss": 0.1841, "step": 42170 }, { "epoch": 2.309313913376773, "grad_norm": 0.09553253650665283, "learning_rate": 1.2790509024538633e-05, "loss": 0.1858, "step": 42175 }, { "epoch": 2.309587690959864, "grad_norm": 0.10399026423692703, "learning_rate": 1.2785439059014398e-05, "loss": 0.1789, "step": 42180 }, { "epoch": 2.3098614685429557, "grad_norm": 0.11231612414121628, "learning_rate": 1.2780369093490166e-05, "loss": 0.1787, "step": 42185 }, { "epoch": 2.3101352461260474, "grad_norm": 0.10528168082237244, "learning_rate": 1.2775299127965931e-05, "loss": 0.1842, "step": 42190 }, { "epoch": 2.3104090237091386, "grad_norm": 0.11038260161876678, "learning_rate": 1.2770229162441696e-05, "loss": 0.1907, "step": 42195 }, { "epoch": 2.3106828012922302, "grad_norm": 0.10079120099544525, "learning_rate": 1.2765159196917461e-05, "loss": 0.1875, "step": 42200 }, { "epoch": 2.310956578875322, "grad_norm": 0.09574858099222183, "learning_rate": 1.2760089231393226e-05, "loss": 0.1828, "step": 42205 }, { "epoch": 2.311230356458413, "grad_norm": 0.13822217285633087, "learning_rate": 1.2755019265868993e-05, "loss": 0.1852, "step": 42210 }, { "epoch": 2.311504134041505, "grad_norm": 0.0990309864282608, "learning_rate": 1.2749949300344758e-05, "loss": 0.1865, "step": 42215 }, { "epoch": 2.311777911624596, "grad_norm": 0.10466358810663223, "learning_rate": 1.2744879334820523e-05, "loss": 0.1879, "step": 42220 }, { "epoch": 2.3120516892076877, "grad_norm": 0.10320354253053665, "learning_rate": 1.2739809369296291e-05, "loss": 0.1747, "step": 42225 }, { "epoch": 2.3123254667907793, "grad_norm": 0.10584297776222229, "learning_rate": 1.2734739403772056e-05, "loss": 0.1759, "step": 42230 }, { "epoch": 2.3125992443738705, "grad_norm": 0.15501973032951355, "learning_rate": 1.2729669438247821e-05, "loss": 0.1906, "step": 42235 }, { "epoch": 2.312873021956962, "grad_norm": 0.10010617226362228, "learning_rate": 1.2724599472723586e-05, "loss": 0.1871, "step": 42240 }, { "epoch": 2.3131467995400534, "grad_norm": 0.1261000782251358, "learning_rate": 1.2719529507199351e-05, "loss": 0.1936, "step": 42245 }, { "epoch": 2.313420577123145, "grad_norm": 0.10951226204633713, "learning_rate": 1.2714459541675116e-05, "loss": 0.1795, "step": 42250 }, { "epoch": 2.3136943547062367, "grad_norm": 0.09832091629505157, "learning_rate": 1.2709389576150881e-05, "loss": 0.1813, "step": 42255 }, { "epoch": 2.313968132289328, "grad_norm": 0.1011776477098465, "learning_rate": 1.2704319610626648e-05, "loss": 0.1849, "step": 42260 }, { "epoch": 2.3142419098724196, "grad_norm": 0.10170815140008926, "learning_rate": 1.2699249645102415e-05, "loss": 0.1907, "step": 42265 }, { "epoch": 2.3145156874555113, "grad_norm": 0.10500691086053848, "learning_rate": 1.269417967957818e-05, "loss": 0.1906, "step": 42270 }, { "epoch": 2.3147894650386025, "grad_norm": 0.10548223555088043, "learning_rate": 1.2689109714053946e-05, "loss": 0.1925, "step": 42275 }, { "epoch": 2.315063242621694, "grad_norm": 0.09993984550237656, "learning_rate": 1.2684039748529711e-05, "loss": 0.1844, "step": 42280 }, { "epoch": 2.315337020204786, "grad_norm": 0.0910000205039978, "learning_rate": 1.2678969783005476e-05, "loss": 0.1898, "step": 42285 }, { "epoch": 2.315610797787877, "grad_norm": 0.1150604709982872, "learning_rate": 1.2673899817481241e-05, "loss": 0.1847, "step": 42290 }, { "epoch": 2.3158845753709687, "grad_norm": 0.11053592711687088, "learning_rate": 1.2668829851957006e-05, "loss": 0.1853, "step": 42295 }, { "epoch": 2.3161583529540604, "grad_norm": 0.1213083267211914, "learning_rate": 1.2663759886432771e-05, "loss": 0.1811, "step": 42300 }, { "epoch": 2.3164321305371516, "grad_norm": 0.09586504846811295, "learning_rate": 1.265868992090854e-05, "loss": 0.1897, "step": 42305 }, { "epoch": 2.3167059081202432, "grad_norm": 0.11713362485170364, "learning_rate": 1.2653619955384305e-05, "loss": 0.1895, "step": 42310 }, { "epoch": 2.3169796857033345, "grad_norm": 0.11195772141218185, "learning_rate": 1.264854998986007e-05, "loss": 0.1824, "step": 42315 }, { "epoch": 2.317253463286426, "grad_norm": 0.10192997753620148, "learning_rate": 1.2643480024335835e-05, "loss": 0.1745, "step": 42320 }, { "epoch": 2.317527240869518, "grad_norm": 0.11311356723308563, "learning_rate": 1.2638410058811601e-05, "loss": 0.1854, "step": 42325 }, { "epoch": 2.317801018452609, "grad_norm": 0.10259610414505005, "learning_rate": 1.2633340093287366e-05, "loss": 0.1899, "step": 42330 }, { "epoch": 2.3180747960357007, "grad_norm": 0.11246968060731888, "learning_rate": 1.2628270127763131e-05, "loss": 0.1925, "step": 42335 }, { "epoch": 2.318348573618792, "grad_norm": 0.10099630802869797, "learning_rate": 1.2623200162238896e-05, "loss": 0.1796, "step": 42340 }, { "epoch": 2.3186223512018835, "grad_norm": 0.09408827126026154, "learning_rate": 1.2618130196714665e-05, "loss": 0.1831, "step": 42345 }, { "epoch": 2.318896128784975, "grad_norm": 0.08627378195524216, "learning_rate": 1.261306023119043e-05, "loss": 0.1854, "step": 42350 }, { "epoch": 2.3191699063680664, "grad_norm": 0.11303496360778809, "learning_rate": 1.2607990265666195e-05, "loss": 0.1814, "step": 42355 }, { "epoch": 2.319443683951158, "grad_norm": 0.09476454555988312, "learning_rate": 1.260292030014196e-05, "loss": 0.1782, "step": 42360 }, { "epoch": 2.3197174615342497, "grad_norm": 0.09094591438770294, "learning_rate": 1.2597850334617725e-05, "loss": 0.1844, "step": 42365 }, { "epoch": 2.319991239117341, "grad_norm": 0.09905850887298584, "learning_rate": 1.259278036909349e-05, "loss": 0.1847, "step": 42370 }, { "epoch": 2.3202650167004326, "grad_norm": 0.12671025097370148, "learning_rate": 1.2587710403569255e-05, "loss": 0.1879, "step": 42375 }, { "epoch": 2.3205387942835243, "grad_norm": 0.11947466433048248, "learning_rate": 1.2582640438045021e-05, "loss": 0.1945, "step": 42380 }, { "epoch": 2.3208125718666155, "grad_norm": 0.121427521109581, "learning_rate": 1.2577570472520788e-05, "loss": 0.191, "step": 42385 }, { "epoch": 2.321086349449707, "grad_norm": 0.1054292768239975, "learning_rate": 1.2572500506996555e-05, "loss": 0.1842, "step": 42390 }, { "epoch": 2.3213601270327984, "grad_norm": 0.10222763568162918, "learning_rate": 1.256743054147232e-05, "loss": 0.1819, "step": 42395 }, { "epoch": 2.32163390461589, "grad_norm": 0.10450490564107895, "learning_rate": 1.2562360575948085e-05, "loss": 0.1849, "step": 42400 }, { "epoch": 2.3219076821989817, "grad_norm": 0.10887246578931808, "learning_rate": 1.255729061042385e-05, "loss": 0.1807, "step": 42405 }, { "epoch": 2.322181459782073, "grad_norm": 0.11596322059631348, "learning_rate": 1.2552220644899615e-05, "loss": 0.1863, "step": 42410 }, { "epoch": 2.3224552373651646, "grad_norm": 0.10011108219623566, "learning_rate": 1.254715067937538e-05, "loss": 0.1798, "step": 42415 }, { "epoch": 2.322729014948256, "grad_norm": 0.12159138172864914, "learning_rate": 1.2542080713851145e-05, "loss": 0.188, "step": 42420 }, { "epoch": 2.3230027925313474, "grad_norm": 0.09581341594457626, "learning_rate": 1.2537010748326913e-05, "loss": 0.1887, "step": 42425 }, { "epoch": 2.323276570114439, "grad_norm": 0.09311302751302719, "learning_rate": 1.2531940782802678e-05, "loss": 0.1813, "step": 42430 }, { "epoch": 2.3235503476975303, "grad_norm": 0.10646742582321167, "learning_rate": 1.2526870817278443e-05, "loss": 0.1827, "step": 42435 }, { "epoch": 2.323824125280622, "grad_norm": 0.10202240943908691, "learning_rate": 1.2521800851754208e-05, "loss": 0.1898, "step": 42440 }, { "epoch": 2.3240979028637136, "grad_norm": 0.1145567074418068, "learning_rate": 1.2516730886229975e-05, "loss": 0.1764, "step": 42445 }, { "epoch": 2.324371680446805, "grad_norm": 0.09455665946006775, "learning_rate": 1.251166092070574e-05, "loss": 0.1867, "step": 42450 }, { "epoch": 2.3246454580298965, "grad_norm": 0.10432352870702744, "learning_rate": 1.2506590955181505e-05, "loss": 0.1939, "step": 42455 }, { "epoch": 2.324919235612988, "grad_norm": 0.10912811011075974, "learning_rate": 1.250152098965727e-05, "loss": 0.1864, "step": 42460 }, { "epoch": 2.3251930131960794, "grad_norm": 0.0953117236495018, "learning_rate": 1.2496451024133037e-05, "loss": 0.1872, "step": 42465 }, { "epoch": 2.325466790779171, "grad_norm": 0.09759247303009033, "learning_rate": 1.2491381058608802e-05, "loss": 0.1822, "step": 42470 }, { "epoch": 2.3257405683622627, "grad_norm": 0.11329850554466248, "learning_rate": 1.2486311093084567e-05, "loss": 0.1808, "step": 42475 }, { "epoch": 2.326014345945354, "grad_norm": 0.10914293676614761, "learning_rate": 1.2481241127560333e-05, "loss": 0.1844, "step": 42480 }, { "epoch": 2.3262881235284456, "grad_norm": 0.1099463477730751, "learning_rate": 1.2476171162036098e-05, "loss": 0.1856, "step": 42485 }, { "epoch": 2.326561901111537, "grad_norm": 0.09305243194103241, "learning_rate": 1.2471101196511863e-05, "loss": 0.1936, "step": 42490 }, { "epoch": 2.3268356786946285, "grad_norm": 0.12093575298786163, "learning_rate": 1.246603123098763e-05, "loss": 0.1965, "step": 42495 }, { "epoch": 2.32710945627772, "grad_norm": 0.09311491996049881, "learning_rate": 1.2460961265463397e-05, "loss": 0.1778, "step": 42500 }, { "epoch": 2.3273832338608114, "grad_norm": 0.10419961810112, "learning_rate": 1.2455891299939162e-05, "loss": 0.1817, "step": 42505 }, { "epoch": 2.327657011443903, "grad_norm": 0.09880833327770233, "learning_rate": 1.2450821334414927e-05, "loss": 0.181, "step": 42510 }, { "epoch": 2.3279307890269942, "grad_norm": 0.09761270135641098, "learning_rate": 1.2445751368890692e-05, "loss": 0.1788, "step": 42515 }, { "epoch": 2.328204566610086, "grad_norm": 0.09585311263799667, "learning_rate": 1.2440681403366458e-05, "loss": 0.182, "step": 42520 }, { "epoch": 2.3284783441931776, "grad_norm": 0.09626315534114838, "learning_rate": 1.2435611437842223e-05, "loss": 0.1854, "step": 42525 }, { "epoch": 2.328752121776269, "grad_norm": 0.08701968193054199, "learning_rate": 1.2430541472317988e-05, "loss": 0.1743, "step": 42530 }, { "epoch": 2.3290258993593604, "grad_norm": 0.09815175831317902, "learning_rate": 1.2425471506793753e-05, "loss": 0.182, "step": 42535 }, { "epoch": 2.329299676942452, "grad_norm": 0.0996832549571991, "learning_rate": 1.242040154126952e-05, "loss": 0.1888, "step": 42540 }, { "epoch": 2.3295734545255433, "grad_norm": 0.0912572368979454, "learning_rate": 1.2415331575745285e-05, "loss": 0.1761, "step": 42545 }, { "epoch": 2.329847232108635, "grad_norm": 0.10278236865997314, "learning_rate": 1.241026161022105e-05, "loss": 0.1846, "step": 42550 }, { "epoch": 2.3301210096917266, "grad_norm": 0.10444007813930511, "learning_rate": 1.2405191644696817e-05, "loss": 0.1871, "step": 42555 }, { "epoch": 2.330394787274818, "grad_norm": 0.12306033074855804, "learning_rate": 1.2400121679172583e-05, "loss": 0.189, "step": 42560 }, { "epoch": 2.3306685648579095, "grad_norm": 0.09350033849477768, "learning_rate": 1.2395051713648348e-05, "loss": 0.1847, "step": 42565 }, { "epoch": 2.330942342441001, "grad_norm": 0.10406801104545593, "learning_rate": 1.2389981748124113e-05, "loss": 0.1832, "step": 42570 }, { "epoch": 2.3312161200240924, "grad_norm": 0.11148430407047272, "learning_rate": 1.2384911782599878e-05, "loss": 0.1889, "step": 42575 }, { "epoch": 2.331489897607184, "grad_norm": 0.11580171436071396, "learning_rate": 1.2379841817075645e-05, "loss": 0.1845, "step": 42580 }, { "epoch": 2.3317636751902753, "grad_norm": 0.10646098107099533, "learning_rate": 1.237477185155141e-05, "loss": 0.1844, "step": 42585 }, { "epoch": 2.332037452773367, "grad_norm": 0.10178215801715851, "learning_rate": 1.2369701886027175e-05, "loss": 0.1813, "step": 42590 }, { "epoch": 2.3323112303564586, "grad_norm": 0.11169946938753128, "learning_rate": 1.236463192050294e-05, "loss": 0.1907, "step": 42595 }, { "epoch": 2.33258500793955, "grad_norm": 0.12070885300636292, "learning_rate": 1.2359561954978707e-05, "loss": 0.189, "step": 42600 }, { "epoch": 2.3328587855226415, "grad_norm": 0.11159852892160416, "learning_rate": 1.2354491989454472e-05, "loss": 0.1845, "step": 42605 }, { "epoch": 2.3331325631057327, "grad_norm": 0.10085802525281906, "learning_rate": 1.2349422023930239e-05, "loss": 0.1829, "step": 42610 }, { "epoch": 2.3334063406888244, "grad_norm": 0.10466018319129944, "learning_rate": 1.2344352058406004e-05, "loss": 0.1831, "step": 42615 }, { "epoch": 2.333680118271916, "grad_norm": 0.09807305037975311, "learning_rate": 1.233928209288177e-05, "loss": 0.1805, "step": 42620 }, { "epoch": 2.3339538958550072, "grad_norm": 0.10921042412519455, "learning_rate": 1.2334212127357535e-05, "loss": 0.185, "step": 42625 }, { "epoch": 2.334227673438099, "grad_norm": 0.10786208510398865, "learning_rate": 1.23291421618333e-05, "loss": 0.1846, "step": 42630 }, { "epoch": 2.3345014510211906, "grad_norm": 0.09775426238775253, "learning_rate": 1.2324072196309065e-05, "loss": 0.1889, "step": 42635 }, { "epoch": 2.3347752286042818, "grad_norm": 0.11357773095369339, "learning_rate": 1.2319002230784832e-05, "loss": 0.1803, "step": 42640 }, { "epoch": 2.3350490061873734, "grad_norm": 0.10839413106441498, "learning_rate": 1.2313932265260597e-05, "loss": 0.1786, "step": 42645 }, { "epoch": 2.335322783770465, "grad_norm": 0.10434532165527344, "learning_rate": 1.2308862299736362e-05, "loss": 0.1779, "step": 42650 }, { "epoch": 2.3355965613535563, "grad_norm": 0.09662126749753952, "learning_rate": 1.2303792334212127e-05, "loss": 0.1818, "step": 42655 }, { "epoch": 2.335870338936648, "grad_norm": 0.10417747497558594, "learning_rate": 1.2298722368687894e-05, "loss": 0.1914, "step": 42660 }, { "epoch": 2.336144116519739, "grad_norm": 0.1015123575925827, "learning_rate": 1.2293652403163659e-05, "loss": 0.1857, "step": 42665 }, { "epoch": 2.336417894102831, "grad_norm": 0.09650708734989166, "learning_rate": 1.2288582437639425e-05, "loss": 0.1812, "step": 42670 }, { "epoch": 2.3366916716859225, "grad_norm": 0.11295370012521744, "learning_rate": 1.228351247211519e-05, "loss": 0.1812, "step": 42675 }, { "epoch": 2.3369654492690137, "grad_norm": 0.09937256574630737, "learning_rate": 1.2278442506590957e-05, "loss": 0.1937, "step": 42680 }, { "epoch": 2.3372392268521054, "grad_norm": 0.09844447672367096, "learning_rate": 1.2273372541066722e-05, "loss": 0.1809, "step": 42685 }, { "epoch": 2.3375130044351966, "grad_norm": 0.10856789350509644, "learning_rate": 1.2268302575542487e-05, "loss": 0.1842, "step": 42690 }, { "epoch": 2.3377867820182883, "grad_norm": 0.10876243561506271, "learning_rate": 1.2263232610018252e-05, "loss": 0.1911, "step": 42695 }, { "epoch": 2.33806055960138, "grad_norm": 0.10297167301177979, "learning_rate": 1.2258162644494019e-05, "loss": 0.1876, "step": 42700 }, { "epoch": 2.338334337184471, "grad_norm": 0.09670429676771164, "learning_rate": 1.2253092678969784e-05, "loss": 0.1958, "step": 42705 }, { "epoch": 2.338608114767563, "grad_norm": 0.09294413030147552, "learning_rate": 1.2248022713445549e-05, "loss": 0.1813, "step": 42710 }, { "epoch": 2.3388818923506545, "grad_norm": 0.08885323256254196, "learning_rate": 1.2242952747921314e-05, "loss": 0.1857, "step": 42715 }, { "epoch": 2.3391556699337457, "grad_norm": 0.10349293798208237, "learning_rate": 1.223788278239708e-05, "loss": 0.1829, "step": 42720 }, { "epoch": 2.3394294475168373, "grad_norm": 0.10318946838378906, "learning_rate": 1.2232812816872847e-05, "loss": 0.1843, "step": 42725 }, { "epoch": 2.339703225099929, "grad_norm": 0.09883656352758408, "learning_rate": 1.2227742851348612e-05, "loss": 0.1825, "step": 42730 }, { "epoch": 2.3399770026830202, "grad_norm": 0.09421002119779587, "learning_rate": 1.2222672885824377e-05, "loss": 0.1795, "step": 42735 }, { "epoch": 2.340250780266112, "grad_norm": 0.11094127595424652, "learning_rate": 1.2217602920300142e-05, "loss": 0.1907, "step": 42740 }, { "epoch": 2.3405245578492035, "grad_norm": 0.1236969456076622, "learning_rate": 1.2212532954775909e-05, "loss": 0.2012, "step": 42745 }, { "epoch": 2.3407983354322948, "grad_norm": 0.11102225631475449, "learning_rate": 1.2207462989251674e-05, "loss": 0.184, "step": 42750 }, { "epoch": 2.3410721130153864, "grad_norm": 0.11006765812635422, "learning_rate": 1.2202393023727439e-05, "loss": 0.1936, "step": 42755 }, { "epoch": 2.3413458905984776, "grad_norm": 0.10555166006088257, "learning_rate": 1.2197323058203204e-05, "loss": 0.185, "step": 42760 }, { "epoch": 2.3416196681815693, "grad_norm": 0.10297536849975586, "learning_rate": 1.219225309267897e-05, "loss": 0.1909, "step": 42765 }, { "epoch": 2.341893445764661, "grad_norm": 0.10530006885528564, "learning_rate": 1.2187183127154735e-05, "loss": 0.1832, "step": 42770 }, { "epoch": 2.342167223347752, "grad_norm": 0.09946921467781067, "learning_rate": 1.21821131616305e-05, "loss": 0.1839, "step": 42775 }, { "epoch": 2.342441000930844, "grad_norm": 0.11041833460330963, "learning_rate": 1.2177043196106267e-05, "loss": 0.1796, "step": 42780 }, { "epoch": 2.342714778513935, "grad_norm": 0.11400364339351654, "learning_rate": 1.2171973230582034e-05, "loss": 0.1847, "step": 42785 }, { "epoch": 2.3429885560970267, "grad_norm": 0.11780079454183578, "learning_rate": 1.2166903265057799e-05, "loss": 0.174, "step": 42790 }, { "epoch": 2.3432623336801184, "grad_norm": 0.09473682940006256, "learning_rate": 1.2161833299533564e-05, "loss": 0.1744, "step": 42795 }, { "epoch": 2.3435361112632096, "grad_norm": 0.10140616446733475, "learning_rate": 1.2156763334009329e-05, "loss": 0.1807, "step": 42800 }, { "epoch": 2.3438098888463013, "grad_norm": 0.08890209347009659, "learning_rate": 1.2151693368485095e-05, "loss": 0.1766, "step": 42805 }, { "epoch": 2.344083666429393, "grad_norm": 0.11322863399982452, "learning_rate": 1.214662340296086e-05, "loss": 0.1877, "step": 42810 }, { "epoch": 2.344357444012484, "grad_norm": 0.10113923996686935, "learning_rate": 1.2141553437436625e-05, "loss": 0.1929, "step": 42815 }, { "epoch": 2.344631221595576, "grad_norm": 0.09251666814088821, "learning_rate": 1.213648347191239e-05, "loss": 0.1812, "step": 42820 }, { "epoch": 2.3449049991786675, "grad_norm": 0.10783341526985168, "learning_rate": 1.2131413506388157e-05, "loss": 0.1841, "step": 42825 }, { "epoch": 2.3451787767617587, "grad_norm": 0.10661858320236206, "learning_rate": 1.2126343540863922e-05, "loss": 0.1793, "step": 42830 }, { "epoch": 2.3454525543448503, "grad_norm": 0.09872846305370331, "learning_rate": 1.2121273575339689e-05, "loss": 0.1906, "step": 42835 }, { "epoch": 2.3457263319279416, "grad_norm": 0.09194169193506241, "learning_rate": 1.2116203609815454e-05, "loss": 0.1735, "step": 42840 }, { "epoch": 2.346000109511033, "grad_norm": 0.11665606498718262, "learning_rate": 1.211113364429122e-05, "loss": 0.1794, "step": 42845 }, { "epoch": 2.346273887094125, "grad_norm": 0.10244005173444748, "learning_rate": 1.2106063678766986e-05, "loss": 0.1797, "step": 42850 }, { "epoch": 2.346547664677216, "grad_norm": 0.11313418298959732, "learning_rate": 1.210099371324275e-05, "loss": 0.1849, "step": 42855 }, { "epoch": 2.3468214422603078, "grad_norm": 0.1462046504020691, "learning_rate": 1.2095923747718516e-05, "loss": 0.1852, "step": 42860 }, { "epoch": 2.3470952198433994, "grad_norm": 0.10848557204008102, "learning_rate": 1.2090853782194282e-05, "loss": 0.183, "step": 42865 }, { "epoch": 2.3473689974264906, "grad_norm": 0.09049802273511887, "learning_rate": 1.2085783816670047e-05, "loss": 0.1866, "step": 42870 }, { "epoch": 2.3476427750095823, "grad_norm": 0.12192422151565552, "learning_rate": 1.2080713851145812e-05, "loss": 0.1846, "step": 42875 }, { "epoch": 2.3479165525926735, "grad_norm": 0.09760908782482147, "learning_rate": 1.2075643885621577e-05, "loss": 0.181, "step": 42880 }, { "epoch": 2.348190330175765, "grad_norm": 0.09751598536968231, "learning_rate": 1.2070573920097344e-05, "loss": 0.1812, "step": 42885 }, { "epoch": 2.348464107758857, "grad_norm": 0.09807012230157852, "learning_rate": 1.2065503954573109e-05, "loss": 0.1857, "step": 42890 }, { "epoch": 2.348737885341948, "grad_norm": 0.09081977605819702, "learning_rate": 1.2060433989048876e-05, "loss": 0.1884, "step": 42895 }, { "epoch": 2.3490116629250397, "grad_norm": 0.09438766539096832, "learning_rate": 1.205536402352464e-05, "loss": 0.1795, "step": 42900 }, { "epoch": 2.3492854405081314, "grad_norm": 0.10497968643903732, "learning_rate": 1.2050294058000407e-05, "loss": 0.1806, "step": 42905 }, { "epoch": 2.3495592180912226, "grad_norm": 0.12065248936414719, "learning_rate": 1.2045224092476172e-05, "loss": 0.1845, "step": 42910 }, { "epoch": 2.3498329956743143, "grad_norm": 0.12056642025709152, "learning_rate": 1.2040154126951937e-05, "loss": 0.1891, "step": 42915 }, { "epoch": 2.350106773257406, "grad_norm": 0.10699226707220078, "learning_rate": 1.2035084161427702e-05, "loss": 0.188, "step": 42920 }, { "epoch": 2.350380550840497, "grad_norm": 0.10558564215898514, "learning_rate": 1.2030014195903469e-05, "loss": 0.1883, "step": 42925 }, { "epoch": 2.350654328423589, "grad_norm": 0.09887826442718506, "learning_rate": 1.2024944230379234e-05, "loss": 0.1789, "step": 42930 }, { "epoch": 2.35092810600668, "grad_norm": 0.1059398204088211, "learning_rate": 1.2019874264854999e-05, "loss": 0.1899, "step": 42935 }, { "epoch": 2.3512018835897717, "grad_norm": 0.11079888045787811, "learning_rate": 1.2014804299330764e-05, "loss": 0.1842, "step": 42940 }, { "epoch": 2.3514756611728633, "grad_norm": 0.10711003839969635, "learning_rate": 1.200973433380653e-05, "loss": 0.1894, "step": 42945 }, { "epoch": 2.3517494387559545, "grad_norm": 0.12083917111158371, "learning_rate": 1.2004664368282296e-05, "loss": 0.1813, "step": 42950 }, { "epoch": 2.352023216339046, "grad_norm": 0.09752889722585678, "learning_rate": 1.1999594402758062e-05, "loss": 0.1864, "step": 42955 }, { "epoch": 2.3522969939221374, "grad_norm": 0.10346141457557678, "learning_rate": 1.1994524437233827e-05, "loss": 0.1862, "step": 42960 }, { "epoch": 2.352570771505229, "grad_norm": 0.0994110107421875, "learning_rate": 1.1989454471709594e-05, "loss": 0.188, "step": 42965 }, { "epoch": 2.3528445490883207, "grad_norm": 0.10806885361671448, "learning_rate": 1.1984384506185359e-05, "loss": 0.1856, "step": 42970 }, { "epoch": 2.353118326671412, "grad_norm": 0.10603707283735275, "learning_rate": 1.1979314540661124e-05, "loss": 0.1945, "step": 42975 }, { "epoch": 2.3533921042545036, "grad_norm": 0.11285820603370667, "learning_rate": 1.1974244575136889e-05, "loss": 0.1796, "step": 42980 }, { "epoch": 2.3536658818375953, "grad_norm": 0.12353909015655518, "learning_rate": 1.1969174609612656e-05, "loss": 0.1823, "step": 42985 }, { "epoch": 2.3539396594206865, "grad_norm": 0.10736898332834244, "learning_rate": 1.196410464408842e-05, "loss": 0.1848, "step": 42990 }, { "epoch": 2.354213437003778, "grad_norm": 0.09339068830013275, "learning_rate": 1.1959034678564186e-05, "loss": 0.1842, "step": 42995 }, { "epoch": 2.35448721458687, "grad_norm": 0.106609046459198, "learning_rate": 1.195396471303995e-05, "loss": 0.1882, "step": 43000 }, { "epoch": 2.354760992169961, "grad_norm": 0.0923377275466919, "learning_rate": 1.1948894747515717e-05, "loss": 0.1857, "step": 43005 }, { "epoch": 2.3550347697530527, "grad_norm": 0.10346216708421707, "learning_rate": 1.1943824781991484e-05, "loss": 0.1833, "step": 43010 }, { "epoch": 2.3553085473361444, "grad_norm": 0.09708941727876663, "learning_rate": 1.1938754816467249e-05, "loss": 0.1858, "step": 43015 }, { "epoch": 2.3555823249192356, "grad_norm": 0.10756726562976837, "learning_rate": 1.1933684850943014e-05, "loss": 0.1885, "step": 43020 }, { "epoch": 2.3558561025023272, "grad_norm": 0.091959148645401, "learning_rate": 1.192861488541878e-05, "loss": 0.1886, "step": 43025 }, { "epoch": 2.3561298800854185, "grad_norm": 0.10706361383199692, "learning_rate": 1.1923544919894546e-05, "loss": 0.1814, "step": 43030 }, { "epoch": 2.35640365766851, "grad_norm": 0.09792657941579819, "learning_rate": 1.191847495437031e-05, "loss": 0.1872, "step": 43035 }, { "epoch": 2.356677435251602, "grad_norm": 0.10834742337465286, "learning_rate": 1.1913404988846076e-05, "loss": 0.1838, "step": 43040 }, { "epoch": 2.356951212834693, "grad_norm": 0.10156799107789993, "learning_rate": 1.190833502332184e-05, "loss": 0.1825, "step": 43045 }, { "epoch": 2.3572249904177847, "grad_norm": 0.11334888637065887, "learning_rate": 1.1903265057797608e-05, "loss": 0.1808, "step": 43050 }, { "epoch": 2.357498768000876, "grad_norm": 0.09851308166980743, "learning_rate": 1.1898195092273373e-05, "loss": 0.1822, "step": 43055 }, { "epoch": 2.3577725455839675, "grad_norm": 0.09914346039295197, "learning_rate": 1.1893125126749138e-05, "loss": 0.1878, "step": 43060 }, { "epoch": 2.358046323167059, "grad_norm": 0.09787947684526443, "learning_rate": 1.1888055161224904e-05, "loss": 0.1804, "step": 43065 }, { "epoch": 2.3583201007501504, "grad_norm": 0.10250119864940643, "learning_rate": 1.1882985195700671e-05, "loss": 0.1935, "step": 43070 }, { "epoch": 2.358593878333242, "grad_norm": 0.11021445691585541, "learning_rate": 1.1877915230176436e-05, "loss": 0.1852, "step": 43075 }, { "epoch": 2.3588676559163337, "grad_norm": 0.10332202166318893, "learning_rate": 1.1872845264652201e-05, "loss": 0.1936, "step": 43080 }, { "epoch": 2.359141433499425, "grad_norm": 0.09465283900499344, "learning_rate": 1.1867775299127966e-05, "loss": 0.1838, "step": 43085 }, { "epoch": 2.3594152110825166, "grad_norm": 0.11103805899620056, "learning_rate": 1.1862705333603733e-05, "loss": 0.1851, "step": 43090 }, { "epoch": 2.3596889886656083, "grad_norm": 0.10741152614355087, "learning_rate": 1.1857635368079498e-05, "loss": 0.1888, "step": 43095 }, { "epoch": 2.3599627662486995, "grad_norm": 0.11551866680383682, "learning_rate": 1.1852565402555263e-05, "loss": 0.1814, "step": 43100 }, { "epoch": 2.360236543831791, "grad_norm": 0.09658064693212509, "learning_rate": 1.1847495437031028e-05, "loss": 0.1803, "step": 43105 }, { "epoch": 2.3605103214148824, "grad_norm": 0.11222827434539795, "learning_rate": 1.1842425471506794e-05, "loss": 0.1855, "step": 43110 }, { "epoch": 2.360784098997974, "grad_norm": 0.12637296319007874, "learning_rate": 1.183735550598256e-05, "loss": 0.1894, "step": 43115 }, { "epoch": 2.3610578765810657, "grad_norm": 0.09392838925123215, "learning_rate": 1.1832285540458326e-05, "loss": 0.1862, "step": 43120 }, { "epoch": 2.361331654164157, "grad_norm": 0.10175977647304535, "learning_rate": 1.1827215574934091e-05, "loss": 0.1789, "step": 43125 }, { "epoch": 2.3616054317472486, "grad_norm": 0.1031806692481041, "learning_rate": 1.1822145609409858e-05, "loss": 0.1816, "step": 43130 }, { "epoch": 2.36187920933034, "grad_norm": 0.09809906035661697, "learning_rate": 1.1817075643885623e-05, "loss": 0.1881, "step": 43135 }, { "epoch": 2.3621529869134315, "grad_norm": 0.11147385090589523, "learning_rate": 1.1812005678361388e-05, "loss": 0.183, "step": 43140 }, { "epoch": 2.362426764496523, "grad_norm": 0.10939628630876541, "learning_rate": 1.1806935712837153e-05, "loss": 0.1785, "step": 43145 }, { "epoch": 2.3627005420796143, "grad_norm": 0.09727059304714203, "learning_rate": 1.180186574731292e-05, "loss": 0.1876, "step": 43150 }, { "epoch": 2.362974319662706, "grad_norm": 0.10285986959934235, "learning_rate": 1.1796795781788684e-05, "loss": 0.1761, "step": 43155 }, { "epoch": 2.3632480972457977, "grad_norm": 0.08718287944793701, "learning_rate": 1.179172581626445e-05, "loss": 0.182, "step": 43160 }, { "epoch": 2.363521874828889, "grad_norm": 0.10108456760644913, "learning_rate": 1.1786655850740214e-05, "loss": 0.1789, "step": 43165 }, { "epoch": 2.3637956524119805, "grad_norm": 0.09660159796476364, "learning_rate": 1.1781585885215981e-05, "loss": 0.1843, "step": 43170 }, { "epoch": 2.364069429995072, "grad_norm": 0.09394776821136475, "learning_rate": 1.1776515919691746e-05, "loss": 0.1856, "step": 43175 }, { "epoch": 2.3643432075781634, "grad_norm": 0.11993741244077682, "learning_rate": 1.1771445954167513e-05, "loss": 0.1942, "step": 43180 }, { "epoch": 2.364616985161255, "grad_norm": 0.10989251732826233, "learning_rate": 1.1766375988643278e-05, "loss": 0.1854, "step": 43185 }, { "epoch": 2.3648907627443467, "grad_norm": 0.0935320109128952, "learning_rate": 1.1761306023119044e-05, "loss": 0.181, "step": 43190 }, { "epoch": 2.365164540327438, "grad_norm": 0.09788829833269119, "learning_rate": 1.175623605759481e-05, "loss": 0.1752, "step": 43195 }, { "epoch": 2.3654383179105296, "grad_norm": 0.09321321547031403, "learning_rate": 1.1751166092070574e-05, "loss": 0.1799, "step": 43200 }, { "epoch": 2.365712095493621, "grad_norm": 0.10435386747121811, "learning_rate": 1.174609612654634e-05, "loss": 0.1743, "step": 43205 }, { "epoch": 2.3659858730767125, "grad_norm": 0.104353167116642, "learning_rate": 1.1741026161022106e-05, "loss": 0.1832, "step": 43210 }, { "epoch": 2.366259650659804, "grad_norm": 0.1076701432466507, "learning_rate": 1.1735956195497871e-05, "loss": 0.1816, "step": 43215 }, { "epoch": 2.3665334282428954, "grad_norm": 0.09490232169628143, "learning_rate": 1.1730886229973636e-05, "loss": 0.1756, "step": 43220 }, { "epoch": 2.366807205825987, "grad_norm": 0.0903131291270256, "learning_rate": 1.1725816264449401e-05, "loss": 0.1809, "step": 43225 }, { "epoch": 2.3670809834090782, "grad_norm": 0.10959084331989288, "learning_rate": 1.1720746298925168e-05, "loss": 0.1805, "step": 43230 }, { "epoch": 2.36735476099217, "grad_norm": 0.10200972110033035, "learning_rate": 1.1715676333400935e-05, "loss": 0.1804, "step": 43235 }, { "epoch": 2.3676285385752616, "grad_norm": 0.0973200798034668, "learning_rate": 1.17106063678767e-05, "loss": 0.1941, "step": 43240 }, { "epoch": 2.367902316158353, "grad_norm": 0.10536003857851028, "learning_rate": 1.1705536402352464e-05, "loss": 0.1836, "step": 43245 }, { "epoch": 2.3681760937414444, "grad_norm": 0.09245020896196365, "learning_rate": 1.1700466436828231e-05, "loss": 0.1759, "step": 43250 }, { "epoch": 2.368449871324536, "grad_norm": 0.1006908118724823, "learning_rate": 1.1695396471303996e-05, "loss": 0.183, "step": 43255 }, { "epoch": 2.3687236489076273, "grad_norm": 0.09650878608226776, "learning_rate": 1.1690326505779761e-05, "loss": 0.1896, "step": 43260 }, { "epoch": 2.368997426490719, "grad_norm": 0.11385229229927063, "learning_rate": 1.1685256540255526e-05, "loss": 0.1811, "step": 43265 }, { "epoch": 2.3692712040738106, "grad_norm": 0.09427885711193085, "learning_rate": 1.1680186574731293e-05, "loss": 0.1839, "step": 43270 }, { "epoch": 2.369544981656902, "grad_norm": 0.09301048517227173, "learning_rate": 1.1675116609207058e-05, "loss": 0.1885, "step": 43275 }, { "epoch": 2.3698187592399935, "grad_norm": 0.10991901904344559, "learning_rate": 1.1670046643682823e-05, "loss": 0.1864, "step": 43280 }, { "epoch": 2.3700925368230847, "grad_norm": 0.09830068051815033, "learning_rate": 1.1664976678158588e-05, "loss": 0.1806, "step": 43285 }, { "epoch": 2.3703663144061764, "grad_norm": 0.09111793339252472, "learning_rate": 1.1659906712634355e-05, "loss": 0.1795, "step": 43290 }, { "epoch": 2.370640091989268, "grad_norm": 0.10209548473358154, "learning_rate": 1.1654836747110121e-05, "loss": 0.1781, "step": 43295 }, { "epoch": 2.3709138695723593, "grad_norm": 0.11341918259859085, "learning_rate": 1.1649766781585886e-05, "loss": 0.1914, "step": 43300 }, { "epoch": 2.371187647155451, "grad_norm": 0.094599150121212, "learning_rate": 1.1644696816061651e-05, "loss": 0.1831, "step": 43305 }, { "epoch": 2.3714614247385426, "grad_norm": 0.10382118821144104, "learning_rate": 1.1639626850537418e-05, "loss": 0.1887, "step": 43310 }, { "epoch": 2.371735202321634, "grad_norm": 0.09582307934761047, "learning_rate": 1.1634556885013183e-05, "loss": 0.1807, "step": 43315 }, { "epoch": 2.3720089799047255, "grad_norm": 0.10594218969345093, "learning_rate": 1.1629486919488948e-05, "loss": 0.1876, "step": 43320 }, { "epoch": 2.3722827574878167, "grad_norm": 0.09728143364191055, "learning_rate": 1.1624416953964713e-05, "loss": 0.1884, "step": 43325 }, { "epoch": 2.3725565350709084, "grad_norm": 0.10540354996919632, "learning_rate": 1.1619346988440478e-05, "loss": 0.1905, "step": 43330 }, { "epoch": 2.372830312654, "grad_norm": 0.09318291395902634, "learning_rate": 1.1614277022916245e-05, "loss": 0.1812, "step": 43335 }, { "epoch": 2.3731040902370912, "grad_norm": 0.11210469156503677, "learning_rate": 1.160920705739201e-05, "loss": 0.1857, "step": 43340 }, { "epoch": 2.373377867820183, "grad_norm": 0.10530359297990799, "learning_rate": 1.1604137091867776e-05, "loss": 0.1857, "step": 43345 }, { "epoch": 2.3736516454032746, "grad_norm": 0.09828812628984451, "learning_rate": 1.1599067126343541e-05, "loss": 0.1822, "step": 43350 }, { "epoch": 2.3739254229863658, "grad_norm": 0.09739947319030762, "learning_rate": 1.1593997160819308e-05, "loss": 0.1803, "step": 43355 }, { "epoch": 2.3741992005694574, "grad_norm": 0.09132849425077438, "learning_rate": 1.1588927195295073e-05, "loss": 0.1889, "step": 43360 }, { "epoch": 2.374472978152549, "grad_norm": 0.11189767718315125, "learning_rate": 1.1583857229770838e-05, "loss": 0.1907, "step": 43365 }, { "epoch": 2.3747467557356403, "grad_norm": 0.09502337872982025, "learning_rate": 1.1578787264246603e-05, "loss": 0.1861, "step": 43370 }, { "epoch": 2.375020533318732, "grad_norm": 0.09717239439487457, "learning_rate": 1.157371729872237e-05, "loss": 0.1866, "step": 43375 }, { "epoch": 2.375294310901823, "grad_norm": 0.09609148651361465, "learning_rate": 1.1568647333198135e-05, "loss": 0.1751, "step": 43380 }, { "epoch": 2.375568088484915, "grad_norm": 0.08998480439186096, "learning_rate": 1.15635773676739e-05, "loss": 0.1872, "step": 43385 }, { "epoch": 2.3758418660680065, "grad_norm": 0.08929883688688278, "learning_rate": 1.1558507402149665e-05, "loss": 0.1842, "step": 43390 }, { "epoch": 2.3761156436510977, "grad_norm": 0.10341551899909973, "learning_rate": 1.1553437436625431e-05, "loss": 0.1928, "step": 43395 }, { "epoch": 2.3763894212341894, "grad_norm": 0.1060805395245552, "learning_rate": 1.1548367471101196e-05, "loss": 0.1889, "step": 43400 }, { "epoch": 2.3766631988172806, "grad_norm": 0.09725610166788101, "learning_rate": 1.1543297505576963e-05, "loss": 0.1776, "step": 43405 }, { "epoch": 2.3769369764003723, "grad_norm": 0.09450256824493408, "learning_rate": 1.1538227540052728e-05, "loss": 0.1825, "step": 43410 }, { "epoch": 2.377210753983464, "grad_norm": 0.09780421853065491, "learning_rate": 1.1533157574528495e-05, "loss": 0.1849, "step": 43415 }, { "epoch": 2.377484531566555, "grad_norm": 0.09059769660234451, "learning_rate": 1.152808760900426e-05, "loss": 0.1846, "step": 43420 }, { "epoch": 2.377758309149647, "grad_norm": 0.0997864380478859, "learning_rate": 1.1523017643480025e-05, "loss": 0.1886, "step": 43425 }, { "epoch": 2.3780320867327385, "grad_norm": 0.11087071895599365, "learning_rate": 1.151794767795579e-05, "loss": 0.1836, "step": 43430 }, { "epoch": 2.3783058643158297, "grad_norm": 0.10201950371265411, "learning_rate": 1.1512877712431556e-05, "loss": 0.1812, "step": 43435 }, { "epoch": 2.3785796418989213, "grad_norm": 0.09096484631299973, "learning_rate": 1.1507807746907321e-05, "loss": 0.1757, "step": 43440 }, { "epoch": 2.378853419482013, "grad_norm": 0.09099985659122467, "learning_rate": 1.1502737781383086e-05, "loss": 0.1785, "step": 43445 }, { "epoch": 2.3791271970651042, "grad_norm": 0.10395406931638718, "learning_rate": 1.1497667815858851e-05, "loss": 0.1883, "step": 43450 }, { "epoch": 2.379400974648196, "grad_norm": 0.09091079980134964, "learning_rate": 1.1492597850334618e-05, "loss": 0.1819, "step": 43455 }, { "epoch": 2.3796747522312875, "grad_norm": 0.09625767916440964, "learning_rate": 1.1487527884810383e-05, "loss": 0.1797, "step": 43460 }, { "epoch": 2.3799485298143788, "grad_norm": 0.09963355213403702, "learning_rate": 1.148245791928615e-05, "loss": 0.1823, "step": 43465 }, { "epoch": 2.3802223073974704, "grad_norm": 0.09287748485803604, "learning_rate": 1.1477387953761915e-05, "loss": 0.1835, "step": 43470 }, { "epoch": 2.3804960849805616, "grad_norm": 0.10339187830686569, "learning_rate": 1.1472317988237682e-05, "loss": 0.1854, "step": 43475 }, { "epoch": 2.3807698625636533, "grad_norm": 0.10911250859498978, "learning_rate": 1.1467248022713447e-05, "loss": 0.1893, "step": 43480 }, { "epoch": 2.381043640146745, "grad_norm": 0.0994696170091629, "learning_rate": 1.1462178057189212e-05, "loss": 0.187, "step": 43485 }, { "epoch": 2.381317417729836, "grad_norm": 0.09921379387378693, "learning_rate": 1.1457108091664977e-05, "loss": 0.1897, "step": 43490 }, { "epoch": 2.381591195312928, "grad_norm": 0.1049942597746849, "learning_rate": 1.1452038126140743e-05, "loss": 0.186, "step": 43495 }, { "epoch": 2.381864972896019, "grad_norm": 0.09589684754610062, "learning_rate": 1.1446968160616508e-05, "loss": 0.1838, "step": 43500 }, { "epoch": 2.3821387504791107, "grad_norm": 0.08923256397247314, "learning_rate": 1.1441898195092273e-05, "loss": 0.1794, "step": 43505 }, { "epoch": 2.3824125280622024, "grad_norm": 0.10282454639673233, "learning_rate": 1.1436828229568038e-05, "loss": 0.185, "step": 43510 }, { "epoch": 2.3826863056452936, "grad_norm": 0.106877900660038, "learning_rate": 1.1431758264043805e-05, "loss": 0.1851, "step": 43515 }, { "epoch": 2.3829600832283853, "grad_norm": 0.1254856437444687, "learning_rate": 1.1426688298519572e-05, "loss": 0.1861, "step": 43520 }, { "epoch": 2.383233860811477, "grad_norm": 0.1212785616517067, "learning_rate": 1.1421618332995337e-05, "loss": 0.1882, "step": 43525 }, { "epoch": 2.383507638394568, "grad_norm": 0.10728441923856735, "learning_rate": 1.1416548367471102e-05, "loss": 0.188, "step": 43530 }, { "epoch": 2.38378141597766, "grad_norm": 0.1003468856215477, "learning_rate": 1.1411478401946868e-05, "loss": 0.184, "step": 43535 }, { "epoch": 2.3840551935607515, "grad_norm": 0.09858320653438568, "learning_rate": 1.1406408436422633e-05, "loss": 0.1747, "step": 43540 }, { "epoch": 2.3843289711438427, "grad_norm": 0.10051478445529938, "learning_rate": 1.1401338470898398e-05, "loss": 0.1824, "step": 43545 }, { "epoch": 2.3846027487269343, "grad_norm": 0.10304851830005646, "learning_rate": 1.1396268505374163e-05, "loss": 0.1957, "step": 43550 }, { "epoch": 2.3848765263100256, "grad_norm": 0.09176000952720642, "learning_rate": 1.139119853984993e-05, "loss": 0.1845, "step": 43555 }, { "epoch": 2.385150303893117, "grad_norm": 0.0943320095539093, "learning_rate": 1.1386128574325695e-05, "loss": 0.1753, "step": 43560 }, { "epoch": 2.385424081476209, "grad_norm": 0.09906484931707382, "learning_rate": 1.138105860880146e-05, "loss": 0.1838, "step": 43565 }, { "epoch": 2.3856978590593, "grad_norm": 0.08938904106616974, "learning_rate": 1.1375988643277227e-05, "loss": 0.1822, "step": 43570 }, { "epoch": 2.3859716366423918, "grad_norm": 0.09979480504989624, "learning_rate": 1.1370918677752992e-05, "loss": 0.1886, "step": 43575 }, { "epoch": 2.386245414225483, "grad_norm": 0.10037898272275925, "learning_rate": 1.1365848712228758e-05, "loss": 0.1877, "step": 43580 }, { "epoch": 2.3865191918085746, "grad_norm": 0.0926484540104866, "learning_rate": 1.1360778746704523e-05, "loss": 0.1789, "step": 43585 }, { "epoch": 2.3867929693916663, "grad_norm": 0.10051947087049484, "learning_rate": 1.1355708781180288e-05, "loss": 0.1814, "step": 43590 }, { "epoch": 2.3870667469747575, "grad_norm": 0.09232907742261887, "learning_rate": 1.1350638815656055e-05, "loss": 0.1735, "step": 43595 }, { "epoch": 2.387340524557849, "grad_norm": 0.09933362156152725, "learning_rate": 1.134556885013182e-05, "loss": 0.1789, "step": 43600 }, { "epoch": 2.387614302140941, "grad_norm": 0.09885677695274353, "learning_rate": 1.1340498884607585e-05, "loss": 0.185, "step": 43605 }, { "epoch": 2.387888079724032, "grad_norm": 0.0985121876001358, "learning_rate": 1.133542891908335e-05, "loss": 0.1894, "step": 43610 }, { "epoch": 2.3881618573071237, "grad_norm": 0.11458995193243027, "learning_rate": 1.1330358953559117e-05, "loss": 0.1882, "step": 43615 }, { "epoch": 2.3884356348902154, "grad_norm": 0.10536766052246094, "learning_rate": 1.1325288988034882e-05, "loss": 0.1885, "step": 43620 }, { "epoch": 2.3887094124733066, "grad_norm": 0.09186401218175888, "learning_rate": 1.1320219022510647e-05, "loss": 0.1832, "step": 43625 }, { "epoch": 2.3889831900563983, "grad_norm": 0.08957720547914505, "learning_rate": 1.1315149056986413e-05, "loss": 0.1781, "step": 43630 }, { "epoch": 2.38925696763949, "grad_norm": 0.11372649669647217, "learning_rate": 1.1310079091462178e-05, "loss": 0.1865, "step": 43635 }, { "epoch": 2.389530745222581, "grad_norm": 0.09110240638256073, "learning_rate": 1.1305009125937945e-05, "loss": 0.18, "step": 43640 }, { "epoch": 2.389804522805673, "grad_norm": 0.09642402827739716, "learning_rate": 1.129993916041371e-05, "loss": 0.1786, "step": 43645 }, { "epoch": 2.390078300388764, "grad_norm": 0.10552529990673065, "learning_rate": 1.1294869194889475e-05, "loss": 0.1861, "step": 43650 }, { "epoch": 2.3903520779718557, "grad_norm": 0.10388562828302383, "learning_rate": 1.128979922936524e-05, "loss": 0.1828, "step": 43655 }, { "epoch": 2.3906258555549473, "grad_norm": 0.09257858991622925, "learning_rate": 1.1284729263841007e-05, "loss": 0.1822, "step": 43660 }, { "epoch": 2.3908996331380385, "grad_norm": 0.09497532248497009, "learning_rate": 1.1279659298316772e-05, "loss": 0.1795, "step": 43665 }, { "epoch": 2.39117341072113, "grad_norm": 0.09024639427661896, "learning_rate": 1.1274589332792537e-05, "loss": 0.1855, "step": 43670 }, { "epoch": 2.3914471883042214, "grad_norm": 0.09755907207727432, "learning_rate": 1.1269519367268302e-05, "loss": 0.1821, "step": 43675 }, { "epoch": 2.391720965887313, "grad_norm": 0.10398638248443604, "learning_rate": 1.1264449401744069e-05, "loss": 0.1924, "step": 43680 }, { "epoch": 2.3919947434704047, "grad_norm": 0.11691278219223022, "learning_rate": 1.1259379436219834e-05, "loss": 0.1922, "step": 43685 }, { "epoch": 2.392268521053496, "grad_norm": 0.09476988762617111, "learning_rate": 1.12543094706956e-05, "loss": 0.1829, "step": 43690 }, { "epoch": 2.3925422986365876, "grad_norm": 0.11651644855737686, "learning_rate": 1.1249239505171365e-05, "loss": 0.181, "step": 43695 }, { "epoch": 2.3928160762196793, "grad_norm": 0.09500043839216232, "learning_rate": 1.1244169539647132e-05, "loss": 0.1829, "step": 43700 }, { "epoch": 2.3930898538027705, "grad_norm": 0.09499546885490417, "learning_rate": 1.1239099574122897e-05, "loss": 0.1876, "step": 43705 }, { "epoch": 2.393363631385862, "grad_norm": 0.10773197561502457, "learning_rate": 1.1234029608598662e-05, "loss": 0.1918, "step": 43710 }, { "epoch": 2.393637408968954, "grad_norm": 0.10157009959220886, "learning_rate": 1.1228959643074427e-05, "loss": 0.1865, "step": 43715 }, { "epoch": 2.393911186552045, "grad_norm": 0.09113695472478867, "learning_rate": 1.1223889677550194e-05, "loss": 0.1811, "step": 43720 }, { "epoch": 2.3941849641351367, "grad_norm": 0.09483330696821213, "learning_rate": 1.1218819712025959e-05, "loss": 0.1867, "step": 43725 }, { "epoch": 2.394458741718228, "grad_norm": 0.09984166920185089, "learning_rate": 1.1213749746501724e-05, "loss": 0.178, "step": 43730 }, { "epoch": 2.3947325193013196, "grad_norm": 0.10545103251934052, "learning_rate": 1.1208679780977489e-05, "loss": 0.1816, "step": 43735 }, { "epoch": 2.3950062968844112, "grad_norm": 0.10500838607549667, "learning_rate": 1.1203609815453255e-05, "loss": 0.1816, "step": 43740 }, { "epoch": 2.3952800744675025, "grad_norm": 0.10179710388183594, "learning_rate": 1.1198539849929022e-05, "loss": 0.1799, "step": 43745 }, { "epoch": 2.395553852050594, "grad_norm": 0.10075302422046661, "learning_rate": 1.1193469884404787e-05, "loss": 0.1805, "step": 43750 }, { "epoch": 2.395827629633686, "grad_norm": 0.09655048698186874, "learning_rate": 1.1188399918880552e-05, "loss": 0.1775, "step": 43755 }, { "epoch": 2.396101407216777, "grad_norm": 0.09366731345653534, "learning_rate": 1.1183329953356319e-05, "loss": 0.1808, "step": 43760 }, { "epoch": 2.3963751847998687, "grad_norm": 0.10153594613075256, "learning_rate": 1.1178259987832084e-05, "loss": 0.1807, "step": 43765 }, { "epoch": 2.39664896238296, "grad_norm": 0.09812310338020325, "learning_rate": 1.1173190022307849e-05, "loss": 0.1865, "step": 43770 }, { "epoch": 2.3969227399660515, "grad_norm": 0.10331487655639648, "learning_rate": 1.1168120056783614e-05, "loss": 0.1795, "step": 43775 }, { "epoch": 2.397196517549143, "grad_norm": 0.09614002704620361, "learning_rate": 1.116305009125938e-05, "loss": 0.1814, "step": 43780 }, { "epoch": 2.3974702951322344, "grad_norm": 0.10609035938978195, "learning_rate": 1.1157980125735145e-05, "loss": 0.1867, "step": 43785 }, { "epoch": 2.397744072715326, "grad_norm": 0.08928687870502472, "learning_rate": 1.115291016021091e-05, "loss": 0.1853, "step": 43790 }, { "epoch": 2.3980178502984177, "grad_norm": 0.10577816516160965, "learning_rate": 1.1147840194686675e-05, "loss": 0.1845, "step": 43795 }, { "epoch": 2.398291627881509, "grad_norm": 0.097402423620224, "learning_rate": 1.1142770229162442e-05, "loss": 0.1809, "step": 43800 }, { "epoch": 2.3985654054646006, "grad_norm": 0.10083486884832382, "learning_rate": 1.1137700263638209e-05, "loss": 0.1887, "step": 43805 }, { "epoch": 2.3988391830476923, "grad_norm": 0.09267036616802216, "learning_rate": 1.1132630298113974e-05, "loss": 0.1837, "step": 43810 }, { "epoch": 2.3991129606307835, "grad_norm": 0.10177182406187057, "learning_rate": 1.1127560332589739e-05, "loss": 0.1868, "step": 43815 }, { "epoch": 2.399386738213875, "grad_norm": 0.09322796761989594, "learning_rate": 1.1122490367065505e-05, "loss": 0.1802, "step": 43820 }, { "epoch": 2.3996605157969664, "grad_norm": 0.09371720999479294, "learning_rate": 1.111742040154127e-05, "loss": 0.1763, "step": 43825 }, { "epoch": 2.399934293380058, "grad_norm": 0.09897329658269882, "learning_rate": 1.1112350436017035e-05, "loss": 0.1863, "step": 43830 }, { "epoch": 2.4002080709631497, "grad_norm": 0.10653197765350342, "learning_rate": 1.11072804704928e-05, "loss": 0.1843, "step": 43835 }, { "epoch": 2.400481848546241, "grad_norm": 0.0979604721069336, "learning_rate": 1.1102210504968567e-05, "loss": 0.1762, "step": 43840 }, { "epoch": 2.4007556261293326, "grad_norm": 0.10285837948322296, "learning_rate": 1.1097140539444332e-05, "loss": 0.1744, "step": 43845 }, { "epoch": 2.401029403712424, "grad_norm": 0.10361108183860779, "learning_rate": 1.1092070573920097e-05, "loss": 0.1843, "step": 43850 }, { "epoch": 2.4013031812955155, "grad_norm": 0.10550296306610107, "learning_rate": 1.1087000608395864e-05, "loss": 0.1876, "step": 43855 }, { "epoch": 2.401576958878607, "grad_norm": 0.0932803824543953, "learning_rate": 1.1081930642871629e-05, "loss": 0.1888, "step": 43860 }, { "epoch": 2.4018507364616983, "grad_norm": 0.11518201231956482, "learning_rate": 1.1076860677347395e-05, "loss": 0.1837, "step": 43865 }, { "epoch": 2.40212451404479, "grad_norm": 0.09207317978143692, "learning_rate": 1.107179071182316e-05, "loss": 0.1786, "step": 43870 }, { "epoch": 2.4023982916278817, "grad_norm": 0.09879032522439957, "learning_rate": 1.1066720746298925e-05, "loss": 0.1839, "step": 43875 }, { "epoch": 2.402672069210973, "grad_norm": 0.09819487482309341, "learning_rate": 1.1061650780774692e-05, "loss": 0.1802, "step": 43880 }, { "epoch": 2.4029458467940645, "grad_norm": 0.10910527408123016, "learning_rate": 1.1056580815250457e-05, "loss": 0.1912, "step": 43885 }, { "epoch": 2.403219624377156, "grad_norm": 0.09123333543539047, "learning_rate": 1.1051510849726222e-05, "loss": 0.1809, "step": 43890 }, { "epoch": 2.4034934019602474, "grad_norm": 0.09594841301441193, "learning_rate": 1.1046440884201987e-05, "loss": 0.1891, "step": 43895 }, { "epoch": 2.403767179543339, "grad_norm": 0.09781014919281006, "learning_rate": 1.1041370918677754e-05, "loss": 0.1856, "step": 43900 }, { "epoch": 2.4040409571264307, "grad_norm": 0.10533323884010315, "learning_rate": 1.1036300953153519e-05, "loss": 0.1933, "step": 43905 }, { "epoch": 2.404314734709522, "grad_norm": 0.10192050784826279, "learning_rate": 1.1031230987629284e-05, "loss": 0.1796, "step": 43910 }, { "epoch": 2.4045885122926136, "grad_norm": 0.09791847318410873, "learning_rate": 1.102616102210505e-05, "loss": 0.1882, "step": 43915 }, { "epoch": 2.404862289875705, "grad_norm": 0.09771980345249176, "learning_rate": 1.1021091056580816e-05, "loss": 0.1848, "step": 43920 }, { "epoch": 2.4051360674587965, "grad_norm": 0.10088096559047699, "learning_rate": 1.1016021091056582e-05, "loss": 0.181, "step": 43925 }, { "epoch": 2.405409845041888, "grad_norm": 0.09836041927337646, "learning_rate": 1.1010951125532347e-05, "loss": 0.1863, "step": 43930 }, { "epoch": 2.4056836226249794, "grad_norm": 0.12786641716957092, "learning_rate": 1.1005881160008112e-05, "loss": 0.1834, "step": 43935 }, { "epoch": 2.405957400208071, "grad_norm": 0.11007501929998398, "learning_rate": 1.1000811194483877e-05, "loss": 0.1836, "step": 43940 }, { "epoch": 2.4062311777911622, "grad_norm": 0.10759413242340088, "learning_rate": 1.0995741228959644e-05, "loss": 0.1859, "step": 43945 }, { "epoch": 2.406504955374254, "grad_norm": 0.10227707773447037, "learning_rate": 1.0990671263435409e-05, "loss": 0.1767, "step": 43950 }, { "epoch": 2.4067787329573456, "grad_norm": 0.1100749671459198, "learning_rate": 1.0985601297911174e-05, "loss": 0.1859, "step": 43955 }, { "epoch": 2.407052510540437, "grad_norm": 0.09656435996294022, "learning_rate": 1.0980531332386939e-05, "loss": 0.1931, "step": 43960 }, { "epoch": 2.4073262881235284, "grad_norm": 0.08812649548053741, "learning_rate": 1.0975461366862706e-05, "loss": 0.1824, "step": 43965 }, { "epoch": 2.40760006570662, "grad_norm": 0.10635994374752045, "learning_rate": 1.0970391401338472e-05, "loss": 0.1844, "step": 43970 }, { "epoch": 2.4078738432897113, "grad_norm": 0.10026815533638, "learning_rate": 1.0965321435814237e-05, "loss": 0.1853, "step": 43975 }, { "epoch": 2.408147620872803, "grad_norm": 0.09803330153226852, "learning_rate": 1.0960251470290002e-05, "loss": 0.1819, "step": 43980 }, { "epoch": 2.4084213984558946, "grad_norm": 0.08714905381202698, "learning_rate": 1.0955181504765769e-05, "loss": 0.1842, "step": 43985 }, { "epoch": 2.408695176038986, "grad_norm": 0.10079178214073181, "learning_rate": 1.0950111539241534e-05, "loss": 0.1879, "step": 43990 }, { "epoch": 2.4089689536220775, "grad_norm": 0.11605609953403473, "learning_rate": 1.0945041573717299e-05, "loss": 0.1881, "step": 43995 }, { "epoch": 2.4092427312051687, "grad_norm": 0.09860466420650482, "learning_rate": 1.0939971608193064e-05, "loss": 0.1879, "step": 44000 }, { "epoch": 2.4095165087882604, "grad_norm": 0.0924578383564949, "learning_rate": 1.093490164266883e-05, "loss": 0.1781, "step": 44005 }, { "epoch": 2.409790286371352, "grad_norm": 0.09708289802074432, "learning_rate": 1.0929831677144596e-05, "loss": 0.1812, "step": 44010 }, { "epoch": 2.4100640639544433, "grad_norm": 0.09688571840524673, "learning_rate": 1.092476171162036e-05, "loss": 0.1822, "step": 44015 }, { "epoch": 2.410337841537535, "grad_norm": 0.10198963433504105, "learning_rate": 1.0919691746096126e-05, "loss": 0.1872, "step": 44020 }, { "epoch": 2.410611619120626, "grad_norm": 0.09602273255586624, "learning_rate": 1.0914621780571892e-05, "loss": 0.1822, "step": 44025 }, { "epoch": 2.410885396703718, "grad_norm": 0.09841657429933548, "learning_rate": 1.0909551815047659e-05, "loss": 0.1852, "step": 44030 }, { "epoch": 2.4111591742868095, "grad_norm": 0.09447453171014786, "learning_rate": 1.0904481849523424e-05, "loss": 0.1797, "step": 44035 }, { "epoch": 2.4114329518699007, "grad_norm": 0.09775770455598831, "learning_rate": 1.0899411883999189e-05, "loss": 0.19, "step": 44040 }, { "epoch": 2.4117067294529924, "grad_norm": 0.11312292516231537, "learning_rate": 1.0894341918474956e-05, "loss": 0.1848, "step": 44045 }, { "epoch": 2.411980507036084, "grad_norm": 0.09416934847831726, "learning_rate": 1.088927195295072e-05, "loss": 0.184, "step": 44050 }, { "epoch": 2.4122542846191752, "grad_norm": 0.10607683658599854, "learning_rate": 1.0884201987426486e-05, "loss": 0.1789, "step": 44055 }, { "epoch": 2.412528062202267, "grad_norm": 0.09931603074073792, "learning_rate": 1.087913202190225e-05, "loss": 0.1856, "step": 44060 }, { "epoch": 2.4128018397853586, "grad_norm": 0.09390635788440704, "learning_rate": 1.0874062056378017e-05, "loss": 0.1817, "step": 44065 }, { "epoch": 2.4130756173684498, "grad_norm": 0.10164294391870499, "learning_rate": 1.0868992090853782e-05, "loss": 0.1839, "step": 44070 }, { "epoch": 2.4133493949515414, "grad_norm": 0.09887994080781937, "learning_rate": 1.0863922125329547e-05, "loss": 0.1859, "step": 44075 }, { "epoch": 2.413623172534633, "grad_norm": 0.09981602430343628, "learning_rate": 1.0858852159805314e-05, "loss": 0.1884, "step": 44080 }, { "epoch": 2.4138969501177243, "grad_norm": 0.09894982725381851, "learning_rate": 1.0853782194281079e-05, "loss": 0.1845, "step": 44085 }, { "epoch": 2.414170727700816, "grad_norm": 0.0944930762052536, "learning_rate": 1.0848712228756846e-05, "loss": 0.1878, "step": 44090 }, { "epoch": 2.414444505283907, "grad_norm": 0.10118921846151352, "learning_rate": 1.084364226323261e-05, "loss": 0.1789, "step": 44095 }, { "epoch": 2.414718282866999, "grad_norm": 0.09755910933017731, "learning_rate": 1.0838572297708376e-05, "loss": 0.1842, "step": 44100 }, { "epoch": 2.4149920604500905, "grad_norm": 0.10645733773708344, "learning_rate": 1.0833502332184143e-05, "loss": 0.1851, "step": 44105 }, { "epoch": 2.4152658380331817, "grad_norm": 0.09906741231679916, "learning_rate": 1.0828432366659908e-05, "loss": 0.181, "step": 44110 }, { "epoch": 2.4155396156162734, "grad_norm": 0.09692629426717758, "learning_rate": 1.0823362401135673e-05, "loss": 0.1775, "step": 44115 }, { "epoch": 2.4158133931993646, "grad_norm": 0.10192862898111343, "learning_rate": 1.0818292435611438e-05, "loss": 0.1867, "step": 44120 }, { "epoch": 2.4160871707824563, "grad_norm": 0.09359747171401978, "learning_rate": 1.0813222470087204e-05, "loss": 0.1825, "step": 44125 }, { "epoch": 2.416360948365548, "grad_norm": 0.09503020346164703, "learning_rate": 1.080815250456297e-05, "loss": 0.1841, "step": 44130 }, { "epoch": 2.416634725948639, "grad_norm": 0.09123598039150238, "learning_rate": 1.0803082539038734e-05, "loss": 0.1851, "step": 44135 }, { "epoch": 2.416908503531731, "grad_norm": 0.09531624615192413, "learning_rate": 1.0798012573514501e-05, "loss": 0.184, "step": 44140 }, { "epoch": 2.4171822811148225, "grad_norm": 0.11146391928195953, "learning_rate": 1.0792942607990268e-05, "loss": 0.1808, "step": 44145 }, { "epoch": 2.4174560586979137, "grad_norm": 0.09768811613321304, "learning_rate": 1.0787872642466033e-05, "loss": 0.1829, "step": 44150 }, { "epoch": 2.4177298362810054, "grad_norm": 0.09265401214361191, "learning_rate": 1.0782802676941798e-05, "loss": 0.1841, "step": 44155 }, { "epoch": 2.418003613864097, "grad_norm": 0.10153961926698685, "learning_rate": 1.0777732711417563e-05, "loss": 0.1799, "step": 44160 }, { "epoch": 2.4182773914471882, "grad_norm": 0.09007935225963593, "learning_rate": 1.077266274589333e-05, "loss": 0.1825, "step": 44165 }, { "epoch": 2.41855116903028, "grad_norm": 0.09610670059919357, "learning_rate": 1.0767592780369094e-05, "loss": 0.188, "step": 44170 }, { "epoch": 2.418824946613371, "grad_norm": 0.0869923010468483, "learning_rate": 1.076252281484486e-05, "loss": 0.1811, "step": 44175 }, { "epoch": 2.4190987241964628, "grad_norm": 0.09662626683712006, "learning_rate": 1.0757452849320624e-05, "loss": 0.1882, "step": 44180 }, { "epoch": 2.4193725017795544, "grad_norm": 0.09763441234827042, "learning_rate": 1.0752382883796391e-05, "loss": 0.1863, "step": 44185 }, { "epoch": 2.4196462793626456, "grad_norm": 0.09926164150238037, "learning_rate": 1.0747312918272156e-05, "loss": 0.1834, "step": 44190 }, { "epoch": 2.4199200569457373, "grad_norm": 0.10765910148620605, "learning_rate": 1.0742242952747921e-05, "loss": 0.1883, "step": 44195 }, { "epoch": 2.420193834528829, "grad_norm": 0.1038895845413208, "learning_rate": 1.0737172987223688e-05, "loss": 0.1878, "step": 44200 }, { "epoch": 2.42046761211192, "grad_norm": 0.10275372117757797, "learning_rate": 1.0732103021699454e-05, "loss": 0.1877, "step": 44205 }, { "epoch": 2.420741389695012, "grad_norm": 0.105167455971241, "learning_rate": 1.072703305617522e-05, "loss": 0.1805, "step": 44210 }, { "epoch": 2.421015167278103, "grad_norm": 0.1005648821592331, "learning_rate": 1.0721963090650984e-05, "loss": 0.1818, "step": 44215 }, { "epoch": 2.4212889448611947, "grad_norm": 0.09113462269306183, "learning_rate": 1.071689312512675e-05, "loss": 0.1772, "step": 44220 }, { "epoch": 2.4215627224442864, "grad_norm": 0.11100755631923676, "learning_rate": 1.0711823159602514e-05, "loss": 0.1836, "step": 44225 }, { "epoch": 2.4218365000273776, "grad_norm": 0.1024274155497551, "learning_rate": 1.0706753194078281e-05, "loss": 0.1864, "step": 44230 }, { "epoch": 2.4221102776104693, "grad_norm": 0.0955883264541626, "learning_rate": 1.0701683228554046e-05, "loss": 0.1907, "step": 44235 }, { "epoch": 2.422384055193561, "grad_norm": 0.10509393364191055, "learning_rate": 1.0696613263029811e-05, "loss": 0.183, "step": 44240 }, { "epoch": 2.422657832776652, "grad_norm": 0.10597097128629684, "learning_rate": 1.0691543297505576e-05, "loss": 0.1874, "step": 44245 }, { "epoch": 2.422931610359744, "grad_norm": 0.103736512362957, "learning_rate": 1.0686473331981343e-05, "loss": 0.1872, "step": 44250 }, { "epoch": 2.4232053879428355, "grad_norm": 0.09553594142198563, "learning_rate": 1.068140336645711e-05, "loss": 0.1895, "step": 44255 }, { "epoch": 2.4234791655259267, "grad_norm": 0.09410140663385391, "learning_rate": 1.0676333400932874e-05, "loss": 0.1845, "step": 44260 }, { "epoch": 2.4237529431090183, "grad_norm": 0.0985005795955658, "learning_rate": 1.067126343540864e-05, "loss": 0.1895, "step": 44265 }, { "epoch": 2.4240267206921096, "grad_norm": 0.11494415253400803, "learning_rate": 1.0666193469884406e-05, "loss": 0.1836, "step": 44270 }, { "epoch": 2.424300498275201, "grad_norm": 0.10065501928329468, "learning_rate": 1.0661123504360171e-05, "loss": 0.187, "step": 44275 }, { "epoch": 2.424574275858293, "grad_norm": 0.10370209068059921, "learning_rate": 1.0656053538835936e-05, "loss": 0.1902, "step": 44280 }, { "epoch": 2.424848053441384, "grad_norm": 0.09911768138408661, "learning_rate": 1.0650983573311701e-05, "loss": 0.1774, "step": 44285 }, { "epoch": 2.4251218310244758, "grad_norm": 0.09758676588535309, "learning_rate": 1.0645913607787468e-05, "loss": 0.1891, "step": 44290 }, { "epoch": 2.425395608607567, "grad_norm": 0.09478362649679184, "learning_rate": 1.0640843642263233e-05, "loss": 0.1859, "step": 44295 }, { "epoch": 2.4256693861906586, "grad_norm": 0.10352443158626556, "learning_rate": 1.0635773676738998e-05, "loss": 0.1835, "step": 44300 }, { "epoch": 2.4259431637737503, "grad_norm": 0.09924434870481491, "learning_rate": 1.0630703711214765e-05, "loss": 0.1854, "step": 44305 }, { "epoch": 2.4262169413568415, "grad_norm": 0.11130892485380173, "learning_rate": 1.062563374569053e-05, "loss": 0.1783, "step": 44310 }, { "epoch": 2.426490718939933, "grad_norm": 0.10374338179826736, "learning_rate": 1.0620563780166296e-05, "loss": 0.1781, "step": 44315 }, { "epoch": 2.426764496523025, "grad_norm": 0.09663322567939758, "learning_rate": 1.0615493814642061e-05, "loss": 0.1829, "step": 44320 }, { "epoch": 2.427038274106116, "grad_norm": 0.08898560702800751, "learning_rate": 1.0610423849117826e-05, "loss": 0.1798, "step": 44325 }, { "epoch": 2.4273120516892077, "grad_norm": 0.10074219107627869, "learning_rate": 1.0605353883593593e-05, "loss": 0.1883, "step": 44330 }, { "epoch": 2.4275858292722994, "grad_norm": 0.0918743908405304, "learning_rate": 1.0600283918069358e-05, "loss": 0.1855, "step": 44335 }, { "epoch": 2.4278596068553906, "grad_norm": 0.09903676807880402, "learning_rate": 1.0595213952545123e-05, "loss": 0.1902, "step": 44340 }, { "epoch": 2.4281333844384823, "grad_norm": 0.09124469012022018, "learning_rate": 1.0590143987020888e-05, "loss": 0.1768, "step": 44345 }, { "epoch": 2.428407162021574, "grad_norm": 0.09908977150917053, "learning_rate": 1.0585074021496655e-05, "loss": 0.1899, "step": 44350 }, { "epoch": 2.428680939604665, "grad_norm": 0.08971045166254044, "learning_rate": 1.058000405597242e-05, "loss": 0.1822, "step": 44355 }, { "epoch": 2.428954717187757, "grad_norm": 0.09279836714267731, "learning_rate": 1.0574934090448185e-05, "loss": 0.1823, "step": 44360 }, { "epoch": 2.429228494770848, "grad_norm": 0.10299209505319595, "learning_rate": 1.0569864124923951e-05, "loss": 0.1764, "step": 44365 }, { "epoch": 2.4295022723539397, "grad_norm": 0.0957610160112381, "learning_rate": 1.0564794159399718e-05, "loss": 0.1869, "step": 44370 }, { "epoch": 2.4297760499370313, "grad_norm": 0.10710897296667099, "learning_rate": 1.0559724193875483e-05, "loss": 0.189, "step": 44375 }, { "epoch": 2.4300498275201226, "grad_norm": 0.0983012467622757, "learning_rate": 1.0554654228351248e-05, "loss": 0.1819, "step": 44380 }, { "epoch": 2.430323605103214, "grad_norm": 0.08858396857976913, "learning_rate": 1.0549584262827013e-05, "loss": 0.184, "step": 44385 }, { "epoch": 2.4305973826863054, "grad_norm": 0.0933871790766716, "learning_rate": 1.054451429730278e-05, "loss": 0.1802, "step": 44390 }, { "epoch": 2.430871160269397, "grad_norm": 0.08572863042354584, "learning_rate": 1.0539444331778545e-05, "loss": 0.1829, "step": 44395 }, { "epoch": 2.4311449378524888, "grad_norm": 0.09445881843566895, "learning_rate": 1.053437436625431e-05, "loss": 0.1804, "step": 44400 }, { "epoch": 2.43141871543558, "grad_norm": 0.08886948972940445, "learning_rate": 1.0529304400730075e-05, "loss": 0.1844, "step": 44405 }, { "epoch": 2.4316924930186716, "grad_norm": 0.0947953462600708, "learning_rate": 1.0524234435205841e-05, "loss": 0.1861, "step": 44410 }, { "epoch": 2.4319662706017633, "grad_norm": 0.09270256012678146, "learning_rate": 1.0519164469681606e-05, "loss": 0.186, "step": 44415 }, { "epoch": 2.4322400481848545, "grad_norm": 0.10398959368467331, "learning_rate": 1.0514094504157371e-05, "loss": 0.1774, "step": 44420 }, { "epoch": 2.432513825767946, "grad_norm": 0.09613058716058731, "learning_rate": 1.0509024538633138e-05, "loss": 0.1848, "step": 44425 }, { "epoch": 2.432787603351038, "grad_norm": 0.09089330583810806, "learning_rate": 1.0503954573108905e-05, "loss": 0.1786, "step": 44430 }, { "epoch": 2.433061380934129, "grad_norm": 0.10037349164485931, "learning_rate": 1.049888460758467e-05, "loss": 0.1832, "step": 44435 }, { "epoch": 2.4333351585172207, "grad_norm": 0.11027474701404572, "learning_rate": 1.0493814642060435e-05, "loss": 0.1875, "step": 44440 }, { "epoch": 2.433608936100312, "grad_norm": 0.1134481132030487, "learning_rate": 1.04887446765362e-05, "loss": 0.1877, "step": 44445 }, { "epoch": 2.4338827136834036, "grad_norm": 0.09879853576421738, "learning_rate": 1.0483674711011966e-05, "loss": 0.1839, "step": 44450 }, { "epoch": 2.4341564912664952, "grad_norm": 0.10033170878887177, "learning_rate": 1.0478604745487731e-05, "loss": 0.192, "step": 44455 }, { "epoch": 2.4344302688495865, "grad_norm": 0.10092251002788544, "learning_rate": 1.0473534779963496e-05, "loss": 0.1755, "step": 44460 }, { "epoch": 2.434704046432678, "grad_norm": 0.11301536858081818, "learning_rate": 1.0468464814439261e-05, "loss": 0.1875, "step": 44465 }, { "epoch": 2.4349778240157693, "grad_norm": 0.09668336063623428, "learning_rate": 1.0463394848915028e-05, "loss": 0.1832, "step": 44470 }, { "epoch": 2.435251601598861, "grad_norm": 0.11081787943840027, "learning_rate": 1.0458324883390793e-05, "loss": 0.179, "step": 44475 }, { "epoch": 2.4355253791819527, "grad_norm": 0.09453868120908737, "learning_rate": 1.045325491786656e-05, "loss": 0.1806, "step": 44480 }, { "epoch": 2.435799156765044, "grad_norm": 0.09247345477342606, "learning_rate": 1.0448184952342325e-05, "loss": 0.1822, "step": 44485 }, { "epoch": 2.4360729343481355, "grad_norm": 0.0994827002286911, "learning_rate": 1.0443114986818091e-05, "loss": 0.1807, "step": 44490 }, { "epoch": 2.436346711931227, "grad_norm": 0.09372078627347946, "learning_rate": 1.0438045021293856e-05, "loss": 0.1829, "step": 44495 }, { "epoch": 2.4366204895143184, "grad_norm": 0.0975797101855278, "learning_rate": 1.0432975055769621e-05, "loss": 0.1797, "step": 44500 }, { "epoch": 2.43689426709741, "grad_norm": 0.09751947969198227, "learning_rate": 1.0427905090245386e-05, "loss": 0.1783, "step": 44505 }, { "epoch": 2.4371680446805017, "grad_norm": 0.09657566249370575, "learning_rate": 1.0422835124721153e-05, "loss": 0.1842, "step": 44510 }, { "epoch": 2.437441822263593, "grad_norm": 0.10270494222640991, "learning_rate": 1.0417765159196918e-05, "loss": 0.1855, "step": 44515 }, { "epoch": 2.4377155998466846, "grad_norm": 0.10887613892555237, "learning_rate": 1.0412695193672683e-05, "loss": 0.188, "step": 44520 }, { "epoch": 2.4379893774297763, "grad_norm": 0.09539604932069778, "learning_rate": 1.0407625228148448e-05, "loss": 0.1746, "step": 44525 }, { "epoch": 2.4382631550128675, "grad_norm": 0.09431631118059158, "learning_rate": 1.0402555262624213e-05, "loss": 0.1821, "step": 44530 }, { "epoch": 2.438536932595959, "grad_norm": 0.11015740782022476, "learning_rate": 1.039748529709998e-05, "loss": 0.1856, "step": 44535 }, { "epoch": 2.4388107101790504, "grad_norm": 0.09746122360229492, "learning_rate": 1.0392415331575747e-05, "loss": 0.1736, "step": 44540 }, { "epoch": 2.439084487762142, "grad_norm": 0.09422391653060913, "learning_rate": 1.0387345366051512e-05, "loss": 0.1901, "step": 44545 }, { "epoch": 2.4393582653452337, "grad_norm": 0.10537172108888626, "learning_rate": 1.0382275400527277e-05, "loss": 0.1918, "step": 44550 }, { "epoch": 2.439632042928325, "grad_norm": 0.0987621620297432, "learning_rate": 1.0377205435003043e-05, "loss": 0.177, "step": 44555 }, { "epoch": 2.4399058205114166, "grad_norm": 0.10688835382461548, "learning_rate": 1.0372135469478808e-05, "loss": 0.1862, "step": 44560 }, { "epoch": 2.440179598094508, "grad_norm": 0.1049848273396492, "learning_rate": 1.0367065503954573e-05, "loss": 0.1899, "step": 44565 }, { "epoch": 2.4404533756775995, "grad_norm": 0.0920770987868309, "learning_rate": 1.0361995538430338e-05, "loss": 0.1834, "step": 44570 }, { "epoch": 2.440727153260691, "grad_norm": 0.10194705426692963, "learning_rate": 1.0356925572906105e-05, "loss": 0.1865, "step": 44575 }, { "epoch": 2.4410009308437823, "grad_norm": 0.09814043343067169, "learning_rate": 1.035185560738187e-05, "loss": 0.186, "step": 44580 }, { "epoch": 2.441274708426874, "grad_norm": 0.1029568612575531, "learning_rate": 1.0346785641857635e-05, "loss": 0.1864, "step": 44585 }, { "epoch": 2.4415484860099657, "grad_norm": 0.10039433091878891, "learning_rate": 1.0341715676333402e-05, "loss": 0.1839, "step": 44590 }, { "epoch": 2.441822263593057, "grad_norm": 0.08858130872249603, "learning_rate": 1.0336645710809167e-05, "loss": 0.1838, "step": 44595 }, { "epoch": 2.4420960411761485, "grad_norm": 0.09131894260644913, "learning_rate": 1.0331575745284933e-05, "loss": 0.184, "step": 44600 }, { "epoch": 2.44236981875924, "grad_norm": 0.10280438512563705, "learning_rate": 1.0326505779760698e-05, "loss": 0.1859, "step": 44605 }, { "epoch": 2.4426435963423314, "grad_norm": 0.09752482920885086, "learning_rate": 1.0321435814236463e-05, "loss": 0.1831, "step": 44610 }, { "epoch": 2.442917373925423, "grad_norm": 0.09720644354820251, "learning_rate": 1.031636584871223e-05, "loss": 0.1795, "step": 44615 }, { "epoch": 2.4431911515085143, "grad_norm": 0.09878677129745483, "learning_rate": 1.0311295883187995e-05, "loss": 0.1918, "step": 44620 }, { "epoch": 2.443464929091606, "grad_norm": 0.09148482233285904, "learning_rate": 1.030622591766376e-05, "loss": 0.1793, "step": 44625 }, { "epoch": 2.4437387066746976, "grad_norm": 0.10234277695417404, "learning_rate": 1.0301155952139525e-05, "loss": 0.1802, "step": 44630 }, { "epoch": 2.444012484257789, "grad_norm": 0.1055539920926094, "learning_rate": 1.0296085986615292e-05, "loss": 0.1858, "step": 44635 }, { "epoch": 2.4442862618408805, "grad_norm": 0.09690556675195694, "learning_rate": 1.0291016021091057e-05, "loss": 0.1794, "step": 44640 }, { "epoch": 2.444560039423972, "grad_norm": 0.08955512940883636, "learning_rate": 1.0285946055566822e-05, "loss": 0.1914, "step": 44645 }, { "epoch": 2.4448338170070634, "grad_norm": 0.10920418053865433, "learning_rate": 1.0280876090042588e-05, "loss": 0.1811, "step": 44650 }, { "epoch": 2.445107594590155, "grad_norm": 0.10905066132545471, "learning_rate": 1.0275806124518355e-05, "loss": 0.1892, "step": 44655 }, { "epoch": 2.4453813721732462, "grad_norm": 0.09040121734142303, "learning_rate": 1.027073615899412e-05, "loss": 0.1747, "step": 44660 }, { "epoch": 2.445655149756338, "grad_norm": 0.0932900458574295, "learning_rate": 1.0265666193469885e-05, "loss": 0.1857, "step": 44665 }, { "epoch": 2.4459289273394296, "grad_norm": 0.0984555259346962, "learning_rate": 1.026059622794565e-05, "loss": 0.1838, "step": 44670 }, { "epoch": 2.446202704922521, "grad_norm": 0.10314640402793884, "learning_rate": 1.0255526262421417e-05, "loss": 0.1869, "step": 44675 }, { "epoch": 2.4464764825056124, "grad_norm": 0.10848243534564972, "learning_rate": 1.0250456296897182e-05, "loss": 0.1746, "step": 44680 }, { "epoch": 2.446750260088704, "grad_norm": 0.11036952584981918, "learning_rate": 1.0245386331372947e-05, "loss": 0.1905, "step": 44685 }, { "epoch": 2.4470240376717953, "grad_norm": 0.09666375070810318, "learning_rate": 1.0240316365848712e-05, "loss": 0.188, "step": 44690 }, { "epoch": 2.447297815254887, "grad_norm": 0.1141592264175415, "learning_rate": 1.0235246400324478e-05, "loss": 0.1898, "step": 44695 }, { "epoch": 2.4475715928379786, "grad_norm": 0.10345862805843353, "learning_rate": 1.0230176434800243e-05, "loss": 0.1815, "step": 44700 }, { "epoch": 2.44784537042107, "grad_norm": 0.1021902859210968, "learning_rate": 1.0225106469276008e-05, "loss": 0.1869, "step": 44705 }, { "epoch": 2.4481191480041615, "grad_norm": 0.1068696454167366, "learning_rate": 1.0220036503751775e-05, "loss": 0.1814, "step": 44710 }, { "epoch": 2.4483929255872527, "grad_norm": 0.09892097115516663, "learning_rate": 1.0214966538227542e-05, "loss": 0.1812, "step": 44715 }, { "epoch": 2.4486667031703444, "grad_norm": 0.10242102295160294, "learning_rate": 1.0209896572703307e-05, "loss": 0.186, "step": 44720 }, { "epoch": 2.448940480753436, "grad_norm": 0.09425865113735199, "learning_rate": 1.0204826607179072e-05, "loss": 0.1921, "step": 44725 }, { "epoch": 2.4492142583365273, "grad_norm": 0.1010228618979454, "learning_rate": 1.0199756641654837e-05, "loss": 0.1832, "step": 44730 }, { "epoch": 2.449488035919619, "grad_norm": 0.1125209853053093, "learning_rate": 1.0194686676130604e-05, "loss": 0.1928, "step": 44735 }, { "epoch": 2.44976181350271, "grad_norm": 0.12068604677915573, "learning_rate": 1.0189616710606369e-05, "loss": 0.1891, "step": 44740 }, { "epoch": 2.450035591085802, "grad_norm": 0.09571874886751175, "learning_rate": 1.0184546745082134e-05, "loss": 0.182, "step": 44745 }, { "epoch": 2.4503093686688935, "grad_norm": 0.09241928160190582, "learning_rate": 1.0179476779557899e-05, "loss": 0.1997, "step": 44750 }, { "epoch": 2.4505831462519847, "grad_norm": 0.09965410828590393, "learning_rate": 1.0174406814033665e-05, "loss": 0.1845, "step": 44755 }, { "epoch": 2.4508569238350764, "grad_norm": 0.09174860268831253, "learning_rate": 1.016933684850943e-05, "loss": 0.1894, "step": 44760 }, { "epoch": 2.451130701418168, "grad_norm": 0.09522981196641922, "learning_rate": 1.0164266882985197e-05, "loss": 0.1806, "step": 44765 }, { "epoch": 2.4514044790012592, "grad_norm": 0.1017475575208664, "learning_rate": 1.0159196917460962e-05, "loss": 0.1809, "step": 44770 }, { "epoch": 2.451678256584351, "grad_norm": 0.09511157125234604, "learning_rate": 1.0154126951936729e-05, "loss": 0.1882, "step": 44775 }, { "epoch": 2.4519520341674426, "grad_norm": 0.10094235837459564, "learning_rate": 1.0149056986412494e-05, "loss": 0.1807, "step": 44780 }, { "epoch": 2.452225811750534, "grad_norm": 0.09670593589544296, "learning_rate": 1.0143987020888259e-05, "loss": 0.1877, "step": 44785 }, { "epoch": 2.4524995893336254, "grad_norm": 0.10362095385789871, "learning_rate": 1.0138917055364024e-05, "loss": 0.1937, "step": 44790 }, { "epoch": 2.452773366916717, "grad_norm": 0.1005074679851532, "learning_rate": 1.013384708983979e-05, "loss": 0.1927, "step": 44795 }, { "epoch": 2.4530471444998083, "grad_norm": 0.09232202172279358, "learning_rate": 1.0128777124315555e-05, "loss": 0.1816, "step": 44800 }, { "epoch": 2.4533209220829, "grad_norm": 0.09857380390167236, "learning_rate": 1.012370715879132e-05, "loss": 0.1837, "step": 44805 }, { "epoch": 2.453594699665991, "grad_norm": 0.09562329202890396, "learning_rate": 1.0118637193267085e-05, "loss": 0.1795, "step": 44810 }, { "epoch": 2.453868477249083, "grad_norm": 0.09200378507375717, "learning_rate": 1.0113567227742852e-05, "loss": 0.1836, "step": 44815 }, { "epoch": 2.4541422548321745, "grad_norm": 0.09164101630449295, "learning_rate": 1.0108497262218617e-05, "loss": 0.1851, "step": 44820 }, { "epoch": 2.4544160324152657, "grad_norm": 0.09279116988182068, "learning_rate": 1.0103427296694384e-05, "loss": 0.1858, "step": 44825 }, { "epoch": 2.4546898099983574, "grad_norm": 0.09737528860569, "learning_rate": 1.0098357331170149e-05, "loss": 0.1849, "step": 44830 }, { "epoch": 2.4549635875814486, "grad_norm": 0.11701182276010513, "learning_rate": 1.0093287365645914e-05, "loss": 0.1788, "step": 44835 }, { "epoch": 2.4552373651645403, "grad_norm": 0.09342233836650848, "learning_rate": 1.008821740012168e-05, "loss": 0.1767, "step": 44840 }, { "epoch": 2.455511142747632, "grad_norm": 0.09432172775268555, "learning_rate": 1.0083147434597445e-05, "loss": 0.1907, "step": 44845 }, { "epoch": 2.455784920330723, "grad_norm": 0.1038605272769928, "learning_rate": 1.007807746907321e-05, "loss": 0.1834, "step": 44850 }, { "epoch": 2.456058697913815, "grad_norm": 0.09640506654977798, "learning_rate": 1.0073007503548975e-05, "loss": 0.1787, "step": 44855 }, { "epoch": 2.4563324754969065, "grad_norm": 0.10412587970495224, "learning_rate": 1.0067937538024742e-05, "loss": 0.1843, "step": 44860 }, { "epoch": 2.4566062530799977, "grad_norm": 0.09125671535730362, "learning_rate": 1.0062867572500507e-05, "loss": 0.1847, "step": 44865 }, { "epoch": 2.4568800306630894, "grad_norm": 0.11183050274848938, "learning_rate": 1.0057797606976272e-05, "loss": 0.1789, "step": 44870 }, { "epoch": 2.457153808246181, "grad_norm": 0.10303555428981781, "learning_rate": 1.0052727641452039e-05, "loss": 0.1853, "step": 44875 }, { "epoch": 2.4574275858292722, "grad_norm": 0.10583444684743881, "learning_rate": 1.0047657675927805e-05, "loss": 0.1826, "step": 44880 }, { "epoch": 2.457701363412364, "grad_norm": 0.10162948817014694, "learning_rate": 1.004258771040357e-05, "loss": 0.1835, "step": 44885 }, { "epoch": 2.457975140995455, "grad_norm": 0.10287231206893921, "learning_rate": 1.0037517744879335e-05, "loss": 0.1924, "step": 44890 }, { "epoch": 2.4582489185785468, "grad_norm": 0.09574539214372635, "learning_rate": 1.00324477793551e-05, "loss": 0.1815, "step": 44895 }, { "epoch": 2.4585226961616384, "grad_norm": 0.10300237685441971, "learning_rate": 1.0027377813830867e-05, "loss": 0.1826, "step": 44900 }, { "epoch": 2.4587964737447296, "grad_norm": 0.1081659272313118, "learning_rate": 1.0022307848306632e-05, "loss": 0.1834, "step": 44905 }, { "epoch": 2.4590702513278213, "grad_norm": 0.10344961285591125, "learning_rate": 1.0017237882782397e-05, "loss": 0.1901, "step": 44910 }, { "epoch": 2.4593440289109125, "grad_norm": 0.09442053735256195, "learning_rate": 1.0012167917258162e-05, "loss": 0.1801, "step": 44915 }, { "epoch": 2.459617806494004, "grad_norm": 0.09799553453922272, "learning_rate": 1.0007097951733929e-05, "loss": 0.1912, "step": 44920 }, { "epoch": 2.459891584077096, "grad_norm": 0.09497150033712387, "learning_rate": 1.0002027986209694e-05, "loss": 0.1904, "step": 44925 }, { "epoch": 2.460165361660187, "grad_norm": 0.0983889102935791, "learning_rate": 9.996958020685459e-06, "loss": 0.1822, "step": 44930 }, { "epoch": 2.4604391392432787, "grad_norm": 0.09735898673534393, "learning_rate": 9.991888055161225e-06, "loss": 0.1806, "step": 44935 }, { "epoch": 2.4607129168263704, "grad_norm": 0.10699759423732758, "learning_rate": 9.986818089636992e-06, "loss": 0.1858, "step": 44940 }, { "epoch": 2.4609866944094616, "grad_norm": 0.09821256250143051, "learning_rate": 9.981748124112757e-06, "loss": 0.1835, "step": 44945 }, { "epoch": 2.4612604719925533, "grad_norm": 0.09798555821180344, "learning_rate": 9.976678158588522e-06, "loss": 0.1794, "step": 44950 }, { "epoch": 2.461534249575645, "grad_norm": 0.09882152825593948, "learning_rate": 9.971608193064287e-06, "loss": 0.1825, "step": 44955 }, { "epoch": 2.461808027158736, "grad_norm": 0.1180984377861023, "learning_rate": 9.966538227540054e-06, "loss": 0.1847, "step": 44960 }, { "epoch": 2.462081804741828, "grad_norm": 0.10285110026597977, "learning_rate": 9.961468262015819e-06, "loss": 0.1889, "step": 44965 }, { "epoch": 2.4623555823249195, "grad_norm": 0.10875725001096725, "learning_rate": 9.956398296491584e-06, "loss": 0.1894, "step": 44970 }, { "epoch": 2.4626293599080107, "grad_norm": 0.0887237936258316, "learning_rate": 9.951328330967349e-06, "loss": 0.178, "step": 44975 }, { "epoch": 2.4629031374911023, "grad_norm": 0.10283070057630539, "learning_rate": 9.946258365443116e-06, "loss": 0.1746, "step": 44980 }, { "epoch": 2.4631769150741936, "grad_norm": 0.0948372334241867, "learning_rate": 9.94118839991888e-06, "loss": 0.1872, "step": 44985 }, { "epoch": 2.4634506926572852, "grad_norm": 0.09255865216255188, "learning_rate": 9.936118434394647e-06, "loss": 0.1885, "step": 44990 }, { "epoch": 2.463724470240377, "grad_norm": 0.10909353196620941, "learning_rate": 9.931048468870412e-06, "loss": 0.1809, "step": 44995 }, { "epoch": 2.463998247823468, "grad_norm": 0.08937609940767288, "learning_rate": 9.925978503346179e-06, "loss": 0.1796, "step": 45000 }, { "epoch": 2.4642720254065598, "grad_norm": 0.10075701773166656, "learning_rate": 9.920908537821944e-06, "loss": 0.1823, "step": 45005 }, { "epoch": 2.464545802989651, "grad_norm": 0.09312217682600021, "learning_rate": 9.915838572297709e-06, "loss": 0.1832, "step": 45010 }, { "epoch": 2.4648195805727426, "grad_norm": 0.11116830259561539, "learning_rate": 9.910768606773474e-06, "loss": 0.1814, "step": 45015 }, { "epoch": 2.4650933581558343, "grad_norm": 0.09877711534500122, "learning_rate": 9.90569864124924e-06, "loss": 0.1859, "step": 45020 }, { "epoch": 2.4653671357389255, "grad_norm": 0.09742273390293121, "learning_rate": 9.900628675725006e-06, "loss": 0.1877, "step": 45025 }, { "epoch": 2.465640913322017, "grad_norm": 0.09315041452646255, "learning_rate": 9.89555871020077e-06, "loss": 0.1834, "step": 45030 }, { "epoch": 2.465914690905109, "grad_norm": 0.10531903803348541, "learning_rate": 9.890488744676536e-06, "loss": 0.1875, "step": 45035 }, { "epoch": 2.4661884684882, "grad_norm": 0.1230914294719696, "learning_rate": 9.885418779152302e-06, "loss": 0.1912, "step": 45040 }, { "epoch": 2.4664622460712917, "grad_norm": 0.09804712980985641, "learning_rate": 9.880348813628067e-06, "loss": 0.1905, "step": 45045 }, { "epoch": 2.4667360236543834, "grad_norm": 0.10265281051397324, "learning_rate": 9.875278848103834e-06, "loss": 0.1854, "step": 45050 }, { "epoch": 2.4670098012374746, "grad_norm": 0.09592879563570023, "learning_rate": 9.870208882579599e-06, "loss": 0.1793, "step": 45055 }, { "epoch": 2.4672835788205663, "grad_norm": 0.09831004589796066, "learning_rate": 9.865138917055366e-06, "loss": 0.18, "step": 45060 }, { "epoch": 2.4675573564036575, "grad_norm": 0.10142052173614502, "learning_rate": 9.86006895153113e-06, "loss": 0.1852, "step": 45065 }, { "epoch": 2.467831133986749, "grad_norm": 0.08694496005773544, "learning_rate": 9.854998986006896e-06, "loss": 0.1789, "step": 45070 }, { "epoch": 2.468104911569841, "grad_norm": 0.09263142198324203, "learning_rate": 9.84992902048266e-06, "loss": 0.1811, "step": 45075 }, { "epoch": 2.468378689152932, "grad_norm": 0.10248815268278122, "learning_rate": 9.844859054958427e-06, "loss": 0.1807, "step": 45080 }, { "epoch": 2.4686524667360237, "grad_norm": 0.09923840314149857, "learning_rate": 9.839789089434192e-06, "loss": 0.1849, "step": 45085 }, { "epoch": 2.4689262443191153, "grad_norm": 0.10950980335474014, "learning_rate": 9.834719123909957e-06, "loss": 0.1899, "step": 45090 }, { "epoch": 2.4692000219022066, "grad_norm": 0.10669814050197601, "learning_rate": 9.829649158385722e-06, "loss": 0.1762, "step": 45095 }, { "epoch": 2.469473799485298, "grad_norm": 0.09146038442850113, "learning_rate": 9.824579192861489e-06, "loss": 0.1754, "step": 45100 }, { "epoch": 2.4697475770683894, "grad_norm": 0.10271377116441727, "learning_rate": 9.819509227337254e-06, "loss": 0.1871, "step": 45105 }, { "epoch": 2.470021354651481, "grad_norm": 0.09219491481781006, "learning_rate": 9.81443926181302e-06, "loss": 0.1813, "step": 45110 }, { "epoch": 2.4702951322345728, "grad_norm": 0.10155389457941055, "learning_rate": 9.809369296288786e-06, "loss": 0.1871, "step": 45115 }, { "epoch": 2.470568909817664, "grad_norm": 0.10728074610233307, "learning_rate": 9.80429933076455e-06, "loss": 0.1842, "step": 45120 }, { "epoch": 2.4708426874007556, "grad_norm": 0.10158652067184448, "learning_rate": 9.799229365240317e-06, "loss": 0.18, "step": 45125 }, { "epoch": 2.4711164649838473, "grad_norm": 0.09134213626384735, "learning_rate": 9.794159399716082e-06, "loss": 0.1866, "step": 45130 }, { "epoch": 2.4713902425669385, "grad_norm": 0.09800365567207336, "learning_rate": 9.789089434191847e-06, "loss": 0.179, "step": 45135 }, { "epoch": 2.47166402015003, "grad_norm": 0.09703680127859116, "learning_rate": 9.784019468667612e-06, "loss": 0.1855, "step": 45140 }, { "epoch": 2.471937797733122, "grad_norm": 0.09668412059545517, "learning_rate": 9.77894950314338e-06, "loss": 0.1801, "step": 45145 }, { "epoch": 2.472211575316213, "grad_norm": 0.12384715676307678, "learning_rate": 9.773879537619144e-06, "loss": 0.1889, "step": 45150 }, { "epoch": 2.4724853528993047, "grad_norm": 0.09269735217094421, "learning_rate": 9.76880957209491e-06, "loss": 0.1819, "step": 45155 }, { "epoch": 2.472759130482396, "grad_norm": 0.0906321331858635, "learning_rate": 9.763739606570676e-06, "loss": 0.1767, "step": 45160 }, { "epoch": 2.4730329080654876, "grad_norm": 0.09325920790433884, "learning_rate": 9.758669641046443e-06, "loss": 0.1777, "step": 45165 }, { "epoch": 2.4733066856485793, "grad_norm": 0.09290842711925507, "learning_rate": 9.753599675522208e-06, "loss": 0.1852, "step": 45170 }, { "epoch": 2.4735804632316705, "grad_norm": 0.09599005430936813, "learning_rate": 9.748529709997973e-06, "loss": 0.1891, "step": 45175 }, { "epoch": 2.473854240814762, "grad_norm": 0.09815891087055206, "learning_rate": 9.743459744473738e-06, "loss": 0.1927, "step": 45180 }, { "epoch": 2.4741280183978533, "grad_norm": 0.11939562112092972, "learning_rate": 9.738389778949504e-06, "loss": 0.1838, "step": 45185 }, { "epoch": 2.474401795980945, "grad_norm": 0.0995275005698204, "learning_rate": 9.73331981342527e-06, "loss": 0.1849, "step": 45190 }, { "epoch": 2.4746755735640367, "grad_norm": 0.11410612612962723, "learning_rate": 9.728249847901034e-06, "loss": 0.1855, "step": 45195 }, { "epoch": 2.474949351147128, "grad_norm": 0.09719367325305939, "learning_rate": 9.7231798823768e-06, "loss": 0.1824, "step": 45200 }, { "epoch": 2.4752231287302195, "grad_norm": 0.10319675505161285, "learning_rate": 9.718109916852566e-06, "loss": 0.1913, "step": 45205 }, { "epoch": 2.475496906313311, "grad_norm": 0.09511204808950424, "learning_rate": 9.713039951328331e-06, "loss": 0.1827, "step": 45210 }, { "epoch": 2.4757706838964024, "grad_norm": 0.09528202563524246, "learning_rate": 9.707969985804098e-06, "loss": 0.1833, "step": 45215 }, { "epoch": 2.476044461479494, "grad_norm": 0.10289203375577927, "learning_rate": 9.702900020279863e-06, "loss": 0.1893, "step": 45220 }, { "epoch": 2.4763182390625857, "grad_norm": 0.09378179907798767, "learning_rate": 9.69783005475563e-06, "loss": 0.1813, "step": 45225 }, { "epoch": 2.476592016645677, "grad_norm": 0.10316365957260132, "learning_rate": 9.692760089231394e-06, "loss": 0.1841, "step": 45230 }, { "epoch": 2.4768657942287686, "grad_norm": 0.09063053131103516, "learning_rate": 9.68769012370716e-06, "loss": 0.1845, "step": 45235 }, { "epoch": 2.4771395718118603, "grad_norm": 0.09492834657430649, "learning_rate": 9.682620158182924e-06, "loss": 0.1782, "step": 45240 }, { "epoch": 2.4774133493949515, "grad_norm": 0.09121669083833694, "learning_rate": 9.677550192658691e-06, "loss": 0.181, "step": 45245 }, { "epoch": 2.477687126978043, "grad_norm": 0.09585941582918167, "learning_rate": 9.672480227134456e-06, "loss": 0.1805, "step": 45250 }, { "epoch": 2.4779609045611344, "grad_norm": 0.0983998253941536, "learning_rate": 9.667410261610221e-06, "loss": 0.1826, "step": 45255 }, { "epoch": 2.478234682144226, "grad_norm": 0.12233388423919678, "learning_rate": 9.662340296085986e-06, "loss": 0.1842, "step": 45260 }, { "epoch": 2.4785084597273177, "grad_norm": 0.09943482279777527, "learning_rate": 9.657270330561753e-06, "loss": 0.1837, "step": 45265 }, { "epoch": 2.478782237310409, "grad_norm": 0.10332293808460236, "learning_rate": 9.652200365037518e-06, "loss": 0.1854, "step": 45270 }, { "epoch": 2.4790560148935006, "grad_norm": 0.08969467878341675, "learning_rate": 9.647130399513284e-06, "loss": 0.1829, "step": 45275 }, { "epoch": 2.479329792476592, "grad_norm": 0.09229618310928345, "learning_rate": 9.64206043398905e-06, "loss": 0.176, "step": 45280 }, { "epoch": 2.4796035700596835, "grad_norm": 0.10911329835653305, "learning_rate": 9.636990468464816e-06, "loss": 0.187, "step": 45285 }, { "epoch": 2.479877347642775, "grad_norm": 0.10718198865652084, "learning_rate": 9.631920502940581e-06, "loss": 0.1858, "step": 45290 }, { "epoch": 2.4801511252258663, "grad_norm": 0.09156215935945511, "learning_rate": 9.626850537416346e-06, "loss": 0.1834, "step": 45295 }, { "epoch": 2.480424902808958, "grad_norm": 0.09582200646400452, "learning_rate": 9.621780571892111e-06, "loss": 0.1915, "step": 45300 }, { "epoch": 2.4806986803920497, "grad_norm": 0.09974609315395355, "learning_rate": 9.616710606367878e-06, "loss": 0.1976, "step": 45305 }, { "epoch": 2.480972457975141, "grad_norm": 0.09938428550958633, "learning_rate": 9.611640640843643e-06, "loss": 0.186, "step": 45310 }, { "epoch": 2.4812462355582325, "grad_norm": 0.10364086925983429, "learning_rate": 9.606570675319408e-06, "loss": 0.1863, "step": 45315 }, { "epoch": 2.481520013141324, "grad_norm": 0.09958130121231079, "learning_rate": 9.601500709795173e-06, "loss": 0.1856, "step": 45320 }, { "epoch": 2.4817937907244154, "grad_norm": 0.09544459730386734, "learning_rate": 9.59643074427094e-06, "loss": 0.1857, "step": 45325 }, { "epoch": 2.482067568307507, "grad_norm": 0.10510308295488358, "learning_rate": 9.591360778746704e-06, "loss": 0.1853, "step": 45330 }, { "epoch": 2.4823413458905983, "grad_norm": 0.09264272451400757, "learning_rate": 9.586290813222471e-06, "loss": 0.1801, "step": 45335 }, { "epoch": 2.48261512347369, "grad_norm": 0.08949818462133408, "learning_rate": 9.581220847698236e-06, "loss": 0.1815, "step": 45340 }, { "epoch": 2.4828889010567816, "grad_norm": 0.09564720839262009, "learning_rate": 9.576150882174003e-06, "loss": 0.1802, "step": 45345 }, { "epoch": 2.483162678639873, "grad_norm": 0.10158824920654297, "learning_rate": 9.571080916649768e-06, "loss": 0.1797, "step": 45350 }, { "epoch": 2.4834364562229645, "grad_norm": 0.10266555100679398, "learning_rate": 9.566010951125533e-06, "loss": 0.1928, "step": 45355 }, { "epoch": 2.4837102338060557, "grad_norm": 0.09699103236198425, "learning_rate": 9.560940985601298e-06, "loss": 0.1774, "step": 45360 }, { "epoch": 2.4839840113891474, "grad_norm": 0.10540973395109177, "learning_rate": 9.555871020077065e-06, "loss": 0.1865, "step": 45365 }, { "epoch": 2.484257788972239, "grad_norm": 0.09763877838850021, "learning_rate": 9.55080105455283e-06, "loss": 0.1841, "step": 45370 }, { "epoch": 2.4845315665553303, "grad_norm": 0.09356526285409927, "learning_rate": 9.545731089028595e-06, "loss": 0.1814, "step": 45375 }, { "epoch": 2.484805344138422, "grad_norm": 0.09422016143798828, "learning_rate": 9.54066112350436e-06, "loss": 0.1802, "step": 45380 }, { "epoch": 2.4850791217215136, "grad_norm": 0.08850964903831482, "learning_rate": 9.535591157980126e-06, "loss": 0.1891, "step": 45385 }, { "epoch": 2.485352899304605, "grad_norm": 0.09256679564714432, "learning_rate": 9.530521192455893e-06, "loss": 0.1781, "step": 45390 }, { "epoch": 2.4856266768876965, "grad_norm": 0.0931025817990303, "learning_rate": 9.525451226931658e-06, "loss": 0.1805, "step": 45395 }, { "epoch": 2.485900454470788, "grad_norm": 0.0960596427321434, "learning_rate": 9.520381261407423e-06, "loss": 0.1837, "step": 45400 }, { "epoch": 2.4861742320538793, "grad_norm": 0.10466765612363815, "learning_rate": 9.515311295883188e-06, "loss": 0.1878, "step": 45405 }, { "epoch": 2.486448009636971, "grad_norm": 0.09369178116321564, "learning_rate": 9.510241330358955e-06, "loss": 0.1767, "step": 45410 }, { "epoch": 2.4867217872200627, "grad_norm": 0.09695934504270554, "learning_rate": 9.50517136483472e-06, "loss": 0.1837, "step": 45415 }, { "epoch": 2.486995564803154, "grad_norm": 0.1037040501832962, "learning_rate": 9.500101399310485e-06, "loss": 0.1814, "step": 45420 }, { "epoch": 2.4872693423862455, "grad_norm": 0.09306695312261581, "learning_rate": 9.49503143378625e-06, "loss": 0.185, "step": 45425 }, { "epoch": 2.4875431199693367, "grad_norm": 0.09553488343954086, "learning_rate": 9.489961468262016e-06, "loss": 0.1793, "step": 45430 }, { "epoch": 2.4878168975524284, "grad_norm": 0.09834404289722443, "learning_rate": 9.484891502737781e-06, "loss": 0.1905, "step": 45435 }, { "epoch": 2.48809067513552, "grad_norm": 0.1047791838645935, "learning_rate": 9.479821537213546e-06, "loss": 0.1832, "step": 45440 }, { "epoch": 2.4883644527186113, "grad_norm": 0.10275452584028244, "learning_rate": 9.474751571689313e-06, "loss": 0.1924, "step": 45445 }, { "epoch": 2.488638230301703, "grad_norm": 0.09810779243707657, "learning_rate": 9.46968160616508e-06, "loss": 0.1869, "step": 45450 }, { "epoch": 2.488912007884794, "grad_norm": 0.10490139573812485, "learning_rate": 9.464611640640845e-06, "loss": 0.1842, "step": 45455 }, { "epoch": 2.489185785467886, "grad_norm": 0.10573983192443848, "learning_rate": 9.45954167511661e-06, "loss": 0.1857, "step": 45460 }, { "epoch": 2.4894595630509775, "grad_norm": 0.09395800530910492, "learning_rate": 9.454471709592375e-06, "loss": 0.1831, "step": 45465 }, { "epoch": 2.4897333406340687, "grad_norm": 0.0926181823015213, "learning_rate": 9.449401744068141e-06, "loss": 0.1862, "step": 45470 }, { "epoch": 2.4900071182171604, "grad_norm": 0.09384844452142715, "learning_rate": 9.444331778543906e-06, "loss": 0.1821, "step": 45475 }, { "epoch": 2.490280895800252, "grad_norm": 0.1030534952878952, "learning_rate": 9.439261813019671e-06, "loss": 0.1861, "step": 45480 }, { "epoch": 2.4905546733833432, "grad_norm": 0.10189472883939743, "learning_rate": 9.434191847495436e-06, "loss": 0.182, "step": 45485 }, { "epoch": 2.490828450966435, "grad_norm": 0.10274852812290192, "learning_rate": 9.429121881971203e-06, "loss": 0.1844, "step": 45490 }, { "epoch": 2.4911022285495266, "grad_norm": 0.09676801413297653, "learning_rate": 9.424051916446968e-06, "loss": 0.1843, "step": 45495 }, { "epoch": 2.491376006132618, "grad_norm": 0.09430935233831406, "learning_rate": 9.418981950922735e-06, "loss": 0.1835, "step": 45500 }, { "epoch": 2.4916497837157094, "grad_norm": 0.09109923243522644, "learning_rate": 9.4139119853985e-06, "loss": 0.1858, "step": 45505 }, { "epoch": 2.4919235612988007, "grad_norm": 0.0975646823644638, "learning_rate": 9.408842019874266e-06, "loss": 0.1861, "step": 45510 }, { "epoch": 2.4921973388818923, "grad_norm": 0.1036713719367981, "learning_rate": 9.403772054350031e-06, "loss": 0.1833, "step": 45515 }, { "epoch": 2.492471116464984, "grad_norm": 0.09679851680994034, "learning_rate": 9.398702088825796e-06, "loss": 0.1758, "step": 45520 }, { "epoch": 2.492744894048075, "grad_norm": 0.10085119307041168, "learning_rate": 9.393632123301561e-06, "loss": 0.1799, "step": 45525 }, { "epoch": 2.493018671631167, "grad_norm": 0.09689656645059586, "learning_rate": 9.388562157777328e-06, "loss": 0.1911, "step": 45530 }, { "epoch": 2.4932924492142585, "grad_norm": 0.09280238300561905, "learning_rate": 9.383492192253093e-06, "loss": 0.1829, "step": 45535 }, { "epoch": 2.4935662267973497, "grad_norm": 0.09247510135173798, "learning_rate": 9.378422226728858e-06, "loss": 0.183, "step": 45540 }, { "epoch": 2.4938400043804414, "grad_norm": 0.0941535159945488, "learning_rate": 9.373352261204623e-06, "loss": 0.1871, "step": 45545 }, { "epoch": 2.4941137819635326, "grad_norm": 0.11652912199497223, "learning_rate": 9.36828229568039e-06, "loss": 0.1839, "step": 45550 }, { "epoch": 2.4943875595466243, "grad_norm": 0.10667169839143753, "learning_rate": 9.363212330156155e-06, "loss": 0.1805, "step": 45555 }, { "epoch": 2.494661337129716, "grad_norm": 0.09617473185062408, "learning_rate": 9.358142364631921e-06, "loss": 0.188, "step": 45560 }, { "epoch": 2.494935114712807, "grad_norm": 0.10857920348644257, "learning_rate": 9.353072399107686e-06, "loss": 0.1804, "step": 45565 }, { "epoch": 2.495208892295899, "grad_norm": 0.09818733483552933, "learning_rate": 9.348002433583453e-06, "loss": 0.1813, "step": 45570 }, { "epoch": 2.4954826698789905, "grad_norm": 0.09786468744277954, "learning_rate": 9.342932468059218e-06, "loss": 0.1804, "step": 45575 }, { "epoch": 2.4957564474620817, "grad_norm": 0.09990128129720688, "learning_rate": 9.337862502534983e-06, "loss": 0.1843, "step": 45580 }, { "epoch": 2.4960302250451734, "grad_norm": 0.12417761981487274, "learning_rate": 9.332792537010748e-06, "loss": 0.187, "step": 45585 }, { "epoch": 2.496304002628265, "grad_norm": 0.11484319716691971, "learning_rate": 9.327722571486515e-06, "loss": 0.1887, "step": 45590 }, { "epoch": 2.4965777802113562, "grad_norm": 0.09539178013801575, "learning_rate": 9.32265260596228e-06, "loss": 0.188, "step": 45595 }, { "epoch": 2.496851557794448, "grad_norm": 0.09905629605054855, "learning_rate": 9.317582640438045e-06, "loss": 0.185, "step": 45600 }, { "epoch": 2.497125335377539, "grad_norm": 0.10565432161092758, "learning_rate": 9.31251267491381e-06, "loss": 0.1822, "step": 45605 }, { "epoch": 2.4973991129606308, "grad_norm": 0.09992078691720963, "learning_rate": 9.307442709389577e-06, "loss": 0.1926, "step": 45610 }, { "epoch": 2.4976728905437224, "grad_norm": 0.09015887975692749, "learning_rate": 9.302372743865343e-06, "loss": 0.1899, "step": 45615 }, { "epoch": 2.4979466681268137, "grad_norm": 0.09090900421142578, "learning_rate": 9.297302778341108e-06, "loss": 0.1795, "step": 45620 }, { "epoch": 2.4982204457099053, "grad_norm": 0.10473987460136414, "learning_rate": 9.292232812816873e-06, "loss": 0.1847, "step": 45625 }, { "epoch": 2.4984942232929965, "grad_norm": 0.10710205137729645, "learning_rate": 9.28716284729264e-06, "loss": 0.1832, "step": 45630 }, { "epoch": 2.498768000876088, "grad_norm": 0.09754358977079391, "learning_rate": 9.282092881768405e-06, "loss": 0.1842, "step": 45635 }, { "epoch": 2.49904177845918, "grad_norm": 0.10112626105546951, "learning_rate": 9.27702291624417e-06, "loss": 0.1805, "step": 45640 }, { "epoch": 2.499315556042271, "grad_norm": 0.0993490219116211, "learning_rate": 9.271952950719935e-06, "loss": 0.1899, "step": 45645 }, { "epoch": 2.4995893336253627, "grad_norm": 0.10595618933439255, "learning_rate": 9.266882985195702e-06, "loss": 0.1862, "step": 45650 }, { "epoch": 2.4998631112084544, "grad_norm": 0.09680825471878052, "learning_rate": 9.261813019671467e-06, "loss": 0.1863, "step": 45655 }, { "epoch": 2.5001368887915456, "grad_norm": 0.09169302135705948, "learning_rate": 9.256743054147232e-06, "loss": 0.1833, "step": 45660 }, { "epoch": 2.5004106663746373, "grad_norm": 0.10362884402275085, "learning_rate": 9.251673088622997e-06, "loss": 0.1886, "step": 45665 }, { "epoch": 2.500684443957729, "grad_norm": 0.10492584109306335, "learning_rate": 9.246603123098763e-06, "loss": 0.1845, "step": 45670 }, { "epoch": 2.50095822154082, "grad_norm": 0.10507108271121979, "learning_rate": 9.24153315757453e-06, "loss": 0.1828, "step": 45675 }, { "epoch": 2.501231999123912, "grad_norm": 0.10324845463037491, "learning_rate": 9.236463192050295e-06, "loss": 0.1829, "step": 45680 }, { "epoch": 2.5015057767070035, "grad_norm": 0.09275482594966888, "learning_rate": 9.23139322652606e-06, "loss": 0.181, "step": 45685 }, { "epoch": 2.5017795542900947, "grad_norm": 0.09631923586130142, "learning_rate": 9.226323261001827e-06, "loss": 0.179, "step": 45690 }, { "epoch": 2.5020533318731863, "grad_norm": 0.09374179691076279, "learning_rate": 9.221253295477592e-06, "loss": 0.1845, "step": 45695 }, { "epoch": 2.5023271094562776, "grad_norm": 0.09281674772500992, "learning_rate": 9.216183329953357e-06, "loss": 0.1881, "step": 45700 }, { "epoch": 2.5026008870393692, "grad_norm": 0.09147520363330841, "learning_rate": 9.211113364429122e-06, "loss": 0.1811, "step": 45705 }, { "epoch": 2.5028746646224604, "grad_norm": 0.09788766503334045, "learning_rate": 9.206043398904887e-06, "loss": 0.174, "step": 45710 }, { "epoch": 2.503148442205552, "grad_norm": 0.09905875474214554, "learning_rate": 9.200973433380653e-06, "loss": 0.1814, "step": 45715 }, { "epoch": 2.5034222197886438, "grad_norm": 0.096535824239254, "learning_rate": 9.195903467856418e-06, "loss": 0.1888, "step": 45720 }, { "epoch": 2.503695997371735, "grad_norm": 0.10099546611309052, "learning_rate": 9.190833502332185e-06, "loss": 0.1766, "step": 45725 }, { "epoch": 2.5039697749548266, "grad_norm": 0.09180213510990143, "learning_rate": 9.18576353680795e-06, "loss": 0.1834, "step": 45730 }, { "epoch": 2.5042435525379183, "grad_norm": 0.09233308583498001, "learning_rate": 9.180693571283717e-06, "loss": 0.1832, "step": 45735 }, { "epoch": 2.5045173301210095, "grad_norm": 0.08844134211540222, "learning_rate": 9.175623605759482e-06, "loss": 0.1748, "step": 45740 }, { "epoch": 2.504791107704101, "grad_norm": 0.09935373067855835, "learning_rate": 9.170553640235247e-06, "loss": 0.1792, "step": 45745 }, { "epoch": 2.505064885287193, "grad_norm": 0.09874221682548523, "learning_rate": 9.165483674711012e-06, "loss": 0.1892, "step": 45750 }, { "epoch": 2.505338662870284, "grad_norm": 0.09528393298387527, "learning_rate": 9.160413709186778e-06, "loss": 0.1866, "step": 45755 }, { "epoch": 2.5056124404533757, "grad_norm": 0.09953179210424423, "learning_rate": 9.155343743662543e-06, "loss": 0.1896, "step": 45760 }, { "epoch": 2.5058862180364674, "grad_norm": 0.09642617404460907, "learning_rate": 9.150273778138308e-06, "loss": 0.1786, "step": 45765 }, { "epoch": 2.5061599956195586, "grad_norm": 0.10002191364765167, "learning_rate": 9.145203812614073e-06, "loss": 0.188, "step": 45770 }, { "epoch": 2.5064337732026503, "grad_norm": 0.09613684564828873, "learning_rate": 9.14013384708984e-06, "loss": 0.186, "step": 45775 }, { "epoch": 2.506707550785742, "grad_norm": 0.0879923403263092, "learning_rate": 9.135063881565605e-06, "loss": 0.1768, "step": 45780 }, { "epoch": 2.506981328368833, "grad_norm": 0.09330998361110687, "learning_rate": 9.129993916041372e-06, "loss": 0.1829, "step": 45785 }, { "epoch": 2.507255105951925, "grad_norm": 0.09952162951231003, "learning_rate": 9.124923950517137e-06, "loss": 0.1731, "step": 45790 }, { "epoch": 2.507528883535016, "grad_norm": 0.09014544636011124, "learning_rate": 9.119853984992904e-06, "loss": 0.1819, "step": 45795 }, { "epoch": 2.5078026611181077, "grad_norm": 0.10539740324020386, "learning_rate": 9.114784019468669e-06, "loss": 0.1916, "step": 45800 }, { "epoch": 2.508076438701199, "grad_norm": 0.09479734301567078, "learning_rate": 9.109714053944434e-06, "loss": 0.1768, "step": 45805 }, { "epoch": 2.5083502162842906, "grad_norm": 0.096542589366436, "learning_rate": 9.104644088420199e-06, "loss": 0.1794, "step": 45810 }, { "epoch": 2.508623993867382, "grad_norm": 0.09979352355003357, "learning_rate": 9.099574122895965e-06, "loss": 0.1844, "step": 45815 }, { "epoch": 2.5088977714504734, "grad_norm": 0.09904731065034866, "learning_rate": 9.09450415737173e-06, "loss": 0.1839, "step": 45820 }, { "epoch": 2.509171549033565, "grad_norm": 0.09454433619976044, "learning_rate": 9.089434191847495e-06, "loss": 0.1841, "step": 45825 }, { "epoch": 2.5094453266166568, "grad_norm": 0.09244538843631744, "learning_rate": 9.08436422632326e-06, "loss": 0.1815, "step": 45830 }, { "epoch": 2.509719104199748, "grad_norm": 0.10349372029304504, "learning_rate": 9.079294260799027e-06, "loss": 0.184, "step": 45835 }, { "epoch": 2.5099928817828396, "grad_norm": 0.09928891807794571, "learning_rate": 9.074224295274792e-06, "loss": 0.186, "step": 45840 }, { "epoch": 2.5102666593659313, "grad_norm": 0.09587191045284271, "learning_rate": 9.069154329750559e-06, "loss": 0.1836, "step": 45845 }, { "epoch": 2.5105404369490225, "grad_norm": 0.10478594154119492, "learning_rate": 9.064084364226324e-06, "loss": 0.1945, "step": 45850 }, { "epoch": 2.510814214532114, "grad_norm": 0.10137370973825455, "learning_rate": 9.05901439870209e-06, "loss": 0.1811, "step": 45855 }, { "epoch": 2.511087992115206, "grad_norm": 0.09143252670764923, "learning_rate": 9.053944433177855e-06, "loss": 0.1804, "step": 45860 }, { "epoch": 2.511361769698297, "grad_norm": 0.09073338657617569, "learning_rate": 9.04887446765362e-06, "loss": 0.1818, "step": 45865 }, { "epoch": 2.5116355472813887, "grad_norm": 0.09908824414014816, "learning_rate": 9.043804502129385e-06, "loss": 0.1781, "step": 45870 }, { "epoch": 2.51190932486448, "grad_norm": 0.10170744359493256, "learning_rate": 9.038734536605152e-06, "loss": 0.1841, "step": 45875 }, { "epoch": 2.5121831024475716, "grad_norm": 0.10288862138986588, "learning_rate": 9.033664571080917e-06, "loss": 0.1821, "step": 45880 }, { "epoch": 2.5124568800306633, "grad_norm": 0.09341003000736237, "learning_rate": 9.028594605556682e-06, "loss": 0.1865, "step": 45885 }, { "epoch": 2.5127306576137545, "grad_norm": 0.0999240055680275, "learning_rate": 9.023524640032447e-06, "loss": 0.1886, "step": 45890 }, { "epoch": 2.513004435196846, "grad_norm": 0.10003675520420074, "learning_rate": 9.018454674508214e-06, "loss": 0.1791, "step": 45895 }, { "epoch": 2.5132782127799373, "grad_norm": 0.10059544444084167, "learning_rate": 9.01338470898398e-06, "loss": 0.1752, "step": 45900 }, { "epoch": 2.513551990363029, "grad_norm": 0.09026261419057846, "learning_rate": 9.008314743459745e-06, "loss": 0.176, "step": 45905 }, { "epoch": 2.5138257679461207, "grad_norm": 0.10124162584543228, "learning_rate": 9.00324477793551e-06, "loss": 0.182, "step": 45910 }, { "epoch": 2.514099545529212, "grad_norm": 0.10872842371463776, "learning_rate": 8.998174812411277e-06, "loss": 0.1808, "step": 45915 }, { "epoch": 2.5143733231123035, "grad_norm": 0.09967955946922302, "learning_rate": 8.993104846887042e-06, "loss": 0.1878, "step": 45920 }, { "epoch": 2.514647100695395, "grad_norm": 0.09533744305372238, "learning_rate": 8.988034881362807e-06, "loss": 0.1823, "step": 45925 }, { "epoch": 2.5149208782784864, "grad_norm": 0.10142547637224197, "learning_rate": 8.982964915838572e-06, "loss": 0.1836, "step": 45930 }, { "epoch": 2.515194655861578, "grad_norm": 0.08935622125864029, "learning_rate": 8.977894950314339e-06, "loss": 0.1779, "step": 45935 }, { "epoch": 2.5154684334446697, "grad_norm": 0.10625868290662766, "learning_rate": 8.972824984790104e-06, "loss": 0.1828, "step": 45940 }, { "epoch": 2.515742211027761, "grad_norm": 0.09242301434278488, "learning_rate": 8.967755019265869e-06, "loss": 0.185, "step": 45945 }, { "epoch": 2.5160159886108526, "grad_norm": 0.10038433223962784, "learning_rate": 8.962685053741635e-06, "loss": 0.1865, "step": 45950 }, { "epoch": 2.5162897661939443, "grad_norm": 0.09781737625598907, "learning_rate": 8.9576150882174e-06, "loss": 0.1849, "step": 45955 }, { "epoch": 2.5165635437770355, "grad_norm": 0.09938959777355194, "learning_rate": 8.952545122693167e-06, "loss": 0.1885, "step": 45960 }, { "epoch": 2.516837321360127, "grad_norm": 0.09486674517393112, "learning_rate": 8.947475157168932e-06, "loss": 0.1726, "step": 45965 }, { "epoch": 2.5171110989432184, "grad_norm": 0.091450534760952, "learning_rate": 8.942405191644697e-06, "loss": 0.1865, "step": 45970 }, { "epoch": 2.51738487652631, "grad_norm": 0.09934838861227036, "learning_rate": 8.937335226120464e-06, "loss": 0.1825, "step": 45975 }, { "epoch": 2.5176586541094013, "grad_norm": 0.10094112902879715, "learning_rate": 8.932265260596229e-06, "loss": 0.18, "step": 45980 }, { "epoch": 2.517932431692493, "grad_norm": 0.10005932301282883, "learning_rate": 8.927195295071994e-06, "loss": 0.1846, "step": 45985 }, { "epoch": 2.5182062092755846, "grad_norm": 0.09203176945447922, "learning_rate": 8.922125329547759e-06, "loss": 0.1812, "step": 45990 }, { "epoch": 2.518479986858676, "grad_norm": 0.09429121762514114, "learning_rate": 8.917055364023526e-06, "loss": 0.1887, "step": 45995 }, { "epoch": 2.5187537644417675, "grad_norm": 0.10667461156845093, "learning_rate": 8.91198539849929e-06, "loss": 0.1827, "step": 46000 }, { "epoch": 2.519027542024859, "grad_norm": 0.09838208556175232, "learning_rate": 8.906915432975055e-06, "loss": 0.1844, "step": 46005 }, { "epoch": 2.5193013196079503, "grad_norm": 0.1042695865035057, "learning_rate": 8.901845467450822e-06, "loss": 0.1913, "step": 46010 }, { "epoch": 2.519575097191042, "grad_norm": 0.08049789816141129, "learning_rate": 8.896775501926587e-06, "loss": 0.1811, "step": 46015 }, { "epoch": 2.5198488747741337, "grad_norm": 0.09445443749427795, "learning_rate": 8.891705536402354e-06, "loss": 0.1835, "step": 46020 }, { "epoch": 2.520122652357225, "grad_norm": 0.09814327955245972, "learning_rate": 8.886635570878119e-06, "loss": 0.1851, "step": 46025 }, { "epoch": 2.5203964299403165, "grad_norm": 0.08400911837816238, "learning_rate": 8.881565605353884e-06, "loss": 0.171, "step": 46030 }, { "epoch": 2.520670207523408, "grad_norm": 0.10294274240732193, "learning_rate": 8.876495639829649e-06, "loss": 0.1853, "step": 46035 }, { "epoch": 2.5209439851064994, "grad_norm": 0.10271543264389038, "learning_rate": 8.871425674305416e-06, "loss": 0.1946, "step": 46040 }, { "epoch": 2.521217762689591, "grad_norm": 0.09490421414375305, "learning_rate": 8.86635570878118e-06, "loss": 0.1793, "step": 46045 }, { "epoch": 2.5214915402726827, "grad_norm": 0.10035412758588791, "learning_rate": 8.861285743256946e-06, "loss": 0.189, "step": 46050 }, { "epoch": 2.521765317855774, "grad_norm": 0.09924974292516708, "learning_rate": 8.85621577773271e-06, "loss": 0.1904, "step": 46055 }, { "epoch": 2.5220390954388656, "grad_norm": 0.11272735148668289, "learning_rate": 8.851145812208477e-06, "loss": 0.182, "step": 46060 }, { "epoch": 2.522312873021957, "grad_norm": 0.10944925993680954, "learning_rate": 8.846075846684242e-06, "loss": 0.1998, "step": 46065 }, { "epoch": 2.5225866506050485, "grad_norm": 0.09237989783287048, "learning_rate": 8.841005881160009e-06, "loss": 0.186, "step": 46070 }, { "epoch": 2.5228604281881397, "grad_norm": 0.09363611787557602, "learning_rate": 8.835935915635774e-06, "loss": 0.1772, "step": 46075 }, { "epoch": 2.5231342057712314, "grad_norm": 0.10235628485679626, "learning_rate": 8.83086595011154e-06, "loss": 0.186, "step": 46080 }, { "epoch": 2.523407983354323, "grad_norm": 0.0897279754281044, "learning_rate": 8.825795984587306e-06, "loss": 0.1772, "step": 46085 }, { "epoch": 2.5236817609374143, "grad_norm": 0.09485730528831482, "learning_rate": 8.82072601906307e-06, "loss": 0.1834, "step": 46090 }, { "epoch": 2.523955538520506, "grad_norm": 0.09361961483955383, "learning_rate": 8.815656053538836e-06, "loss": 0.191, "step": 46095 }, { "epoch": 2.5242293161035976, "grad_norm": 0.09105076640844345, "learning_rate": 8.810586088014602e-06, "loss": 0.1814, "step": 46100 }, { "epoch": 2.524503093686689, "grad_norm": 0.10769902914762497, "learning_rate": 8.805516122490367e-06, "loss": 0.1878, "step": 46105 }, { "epoch": 2.5247768712697805, "grad_norm": 0.09561935812234879, "learning_rate": 8.800446156966132e-06, "loss": 0.1853, "step": 46110 }, { "epoch": 2.525050648852872, "grad_norm": 0.09287001192569733, "learning_rate": 8.795376191441897e-06, "loss": 0.1835, "step": 46115 }, { "epoch": 2.5253244264359633, "grad_norm": 0.0959891602396965, "learning_rate": 8.790306225917664e-06, "loss": 0.1803, "step": 46120 }, { "epoch": 2.525598204019055, "grad_norm": 0.10063081234693527, "learning_rate": 8.78523626039343e-06, "loss": 0.1825, "step": 46125 }, { "epoch": 2.5258719816021467, "grad_norm": 0.0993543490767479, "learning_rate": 8.780166294869196e-06, "loss": 0.1893, "step": 46130 }, { "epoch": 2.526145759185238, "grad_norm": 0.09986844658851624, "learning_rate": 8.77509632934496e-06, "loss": 0.1825, "step": 46135 }, { "epoch": 2.5264195367683295, "grad_norm": 0.10096265375614166, "learning_rate": 8.770026363820727e-06, "loss": 0.1924, "step": 46140 }, { "epoch": 2.5266933143514207, "grad_norm": 0.10270325839519501, "learning_rate": 8.764956398296492e-06, "loss": 0.1899, "step": 46145 }, { "epoch": 2.5269670919345124, "grad_norm": 0.09532582759857178, "learning_rate": 8.759886432772257e-06, "loss": 0.1782, "step": 46150 }, { "epoch": 2.5272408695176036, "grad_norm": 0.095036081969738, "learning_rate": 8.754816467248022e-06, "loss": 0.1824, "step": 46155 }, { "epoch": 2.5275146471006953, "grad_norm": 0.09990984946489334, "learning_rate": 8.749746501723789e-06, "loss": 0.1865, "step": 46160 }, { "epoch": 2.527788424683787, "grad_norm": 0.10198966413736343, "learning_rate": 8.744676536199554e-06, "loss": 0.191, "step": 46165 }, { "epoch": 2.528062202266878, "grad_norm": 0.0919814258813858, "learning_rate": 8.739606570675319e-06, "loss": 0.1739, "step": 46170 }, { "epoch": 2.52833597984997, "grad_norm": 0.09423157572746277, "learning_rate": 8.734536605151084e-06, "loss": 0.1826, "step": 46175 }, { "epoch": 2.5286097574330615, "grad_norm": 0.09650100022554398, "learning_rate": 8.72946663962685e-06, "loss": 0.1795, "step": 46180 }, { "epoch": 2.5288835350161527, "grad_norm": 0.09107708185911179, "learning_rate": 8.724396674102617e-06, "loss": 0.184, "step": 46185 }, { "epoch": 2.5291573125992444, "grad_norm": 0.09201566129922867, "learning_rate": 8.719326708578382e-06, "loss": 0.178, "step": 46190 }, { "epoch": 2.529431090182336, "grad_norm": 0.0926818698644638, "learning_rate": 8.714256743054147e-06, "loss": 0.1799, "step": 46195 }, { "epoch": 2.5297048677654272, "grad_norm": 0.09869319200515747, "learning_rate": 8.709186777529914e-06, "loss": 0.1894, "step": 46200 }, { "epoch": 2.529978645348519, "grad_norm": 0.09151874482631683, "learning_rate": 8.70411681200568e-06, "loss": 0.1813, "step": 46205 }, { "epoch": 2.5302524229316106, "grad_norm": 0.08926539123058319, "learning_rate": 8.699046846481444e-06, "loss": 0.1843, "step": 46210 }, { "epoch": 2.530526200514702, "grad_norm": 0.09312546998262405, "learning_rate": 8.69397688095721e-06, "loss": 0.1795, "step": 46215 }, { "epoch": 2.5307999780977934, "grad_norm": 0.10301654785871506, "learning_rate": 8.688906915432976e-06, "loss": 0.1814, "step": 46220 }, { "epoch": 2.531073755680885, "grad_norm": 0.09267055243253708, "learning_rate": 8.683836949908741e-06, "loss": 0.1724, "step": 46225 }, { "epoch": 2.5313475332639763, "grad_norm": 0.0964648500084877, "learning_rate": 8.678766984384506e-06, "loss": 0.1855, "step": 46230 }, { "epoch": 2.531621310847068, "grad_norm": 0.10256678611040115, "learning_rate": 8.673697018860273e-06, "loss": 0.1845, "step": 46235 }, { "epoch": 2.531895088430159, "grad_norm": 0.10903505980968475, "learning_rate": 8.668627053336038e-06, "loss": 0.1813, "step": 46240 }, { "epoch": 2.532168866013251, "grad_norm": 0.09237110614776611, "learning_rate": 8.663557087811804e-06, "loss": 0.1875, "step": 46245 }, { "epoch": 2.532442643596342, "grad_norm": 0.10379835218191147, "learning_rate": 8.65848712228757e-06, "loss": 0.1799, "step": 46250 }, { "epoch": 2.5327164211794337, "grad_norm": 0.09686646610498428, "learning_rate": 8.653417156763334e-06, "loss": 0.1886, "step": 46255 }, { "epoch": 2.5329901987625254, "grad_norm": 0.0863257572054863, "learning_rate": 8.648347191239101e-06, "loss": 0.1806, "step": 46260 }, { "epoch": 2.5332639763456166, "grad_norm": 0.09105030447244644, "learning_rate": 8.643277225714866e-06, "loss": 0.1883, "step": 46265 }, { "epoch": 2.5335377539287083, "grad_norm": 0.11253511905670166, "learning_rate": 8.638207260190631e-06, "loss": 0.1875, "step": 46270 }, { "epoch": 2.5338115315118, "grad_norm": 0.1046103909611702, "learning_rate": 8.633137294666396e-06, "loss": 0.1913, "step": 46275 }, { "epoch": 2.534085309094891, "grad_norm": 0.10340090095996857, "learning_rate": 8.628067329142163e-06, "loss": 0.1866, "step": 46280 }, { "epoch": 2.534359086677983, "grad_norm": 0.08771859854459763, "learning_rate": 8.622997363617928e-06, "loss": 0.1861, "step": 46285 }, { "epoch": 2.5346328642610745, "grad_norm": 0.09328670054674149, "learning_rate": 8.617927398093693e-06, "loss": 0.1823, "step": 46290 }, { "epoch": 2.5349066418441657, "grad_norm": 0.1082613542675972, "learning_rate": 8.61285743256946e-06, "loss": 0.1975, "step": 46295 }, { "epoch": 2.5351804194272574, "grad_norm": 0.09250333160161972, "learning_rate": 8.607787467045224e-06, "loss": 0.1879, "step": 46300 }, { "epoch": 2.535454197010349, "grad_norm": 0.0897076353430748, "learning_rate": 8.602717501520991e-06, "loss": 0.1717, "step": 46305 }, { "epoch": 2.5357279745934402, "grad_norm": 0.09567374736070633, "learning_rate": 8.597647535996756e-06, "loss": 0.1861, "step": 46310 }, { "epoch": 2.536001752176532, "grad_norm": 0.09171520173549652, "learning_rate": 8.592577570472521e-06, "loss": 0.1821, "step": 46315 }, { "epoch": 2.5362755297596236, "grad_norm": 0.10605650395154953, "learning_rate": 8.587507604948286e-06, "loss": 0.1857, "step": 46320 }, { "epoch": 2.5365493073427148, "grad_norm": 0.09811701625585556, "learning_rate": 8.582437639424053e-06, "loss": 0.1793, "step": 46325 }, { "epoch": 2.5368230849258064, "grad_norm": 0.09677726775407791, "learning_rate": 8.577367673899818e-06, "loss": 0.1858, "step": 46330 }, { "epoch": 2.5370968625088977, "grad_norm": 0.10604415088891983, "learning_rate": 8.572297708375583e-06, "loss": 0.1919, "step": 46335 }, { "epoch": 2.5373706400919893, "grad_norm": 0.1005789041519165, "learning_rate": 8.567227742851348e-06, "loss": 0.1923, "step": 46340 }, { "epoch": 2.5376444176750805, "grad_norm": 0.10436242818832397, "learning_rate": 8.562157777327114e-06, "loss": 0.1884, "step": 46345 }, { "epoch": 2.537918195258172, "grad_norm": 0.0985654890537262, "learning_rate": 8.55708781180288e-06, "loss": 0.187, "step": 46350 }, { "epoch": 2.538191972841264, "grad_norm": 0.09802553057670593, "learning_rate": 8.552017846278646e-06, "loss": 0.1835, "step": 46355 }, { "epoch": 2.538465750424355, "grad_norm": 0.10749513655900955, "learning_rate": 8.546947880754411e-06, "loss": 0.1854, "step": 46360 }, { "epoch": 2.5387395280074467, "grad_norm": 0.09854506701231003, "learning_rate": 8.541877915230178e-06, "loss": 0.1857, "step": 46365 }, { "epoch": 2.5390133055905384, "grad_norm": 0.0906066969037056, "learning_rate": 8.536807949705943e-06, "loss": 0.1793, "step": 46370 }, { "epoch": 2.5392870831736296, "grad_norm": 0.09584183990955353, "learning_rate": 8.531737984181708e-06, "loss": 0.1864, "step": 46375 }, { "epoch": 2.5395608607567213, "grad_norm": 0.09592445194721222, "learning_rate": 8.526668018657473e-06, "loss": 0.1815, "step": 46380 }, { "epoch": 2.539834638339813, "grad_norm": 0.09136542677879333, "learning_rate": 8.52159805313324e-06, "loss": 0.1789, "step": 46385 }, { "epoch": 2.540108415922904, "grad_norm": 0.0922069102525711, "learning_rate": 8.516528087609004e-06, "loss": 0.1855, "step": 46390 }, { "epoch": 2.540382193505996, "grad_norm": 0.09752272069454193, "learning_rate": 8.51145812208477e-06, "loss": 0.1763, "step": 46395 }, { "epoch": 2.5406559710890875, "grad_norm": 0.09122978895902634, "learning_rate": 8.506388156560534e-06, "loss": 0.1922, "step": 46400 }, { "epoch": 2.5409297486721787, "grad_norm": 0.09255416691303253, "learning_rate": 8.501318191036301e-06, "loss": 0.1834, "step": 46405 }, { "epoch": 2.5412035262552704, "grad_norm": 0.10206171125173569, "learning_rate": 8.496248225512068e-06, "loss": 0.186, "step": 46410 }, { "epoch": 2.5414773038383616, "grad_norm": 0.09830138832330704, "learning_rate": 8.491178259987833e-06, "loss": 0.1886, "step": 46415 }, { "epoch": 2.5417510814214532, "grad_norm": 0.09338942170143127, "learning_rate": 8.486108294463598e-06, "loss": 0.1782, "step": 46420 }, { "epoch": 2.5420248590045444, "grad_norm": 0.1030440554022789, "learning_rate": 8.481038328939365e-06, "loss": 0.1814, "step": 46425 }, { "epoch": 2.542298636587636, "grad_norm": 0.10401773452758789, "learning_rate": 8.47596836341513e-06, "loss": 0.1773, "step": 46430 }, { "epoch": 2.5425724141707278, "grad_norm": 0.10206165164709091, "learning_rate": 8.470898397890895e-06, "loss": 0.183, "step": 46435 }, { "epoch": 2.542846191753819, "grad_norm": 0.09788591414690018, "learning_rate": 8.46582843236666e-06, "loss": 0.1795, "step": 46440 }, { "epoch": 2.5431199693369106, "grad_norm": 0.09392967820167542, "learning_rate": 8.460758466842426e-06, "loss": 0.1871, "step": 46445 }, { "epoch": 2.5433937469200023, "grad_norm": 0.10133534669876099, "learning_rate": 8.455688501318191e-06, "loss": 0.1863, "step": 46450 }, { "epoch": 2.5436675245030935, "grad_norm": 0.1061534658074379, "learning_rate": 8.450618535793956e-06, "loss": 0.1842, "step": 46455 }, { "epoch": 2.543941302086185, "grad_norm": 0.09149649739265442, "learning_rate": 8.445548570269723e-06, "loss": 0.187, "step": 46460 }, { "epoch": 2.544215079669277, "grad_norm": 0.0934486910700798, "learning_rate": 8.440478604745488e-06, "loss": 0.1817, "step": 46465 }, { "epoch": 2.544488857252368, "grad_norm": 0.09330829977989197, "learning_rate": 8.435408639221255e-06, "loss": 0.1779, "step": 46470 }, { "epoch": 2.5447626348354597, "grad_norm": 0.09395431727170944, "learning_rate": 8.43033867369702e-06, "loss": 0.1794, "step": 46475 }, { "epoch": 2.5450364124185514, "grad_norm": 0.09055626392364502, "learning_rate": 8.425268708172785e-06, "loss": 0.1775, "step": 46480 }, { "epoch": 2.5453101900016426, "grad_norm": 0.09399578720331192, "learning_rate": 8.420198742648551e-06, "loss": 0.1799, "step": 46485 }, { "epoch": 2.5455839675847343, "grad_norm": 0.09491956979036331, "learning_rate": 8.415128777124316e-06, "loss": 0.1817, "step": 46490 }, { "epoch": 2.545857745167826, "grad_norm": 0.09360785782337189, "learning_rate": 8.410058811600081e-06, "loss": 0.19, "step": 46495 }, { "epoch": 2.546131522750917, "grad_norm": 0.0956801176071167, "learning_rate": 8.404988846075846e-06, "loss": 0.1809, "step": 46500 }, { "epoch": 2.546405300334009, "grad_norm": 0.0962790697813034, "learning_rate": 8.399918880551613e-06, "loss": 0.1833, "step": 46505 }, { "epoch": 2.5466790779171, "grad_norm": 0.09680087119340897, "learning_rate": 8.394848915027378e-06, "loss": 0.1772, "step": 46510 }, { "epoch": 2.5469528555001917, "grad_norm": 0.08696702122688293, "learning_rate": 8.389778949503143e-06, "loss": 0.1799, "step": 46515 }, { "epoch": 2.547226633083283, "grad_norm": 0.09460693597793579, "learning_rate": 8.38470898397891e-06, "loss": 0.1754, "step": 46520 }, { "epoch": 2.5475004106663746, "grad_norm": 0.09274940937757492, "learning_rate": 8.379639018454676e-06, "loss": 0.1748, "step": 46525 }, { "epoch": 2.547774188249466, "grad_norm": 0.09655749052762985, "learning_rate": 8.374569052930441e-06, "loss": 0.1829, "step": 46530 }, { "epoch": 2.5480479658325574, "grad_norm": 0.09323755651712418, "learning_rate": 8.369499087406206e-06, "loss": 0.1887, "step": 46535 }, { "epoch": 2.548321743415649, "grad_norm": 0.09966538846492767, "learning_rate": 8.364429121881971e-06, "loss": 0.1789, "step": 46540 }, { "epoch": 2.5485955209987408, "grad_norm": 0.09475985914468765, "learning_rate": 8.359359156357738e-06, "loss": 0.1851, "step": 46545 }, { "epoch": 2.548869298581832, "grad_norm": 0.10034342855215073, "learning_rate": 8.354289190833503e-06, "loss": 0.1916, "step": 46550 }, { "epoch": 2.5491430761649236, "grad_norm": 0.09004765003919601, "learning_rate": 8.349219225309268e-06, "loss": 0.182, "step": 46555 }, { "epoch": 2.5494168537480153, "grad_norm": 0.09323671460151672, "learning_rate": 8.344149259785033e-06, "loss": 0.1862, "step": 46560 }, { "epoch": 2.5496906313311065, "grad_norm": 0.10076852887868881, "learning_rate": 8.3390792942608e-06, "loss": 0.1828, "step": 46565 }, { "epoch": 2.549964408914198, "grad_norm": 0.10937163978815079, "learning_rate": 8.334009328736565e-06, "loss": 0.1849, "step": 46570 }, { "epoch": 2.55023818649729, "grad_norm": 0.10455974191427231, "learning_rate": 8.32893936321233e-06, "loss": 0.1822, "step": 46575 }, { "epoch": 2.550511964080381, "grad_norm": 0.0983656495809555, "learning_rate": 8.323869397688096e-06, "loss": 0.1917, "step": 46580 }, { "epoch": 2.5507857416634727, "grad_norm": 0.11139486730098724, "learning_rate": 8.318799432163863e-06, "loss": 0.1819, "step": 46585 }, { "epoch": 2.551059519246564, "grad_norm": 0.09245181828737259, "learning_rate": 8.313729466639628e-06, "loss": 0.1799, "step": 46590 }, { "epoch": 2.5513332968296556, "grad_norm": 0.10421761125326157, "learning_rate": 8.308659501115393e-06, "loss": 0.1788, "step": 46595 }, { "epoch": 2.551607074412747, "grad_norm": 0.0947800874710083, "learning_rate": 8.303589535591158e-06, "loss": 0.1796, "step": 46600 }, { "epoch": 2.5518808519958385, "grad_norm": 0.09030705690383911, "learning_rate": 8.298519570066923e-06, "loss": 0.1854, "step": 46605 }, { "epoch": 2.55215462957893, "grad_norm": 0.0907343178987503, "learning_rate": 8.29344960454269e-06, "loss": 0.1804, "step": 46610 }, { "epoch": 2.5524284071620214, "grad_norm": 0.10663308948278427, "learning_rate": 8.288379639018455e-06, "loss": 0.1855, "step": 46615 }, { "epoch": 2.552702184745113, "grad_norm": 0.09747827798128128, "learning_rate": 8.28330967349422e-06, "loss": 0.1817, "step": 46620 }, { "epoch": 2.5529759623282047, "grad_norm": 0.09470448642969131, "learning_rate": 8.278239707969985e-06, "loss": 0.1847, "step": 46625 }, { "epoch": 2.553249739911296, "grad_norm": 0.09090986102819443, "learning_rate": 8.273169742445751e-06, "loss": 0.1761, "step": 46630 }, { "epoch": 2.5535235174943876, "grad_norm": 0.09735153615474701, "learning_rate": 8.268099776921518e-06, "loss": 0.1788, "step": 46635 }, { "epoch": 2.553797295077479, "grad_norm": 0.09365372359752655, "learning_rate": 8.263029811397283e-06, "loss": 0.1811, "step": 46640 }, { "epoch": 2.5540710726605704, "grad_norm": 0.10430141538381577, "learning_rate": 8.257959845873048e-06, "loss": 0.1832, "step": 46645 }, { "epoch": 2.554344850243662, "grad_norm": 0.09242446720600128, "learning_rate": 8.252889880348815e-06, "loss": 0.1781, "step": 46650 }, { "epoch": 2.5546186278267538, "grad_norm": 0.10017286986112595, "learning_rate": 8.24781991482458e-06, "loss": 0.192, "step": 46655 }, { "epoch": 2.554892405409845, "grad_norm": 0.10133831202983856, "learning_rate": 8.242749949300345e-06, "loss": 0.1761, "step": 46660 }, { "epoch": 2.5551661829929366, "grad_norm": 0.0972617119550705, "learning_rate": 8.23767998377611e-06, "loss": 0.1798, "step": 46665 }, { "epoch": 2.5554399605760283, "grad_norm": 0.09970255196094513, "learning_rate": 8.232610018251877e-06, "loss": 0.1795, "step": 46670 }, { "epoch": 2.5557137381591195, "grad_norm": 0.08872048556804657, "learning_rate": 8.227540052727642e-06, "loss": 0.1755, "step": 46675 }, { "epoch": 2.555987515742211, "grad_norm": 0.09052705764770508, "learning_rate": 8.222470087203407e-06, "loss": 0.1844, "step": 46680 }, { "epoch": 2.5562612933253024, "grad_norm": 0.09961314499378204, "learning_rate": 8.217400121679172e-06, "loss": 0.1885, "step": 46685 }, { "epoch": 2.556535070908394, "grad_norm": 0.10471028834581375, "learning_rate": 8.212330156154938e-06, "loss": 0.183, "step": 46690 }, { "epoch": 2.5568088484914853, "grad_norm": 0.1098153293132782, "learning_rate": 8.207260190630705e-06, "loss": 0.1873, "step": 46695 }, { "epoch": 2.557082626074577, "grad_norm": 0.10166867077350616, "learning_rate": 8.20219022510647e-06, "loss": 0.1822, "step": 46700 }, { "epoch": 2.5573564036576686, "grad_norm": 0.09023527055978775, "learning_rate": 8.197120259582235e-06, "loss": 0.1784, "step": 46705 }, { "epoch": 2.55763018124076, "grad_norm": 0.09341321140527725, "learning_rate": 8.192050294058002e-06, "loss": 0.1862, "step": 46710 }, { "epoch": 2.5579039588238515, "grad_norm": 0.09557899087667465, "learning_rate": 8.186980328533767e-06, "loss": 0.1889, "step": 46715 }, { "epoch": 2.558177736406943, "grad_norm": 0.0867866575717926, "learning_rate": 8.181910363009532e-06, "loss": 0.1835, "step": 46720 }, { "epoch": 2.5584515139900343, "grad_norm": 0.09272515028715134, "learning_rate": 8.176840397485297e-06, "loss": 0.1901, "step": 46725 }, { "epoch": 2.558725291573126, "grad_norm": 0.09722352027893066, "learning_rate": 8.171770431961063e-06, "loss": 0.1838, "step": 46730 }, { "epoch": 2.5589990691562177, "grad_norm": 0.08785957098007202, "learning_rate": 8.166700466436828e-06, "loss": 0.1873, "step": 46735 }, { "epoch": 2.559272846739309, "grad_norm": 0.10818380117416382, "learning_rate": 8.161630500912593e-06, "loss": 0.1894, "step": 46740 }, { "epoch": 2.5595466243224005, "grad_norm": 0.09179072082042694, "learning_rate": 8.15656053538836e-06, "loss": 0.1791, "step": 46745 }, { "epoch": 2.559820401905492, "grad_norm": 0.10006878525018692, "learning_rate": 8.151490569864125e-06, "loss": 0.1834, "step": 46750 }, { "epoch": 2.5600941794885834, "grad_norm": 0.09447616338729858, "learning_rate": 8.146420604339892e-06, "loss": 0.1903, "step": 46755 }, { "epoch": 2.560367957071675, "grad_norm": 0.09119298309087753, "learning_rate": 8.141350638815657e-06, "loss": 0.1849, "step": 46760 }, { "epoch": 2.5606417346547667, "grad_norm": 0.09849800914525986, "learning_rate": 8.136280673291422e-06, "loss": 0.1929, "step": 46765 }, { "epoch": 2.560915512237858, "grad_norm": 0.10171893239021301, "learning_rate": 8.131210707767188e-06, "loss": 0.1881, "step": 46770 }, { "epoch": 2.5611892898209496, "grad_norm": 0.09284788370132446, "learning_rate": 8.126140742242953e-06, "loss": 0.1862, "step": 46775 }, { "epoch": 2.561463067404041, "grad_norm": 0.09450052678585052, "learning_rate": 8.121070776718718e-06, "loss": 0.1816, "step": 46780 }, { "epoch": 2.5617368449871325, "grad_norm": 0.09015823155641556, "learning_rate": 8.116000811194483e-06, "loss": 0.1894, "step": 46785 }, { "epoch": 2.5620106225702237, "grad_norm": 0.100957952439785, "learning_rate": 8.11093084567025e-06, "loss": 0.1776, "step": 46790 }, { "epoch": 2.5622844001533154, "grad_norm": 0.1008976399898529, "learning_rate": 8.105860880146015e-06, "loss": 0.1877, "step": 46795 }, { "epoch": 2.562558177736407, "grad_norm": 0.093166284263134, "learning_rate": 8.10079091462178e-06, "loss": 0.1705, "step": 46800 }, { "epoch": 2.5628319553194983, "grad_norm": 0.10198678076267242, "learning_rate": 8.095720949097547e-06, "loss": 0.1827, "step": 46805 }, { "epoch": 2.56310573290259, "grad_norm": 0.11633092164993286, "learning_rate": 8.090650983573313e-06, "loss": 0.1848, "step": 46810 }, { "epoch": 2.5633795104856816, "grad_norm": 0.11906242370605469, "learning_rate": 8.085581018049078e-06, "loss": 0.1838, "step": 46815 }, { "epoch": 2.563653288068773, "grad_norm": 0.09908537566661835, "learning_rate": 8.080511052524843e-06, "loss": 0.1814, "step": 46820 }, { "epoch": 2.5639270656518645, "grad_norm": 0.1007881686091423, "learning_rate": 8.075441087000608e-06, "loss": 0.1814, "step": 46825 }, { "epoch": 2.564200843234956, "grad_norm": 0.09654423594474792, "learning_rate": 8.070371121476375e-06, "loss": 0.1819, "step": 46830 }, { "epoch": 2.5644746208180473, "grad_norm": 0.09559611976146698, "learning_rate": 8.06530115595214e-06, "loss": 0.1842, "step": 46835 }, { "epoch": 2.564748398401139, "grad_norm": 0.08584687858819962, "learning_rate": 8.060231190427905e-06, "loss": 0.1787, "step": 46840 }, { "epoch": 2.5650221759842307, "grad_norm": 0.09790747612714767, "learning_rate": 8.05516122490367e-06, "loss": 0.1836, "step": 46845 }, { "epoch": 2.565295953567322, "grad_norm": 0.09214572608470917, "learning_rate": 8.050091259379437e-06, "loss": 0.1765, "step": 46850 }, { "epoch": 2.5655697311504135, "grad_norm": 0.09196576476097107, "learning_rate": 8.045021293855202e-06, "loss": 0.1903, "step": 46855 }, { "epoch": 2.5658435087335048, "grad_norm": 0.096134714782238, "learning_rate": 8.039951328330969e-06, "loss": 0.195, "step": 46860 }, { "epoch": 2.5661172863165964, "grad_norm": 0.09223711490631104, "learning_rate": 8.034881362806734e-06, "loss": 0.18, "step": 46865 }, { "epoch": 2.5663910638996876, "grad_norm": 0.09711170941591263, "learning_rate": 8.0298113972825e-06, "loss": 0.1794, "step": 46870 }, { "epoch": 2.5666648414827793, "grad_norm": 0.09173814952373505, "learning_rate": 8.024741431758265e-06, "loss": 0.1758, "step": 46875 }, { "epoch": 2.566938619065871, "grad_norm": 0.1011313796043396, "learning_rate": 8.01967146623403e-06, "loss": 0.1831, "step": 46880 }, { "epoch": 2.567212396648962, "grad_norm": 0.09138856828212738, "learning_rate": 8.014601500709795e-06, "loss": 0.1848, "step": 46885 }, { "epoch": 2.567486174232054, "grad_norm": 0.09716144949197769, "learning_rate": 8.00953153518556e-06, "loss": 0.1774, "step": 46890 }, { "epoch": 2.5677599518151455, "grad_norm": 0.09873167425394058, "learning_rate": 8.004461569661327e-06, "loss": 0.1853, "step": 46895 }, { "epoch": 2.5680337293982367, "grad_norm": 0.1012844368815422, "learning_rate": 7.999391604137092e-06, "loss": 0.1864, "step": 46900 }, { "epoch": 2.5683075069813284, "grad_norm": 0.09324813634157181, "learning_rate": 7.994321638612857e-06, "loss": 0.1803, "step": 46905 }, { "epoch": 2.56858128456442, "grad_norm": 0.09901668131351471, "learning_rate": 7.989251673088622e-06, "loss": 0.1851, "step": 46910 }, { "epoch": 2.5688550621475112, "grad_norm": 0.10267173498868942, "learning_rate": 7.984181707564389e-06, "loss": 0.1859, "step": 46915 }, { "epoch": 2.569128839730603, "grad_norm": 0.09369897097349167, "learning_rate": 7.979111742040155e-06, "loss": 0.1795, "step": 46920 }, { "epoch": 2.5694026173136946, "grad_norm": 0.08396273106336594, "learning_rate": 7.97404177651592e-06, "loss": 0.1813, "step": 46925 }, { "epoch": 2.569676394896786, "grad_norm": 0.10669997334480286, "learning_rate": 7.968971810991685e-06, "loss": 0.1889, "step": 46930 }, { "epoch": 2.5699501724798774, "grad_norm": 0.0948028638958931, "learning_rate": 7.963901845467452e-06, "loss": 0.1762, "step": 46935 }, { "epoch": 2.570223950062969, "grad_norm": 0.091008760035038, "learning_rate": 7.958831879943217e-06, "loss": 0.1816, "step": 46940 }, { "epoch": 2.5704977276460603, "grad_norm": 0.09249013662338257, "learning_rate": 7.953761914418982e-06, "loss": 0.1799, "step": 46945 }, { "epoch": 2.570771505229152, "grad_norm": 0.08839519321918488, "learning_rate": 7.948691948894747e-06, "loss": 0.1792, "step": 46950 }, { "epoch": 2.571045282812243, "grad_norm": 0.09734942764043808, "learning_rate": 7.943621983370514e-06, "loss": 0.1872, "step": 46955 }, { "epoch": 2.571319060395335, "grad_norm": 0.0979059636592865, "learning_rate": 7.938552017846279e-06, "loss": 0.186, "step": 46960 }, { "epoch": 2.571592837978426, "grad_norm": 0.0911354273557663, "learning_rate": 7.933482052322044e-06, "loss": 0.1871, "step": 46965 }, { "epoch": 2.5718666155615177, "grad_norm": 0.09982336312532425, "learning_rate": 7.92841208679781e-06, "loss": 0.1762, "step": 46970 }, { "epoch": 2.5721403931446094, "grad_norm": 0.09712375700473785, "learning_rate": 7.923342121273575e-06, "loss": 0.1772, "step": 46975 }, { "epoch": 2.5724141707277006, "grad_norm": 0.09883160144090652, "learning_rate": 7.918272155749342e-06, "loss": 0.1755, "step": 46980 }, { "epoch": 2.5726879483107923, "grad_norm": 0.09772272408008575, "learning_rate": 7.913202190225107e-06, "loss": 0.1816, "step": 46985 }, { "epoch": 2.572961725893884, "grad_norm": 0.09754182398319244, "learning_rate": 7.908132224700872e-06, "loss": 0.1777, "step": 46990 }, { "epoch": 2.573235503476975, "grad_norm": 0.11921309679746628, "learning_rate": 7.903062259176639e-06, "loss": 0.1875, "step": 46995 }, { "epoch": 2.573509281060067, "grad_norm": 0.13183243572711945, "learning_rate": 7.897992293652404e-06, "loss": 0.1867, "step": 47000 }, { "epoch": 2.5737830586431585, "grad_norm": 0.10844002664089203, "learning_rate": 7.892922328128169e-06, "loss": 0.1851, "step": 47005 }, { "epoch": 2.5740568362262497, "grad_norm": 0.10206954181194305, "learning_rate": 7.887852362603934e-06, "loss": 0.1756, "step": 47010 }, { "epoch": 2.5743306138093414, "grad_norm": 0.10014459490776062, "learning_rate": 7.8827823970797e-06, "loss": 0.1898, "step": 47015 }, { "epoch": 2.574604391392433, "grad_norm": 0.0931239053606987, "learning_rate": 7.877712431555465e-06, "loss": 0.1827, "step": 47020 }, { "epoch": 2.5748781689755242, "grad_norm": 0.10742704570293427, "learning_rate": 7.87264246603123e-06, "loss": 0.1875, "step": 47025 }, { "epoch": 2.575151946558616, "grad_norm": 0.09327176958322525, "learning_rate": 7.867572500506997e-06, "loss": 0.1775, "step": 47030 }, { "epoch": 2.575425724141707, "grad_norm": 0.09557585418224335, "learning_rate": 7.862502534982764e-06, "loss": 0.1793, "step": 47035 }, { "epoch": 2.575699501724799, "grad_norm": 0.09961674362421036, "learning_rate": 7.857432569458529e-06, "loss": 0.1827, "step": 47040 }, { "epoch": 2.57597327930789, "grad_norm": 0.10287012904882431, "learning_rate": 7.852362603934294e-06, "loss": 0.1931, "step": 47045 }, { "epoch": 2.5762470568909817, "grad_norm": 0.09512171149253845, "learning_rate": 7.847292638410059e-06, "loss": 0.1864, "step": 47050 }, { "epoch": 2.5765208344740733, "grad_norm": 0.09516525268554688, "learning_rate": 7.842222672885826e-06, "loss": 0.1807, "step": 47055 }, { "epoch": 2.5767946120571645, "grad_norm": 0.10517754405736923, "learning_rate": 7.83715270736159e-06, "loss": 0.1861, "step": 47060 }, { "epoch": 2.577068389640256, "grad_norm": 0.10299995541572571, "learning_rate": 7.832082741837356e-06, "loss": 0.1943, "step": 47065 }, { "epoch": 2.577342167223348, "grad_norm": 0.08437943458557129, "learning_rate": 7.82701277631312e-06, "loss": 0.1827, "step": 47070 }, { "epoch": 2.577615944806439, "grad_norm": 0.09423479437828064, "learning_rate": 7.821942810788887e-06, "loss": 0.1867, "step": 47075 }, { "epoch": 2.5778897223895307, "grad_norm": 0.09584926813840866, "learning_rate": 7.816872845264652e-06, "loss": 0.1869, "step": 47080 }, { "epoch": 2.5781634999726224, "grad_norm": 0.09527337551116943, "learning_rate": 7.811802879740417e-06, "loss": 0.189, "step": 47085 }, { "epoch": 2.5784372775557136, "grad_norm": 0.08535526692867279, "learning_rate": 7.806732914216184e-06, "loss": 0.1803, "step": 47090 }, { "epoch": 2.5787110551388053, "grad_norm": 0.0978359505534172, "learning_rate": 7.80166294869195e-06, "loss": 0.1787, "step": 47095 }, { "epoch": 2.578984832721897, "grad_norm": 0.0948052704334259, "learning_rate": 7.796592983167716e-06, "loss": 0.182, "step": 47100 }, { "epoch": 2.579258610304988, "grad_norm": 0.09032424539327621, "learning_rate": 7.79152301764348e-06, "loss": 0.1805, "step": 47105 }, { "epoch": 2.57953238788808, "grad_norm": 0.0899859145283699, "learning_rate": 7.786453052119246e-06, "loss": 0.1819, "step": 47110 }, { "epoch": 2.5798061654711715, "grad_norm": 0.08438614010810852, "learning_rate": 7.781383086595012e-06, "loss": 0.1817, "step": 47115 }, { "epoch": 2.5800799430542627, "grad_norm": 0.08778272569179535, "learning_rate": 7.776313121070777e-06, "loss": 0.1906, "step": 47120 }, { "epoch": 2.5803537206373544, "grad_norm": 0.10685713589191437, "learning_rate": 7.771243155546542e-06, "loss": 0.1829, "step": 47125 }, { "epoch": 2.5806274982204456, "grad_norm": 0.09153585880994797, "learning_rate": 7.766173190022307e-06, "loss": 0.179, "step": 47130 }, { "epoch": 2.5809012758035372, "grad_norm": 0.09726254642009735, "learning_rate": 7.761103224498074e-06, "loss": 0.1893, "step": 47135 }, { "epoch": 2.5811750533866284, "grad_norm": 0.09715701639652252, "learning_rate": 7.756033258973839e-06, "loss": 0.1848, "step": 47140 }, { "epoch": 2.58144883096972, "grad_norm": 0.10312519967556, "learning_rate": 7.750963293449606e-06, "loss": 0.1813, "step": 47145 }, { "epoch": 2.5817226085528118, "grad_norm": 0.11554504185914993, "learning_rate": 7.74589332792537e-06, "loss": 0.1798, "step": 47150 }, { "epoch": 2.581996386135903, "grad_norm": 0.1132844090461731, "learning_rate": 7.740823362401137e-06, "loss": 0.1914, "step": 47155 }, { "epoch": 2.5822701637189946, "grad_norm": 0.10124685615301132, "learning_rate": 7.735753396876902e-06, "loss": 0.1828, "step": 47160 }, { "epoch": 2.5825439413020863, "grad_norm": 0.09969712793827057, "learning_rate": 7.730683431352667e-06, "loss": 0.1899, "step": 47165 }, { "epoch": 2.5828177188851775, "grad_norm": 0.10099068284034729, "learning_rate": 7.725613465828432e-06, "loss": 0.1867, "step": 47170 }, { "epoch": 2.583091496468269, "grad_norm": 0.0922626256942749, "learning_rate": 7.720543500304199e-06, "loss": 0.1839, "step": 47175 }, { "epoch": 2.583365274051361, "grad_norm": 0.10567352175712585, "learning_rate": 7.715473534779964e-06, "loss": 0.181, "step": 47180 }, { "epoch": 2.583639051634452, "grad_norm": 0.09004028141498566, "learning_rate": 7.710403569255729e-06, "loss": 0.1775, "step": 47185 }, { "epoch": 2.5839128292175437, "grad_norm": 0.10016266256570816, "learning_rate": 7.705333603731494e-06, "loss": 0.1839, "step": 47190 }, { "epoch": 2.5841866068006354, "grad_norm": 0.09758652001619339, "learning_rate": 7.70026363820726e-06, "loss": 0.1775, "step": 47195 }, { "epoch": 2.5844603843837266, "grad_norm": 0.10375243425369263, "learning_rate": 7.695193672683026e-06, "loss": 0.1844, "step": 47200 }, { "epoch": 2.5847341619668183, "grad_norm": 0.09194989502429962, "learning_rate": 7.690123707158792e-06, "loss": 0.1855, "step": 47205 }, { "epoch": 2.58500793954991, "grad_norm": 0.0934981033205986, "learning_rate": 7.685053741634557e-06, "loss": 0.1753, "step": 47210 }, { "epoch": 2.585281717133001, "grad_norm": 0.09600929170846939, "learning_rate": 7.679983776110322e-06, "loss": 0.1836, "step": 47215 }, { "epoch": 2.585555494716093, "grad_norm": 0.08629072457551956, "learning_rate": 7.674913810586089e-06, "loss": 0.1824, "step": 47220 }, { "epoch": 2.585829272299184, "grad_norm": 0.09627199172973633, "learning_rate": 7.669843845061854e-06, "loss": 0.1796, "step": 47225 }, { "epoch": 2.5861030498822757, "grad_norm": 0.096328966319561, "learning_rate": 7.664773879537619e-06, "loss": 0.1862, "step": 47230 }, { "epoch": 2.586376827465367, "grad_norm": 0.08697172999382019, "learning_rate": 7.659703914013384e-06, "loss": 0.1834, "step": 47235 }, { "epoch": 2.5866506050484586, "grad_norm": 0.08567039668560028, "learning_rate": 7.65463394848915e-06, "loss": 0.1745, "step": 47240 }, { "epoch": 2.5869243826315502, "grad_norm": 0.09287045896053314, "learning_rate": 7.649563982964916e-06, "loss": 0.1782, "step": 47245 }, { "epoch": 2.5871981602146414, "grad_norm": 0.09433285892009735, "learning_rate": 7.64449401744068e-06, "loss": 0.1776, "step": 47250 }, { "epoch": 2.587471937797733, "grad_norm": 0.09937787055969238, "learning_rate": 7.639424051916447e-06, "loss": 0.1822, "step": 47255 }, { "epoch": 2.5877457153808248, "grad_norm": 0.08740387111902237, "learning_rate": 7.634354086392214e-06, "loss": 0.1867, "step": 47260 }, { "epoch": 2.588019492963916, "grad_norm": 0.09015271067619324, "learning_rate": 7.629284120867978e-06, "loss": 0.1855, "step": 47265 }, { "epoch": 2.5882932705470076, "grad_norm": 0.10276980698108673, "learning_rate": 7.624214155343744e-06, "loss": 0.1802, "step": 47270 }, { "epoch": 2.5885670481300993, "grad_norm": 0.10187222808599472, "learning_rate": 7.619144189819509e-06, "loss": 0.1843, "step": 47275 }, { "epoch": 2.5888408257131905, "grad_norm": 0.09151098132133484, "learning_rate": 7.614074224295276e-06, "loss": 0.1824, "step": 47280 }, { "epoch": 2.589114603296282, "grad_norm": 0.08840280026197433, "learning_rate": 7.609004258771041e-06, "loss": 0.1764, "step": 47285 }, { "epoch": 2.589388380879374, "grad_norm": 0.09278161078691483, "learning_rate": 7.603934293246806e-06, "loss": 0.1811, "step": 47290 }, { "epoch": 2.589662158462465, "grad_norm": 0.1044030636548996, "learning_rate": 7.598864327722572e-06, "loss": 0.1888, "step": 47295 }, { "epoch": 2.5899359360455567, "grad_norm": 0.10011347383260727, "learning_rate": 7.5937943621983375e-06, "loss": 0.1813, "step": 47300 }, { "epoch": 2.590209713628648, "grad_norm": 0.0977468490600586, "learning_rate": 7.588724396674103e-06, "loss": 0.1768, "step": 47305 }, { "epoch": 2.5904834912117396, "grad_norm": 0.10273543000221252, "learning_rate": 7.583654431149868e-06, "loss": 0.1858, "step": 47310 }, { "epoch": 2.590757268794831, "grad_norm": 0.09843078255653381, "learning_rate": 7.578584465625633e-06, "loss": 0.1828, "step": 47315 }, { "epoch": 2.5910310463779225, "grad_norm": 0.09593354910612106, "learning_rate": 7.5735145001014e-06, "loss": 0.179, "step": 47320 }, { "epoch": 2.591304823961014, "grad_norm": 0.09816252440214157, "learning_rate": 7.568444534577165e-06, "loss": 0.1775, "step": 47325 }, { "epoch": 2.5915786015441054, "grad_norm": 0.09697359055280685, "learning_rate": 7.563374569052931e-06, "loss": 0.1843, "step": 47330 }, { "epoch": 2.591852379127197, "grad_norm": 0.08846762031316757, "learning_rate": 7.558304603528696e-06, "loss": 0.1761, "step": 47335 }, { "epoch": 2.5921261567102887, "grad_norm": 0.0963011234998703, "learning_rate": 7.553234638004463e-06, "loss": 0.1822, "step": 47340 }, { "epoch": 2.59239993429338, "grad_norm": 0.094507597386837, "learning_rate": 7.548164672480228e-06, "loss": 0.1801, "step": 47345 }, { "epoch": 2.5926737118764716, "grad_norm": 0.09863227605819702, "learning_rate": 7.543094706955993e-06, "loss": 0.178, "step": 47350 }, { "epoch": 2.592947489459563, "grad_norm": 0.09593823552131653, "learning_rate": 7.5380247414317585e-06, "loss": 0.1823, "step": 47355 }, { "epoch": 2.5932212670426544, "grad_norm": 0.10253502428531647, "learning_rate": 7.532954775907525e-06, "loss": 0.1887, "step": 47360 }, { "epoch": 2.593495044625746, "grad_norm": 0.0986766666173935, "learning_rate": 7.52788481038329e-06, "loss": 0.1862, "step": 47365 }, { "epoch": 2.5937688222088378, "grad_norm": 0.09435071051120758, "learning_rate": 7.522814844859055e-06, "loss": 0.1855, "step": 47370 }, { "epoch": 2.594042599791929, "grad_norm": 0.0908016562461853, "learning_rate": 7.51774487933482e-06, "loss": 0.1838, "step": 47375 }, { "epoch": 2.5943163773750206, "grad_norm": 0.10042384266853333, "learning_rate": 7.512674913810587e-06, "loss": 0.1798, "step": 47380 }, { "epoch": 2.5945901549581123, "grad_norm": 0.10302116721868515, "learning_rate": 7.507604948286352e-06, "loss": 0.1799, "step": 47385 }, { "epoch": 2.5948639325412035, "grad_norm": 0.10467127710580826, "learning_rate": 7.502534982762118e-06, "loss": 0.1849, "step": 47390 }, { "epoch": 2.595137710124295, "grad_norm": 0.09487684816122055, "learning_rate": 7.497465017237883e-06, "loss": 0.1825, "step": 47395 }, { "epoch": 2.5954114877073864, "grad_norm": 0.09080369025468826, "learning_rate": 7.492395051713649e-06, "loss": 0.1771, "step": 47400 }, { "epoch": 2.595685265290478, "grad_norm": 0.10277093946933746, "learning_rate": 7.487325086189414e-06, "loss": 0.187, "step": 47405 }, { "epoch": 2.5959590428735693, "grad_norm": 0.10186535120010376, "learning_rate": 7.482255120665179e-06, "loss": 0.1903, "step": 47410 }, { "epoch": 2.596232820456661, "grad_norm": 0.10154855996370316, "learning_rate": 7.477185155140945e-06, "loss": 0.1867, "step": 47415 }, { "epoch": 2.5965065980397526, "grad_norm": 0.0967351645231247, "learning_rate": 7.472115189616712e-06, "loss": 0.1886, "step": 47420 }, { "epoch": 2.596780375622844, "grad_norm": 0.09800619632005692, "learning_rate": 7.467045224092477e-06, "loss": 0.1834, "step": 47425 }, { "epoch": 2.5970541532059355, "grad_norm": 0.09028860181570053, "learning_rate": 7.461975258568242e-06, "loss": 0.1804, "step": 47430 }, { "epoch": 2.597327930789027, "grad_norm": 0.11313987523317337, "learning_rate": 7.456905293044007e-06, "loss": 0.1864, "step": 47435 }, { "epoch": 2.5976017083721183, "grad_norm": 0.10197969526052475, "learning_rate": 7.451835327519774e-06, "loss": 0.1792, "step": 47440 }, { "epoch": 2.59787548595521, "grad_norm": 0.08730225265026093, "learning_rate": 7.4467653619955395e-06, "loss": 0.1843, "step": 47445 }, { "epoch": 2.5981492635383017, "grad_norm": 0.09022315591573715, "learning_rate": 7.4416953964713045e-06, "loss": 0.1788, "step": 47450 }, { "epoch": 2.598423041121393, "grad_norm": 0.09629978239536285, "learning_rate": 7.4366254309470695e-06, "loss": 0.1785, "step": 47455 }, { "epoch": 2.5986968187044845, "grad_norm": 0.09889764338731766, "learning_rate": 7.431555465422836e-06, "loss": 0.1841, "step": 47460 }, { "epoch": 2.598970596287576, "grad_norm": 0.0972118228673935, "learning_rate": 7.426485499898601e-06, "loss": 0.1873, "step": 47465 }, { "epoch": 2.5992443738706674, "grad_norm": 0.09492696821689606, "learning_rate": 7.421415534374367e-06, "loss": 0.188, "step": 47470 }, { "epoch": 2.599518151453759, "grad_norm": 0.09484413266181946, "learning_rate": 7.416345568850132e-06, "loss": 0.1802, "step": 47475 }, { "epoch": 2.5997919290368503, "grad_norm": 0.09857809543609619, "learning_rate": 7.411275603325899e-06, "loss": 0.1877, "step": 47480 }, { "epoch": 2.600065706619942, "grad_norm": 0.0950707197189331, "learning_rate": 7.406205637801664e-06, "loss": 0.1836, "step": 47485 }, { "epoch": 2.600339484203033, "grad_norm": 0.1010228767991066, "learning_rate": 7.401135672277429e-06, "loss": 0.1788, "step": 47490 }, { "epoch": 2.600613261786125, "grad_norm": 0.09413200616836548, "learning_rate": 7.3960657067531945e-06, "loss": 0.1894, "step": 47495 }, { "epoch": 2.6008870393692165, "grad_norm": 0.09009487181901932, "learning_rate": 7.3909957412289595e-06, "loss": 0.1858, "step": 47500 }, { "epoch": 2.6011608169523077, "grad_norm": 0.08218023926019669, "learning_rate": 7.385925775704726e-06, "loss": 0.1766, "step": 47505 }, { "epoch": 2.6014345945353994, "grad_norm": 0.08745595067739487, "learning_rate": 7.380855810180491e-06, "loss": 0.1832, "step": 47510 }, { "epoch": 2.601708372118491, "grad_norm": 0.0916365310549736, "learning_rate": 7.375785844656256e-06, "loss": 0.1783, "step": 47515 }, { "epoch": 2.6019821497015823, "grad_norm": 0.09318608790636063, "learning_rate": 7.370715879132021e-06, "loss": 0.1817, "step": 47520 }, { "epoch": 2.602255927284674, "grad_norm": 0.08975800126791, "learning_rate": 7.365645913607788e-06, "loss": 0.1873, "step": 47525 }, { "epoch": 2.6025297048677656, "grad_norm": 0.09360107779502869, "learning_rate": 7.360575948083554e-06, "loss": 0.1827, "step": 47530 }, { "epoch": 2.602803482450857, "grad_norm": 0.10227049887180328, "learning_rate": 7.355505982559319e-06, "loss": 0.1817, "step": 47535 }, { "epoch": 2.6030772600339485, "grad_norm": 0.09506077319383621, "learning_rate": 7.350436017035084e-06, "loss": 0.1858, "step": 47540 }, { "epoch": 2.60335103761704, "grad_norm": 0.08998129516839981, "learning_rate": 7.3453660515108504e-06, "loss": 0.1758, "step": 47545 }, { "epoch": 2.6036248152001313, "grad_norm": 0.09131129831075668, "learning_rate": 7.3402960859866154e-06, "loss": 0.1898, "step": 47550 }, { "epoch": 2.603898592783223, "grad_norm": 0.09046870470046997, "learning_rate": 7.335226120462381e-06, "loss": 0.18, "step": 47555 }, { "epoch": 2.6041723703663147, "grad_norm": 0.09706885367631912, "learning_rate": 7.330156154938146e-06, "loss": 0.1778, "step": 47560 }, { "epoch": 2.604446147949406, "grad_norm": 0.10596328973770142, "learning_rate": 7.325086189413913e-06, "loss": 0.1762, "step": 47565 }, { "epoch": 2.6047199255324975, "grad_norm": 0.09500458836555481, "learning_rate": 7.320016223889678e-06, "loss": 0.1886, "step": 47570 }, { "epoch": 2.6049937031155888, "grad_norm": 0.10625462234020233, "learning_rate": 7.314946258365443e-06, "loss": 0.1838, "step": 47575 }, { "epoch": 2.6052674806986804, "grad_norm": 0.10363554954528809, "learning_rate": 7.309876292841209e-06, "loss": 0.1872, "step": 47580 }, { "epoch": 2.6055412582817716, "grad_norm": 0.0967603474855423, "learning_rate": 7.304806327316975e-06, "loss": 0.173, "step": 47585 }, { "epoch": 2.6058150358648633, "grad_norm": 0.09738245606422424, "learning_rate": 7.2997363617927405e-06, "loss": 0.1901, "step": 47590 }, { "epoch": 2.606088813447955, "grad_norm": 0.10224785655736923, "learning_rate": 7.2946663962685055e-06, "loss": 0.1759, "step": 47595 }, { "epoch": 2.606362591031046, "grad_norm": 0.10436955094337463, "learning_rate": 7.2895964307442705e-06, "loss": 0.1842, "step": 47600 }, { "epoch": 2.606636368614138, "grad_norm": 0.09117979556322098, "learning_rate": 7.284526465220037e-06, "loss": 0.1791, "step": 47605 }, { "epoch": 2.6069101461972295, "grad_norm": 0.10331074893474579, "learning_rate": 7.279456499695802e-06, "loss": 0.185, "step": 47610 }, { "epoch": 2.6071839237803207, "grad_norm": 0.08985444158315659, "learning_rate": 7.274386534171568e-06, "loss": 0.1769, "step": 47615 }, { "epoch": 2.6074577013634124, "grad_norm": 0.09674011915922165, "learning_rate": 7.269316568647333e-06, "loss": 0.1777, "step": 47620 }, { "epoch": 2.607731478946504, "grad_norm": 0.09485330432653427, "learning_rate": 7.2642466031231e-06, "loss": 0.1812, "step": 47625 }, { "epoch": 2.6080052565295953, "grad_norm": 0.09346701204776764, "learning_rate": 7.259176637598865e-06, "loss": 0.1918, "step": 47630 }, { "epoch": 2.608279034112687, "grad_norm": 0.10007932782173157, "learning_rate": 7.25410667207463e-06, "loss": 0.1894, "step": 47635 }, { "epoch": 2.6085528116957786, "grad_norm": 0.0987025573849678, "learning_rate": 7.249036706550396e-06, "loss": 0.1891, "step": 47640 }, { "epoch": 2.60882658927887, "grad_norm": 0.09419236332178116, "learning_rate": 7.243966741026162e-06, "loss": 0.1845, "step": 47645 }, { "epoch": 2.6091003668619615, "grad_norm": 0.10057643055915833, "learning_rate": 7.238896775501927e-06, "loss": 0.1909, "step": 47650 }, { "epoch": 2.609374144445053, "grad_norm": 0.10169373452663422, "learning_rate": 7.233826809977692e-06, "loss": 0.1875, "step": 47655 }, { "epoch": 2.6096479220281443, "grad_norm": 0.10744789987802505, "learning_rate": 7.228756844453457e-06, "loss": 0.1811, "step": 47660 }, { "epoch": 2.609921699611236, "grad_norm": 0.09933972358703613, "learning_rate": 7.223686878929224e-06, "loss": 0.183, "step": 47665 }, { "epoch": 2.610195477194327, "grad_norm": 0.0919761210680008, "learning_rate": 7.21861691340499e-06, "loss": 0.1775, "step": 47670 }, { "epoch": 2.610469254777419, "grad_norm": 0.10792794823646545, "learning_rate": 7.213546947880755e-06, "loss": 0.1859, "step": 47675 }, { "epoch": 2.61074303236051, "grad_norm": 0.09365373849868774, "learning_rate": 7.20847698235652e-06, "loss": 0.1849, "step": 47680 }, { "epoch": 2.6110168099436017, "grad_norm": 0.09496499598026276, "learning_rate": 7.2034070168322865e-06, "loss": 0.1817, "step": 47685 }, { "epoch": 2.6112905875266934, "grad_norm": 0.10066934674978256, "learning_rate": 7.1983370513080515e-06, "loss": 0.1798, "step": 47690 }, { "epoch": 2.6115643651097846, "grad_norm": 0.09814495593309402, "learning_rate": 7.1932670857838165e-06, "loss": 0.1794, "step": 47695 }, { "epoch": 2.6118381426928763, "grad_norm": 0.09247198700904846, "learning_rate": 7.188197120259582e-06, "loss": 0.1857, "step": 47700 }, { "epoch": 2.612111920275968, "grad_norm": 0.09804574400186539, "learning_rate": 7.183127154735349e-06, "loss": 0.1842, "step": 47705 }, { "epoch": 2.612385697859059, "grad_norm": 0.10046405345201492, "learning_rate": 7.178057189211114e-06, "loss": 0.1822, "step": 47710 }, { "epoch": 2.612659475442151, "grad_norm": 0.1034899353981018, "learning_rate": 7.172987223686879e-06, "loss": 0.1859, "step": 47715 }, { "epoch": 2.6129332530252425, "grad_norm": 0.10481148213148117, "learning_rate": 7.167917258162644e-06, "loss": 0.1925, "step": 47720 }, { "epoch": 2.6132070306083337, "grad_norm": 0.11496999114751816, "learning_rate": 7.162847292638411e-06, "loss": 0.1841, "step": 47725 }, { "epoch": 2.6134808081914254, "grad_norm": 0.08967955410480499, "learning_rate": 7.1577773271141766e-06, "loss": 0.1842, "step": 47730 }, { "epoch": 2.613754585774517, "grad_norm": 0.10224612802267075, "learning_rate": 7.1527073615899416e-06, "loss": 0.1823, "step": 47735 }, { "epoch": 2.6140283633576082, "grad_norm": 0.11309715360403061, "learning_rate": 7.1476373960657066e-06, "loss": 0.1837, "step": 47740 }, { "epoch": 2.6143021409407, "grad_norm": 0.0974968895316124, "learning_rate": 7.142567430541473e-06, "loss": 0.1838, "step": 47745 }, { "epoch": 2.614575918523791, "grad_norm": 0.09633558243513107, "learning_rate": 7.137497465017238e-06, "loss": 0.1744, "step": 47750 }, { "epoch": 2.614849696106883, "grad_norm": 0.09497210383415222, "learning_rate": 7.132427499493004e-06, "loss": 0.1878, "step": 47755 }, { "epoch": 2.615123473689974, "grad_norm": 0.09416910260915756, "learning_rate": 7.127357533968769e-06, "loss": 0.1749, "step": 47760 }, { "epoch": 2.6153972512730657, "grad_norm": 0.10111500322818756, "learning_rate": 7.122287568444536e-06, "loss": 0.1823, "step": 47765 }, { "epoch": 2.6156710288561573, "grad_norm": 0.09821774065494537, "learning_rate": 7.117217602920301e-06, "loss": 0.1928, "step": 47770 }, { "epoch": 2.6159448064392485, "grad_norm": 0.08491237461566925, "learning_rate": 7.112147637396066e-06, "loss": 0.182, "step": 47775 }, { "epoch": 2.61621858402234, "grad_norm": 0.08784282207489014, "learning_rate": 7.107077671871832e-06, "loss": 0.1778, "step": 47780 }, { "epoch": 2.616492361605432, "grad_norm": 0.1145959421992302, "learning_rate": 7.102007706347597e-06, "loss": 0.1807, "step": 47785 }, { "epoch": 2.616766139188523, "grad_norm": 0.10026398301124573, "learning_rate": 7.096937740823363e-06, "loss": 0.18, "step": 47790 }, { "epoch": 2.6170399167716147, "grad_norm": 0.09628412127494812, "learning_rate": 7.091867775299128e-06, "loss": 0.1813, "step": 47795 }, { "epoch": 2.6173136943547064, "grad_norm": 0.10786837339401245, "learning_rate": 7.086797809774893e-06, "loss": 0.1797, "step": 47800 }, { "epoch": 2.6175874719377976, "grad_norm": 0.10814499109983444, "learning_rate": 7.081727844250659e-06, "loss": 0.1804, "step": 47805 }, { "epoch": 2.6178612495208893, "grad_norm": 0.09619664400815964, "learning_rate": 7.076657878726425e-06, "loss": 0.1838, "step": 47810 }, { "epoch": 2.618135027103981, "grad_norm": 0.09178557991981506, "learning_rate": 7.071587913202191e-06, "loss": 0.1762, "step": 47815 }, { "epoch": 2.618408804687072, "grad_norm": 0.09583760797977448, "learning_rate": 7.066517947677956e-06, "loss": 0.187, "step": 47820 }, { "epoch": 2.618682582270164, "grad_norm": 0.10354863107204437, "learning_rate": 7.061447982153721e-06, "loss": 0.188, "step": 47825 }, { "epoch": 2.6189563598532555, "grad_norm": 0.10844969749450684, "learning_rate": 7.0563780166294876e-06, "loss": 0.1862, "step": 47830 }, { "epoch": 2.6192301374363467, "grad_norm": 0.08916443586349487, "learning_rate": 7.0513080511052526e-06, "loss": 0.1826, "step": 47835 }, { "epoch": 2.6195039150194384, "grad_norm": 0.09671296179294586, "learning_rate": 7.046238085581018e-06, "loss": 0.1835, "step": 47840 }, { "epoch": 2.6197776926025296, "grad_norm": 0.10466774553060532, "learning_rate": 7.041168120056783e-06, "loss": 0.1879, "step": 47845 }, { "epoch": 2.6200514701856212, "grad_norm": 0.09954847395420074, "learning_rate": 7.03609815453255e-06, "loss": 0.1874, "step": 47850 }, { "epoch": 2.6203252477687125, "grad_norm": 0.09700379520654678, "learning_rate": 7.031028189008315e-06, "loss": 0.1831, "step": 47855 }, { "epoch": 2.620599025351804, "grad_norm": 0.08917612582445145, "learning_rate": 7.02595822348408e-06, "loss": 0.1757, "step": 47860 }, { "epoch": 2.6208728029348958, "grad_norm": 0.09021319448947906, "learning_rate": 7.020888257959846e-06, "loss": 0.1834, "step": 47865 }, { "epoch": 2.621146580517987, "grad_norm": 0.10408627241849899, "learning_rate": 7.015818292435613e-06, "loss": 0.1908, "step": 47870 }, { "epoch": 2.6214203581010787, "grad_norm": 0.10528981685638428, "learning_rate": 7.010748326911378e-06, "loss": 0.1899, "step": 47875 }, { "epoch": 2.6216941356841703, "grad_norm": 0.09384175390005112, "learning_rate": 7.005678361387143e-06, "loss": 0.1771, "step": 47880 }, { "epoch": 2.6219679132672615, "grad_norm": 0.10139814019203186, "learning_rate": 7.000608395862908e-06, "loss": 0.1841, "step": 47885 }, { "epoch": 2.622241690850353, "grad_norm": 0.09117363393306732, "learning_rate": 6.995538430338674e-06, "loss": 0.1867, "step": 47890 }, { "epoch": 2.622515468433445, "grad_norm": 0.08778172731399536, "learning_rate": 6.990468464814439e-06, "loss": 0.1886, "step": 47895 }, { "epoch": 2.622789246016536, "grad_norm": 0.1086944192647934, "learning_rate": 6.985398499290205e-06, "loss": 0.186, "step": 47900 }, { "epoch": 2.6230630235996277, "grad_norm": 0.09811646491289139, "learning_rate": 6.98032853376597e-06, "loss": 0.1766, "step": 47905 }, { "epoch": 2.6233368011827194, "grad_norm": 0.11207668483257294, "learning_rate": 6.975258568241737e-06, "loss": 0.1936, "step": 47910 }, { "epoch": 2.6236105787658106, "grad_norm": 0.10771258920431137, "learning_rate": 6.970188602717502e-06, "loss": 0.1832, "step": 47915 }, { "epoch": 2.6238843563489023, "grad_norm": 0.10260029882192612, "learning_rate": 6.965118637193267e-06, "loss": 0.1767, "step": 47920 }, { "epoch": 2.6241581339319935, "grad_norm": 0.1289922595024109, "learning_rate": 6.960048671669033e-06, "loss": 0.1839, "step": 47925 }, { "epoch": 2.624431911515085, "grad_norm": 0.09811894595623016, "learning_rate": 6.954978706144799e-06, "loss": 0.1875, "step": 47930 }, { "epoch": 2.6247056890981764, "grad_norm": 0.10011065751314163, "learning_rate": 6.949908740620564e-06, "loss": 0.1892, "step": 47935 }, { "epoch": 2.624979466681268, "grad_norm": 0.10444431751966476, "learning_rate": 6.944838775096329e-06, "loss": 0.1779, "step": 47940 }, { "epoch": 2.6252532442643597, "grad_norm": 0.08987798541784286, "learning_rate": 6.939768809572094e-06, "loss": 0.1882, "step": 47945 }, { "epoch": 2.625527021847451, "grad_norm": 0.10739441961050034, "learning_rate": 6.934698844047861e-06, "loss": 0.1857, "step": 47950 }, { "epoch": 2.6258007994305426, "grad_norm": 0.09634178876876831, "learning_rate": 6.929628878523627e-06, "loss": 0.1838, "step": 47955 }, { "epoch": 2.6260745770136342, "grad_norm": 0.09022577106952667, "learning_rate": 6.924558912999392e-06, "loss": 0.1853, "step": 47960 }, { "epoch": 2.6263483545967254, "grad_norm": 0.08675994724035263, "learning_rate": 6.919488947475157e-06, "loss": 0.1773, "step": 47965 }, { "epoch": 2.626622132179817, "grad_norm": 0.08820240944623947, "learning_rate": 6.914418981950924e-06, "loss": 0.1746, "step": 47970 }, { "epoch": 2.6268959097629088, "grad_norm": 0.09923912584781647, "learning_rate": 6.909349016426689e-06, "loss": 0.1798, "step": 47975 }, { "epoch": 2.627169687346, "grad_norm": 0.09110752493143082, "learning_rate": 6.9042790509024545e-06, "loss": 0.1831, "step": 47980 }, { "epoch": 2.6274434649290916, "grad_norm": 0.10197506844997406, "learning_rate": 6.8992090853782195e-06, "loss": 0.1887, "step": 47985 }, { "epoch": 2.6277172425121833, "grad_norm": 0.10325668752193451, "learning_rate": 6.894139119853986e-06, "loss": 0.1882, "step": 47990 }, { "epoch": 2.6279910200952745, "grad_norm": 0.09947815537452698, "learning_rate": 6.889069154329751e-06, "loss": 0.1809, "step": 47995 }, { "epoch": 2.628264797678366, "grad_norm": 0.10980818420648575, "learning_rate": 6.883999188805516e-06, "loss": 0.1774, "step": 48000 }, { "epoch": 2.628538575261458, "grad_norm": 0.10606197267770767, "learning_rate": 6.878929223281282e-06, "loss": 0.1848, "step": 48005 }, { "epoch": 2.628812352844549, "grad_norm": 0.0917837843298912, "learning_rate": 6.873859257757048e-06, "loss": 0.1933, "step": 48010 }, { "epoch": 2.6290861304276407, "grad_norm": 0.09744110703468323, "learning_rate": 6.868789292232814e-06, "loss": 0.1866, "step": 48015 }, { "epoch": 2.629359908010732, "grad_norm": 0.1018586978316307, "learning_rate": 6.863719326708579e-06, "loss": 0.1895, "step": 48020 }, { "epoch": 2.6296336855938236, "grad_norm": 0.09373670071363449, "learning_rate": 6.858649361184344e-06, "loss": 0.183, "step": 48025 }, { "epoch": 2.629907463176915, "grad_norm": 0.0950181633234024, "learning_rate": 6.85357939566011e-06, "loss": 0.1743, "step": 48030 }, { "epoch": 2.6301812407600065, "grad_norm": 0.09234602749347687, "learning_rate": 6.848509430135875e-06, "loss": 0.1865, "step": 48035 }, { "epoch": 2.630455018343098, "grad_norm": 0.09652058035135269, "learning_rate": 6.843439464611641e-06, "loss": 0.1821, "step": 48040 }, { "epoch": 2.6307287959261894, "grad_norm": 0.09959696233272552, "learning_rate": 6.838369499087406e-06, "loss": 0.1821, "step": 48045 }, { "epoch": 2.631002573509281, "grad_norm": 0.096981480717659, "learning_rate": 6.833299533563173e-06, "loss": 0.1847, "step": 48050 }, { "epoch": 2.6312763510923727, "grad_norm": 0.08814801275730133, "learning_rate": 6.828229568038938e-06, "loss": 0.1791, "step": 48055 }, { "epoch": 2.631550128675464, "grad_norm": 0.08120590448379517, "learning_rate": 6.823159602514703e-06, "loss": 0.188, "step": 48060 }, { "epoch": 2.6318239062585556, "grad_norm": 0.0983385443687439, "learning_rate": 6.818089636990469e-06, "loss": 0.186, "step": 48065 }, { "epoch": 2.632097683841647, "grad_norm": 0.09270492196083069, "learning_rate": 6.8130196714662354e-06, "loss": 0.1889, "step": 48070 }, { "epoch": 2.6323714614247384, "grad_norm": 0.09355780482292175, "learning_rate": 6.8079497059420004e-06, "loss": 0.1866, "step": 48075 }, { "epoch": 2.63264523900783, "grad_norm": 0.10054336488246918, "learning_rate": 6.8028797404177654e-06, "loss": 0.1847, "step": 48080 }, { "epoch": 2.6329190165909218, "grad_norm": 0.09219471365213394, "learning_rate": 6.7978097748935304e-06, "loss": 0.1875, "step": 48085 }, { "epoch": 2.633192794174013, "grad_norm": 0.09790432453155518, "learning_rate": 6.792739809369296e-06, "loss": 0.1909, "step": 48090 }, { "epoch": 2.6334665717571046, "grad_norm": 0.09981398284435272, "learning_rate": 6.787669843845062e-06, "loss": 0.1847, "step": 48095 }, { "epoch": 2.6337403493401963, "grad_norm": 0.0907161608338356, "learning_rate": 6.782599878320828e-06, "loss": 0.1768, "step": 48100 }, { "epoch": 2.6340141269232875, "grad_norm": 0.08619926869869232, "learning_rate": 6.777529912796593e-06, "loss": 0.1839, "step": 48105 }, { "epoch": 2.634287904506379, "grad_norm": 0.08955077826976776, "learning_rate": 6.772459947272358e-06, "loss": 0.1799, "step": 48110 }, { "epoch": 2.6345616820894704, "grad_norm": 0.09205850213766098, "learning_rate": 6.767389981748125e-06, "loss": 0.1772, "step": 48115 }, { "epoch": 2.634835459672562, "grad_norm": 0.10358592867851257, "learning_rate": 6.76232001622389e-06, "loss": 0.1804, "step": 48120 }, { "epoch": 2.6351092372556533, "grad_norm": 0.10242164880037308, "learning_rate": 6.7572500506996555e-06, "loss": 0.1833, "step": 48125 }, { "epoch": 2.635383014838745, "grad_norm": 0.10889346897602081, "learning_rate": 6.7521800851754205e-06, "loss": 0.1807, "step": 48130 }, { "epoch": 2.6356567924218366, "grad_norm": 0.08838853985071182, "learning_rate": 6.747110119651187e-06, "loss": 0.1757, "step": 48135 }, { "epoch": 2.635930570004928, "grad_norm": 0.10518048703670502, "learning_rate": 6.742040154126952e-06, "loss": 0.1878, "step": 48140 }, { "epoch": 2.6362043475880195, "grad_norm": 0.09635413438081741, "learning_rate": 6.736970188602717e-06, "loss": 0.1792, "step": 48145 }, { "epoch": 2.636478125171111, "grad_norm": 0.08596277236938477, "learning_rate": 6.731900223078483e-06, "loss": 0.1899, "step": 48150 }, { "epoch": 2.6367519027542023, "grad_norm": 0.10315373539924622, "learning_rate": 6.72683025755425e-06, "loss": 0.178, "step": 48155 }, { "epoch": 2.637025680337294, "grad_norm": 0.09785236418247223, "learning_rate": 6.721760292030015e-06, "loss": 0.185, "step": 48160 }, { "epoch": 2.6372994579203857, "grad_norm": 0.09248553216457367, "learning_rate": 6.71669032650578e-06, "loss": 0.1893, "step": 48165 }, { "epoch": 2.637573235503477, "grad_norm": 0.09894034266471863, "learning_rate": 6.711620360981545e-06, "loss": 0.1799, "step": 48170 }, { "epoch": 2.6378470130865685, "grad_norm": 0.1033153235912323, "learning_rate": 6.7065503954573114e-06, "loss": 0.1844, "step": 48175 }, { "epoch": 2.63812079066966, "grad_norm": 0.09518852829933167, "learning_rate": 6.701480429933077e-06, "loss": 0.1914, "step": 48180 }, { "epoch": 2.6383945682527514, "grad_norm": 0.09765000641345978, "learning_rate": 6.696410464408842e-06, "loss": 0.1839, "step": 48185 }, { "epoch": 2.638668345835843, "grad_norm": 0.10133158415555954, "learning_rate": 6.691340498884607e-06, "loss": 0.1862, "step": 48190 }, { "epoch": 2.6389421234189343, "grad_norm": 0.09043958783149719, "learning_rate": 6.686270533360374e-06, "loss": 0.1788, "step": 48195 }, { "epoch": 2.639215901002026, "grad_norm": 0.10959409922361374, "learning_rate": 6.681200567836139e-06, "loss": 0.1847, "step": 48200 }, { "epoch": 2.639489678585117, "grad_norm": 0.08844832330942154, "learning_rate": 6.676130602311905e-06, "loss": 0.174, "step": 48205 }, { "epoch": 2.639763456168209, "grad_norm": 0.10864852368831635, "learning_rate": 6.67106063678767e-06, "loss": 0.1834, "step": 48210 }, { "epoch": 2.6400372337513005, "grad_norm": 0.09397401660680771, "learning_rate": 6.6659906712634365e-06, "loss": 0.1866, "step": 48215 }, { "epoch": 2.6403110113343917, "grad_norm": 0.09030672907829285, "learning_rate": 6.6609207057392015e-06, "loss": 0.1801, "step": 48220 }, { "epoch": 2.6405847889174834, "grad_norm": 0.09340238571166992, "learning_rate": 6.6558507402149665e-06, "loss": 0.1812, "step": 48225 }, { "epoch": 2.640858566500575, "grad_norm": 0.10065654665231705, "learning_rate": 6.6507807746907315e-06, "loss": 0.19, "step": 48230 }, { "epoch": 2.6411323440836663, "grad_norm": 0.10957236588001251, "learning_rate": 6.645710809166498e-06, "loss": 0.1866, "step": 48235 }, { "epoch": 2.641406121666758, "grad_norm": 0.09423689544200897, "learning_rate": 6.640640843642264e-06, "loss": 0.1825, "step": 48240 }, { "epoch": 2.6416798992498496, "grad_norm": 0.09442023932933807, "learning_rate": 6.635570878118029e-06, "loss": 0.1887, "step": 48245 }, { "epoch": 2.641953676832941, "grad_norm": 0.09740644693374634, "learning_rate": 6.630500912593794e-06, "loss": 0.1777, "step": 48250 }, { "epoch": 2.6422274544160325, "grad_norm": 0.10138089954853058, "learning_rate": 6.625430947069561e-06, "loss": 0.1892, "step": 48255 }, { "epoch": 2.642501231999124, "grad_norm": 0.09123947471380234, "learning_rate": 6.620360981545326e-06, "loss": 0.1783, "step": 48260 }, { "epoch": 2.6427750095822153, "grad_norm": 0.1001330018043518, "learning_rate": 6.6152910160210916e-06, "loss": 0.1876, "step": 48265 }, { "epoch": 2.643048787165307, "grad_norm": 0.10337603092193604, "learning_rate": 6.6102210504968566e-06, "loss": 0.1936, "step": 48270 }, { "epoch": 2.6433225647483987, "grad_norm": 0.0914858728647232, "learning_rate": 6.605151084972623e-06, "loss": 0.1821, "step": 48275 }, { "epoch": 2.64359634233149, "grad_norm": 0.09728303551673889, "learning_rate": 6.600081119448388e-06, "loss": 0.1851, "step": 48280 }, { "epoch": 2.6438701199145815, "grad_norm": 0.09684410691261292, "learning_rate": 6.595011153924153e-06, "loss": 0.1914, "step": 48285 }, { "epoch": 2.6441438974976728, "grad_norm": 0.09662895649671555, "learning_rate": 6.589941188399919e-06, "loss": 0.18, "step": 48290 }, { "epoch": 2.6444176750807644, "grad_norm": 0.09774301946163177, "learning_rate": 6.584871222875685e-06, "loss": 0.1814, "step": 48295 }, { "epoch": 2.6446914526638556, "grad_norm": 0.08719372749328613, "learning_rate": 6.579801257351451e-06, "loss": 0.1779, "step": 48300 }, { "epoch": 2.6449652302469473, "grad_norm": 0.09413747489452362, "learning_rate": 6.574731291827216e-06, "loss": 0.1744, "step": 48305 }, { "epoch": 2.645239007830039, "grad_norm": 0.09492949396371841, "learning_rate": 6.569661326302981e-06, "loss": 0.1817, "step": 48310 }, { "epoch": 2.64551278541313, "grad_norm": 0.09516605734825134, "learning_rate": 6.5645913607787475e-06, "loss": 0.1849, "step": 48315 }, { "epoch": 2.645786562996222, "grad_norm": 0.11438240110874176, "learning_rate": 6.5595213952545125e-06, "loss": 0.1807, "step": 48320 }, { "epoch": 2.6460603405793135, "grad_norm": 0.10616292804479599, "learning_rate": 6.554451429730278e-06, "loss": 0.1849, "step": 48325 }, { "epoch": 2.6463341181624047, "grad_norm": 0.09383007138967514, "learning_rate": 6.549381464206043e-06, "loss": 0.1842, "step": 48330 }, { "epoch": 2.6466078957454964, "grad_norm": 0.09429315477609634, "learning_rate": 6.54431149868181e-06, "loss": 0.1859, "step": 48335 }, { "epoch": 2.646881673328588, "grad_norm": 0.09785456210374832, "learning_rate": 6.539241533157575e-06, "loss": 0.191, "step": 48340 }, { "epoch": 2.6471554509116793, "grad_norm": 0.09518570452928543, "learning_rate": 6.53417156763334e-06, "loss": 0.187, "step": 48345 }, { "epoch": 2.647429228494771, "grad_norm": 0.09602168202400208, "learning_rate": 6.529101602109106e-06, "loss": 0.1829, "step": 48350 }, { "epoch": 2.6477030060778626, "grad_norm": 0.10204430669546127, "learning_rate": 6.5240316365848726e-06, "loss": 0.1801, "step": 48355 }, { "epoch": 2.647976783660954, "grad_norm": 0.10297899693250656, "learning_rate": 6.5189616710606376e-06, "loss": 0.1893, "step": 48360 }, { "epoch": 2.6482505612440455, "grad_norm": 0.09949049353599548, "learning_rate": 6.5138917055364026e-06, "loss": 0.1844, "step": 48365 }, { "epoch": 2.6485243388271367, "grad_norm": 0.09067709743976593, "learning_rate": 6.5088217400121676e-06, "loss": 0.1828, "step": 48370 }, { "epoch": 2.6487981164102283, "grad_norm": 0.09984434396028519, "learning_rate": 6.503751774487933e-06, "loss": 0.1833, "step": 48375 }, { "epoch": 2.64907189399332, "grad_norm": 0.09348691999912262, "learning_rate": 6.4986818089637e-06, "loss": 0.1845, "step": 48380 }, { "epoch": 2.649345671576411, "grad_norm": 0.10903113335371017, "learning_rate": 6.493611843439465e-06, "loss": 0.1899, "step": 48385 }, { "epoch": 2.649619449159503, "grad_norm": 0.08625533431768417, "learning_rate": 6.48854187791523e-06, "loss": 0.1777, "step": 48390 }, { "epoch": 2.649893226742594, "grad_norm": 0.09926146268844604, "learning_rate": 6.483471912390995e-06, "loss": 0.1903, "step": 48395 }, { "epoch": 2.6501670043256857, "grad_norm": 0.0991806760430336, "learning_rate": 6.478401946866762e-06, "loss": 0.1782, "step": 48400 }, { "epoch": 2.6504407819087774, "grad_norm": 0.1050604060292244, "learning_rate": 6.473331981342528e-06, "loss": 0.1862, "step": 48405 }, { "epoch": 2.6507145594918686, "grad_norm": 0.1116306483745575, "learning_rate": 6.468262015818293e-06, "loss": 0.1827, "step": 48410 }, { "epoch": 2.6509883370749603, "grad_norm": 0.09216032922267914, "learning_rate": 6.463192050294058e-06, "loss": 0.1818, "step": 48415 }, { "epoch": 2.651262114658052, "grad_norm": 0.09734760969877243, "learning_rate": 6.458122084769824e-06, "loss": 0.179, "step": 48420 }, { "epoch": 2.651535892241143, "grad_norm": 0.09295245260000229, "learning_rate": 6.453052119245589e-06, "loss": 0.182, "step": 48425 }, { "epoch": 2.651809669824235, "grad_norm": 0.09160993993282318, "learning_rate": 6.447982153721354e-06, "loss": 0.183, "step": 48430 }, { "epoch": 2.6520834474073265, "grad_norm": 0.09597142785787582, "learning_rate": 6.44291218819712e-06, "loss": 0.1794, "step": 48435 }, { "epoch": 2.6523572249904177, "grad_norm": 0.09422807395458221, "learning_rate": 6.437842222672887e-06, "loss": 0.1832, "step": 48440 }, { "epoch": 2.6526310025735094, "grad_norm": 0.0972476527094841, "learning_rate": 6.432772257148652e-06, "loss": 0.182, "step": 48445 }, { "epoch": 2.652904780156601, "grad_norm": 0.09743177145719528, "learning_rate": 6.427702291624417e-06, "loss": 0.1835, "step": 48450 }, { "epoch": 2.6531785577396922, "grad_norm": 0.09337285906076431, "learning_rate": 6.422632326100182e-06, "loss": 0.1765, "step": 48455 }, { "epoch": 2.653452335322784, "grad_norm": 0.08972135186195374, "learning_rate": 6.4175623605759485e-06, "loss": 0.1733, "step": 48460 }, { "epoch": 2.653726112905875, "grad_norm": 0.09735027700662613, "learning_rate": 6.412492395051714e-06, "loss": 0.1796, "step": 48465 }, { "epoch": 2.653999890488967, "grad_norm": 0.0841025859117508, "learning_rate": 6.407422429527479e-06, "loss": 0.1821, "step": 48470 }, { "epoch": 2.654273668072058, "grad_norm": 0.0927552729845047, "learning_rate": 6.402352464003244e-06, "loss": 0.1761, "step": 48475 }, { "epoch": 2.6545474456551497, "grad_norm": 0.11738737672567368, "learning_rate": 6.397282498479011e-06, "loss": 0.1882, "step": 48480 }, { "epoch": 2.6548212232382413, "grad_norm": 0.10165845602750778, "learning_rate": 6.392212532954776e-06, "loss": 0.1912, "step": 48485 }, { "epoch": 2.6550950008213325, "grad_norm": 0.09886204451322556, "learning_rate": 6.387142567430542e-06, "loss": 0.1805, "step": 48490 }, { "epoch": 2.655368778404424, "grad_norm": 0.09976603835821152, "learning_rate": 6.382072601906307e-06, "loss": 0.1869, "step": 48495 }, { "epoch": 2.655642555987516, "grad_norm": 0.10115988552570343, "learning_rate": 6.377002636382074e-06, "loss": 0.1846, "step": 48500 }, { "epoch": 2.655916333570607, "grad_norm": 0.09476955980062485, "learning_rate": 6.371932670857839e-06, "loss": 0.1761, "step": 48505 }, { "epoch": 2.6561901111536987, "grad_norm": 0.10591381043195724, "learning_rate": 6.366862705333604e-06, "loss": 0.1828, "step": 48510 }, { "epoch": 2.6564638887367904, "grad_norm": 0.08986928313970566, "learning_rate": 6.3617927398093695e-06, "loss": 0.1782, "step": 48515 }, { "epoch": 2.6567376663198816, "grad_norm": 0.09132757037878036, "learning_rate": 6.356722774285135e-06, "loss": 0.1847, "step": 48520 }, { "epoch": 2.6570114439029733, "grad_norm": 0.09543154388666153, "learning_rate": 6.351652808760901e-06, "loss": 0.1797, "step": 48525 }, { "epoch": 2.657285221486065, "grad_norm": 0.1052432581782341, "learning_rate": 6.346582843236666e-06, "loss": 0.1876, "step": 48530 }, { "epoch": 2.657558999069156, "grad_norm": 0.09240712970495224, "learning_rate": 6.341512877712431e-06, "loss": 0.1785, "step": 48535 }, { "epoch": 2.657832776652248, "grad_norm": 0.09647564589977264, "learning_rate": 6.336442912188198e-06, "loss": 0.1843, "step": 48540 }, { "epoch": 2.6581065542353395, "grad_norm": 0.09466897696256638, "learning_rate": 6.331372946663963e-06, "loss": 0.18, "step": 48545 }, { "epoch": 2.6583803318184307, "grad_norm": 0.10663457214832306, "learning_rate": 6.326302981139729e-06, "loss": 0.1805, "step": 48550 }, { "epoch": 2.6586541094015224, "grad_norm": 0.10324249416589737, "learning_rate": 6.321233015615494e-06, "loss": 0.1809, "step": 48555 }, { "epoch": 2.6589278869846136, "grad_norm": 0.08890042454004288, "learning_rate": 6.31616305009126e-06, "loss": 0.1854, "step": 48560 }, { "epoch": 2.6592016645677052, "grad_norm": 0.09880994260311127, "learning_rate": 6.311093084567025e-06, "loss": 0.1829, "step": 48565 }, { "epoch": 2.6594754421507965, "grad_norm": 0.08938590437173843, "learning_rate": 6.30602311904279e-06, "loss": 0.1859, "step": 48570 }, { "epoch": 2.659749219733888, "grad_norm": 0.10394978523254395, "learning_rate": 6.300953153518556e-06, "loss": 0.1804, "step": 48575 }, { "epoch": 2.6600229973169798, "grad_norm": 0.09497026354074478, "learning_rate": 6.295883187994323e-06, "loss": 0.1765, "step": 48580 }, { "epoch": 2.660296774900071, "grad_norm": 0.09639635682106018, "learning_rate": 6.290813222470088e-06, "loss": 0.1864, "step": 48585 }, { "epoch": 2.6605705524831627, "grad_norm": 0.09545451402664185, "learning_rate": 6.285743256945853e-06, "loss": 0.1807, "step": 48590 }, { "epoch": 2.6608443300662543, "grad_norm": 0.08934608846902847, "learning_rate": 6.280673291421618e-06, "loss": 0.1821, "step": 48595 }, { "epoch": 2.6611181076493455, "grad_norm": 0.10536621510982513, "learning_rate": 6.275603325897385e-06, "loss": 0.18, "step": 48600 }, { "epoch": 2.661391885232437, "grad_norm": 0.09748970717191696, "learning_rate": 6.2705333603731504e-06, "loss": 0.1874, "step": 48605 }, { "epoch": 2.661665662815529, "grad_norm": 0.09053418785333633, "learning_rate": 6.2654633948489154e-06, "loss": 0.1836, "step": 48610 }, { "epoch": 2.66193944039862, "grad_norm": 0.09614627063274384, "learning_rate": 6.2603934293246804e-06, "loss": 0.1778, "step": 48615 }, { "epoch": 2.6622132179817117, "grad_norm": 0.10181377083063126, "learning_rate": 6.255323463800447e-06, "loss": 0.1851, "step": 48620 }, { "epoch": 2.6624869955648034, "grad_norm": 0.10351444035768509, "learning_rate": 6.250253498276212e-06, "loss": 0.1876, "step": 48625 }, { "epoch": 2.6627607731478946, "grad_norm": 0.09353110194206238, "learning_rate": 6.245183532751977e-06, "loss": 0.1872, "step": 48630 }, { "epoch": 2.6630345507309863, "grad_norm": 0.11235107481479645, "learning_rate": 6.240113567227744e-06, "loss": 0.1896, "step": 48635 }, { "epoch": 2.6633083283140775, "grad_norm": 0.10105810314416885, "learning_rate": 6.235043601703509e-06, "loss": 0.1831, "step": 48640 }, { "epoch": 2.663582105897169, "grad_norm": 0.08982012420892715, "learning_rate": 6.229973636179275e-06, "loss": 0.1798, "step": 48645 }, { "epoch": 2.6638558834802604, "grad_norm": 0.09482884407043457, "learning_rate": 6.22490367065504e-06, "loss": 0.1779, "step": 48650 }, { "epoch": 2.664129661063352, "grad_norm": 0.08958758413791656, "learning_rate": 6.2198337051308055e-06, "loss": 0.1797, "step": 48655 }, { "epoch": 2.6644034386464437, "grad_norm": 0.09708064794540405, "learning_rate": 6.214763739606571e-06, "loss": 0.1792, "step": 48660 }, { "epoch": 2.664677216229535, "grad_norm": 0.08931900560855865, "learning_rate": 6.209693774082337e-06, "loss": 0.1777, "step": 48665 }, { "epoch": 2.6649509938126266, "grad_norm": 0.0990951657295227, "learning_rate": 6.204623808558102e-06, "loss": 0.1808, "step": 48670 }, { "epoch": 2.6652247713957182, "grad_norm": 0.09299208223819733, "learning_rate": 6.199553843033867e-06, "loss": 0.1893, "step": 48675 }, { "epoch": 2.6654985489788094, "grad_norm": 0.08678191155195236, "learning_rate": 6.194483877509633e-06, "loss": 0.1877, "step": 48680 }, { "epoch": 2.665772326561901, "grad_norm": 0.0889151394367218, "learning_rate": 6.189413911985398e-06, "loss": 0.175, "step": 48685 }, { "epoch": 2.6660461041449928, "grad_norm": 0.08777280151844025, "learning_rate": 6.184343946461165e-06, "loss": 0.1819, "step": 48690 }, { "epoch": 2.666319881728084, "grad_norm": 0.10345175862312317, "learning_rate": 6.17927398093693e-06, "loss": 0.1858, "step": 48695 }, { "epoch": 2.6665936593111756, "grad_norm": 0.10195128619670868, "learning_rate": 6.174204015412696e-06, "loss": 0.1827, "step": 48700 }, { "epoch": 2.6668674368942673, "grad_norm": 0.102491594851017, "learning_rate": 6.169134049888461e-06, "loss": 0.1761, "step": 48705 }, { "epoch": 2.6671412144773585, "grad_norm": 0.08649169653654099, "learning_rate": 6.1640640843642264e-06, "loss": 0.1721, "step": 48710 }, { "epoch": 2.66741499206045, "grad_norm": 0.10391531139612198, "learning_rate": 6.158994118839992e-06, "loss": 0.183, "step": 48715 }, { "epoch": 2.667688769643542, "grad_norm": 0.09154010564088821, "learning_rate": 6.153924153315758e-06, "loss": 0.1806, "step": 48720 }, { "epoch": 2.667962547226633, "grad_norm": 0.09984391182661057, "learning_rate": 6.148854187791523e-06, "loss": 0.1854, "step": 48725 }, { "epoch": 2.6682363248097247, "grad_norm": 0.08775731921195984, "learning_rate": 6.143784222267289e-06, "loss": 0.1817, "step": 48730 }, { "epoch": 2.668510102392816, "grad_norm": 0.0926501676440239, "learning_rate": 6.138714256743054e-06, "loss": 0.1843, "step": 48735 }, { "epoch": 2.6687838799759076, "grad_norm": 0.09917483478784561, "learning_rate": 6.13364429121882e-06, "loss": 0.1799, "step": 48740 }, { "epoch": 2.669057657558999, "grad_norm": 0.09263613075017929, "learning_rate": 6.128574325694586e-06, "loss": 0.1891, "step": 48745 }, { "epoch": 2.6693314351420905, "grad_norm": 0.10541191697120667, "learning_rate": 6.1235043601703515e-06, "loss": 0.1767, "step": 48750 }, { "epoch": 2.669605212725182, "grad_norm": 0.11507982015609741, "learning_rate": 6.1184343946461165e-06, "loss": 0.1902, "step": 48755 }, { "epoch": 2.6698789903082734, "grad_norm": 0.1058090403676033, "learning_rate": 6.113364429121882e-06, "loss": 0.1807, "step": 48760 }, { "epoch": 2.670152767891365, "grad_norm": 0.09846662729978561, "learning_rate": 6.108294463597647e-06, "loss": 0.1806, "step": 48765 }, { "epoch": 2.6704265454744567, "grad_norm": 0.09053736180067062, "learning_rate": 6.103224498073413e-06, "loss": 0.177, "step": 48770 }, { "epoch": 2.670700323057548, "grad_norm": 0.10332062095403671, "learning_rate": 6.098154532549179e-06, "loss": 0.1781, "step": 48775 }, { "epoch": 2.6709741006406396, "grad_norm": 0.0955054834485054, "learning_rate": 6.093084567024945e-06, "loss": 0.1802, "step": 48780 }, { "epoch": 2.671247878223731, "grad_norm": 0.10074817389249802, "learning_rate": 6.08801460150071e-06, "loss": 0.1756, "step": 48785 }, { "epoch": 2.6715216558068224, "grad_norm": 0.09649283438920975, "learning_rate": 6.082944635976476e-06, "loss": 0.1861, "step": 48790 }, { "epoch": 2.671795433389914, "grad_norm": 0.09821640700101852, "learning_rate": 6.077874670452241e-06, "loss": 0.1783, "step": 48795 }, { "epoch": 2.6720692109730058, "grad_norm": 0.09470070898532867, "learning_rate": 6.0728047049280066e-06, "loss": 0.1817, "step": 48800 }, { "epoch": 2.672342988556097, "grad_norm": 0.09927106648683548, "learning_rate": 6.067734739403772e-06, "loss": 0.1773, "step": 48805 }, { "epoch": 2.6726167661391886, "grad_norm": 0.09063943475484848, "learning_rate": 6.062664773879538e-06, "loss": 0.1798, "step": 48810 }, { "epoch": 2.67289054372228, "grad_norm": 0.0870489776134491, "learning_rate": 6.057594808355303e-06, "loss": 0.1852, "step": 48815 }, { "epoch": 2.6731643213053715, "grad_norm": 0.0967269092798233, "learning_rate": 6.052524842831069e-06, "loss": 0.1855, "step": 48820 }, { "epoch": 2.673438098888463, "grad_norm": 0.09200746566057205, "learning_rate": 6.047454877306834e-06, "loss": 0.1823, "step": 48825 }, { "epoch": 2.6737118764715544, "grad_norm": 0.09917489439249039, "learning_rate": 6.0423849117826e-06, "loss": 0.1798, "step": 48830 }, { "epoch": 2.673985654054646, "grad_norm": 0.09659059345722198, "learning_rate": 6.037314946258366e-06, "loss": 0.1825, "step": 48835 }, { "epoch": 2.6742594316377373, "grad_norm": 0.09494118392467499, "learning_rate": 6.032244980734132e-06, "loss": 0.1784, "step": 48840 }, { "epoch": 2.674533209220829, "grad_norm": 0.09064314514398575, "learning_rate": 6.027175015209897e-06, "loss": 0.1874, "step": 48845 }, { "epoch": 2.6748069868039206, "grad_norm": 0.09193592518568039, "learning_rate": 6.0221050496856625e-06, "loss": 0.1848, "step": 48850 }, { "epoch": 2.675080764387012, "grad_norm": 0.0930003821849823, "learning_rate": 6.0170350841614275e-06, "loss": 0.1849, "step": 48855 }, { "epoch": 2.6753545419701035, "grad_norm": 0.09054552018642426, "learning_rate": 6.011965118637194e-06, "loss": 0.1785, "step": 48860 }, { "epoch": 2.675628319553195, "grad_norm": 0.09098213911056519, "learning_rate": 6.006895153112959e-06, "loss": 0.183, "step": 48865 }, { "epoch": 2.6759020971362864, "grad_norm": 0.11856414377689362, "learning_rate": 6.001825187588725e-06, "loss": 0.1819, "step": 48870 }, { "epoch": 2.676175874719378, "grad_norm": 0.09531564265489578, "learning_rate": 5.99675522206449e-06, "loss": 0.1772, "step": 48875 }, { "epoch": 2.6764496523024697, "grad_norm": 0.0969085693359375, "learning_rate": 5.991685256540256e-06, "loss": 0.1917, "step": 48880 }, { "epoch": 2.676723429885561, "grad_norm": 0.09560254961252213, "learning_rate": 5.986615291016021e-06, "loss": 0.1778, "step": 48885 }, { "epoch": 2.6769972074686526, "grad_norm": 0.09906221926212311, "learning_rate": 5.9815453254917876e-06, "loss": 0.1883, "step": 48890 }, { "epoch": 2.677270985051744, "grad_norm": 0.10770661383867264, "learning_rate": 5.9764753599675526e-06, "loss": 0.1863, "step": 48895 }, { "epoch": 2.6775447626348354, "grad_norm": 0.09640399366617203, "learning_rate": 5.971405394443318e-06, "loss": 0.1797, "step": 48900 }, { "epoch": 2.677818540217927, "grad_norm": 0.09691363573074341, "learning_rate": 5.966335428919083e-06, "loss": 0.1797, "step": 48905 }, { "epoch": 2.6780923178010183, "grad_norm": 0.10253148525953293, "learning_rate": 5.961265463394849e-06, "loss": 0.1916, "step": 48910 }, { "epoch": 2.67836609538411, "grad_norm": 0.08981861919164658, "learning_rate": 5.956195497870615e-06, "loss": 0.1794, "step": 48915 }, { "epoch": 2.678639872967201, "grad_norm": 0.09828905016183853, "learning_rate": 5.951125532346381e-06, "loss": 0.176, "step": 48920 }, { "epoch": 2.678913650550293, "grad_norm": 0.10354184359312057, "learning_rate": 5.946055566822146e-06, "loss": 0.1856, "step": 48925 }, { "epoch": 2.6791874281333845, "grad_norm": 0.09344127774238586, "learning_rate": 5.940985601297912e-06, "loss": 0.1809, "step": 48930 }, { "epoch": 2.6794612057164757, "grad_norm": 0.0887991338968277, "learning_rate": 5.935915635773677e-06, "loss": 0.1832, "step": 48935 }, { "epoch": 2.6797349832995674, "grad_norm": 0.10221398621797562, "learning_rate": 5.930845670249443e-06, "loss": 0.1807, "step": 48940 }, { "epoch": 2.680008760882659, "grad_norm": 0.10821566730737686, "learning_rate": 5.9257757047252085e-06, "loss": 0.1866, "step": 48945 }, { "epoch": 2.6802825384657503, "grad_norm": 0.09136158227920532, "learning_rate": 5.920705739200974e-06, "loss": 0.1865, "step": 48950 }, { "epoch": 2.680556316048842, "grad_norm": 0.09004244953393936, "learning_rate": 5.915635773676739e-06, "loss": 0.1844, "step": 48955 }, { "epoch": 2.6808300936319336, "grad_norm": 0.09594520926475525, "learning_rate": 5.910565808152505e-06, "loss": 0.1765, "step": 48960 }, { "epoch": 2.681103871215025, "grad_norm": 0.09147562086582184, "learning_rate": 5.90549584262827e-06, "loss": 0.1848, "step": 48965 }, { "epoch": 2.6813776487981165, "grad_norm": 0.0973961353302002, "learning_rate": 5.900425877104036e-06, "loss": 0.183, "step": 48970 }, { "epoch": 2.681651426381208, "grad_norm": 0.1105758398771286, "learning_rate": 5.895355911579802e-06, "loss": 0.1805, "step": 48975 }, { "epoch": 2.6819252039642993, "grad_norm": 0.09075808525085449, "learning_rate": 5.890285946055567e-06, "loss": 0.1797, "step": 48980 }, { "epoch": 2.682198981547391, "grad_norm": 0.0951288565993309, "learning_rate": 5.885215980531333e-06, "loss": 0.1827, "step": 48985 }, { "epoch": 2.6824727591304827, "grad_norm": 0.10091294348239899, "learning_rate": 5.880146015007098e-06, "loss": 0.1804, "step": 48990 }, { "epoch": 2.682746536713574, "grad_norm": 0.09026466310024261, "learning_rate": 5.8750760494828635e-06, "loss": 0.184, "step": 48995 }, { "epoch": 2.6830203142966655, "grad_norm": 0.08966510742902756, "learning_rate": 5.870006083958629e-06, "loss": 0.1853, "step": 49000 }, { "epoch": 2.6832940918797568, "grad_norm": 0.09787974506616592, "learning_rate": 5.864936118434395e-06, "loss": 0.1821, "step": 49005 }, { "epoch": 2.6835678694628484, "grad_norm": 0.09959384053945541, "learning_rate": 5.85986615291016e-06, "loss": 0.1863, "step": 49010 }, { "epoch": 2.6838416470459396, "grad_norm": 0.10342150181531906, "learning_rate": 5.854796187385926e-06, "loss": 0.1841, "step": 49015 }, { "epoch": 2.6841154246290313, "grad_norm": 0.08674698323011398, "learning_rate": 5.849726221861691e-06, "loss": 0.1856, "step": 49020 }, { "epoch": 2.684389202212123, "grad_norm": 0.09011852741241455, "learning_rate": 5.844656256337457e-06, "loss": 0.179, "step": 49025 }, { "epoch": 2.684662979795214, "grad_norm": 0.09246059507131577, "learning_rate": 5.839586290813223e-06, "loss": 0.1852, "step": 49030 }, { "epoch": 2.684936757378306, "grad_norm": 0.09017031639814377, "learning_rate": 5.834516325288989e-06, "loss": 0.1696, "step": 49035 }, { "epoch": 2.6852105349613975, "grad_norm": 0.09751258045434952, "learning_rate": 5.829446359764754e-06, "loss": 0.1809, "step": 49040 }, { "epoch": 2.6854843125444887, "grad_norm": 0.09236558526754379, "learning_rate": 5.8243763942405195e-06, "loss": 0.1811, "step": 49045 }, { "epoch": 2.6857580901275804, "grad_norm": 0.08717288076877594, "learning_rate": 5.8193064287162845e-06, "loss": 0.1822, "step": 49050 }, { "epoch": 2.686031867710672, "grad_norm": 0.09992717206478119, "learning_rate": 5.81423646319205e-06, "loss": 0.1841, "step": 49055 }, { "epoch": 2.6863056452937633, "grad_norm": 0.08889249712228775, "learning_rate": 5.809166497667816e-06, "loss": 0.1736, "step": 49060 }, { "epoch": 2.686579422876855, "grad_norm": 0.10035250335931778, "learning_rate": 5.804096532143582e-06, "loss": 0.1835, "step": 49065 }, { "epoch": 2.6868532004599466, "grad_norm": 0.08745463937520981, "learning_rate": 5.799026566619347e-06, "loss": 0.1773, "step": 49070 }, { "epoch": 2.687126978043038, "grad_norm": 0.09657759964466095, "learning_rate": 5.793956601095113e-06, "loss": 0.187, "step": 49075 }, { "epoch": 2.6874007556261295, "grad_norm": 0.08539728075265884, "learning_rate": 5.788886635570878e-06, "loss": 0.1785, "step": 49080 }, { "epoch": 2.6876745332092207, "grad_norm": 0.09491100907325745, "learning_rate": 5.783816670046644e-06, "loss": 0.1847, "step": 49085 }, { "epoch": 2.6879483107923123, "grad_norm": 0.08836134523153305, "learning_rate": 5.7787467045224095e-06, "loss": 0.1752, "step": 49090 }, { "epoch": 2.6882220883754036, "grad_norm": 0.11626458168029785, "learning_rate": 5.773676738998175e-06, "loss": 0.1892, "step": 49095 }, { "epoch": 2.688495865958495, "grad_norm": 0.09162653237581253, "learning_rate": 5.76860677347394e-06, "loss": 0.1737, "step": 49100 }, { "epoch": 2.688769643541587, "grad_norm": 0.08993268758058548, "learning_rate": 5.763536807949706e-06, "loss": 0.1777, "step": 49105 }, { "epoch": 2.689043421124678, "grad_norm": 0.10493454337120056, "learning_rate": 5.758466842425471e-06, "loss": 0.1861, "step": 49110 }, { "epoch": 2.6893171987077698, "grad_norm": 0.090053491294384, "learning_rate": 5.753396876901238e-06, "loss": 0.1841, "step": 49115 }, { "epoch": 2.6895909762908614, "grad_norm": 0.10000601410865784, "learning_rate": 5.748326911377003e-06, "loss": 0.18, "step": 49120 }, { "epoch": 2.6898647538739526, "grad_norm": 0.09411105513572693, "learning_rate": 5.743256945852769e-06, "loss": 0.1805, "step": 49125 }, { "epoch": 2.6901385314570443, "grad_norm": 0.08929065614938736, "learning_rate": 5.738186980328534e-06, "loss": 0.1767, "step": 49130 }, { "epoch": 2.690412309040136, "grad_norm": 0.0951511487364769, "learning_rate": 5.7331170148043e-06, "loss": 0.1855, "step": 49135 }, { "epoch": 2.690686086623227, "grad_norm": 0.09442657977342606, "learning_rate": 5.7280470492800654e-06, "loss": 0.183, "step": 49140 }, { "epoch": 2.690959864206319, "grad_norm": 0.09992250800132751, "learning_rate": 5.722977083755831e-06, "loss": 0.1823, "step": 49145 }, { "epoch": 2.6912336417894105, "grad_norm": 0.10532175749540329, "learning_rate": 5.717907118231596e-06, "loss": 0.1797, "step": 49150 }, { "epoch": 2.6915074193725017, "grad_norm": 0.10388809442520142, "learning_rate": 5.712837152707362e-06, "loss": 0.1868, "step": 49155 }, { "epoch": 2.6917811969555934, "grad_norm": 0.11317560821771622, "learning_rate": 5.707767187183127e-06, "loss": 0.1833, "step": 49160 }, { "epoch": 2.692054974538685, "grad_norm": 0.09366534650325775, "learning_rate": 5.702697221658893e-06, "loss": 0.179, "step": 49165 }, { "epoch": 2.6923287521217762, "grad_norm": 0.0933021754026413, "learning_rate": 5.697627256134659e-06, "loss": 0.1859, "step": 49170 }, { "epoch": 2.692602529704868, "grad_norm": 0.0954156294465065, "learning_rate": 5.692557290610425e-06, "loss": 0.1822, "step": 49175 }, { "epoch": 2.692876307287959, "grad_norm": 0.09635802358388901, "learning_rate": 5.68748732508619e-06, "loss": 0.1809, "step": 49180 }, { "epoch": 2.693150084871051, "grad_norm": 0.0854443609714508, "learning_rate": 5.6824173595619555e-06, "loss": 0.1829, "step": 49185 }, { "epoch": 2.693423862454142, "grad_norm": 0.08755166083574295, "learning_rate": 5.6773473940377205e-06, "loss": 0.1831, "step": 49190 }, { "epoch": 2.6936976400372337, "grad_norm": 0.09661828726530075, "learning_rate": 5.672277428513486e-06, "loss": 0.1889, "step": 49195 }, { "epoch": 2.6939714176203253, "grad_norm": 0.08920478820800781, "learning_rate": 5.667207462989252e-06, "loss": 0.1822, "step": 49200 }, { "epoch": 2.6942451952034165, "grad_norm": 0.09622068703174591, "learning_rate": 5.662137497465018e-06, "loss": 0.1893, "step": 49205 }, { "epoch": 2.694518972786508, "grad_norm": 0.09381278604269028, "learning_rate": 5.657067531940783e-06, "loss": 0.1791, "step": 49210 }, { "epoch": 2.6947927503696, "grad_norm": 0.09753496944904327, "learning_rate": 5.651997566416549e-06, "loss": 0.1894, "step": 49215 }, { "epoch": 2.695066527952691, "grad_norm": 0.09011074900627136, "learning_rate": 5.646927600892314e-06, "loss": 0.1828, "step": 49220 }, { "epoch": 2.6953403055357827, "grad_norm": 0.09132150560617447, "learning_rate": 5.64185763536808e-06, "loss": 0.1771, "step": 49225 }, { "epoch": 2.6956140831188744, "grad_norm": 0.09663451462984085, "learning_rate": 5.636787669843846e-06, "loss": 0.1842, "step": 49230 }, { "epoch": 2.6958878607019656, "grad_norm": 0.09391491860151291, "learning_rate": 5.6317177043196114e-06, "loss": 0.1822, "step": 49235 }, { "epoch": 2.6961616382850573, "grad_norm": 0.10373669862747192, "learning_rate": 5.6266477387953764e-06, "loss": 0.1795, "step": 49240 }, { "epoch": 2.696435415868149, "grad_norm": 0.09470470994710922, "learning_rate": 5.621577773271142e-06, "loss": 0.187, "step": 49245 }, { "epoch": 2.69670919345124, "grad_norm": 0.10073594748973846, "learning_rate": 5.616507807746907e-06, "loss": 0.1861, "step": 49250 }, { "epoch": 2.696982971034332, "grad_norm": 0.09281178563833237, "learning_rate": 5.611437842222673e-06, "loss": 0.1786, "step": 49255 }, { "epoch": 2.697256748617423, "grad_norm": 0.10244832187891006, "learning_rate": 5.606367876698439e-06, "loss": 0.1833, "step": 49260 }, { "epoch": 2.6975305262005147, "grad_norm": 0.0915362685918808, "learning_rate": 5.601297911174204e-06, "loss": 0.1838, "step": 49265 }, { "epoch": 2.6978043037836064, "grad_norm": 0.09835559874773026, "learning_rate": 5.59622794564997e-06, "loss": 0.1843, "step": 49270 }, { "epoch": 2.6980780813666976, "grad_norm": 0.09710376709699631, "learning_rate": 5.591157980125735e-06, "loss": 0.1888, "step": 49275 }, { "epoch": 2.6983518589497892, "grad_norm": 0.09692056477069855, "learning_rate": 5.586088014601501e-06, "loss": 0.1821, "step": 49280 }, { "epoch": 2.6986256365328805, "grad_norm": 0.09630437940359116, "learning_rate": 5.5810180490772665e-06, "loss": 0.1874, "step": 49285 }, { "epoch": 2.698899414115972, "grad_norm": 0.10865971446037292, "learning_rate": 5.575948083553032e-06, "loss": 0.192, "step": 49290 }, { "epoch": 2.699173191699064, "grad_norm": 0.09408024698495865, "learning_rate": 5.570878118028797e-06, "loss": 0.1724, "step": 49295 }, { "epoch": 2.699446969282155, "grad_norm": 0.0984758585691452, "learning_rate": 5.565808152504563e-06, "loss": 0.1848, "step": 49300 }, { "epoch": 2.6997207468652467, "grad_norm": 0.08821655064821243, "learning_rate": 5.560738186980328e-06, "loss": 0.1803, "step": 49305 }, { "epoch": 2.6999945244483383, "grad_norm": 0.09683924168348312, "learning_rate": 5.555668221456094e-06, "loss": 0.1839, "step": 49310 }, { "epoch": 2.7002683020314295, "grad_norm": 0.0957951620221138, "learning_rate": 5.55059825593186e-06, "loss": 0.1773, "step": 49315 }, { "epoch": 2.700542079614521, "grad_norm": 0.09852196276187897, "learning_rate": 5.545528290407626e-06, "loss": 0.1823, "step": 49320 }, { "epoch": 2.700815857197613, "grad_norm": 0.09853345155715942, "learning_rate": 5.540458324883391e-06, "loss": 0.1887, "step": 49325 }, { "epoch": 2.701089634780704, "grad_norm": 0.09134823083877563, "learning_rate": 5.5353883593591566e-06, "loss": 0.1837, "step": 49330 }, { "epoch": 2.7013634123637957, "grad_norm": 0.089989572763443, "learning_rate": 5.5303183938349216e-06, "loss": 0.1837, "step": 49335 }, { "epoch": 2.7016371899468874, "grad_norm": 0.09814134985208511, "learning_rate": 5.525248428310687e-06, "loss": 0.1915, "step": 49340 }, { "epoch": 2.7019109675299786, "grad_norm": 0.10578478127717972, "learning_rate": 5.520178462786453e-06, "loss": 0.1947, "step": 49345 }, { "epoch": 2.7021847451130703, "grad_norm": 0.08604199439287186, "learning_rate": 5.515108497262219e-06, "loss": 0.1783, "step": 49350 }, { "epoch": 2.7024585226961615, "grad_norm": 0.10301865637302399, "learning_rate": 5.510038531737984e-06, "loss": 0.18, "step": 49355 }, { "epoch": 2.702732300279253, "grad_norm": 0.09733190387487411, "learning_rate": 5.50496856621375e-06, "loss": 0.1925, "step": 49360 }, { "epoch": 2.7030060778623444, "grad_norm": 0.09946054965257645, "learning_rate": 5.499898600689515e-06, "loss": 0.1817, "step": 49365 }, { "epoch": 2.703279855445436, "grad_norm": 0.09258197247982025, "learning_rate": 5.494828635165282e-06, "loss": 0.1859, "step": 49370 }, { "epoch": 2.7035536330285277, "grad_norm": 0.09614533931016922, "learning_rate": 5.489758669641047e-06, "loss": 0.1775, "step": 49375 }, { "epoch": 2.703827410611619, "grad_norm": 0.0946960598230362, "learning_rate": 5.4846887041168125e-06, "loss": 0.1826, "step": 49380 }, { "epoch": 2.7041011881947106, "grad_norm": 0.09385617077350616, "learning_rate": 5.4796187385925775e-06, "loss": 0.1758, "step": 49385 }, { "epoch": 2.7043749657778022, "grad_norm": 0.08523794263601303, "learning_rate": 5.474548773068343e-06, "loss": 0.1799, "step": 49390 }, { "epoch": 2.7046487433608934, "grad_norm": 0.09658539295196533, "learning_rate": 5.469478807544109e-06, "loss": 0.184, "step": 49395 }, { "epoch": 2.704922520943985, "grad_norm": 0.11058758199214935, "learning_rate": 5.464408842019875e-06, "loss": 0.1832, "step": 49400 }, { "epoch": 2.7051962985270768, "grad_norm": 0.09079475700855255, "learning_rate": 5.45933887649564e-06, "loss": 0.186, "step": 49405 }, { "epoch": 2.705470076110168, "grad_norm": 0.08957991749048233, "learning_rate": 5.454268910971406e-06, "loss": 0.1789, "step": 49410 }, { "epoch": 2.7057438536932596, "grad_norm": 0.09185028076171875, "learning_rate": 5.449198945447171e-06, "loss": 0.179, "step": 49415 }, { "epoch": 2.7060176312763513, "grad_norm": 0.0954262837767601, "learning_rate": 5.444128979922937e-06, "loss": 0.1845, "step": 49420 }, { "epoch": 2.7062914088594425, "grad_norm": 0.08531749248504639, "learning_rate": 5.4390590143987026e-06, "loss": 0.1814, "step": 49425 }, { "epoch": 2.706565186442534, "grad_norm": 0.08922997862100601, "learning_rate": 5.433989048874468e-06, "loss": 0.1792, "step": 49430 }, { "epoch": 2.706838964025626, "grad_norm": 0.09193549305200577, "learning_rate": 5.428919083350233e-06, "loss": 0.1847, "step": 49435 }, { "epoch": 2.707112741608717, "grad_norm": 0.09984514117240906, "learning_rate": 5.423849117825999e-06, "loss": 0.1758, "step": 49440 }, { "epoch": 2.7073865191918087, "grad_norm": 0.09983792155981064, "learning_rate": 5.418779152301764e-06, "loss": 0.1802, "step": 49445 }, { "epoch": 2.7076602967749, "grad_norm": 0.09762804955244064, "learning_rate": 5.41370918677753e-06, "loss": 0.1822, "step": 49450 }, { "epoch": 2.7079340743579916, "grad_norm": 0.09343910217285156, "learning_rate": 5.408639221253296e-06, "loss": 0.1741, "step": 49455 }, { "epoch": 2.708207851941083, "grad_norm": 0.10344338417053223, "learning_rate": 5.403569255729062e-06, "loss": 0.1865, "step": 49460 }, { "epoch": 2.7084816295241745, "grad_norm": 0.10878469049930573, "learning_rate": 5.398499290204827e-06, "loss": 0.1862, "step": 49465 }, { "epoch": 2.708755407107266, "grad_norm": 0.10086226463317871, "learning_rate": 5.393429324680593e-06, "loss": 0.1855, "step": 49470 }, { "epoch": 2.7090291846903574, "grad_norm": 0.10390086472034454, "learning_rate": 5.388359359156358e-06, "loss": 0.1832, "step": 49475 }, { "epoch": 2.709302962273449, "grad_norm": 0.09362921118736267, "learning_rate": 5.3832893936321235e-06, "loss": 0.1804, "step": 49480 }, { "epoch": 2.7095767398565407, "grad_norm": 0.09507600218057632, "learning_rate": 5.378219428107889e-06, "loss": 0.1911, "step": 49485 }, { "epoch": 2.709850517439632, "grad_norm": 0.10787501931190491, "learning_rate": 5.373149462583655e-06, "loss": 0.1845, "step": 49490 }, { "epoch": 2.7101242950227236, "grad_norm": 0.08877107501029968, "learning_rate": 5.36807949705942e-06, "loss": 0.1783, "step": 49495 }, { "epoch": 2.7103980726058152, "grad_norm": 0.08739618957042694, "learning_rate": 5.363009531535186e-06, "loss": 0.1754, "step": 49500 }, { "epoch": 2.7106718501889064, "grad_norm": 0.09455980360507965, "learning_rate": 5.357939566010951e-06, "loss": 0.1855, "step": 49505 }, { "epoch": 2.710945627771998, "grad_norm": 0.0895785242319107, "learning_rate": 5.352869600486717e-06, "loss": 0.1829, "step": 49510 }, { "epoch": 2.7112194053550898, "grad_norm": 0.0881040096282959, "learning_rate": 5.347799634962483e-06, "loss": 0.1881, "step": 49515 }, { "epoch": 2.711493182938181, "grad_norm": 0.09128666669130325, "learning_rate": 5.3427296694382485e-06, "loss": 0.1835, "step": 49520 }, { "epoch": 2.7117669605212726, "grad_norm": 0.0926421657204628, "learning_rate": 5.3376597039140135e-06, "loss": 0.1805, "step": 49525 }, { "epoch": 2.712040738104364, "grad_norm": 0.08479703217744827, "learning_rate": 5.332589738389779e-06, "loss": 0.1749, "step": 49530 }, { "epoch": 2.7123145156874555, "grad_norm": 0.08911754935979843, "learning_rate": 5.327519772865544e-06, "loss": 0.1835, "step": 49535 }, { "epoch": 2.7125882932705467, "grad_norm": 0.09136445075273514, "learning_rate": 5.32244980734131e-06, "loss": 0.184, "step": 49540 }, { "epoch": 2.7128620708536384, "grad_norm": 0.09990300238132477, "learning_rate": 5.317379841817076e-06, "loss": 0.1838, "step": 49545 }, { "epoch": 2.71313584843673, "grad_norm": 0.09465944766998291, "learning_rate": 5.312309876292842e-06, "loss": 0.1806, "step": 49550 }, { "epoch": 2.7134096260198213, "grad_norm": 0.09659934788942337, "learning_rate": 5.307239910768607e-06, "loss": 0.1899, "step": 49555 }, { "epoch": 2.713683403602913, "grad_norm": 0.089541956782341, "learning_rate": 5.302169945244373e-06, "loss": 0.1743, "step": 49560 }, { "epoch": 2.7139571811860046, "grad_norm": 0.09957068413496017, "learning_rate": 5.297099979720138e-06, "loss": 0.1763, "step": 49565 }, { "epoch": 2.714230958769096, "grad_norm": 0.1056477352976799, "learning_rate": 5.292030014195904e-06, "loss": 0.1818, "step": 49570 }, { "epoch": 2.7145047363521875, "grad_norm": 0.10046660900115967, "learning_rate": 5.2869600486716695e-06, "loss": 0.1777, "step": 49575 }, { "epoch": 2.714778513935279, "grad_norm": 0.09013497829437256, "learning_rate": 5.2818900831474345e-06, "loss": 0.1841, "step": 49580 }, { "epoch": 2.7150522915183704, "grad_norm": 0.08847040683031082, "learning_rate": 5.2768201176232e-06, "loss": 0.175, "step": 49585 }, { "epoch": 2.715326069101462, "grad_norm": 0.08232585340738297, "learning_rate": 5.271750152098965e-06, "loss": 0.1757, "step": 49590 }, { "epoch": 2.7155998466845537, "grad_norm": 0.09317715466022491, "learning_rate": 5.266680186574732e-06, "loss": 0.1777, "step": 49595 }, { "epoch": 2.715873624267645, "grad_norm": 0.09894649684429169, "learning_rate": 5.261610221050497e-06, "loss": 0.1847, "step": 49600 }, { "epoch": 2.7161474018507366, "grad_norm": 0.098291777074337, "learning_rate": 5.256540255526263e-06, "loss": 0.1821, "step": 49605 }, { "epoch": 2.716421179433828, "grad_norm": 0.09359509497880936, "learning_rate": 5.251470290002028e-06, "loss": 0.1774, "step": 49610 }, { "epoch": 2.7166949570169194, "grad_norm": 0.08866629004478455, "learning_rate": 5.246400324477794e-06, "loss": 0.1773, "step": 49615 }, { "epoch": 2.716968734600011, "grad_norm": 0.09053482115268707, "learning_rate": 5.241330358953559e-06, "loss": 0.1795, "step": 49620 }, { "epoch": 2.7172425121831023, "grad_norm": 0.08345090597867966, "learning_rate": 5.236260393429325e-06, "loss": 0.1815, "step": 49625 }, { "epoch": 2.717516289766194, "grad_norm": 0.09060332179069519, "learning_rate": 5.23119042790509e-06, "loss": 0.1796, "step": 49630 }, { "epoch": 2.717790067349285, "grad_norm": 0.0928298830986023, "learning_rate": 5.226120462380856e-06, "loss": 0.1811, "step": 49635 }, { "epoch": 2.718063844932377, "grad_norm": 0.09085782617330551, "learning_rate": 5.221050496856621e-06, "loss": 0.1766, "step": 49640 }, { "epoch": 2.7183376225154685, "grad_norm": 0.08589840680360794, "learning_rate": 5.215980531332387e-06, "loss": 0.183, "step": 49645 }, { "epoch": 2.7186114000985597, "grad_norm": 0.08919862657785416, "learning_rate": 5.210910565808153e-06, "loss": 0.1866, "step": 49650 }, { "epoch": 2.7188851776816514, "grad_norm": 0.09873691946268082, "learning_rate": 5.205840600283919e-06, "loss": 0.1823, "step": 49655 }, { "epoch": 2.719158955264743, "grad_norm": 0.09496629238128662, "learning_rate": 5.200770634759684e-06, "loss": 0.1775, "step": 49660 }, { "epoch": 2.7194327328478343, "grad_norm": 0.09344103932380676, "learning_rate": 5.19570066923545e-06, "loss": 0.1853, "step": 49665 }, { "epoch": 2.719706510430926, "grad_norm": 0.09273798763751984, "learning_rate": 5.190630703711215e-06, "loss": 0.1776, "step": 49670 }, { "epoch": 2.7199802880140176, "grad_norm": 0.10179005563259125, "learning_rate": 5.1855607381869804e-06, "loss": 0.181, "step": 49675 }, { "epoch": 2.720254065597109, "grad_norm": 0.09558051079511642, "learning_rate": 5.180490772662746e-06, "loss": 0.1821, "step": 49680 }, { "epoch": 2.7205278431802005, "grad_norm": 0.11762674897909164, "learning_rate": 5.175420807138512e-06, "loss": 0.1891, "step": 49685 }, { "epoch": 2.720801620763292, "grad_norm": 0.08819041401147842, "learning_rate": 5.170350841614277e-06, "loss": 0.1798, "step": 49690 }, { "epoch": 2.7210753983463833, "grad_norm": 0.10614030063152313, "learning_rate": 5.165280876090043e-06, "loss": 0.1872, "step": 49695 }, { "epoch": 2.721349175929475, "grad_norm": 0.10054337233304977, "learning_rate": 5.160210910565808e-06, "loss": 0.1811, "step": 49700 }, { "epoch": 2.7216229535125667, "grad_norm": 0.08896833658218384, "learning_rate": 5.155140945041574e-06, "loss": 0.1766, "step": 49705 }, { "epoch": 2.721896731095658, "grad_norm": 0.09428287297487259, "learning_rate": 5.15007097951734e-06, "loss": 0.1753, "step": 49710 }, { "epoch": 2.7221705086787495, "grad_norm": 0.09470700472593307, "learning_rate": 5.1450010139931055e-06, "loss": 0.1852, "step": 49715 }, { "epoch": 2.7224442862618408, "grad_norm": 0.08940168470144272, "learning_rate": 5.1399310484688705e-06, "loss": 0.1838, "step": 49720 }, { "epoch": 2.7227180638449324, "grad_norm": 0.09175238013267517, "learning_rate": 5.134861082944636e-06, "loss": 0.1828, "step": 49725 }, { "epoch": 2.7229918414280236, "grad_norm": 0.09456909447908401, "learning_rate": 5.129791117420401e-06, "loss": 0.1799, "step": 49730 }, { "epoch": 2.7232656190111153, "grad_norm": 0.09114469587802887, "learning_rate": 5.124721151896167e-06, "loss": 0.177, "step": 49735 }, { "epoch": 2.723539396594207, "grad_norm": 0.09262018650770187, "learning_rate": 5.119651186371933e-06, "loss": 0.1757, "step": 49740 }, { "epoch": 2.723813174177298, "grad_norm": 0.09913777559995651, "learning_rate": 5.114581220847699e-06, "loss": 0.1864, "step": 49745 }, { "epoch": 2.72408695176039, "grad_norm": 0.09119755029678345, "learning_rate": 5.109511255323464e-06, "loss": 0.1785, "step": 49750 }, { "epoch": 2.7243607293434815, "grad_norm": 0.09531997889280319, "learning_rate": 5.10444128979923e-06, "loss": 0.1821, "step": 49755 }, { "epoch": 2.7246345069265727, "grad_norm": 0.09248374402523041, "learning_rate": 5.099371324274995e-06, "loss": 0.181, "step": 49760 }, { "epoch": 2.7249082845096644, "grad_norm": 0.0939522311091423, "learning_rate": 5.094301358750761e-06, "loss": 0.1862, "step": 49765 }, { "epoch": 2.725182062092756, "grad_norm": 0.09406710416078568, "learning_rate": 5.0892313932265264e-06, "loss": 0.1802, "step": 49770 }, { "epoch": 2.7254558396758473, "grad_norm": 0.0879061371088028, "learning_rate": 5.084161427702292e-06, "loss": 0.1877, "step": 49775 }, { "epoch": 2.725729617258939, "grad_norm": 0.0921650379896164, "learning_rate": 5.079091462178057e-06, "loss": 0.1757, "step": 49780 }, { "epoch": 2.7260033948420306, "grad_norm": 0.10205592215061188, "learning_rate": 5.074021496653823e-06, "loss": 0.18, "step": 49785 }, { "epoch": 2.726277172425122, "grad_norm": 0.09783602505922318, "learning_rate": 5.068951531129588e-06, "loss": 0.1881, "step": 49790 }, { "epoch": 2.7265509500082135, "grad_norm": 0.09421932697296143, "learning_rate": 5.063881565605355e-06, "loss": 0.1776, "step": 49795 }, { "epoch": 2.7268247275913047, "grad_norm": 0.10569552332162857, "learning_rate": 5.05881160008112e-06, "loss": 0.1887, "step": 49800 }, { "epoch": 2.7270985051743963, "grad_norm": 0.09423598647117615, "learning_rate": 5.053741634556886e-06, "loss": 0.1839, "step": 49805 }, { "epoch": 2.7273722827574876, "grad_norm": 0.09398394078016281, "learning_rate": 5.048671669032651e-06, "loss": 0.1753, "step": 49810 }, { "epoch": 2.727646060340579, "grad_norm": 0.09407871216535568, "learning_rate": 5.0436017035084165e-06, "loss": 0.181, "step": 49815 }, { "epoch": 2.727919837923671, "grad_norm": 0.09580874443054199, "learning_rate": 5.0385317379841815e-06, "loss": 0.1812, "step": 49820 }, { "epoch": 2.728193615506762, "grad_norm": 0.08785001188516617, "learning_rate": 5.033461772459948e-06, "loss": 0.1864, "step": 49825 }, { "epoch": 2.7284673930898538, "grad_norm": 0.10908710956573486, "learning_rate": 5.028391806935713e-06, "loss": 0.1811, "step": 49830 }, { "epoch": 2.7287411706729454, "grad_norm": 0.09175720065832138, "learning_rate": 5.023321841411479e-06, "loss": 0.1797, "step": 49835 }, { "epoch": 2.7290149482560366, "grad_norm": 0.09922812134027481, "learning_rate": 5.018251875887244e-06, "loss": 0.1776, "step": 49840 }, { "epoch": 2.7292887258391283, "grad_norm": 0.0939883142709732, "learning_rate": 5.01318191036301e-06, "loss": 0.1912, "step": 49845 }, { "epoch": 2.72956250342222, "grad_norm": 0.096755750477314, "learning_rate": 5.008111944838776e-06, "loss": 0.1785, "step": 49850 }, { "epoch": 2.729836281005311, "grad_norm": 0.10133156180381775, "learning_rate": 5.003041979314542e-06, "loss": 0.1871, "step": 49855 }, { "epoch": 2.730110058588403, "grad_norm": 0.1037106066942215, "learning_rate": 4.997972013790307e-06, "loss": 0.1917, "step": 49860 }, { "epoch": 2.7303838361714945, "grad_norm": 0.09566222876310349, "learning_rate": 4.992902048266072e-06, "loss": 0.1828, "step": 49865 }, { "epoch": 2.7306576137545857, "grad_norm": 0.0946439728140831, "learning_rate": 4.987832082741837e-06, "loss": 0.1767, "step": 49870 }, { "epoch": 2.7309313913376774, "grad_norm": 0.10217539966106415, "learning_rate": 4.982762117217602e-06, "loss": 0.1805, "step": 49875 }, { "epoch": 2.731205168920769, "grad_norm": 0.08545596152544022, "learning_rate": 4.977692151693369e-06, "loss": 0.1769, "step": 49880 }, { "epoch": 2.7314789465038603, "grad_norm": 0.09056055545806885, "learning_rate": 4.972622186169134e-06, "loss": 0.1747, "step": 49885 }, { "epoch": 2.731752724086952, "grad_norm": 0.0868987962603569, "learning_rate": 4.9675522206449e-06, "loss": 0.1763, "step": 49890 }, { "epoch": 2.732026501670043, "grad_norm": 0.08884193748235703, "learning_rate": 4.962482255120665e-06, "loss": 0.1862, "step": 49895 }, { "epoch": 2.732300279253135, "grad_norm": 0.08761347085237503, "learning_rate": 4.957412289596431e-06, "loss": 0.1846, "step": 49900 }, { "epoch": 2.732574056836226, "grad_norm": 0.096354641020298, "learning_rate": 4.952342324072197e-06, "loss": 0.1903, "step": 49905 }, { "epoch": 2.7328478344193177, "grad_norm": 0.0934673622250557, "learning_rate": 4.9472723585479625e-06, "loss": 0.1865, "step": 49910 }, { "epoch": 2.7331216120024093, "grad_norm": 0.09185750782489777, "learning_rate": 4.9422023930237275e-06, "loss": 0.1838, "step": 49915 }, { "epoch": 2.7333953895855005, "grad_norm": 0.09093736112117767, "learning_rate": 4.937132427499493e-06, "loss": 0.1761, "step": 49920 }, { "epoch": 2.733669167168592, "grad_norm": 0.08645737171173096, "learning_rate": 4.932062461975258e-06, "loss": 0.1806, "step": 49925 }, { "epoch": 2.733942944751684, "grad_norm": 0.0917811393737793, "learning_rate": 4.926992496451024e-06, "loss": 0.1821, "step": 49930 }, { "epoch": 2.734216722334775, "grad_norm": 0.09625635296106339, "learning_rate": 4.92192253092679e-06, "loss": 0.1923, "step": 49935 }, { "epoch": 2.7344904999178667, "grad_norm": 0.08880844712257385, "learning_rate": 4.916852565402556e-06, "loss": 0.1854, "step": 49940 }, { "epoch": 2.7347642775009584, "grad_norm": 0.09389562159776688, "learning_rate": 4.911782599878321e-06, "loss": 0.1864, "step": 49945 }, { "epoch": 2.7350380550840496, "grad_norm": 0.09216655045747757, "learning_rate": 4.906712634354087e-06, "loss": 0.1793, "step": 49950 }, { "epoch": 2.7353118326671413, "grad_norm": 0.08977457135915756, "learning_rate": 4.901642668829852e-06, "loss": 0.1786, "step": 49955 }, { "epoch": 2.735585610250233, "grad_norm": 0.09608913958072662, "learning_rate": 4.8965727033056176e-06, "loss": 0.173, "step": 49960 }, { "epoch": 2.735859387833324, "grad_norm": 0.09222769737243652, "learning_rate": 4.891502737781383e-06, "loss": 0.1872, "step": 49965 }, { "epoch": 2.736133165416416, "grad_norm": 0.09684109687805176, "learning_rate": 4.886432772257149e-06, "loss": 0.1765, "step": 49970 }, { "epoch": 2.736406942999507, "grad_norm": 0.0976523756980896, "learning_rate": 4.881362806732914e-06, "loss": 0.182, "step": 49975 }, { "epoch": 2.7366807205825987, "grad_norm": 0.10668130218982697, "learning_rate": 4.87629284120868e-06, "loss": 0.1925, "step": 49980 }, { "epoch": 2.73695449816569, "grad_norm": 0.09731018543243408, "learning_rate": 4.871222875684445e-06, "loss": 0.1754, "step": 49985 }, { "epoch": 2.7372282757487816, "grad_norm": 0.10019193589687347, "learning_rate": 4.866152910160211e-06, "loss": 0.1825, "step": 49990 }, { "epoch": 2.7375020533318732, "grad_norm": 0.08932335674762726, "learning_rate": 4.861082944635977e-06, "loss": 0.1787, "step": 49995 }, { "epoch": 2.7377758309149645, "grad_norm": 0.10077902674674988, "learning_rate": 4.856012979111743e-06, "loss": 0.1833, "step": 50000 }, { "epoch": 2.738049608498056, "grad_norm": 0.09665265679359436, "learning_rate": 4.850943013587508e-06, "loss": 0.1865, "step": 50005 }, { "epoch": 2.738323386081148, "grad_norm": 0.09158289432525635, "learning_rate": 4.8458730480632735e-06, "loss": 0.1836, "step": 50010 }, { "epoch": 2.738597163664239, "grad_norm": 0.09231897443532944, "learning_rate": 4.8408030825390385e-06, "loss": 0.1838, "step": 50015 }, { "epoch": 2.7388709412473307, "grad_norm": 0.08796840161085129, "learning_rate": 4.835733117014804e-06, "loss": 0.18, "step": 50020 }, { "epoch": 2.7391447188304223, "grad_norm": 0.09322642534971237, "learning_rate": 4.83066315149057e-06, "loss": 0.1819, "step": 50025 }, { "epoch": 2.7394184964135135, "grad_norm": 0.09889840334653854, "learning_rate": 4.825593185966336e-06, "loss": 0.1818, "step": 50030 }, { "epoch": 2.739692273996605, "grad_norm": 0.09325285255908966, "learning_rate": 4.820523220442101e-06, "loss": 0.1757, "step": 50035 }, { "epoch": 2.739966051579697, "grad_norm": 0.09613536298274994, "learning_rate": 4.815453254917867e-06, "loss": 0.1805, "step": 50040 }, { "epoch": 2.740239829162788, "grad_norm": 0.08632995188236237, "learning_rate": 4.810383289393632e-06, "loss": 0.1865, "step": 50045 }, { "epoch": 2.7405136067458797, "grad_norm": 0.09401235729455948, "learning_rate": 4.8053133238693986e-06, "loss": 0.1835, "step": 50050 }, { "epoch": 2.7407873843289714, "grad_norm": 0.09607704728841782, "learning_rate": 4.8002433583451636e-06, "loss": 0.179, "step": 50055 }, { "epoch": 2.7410611619120626, "grad_norm": 0.09317230433225632, "learning_rate": 4.795173392820929e-06, "loss": 0.1762, "step": 50060 }, { "epoch": 2.7413349394951543, "grad_norm": 0.09481315314769745, "learning_rate": 4.790103427296694e-06, "loss": 0.1751, "step": 50065 }, { "epoch": 2.7416087170782455, "grad_norm": 0.10415005683898926, "learning_rate": 4.78503346177246e-06, "loss": 0.1809, "step": 50070 }, { "epoch": 2.741882494661337, "grad_norm": 0.0979888066649437, "learning_rate": 4.779963496248225e-06, "loss": 0.1826, "step": 50075 }, { "epoch": 2.7421562722444284, "grad_norm": 0.08840196579694748, "learning_rate": 4.774893530723992e-06, "loss": 0.1826, "step": 50080 }, { "epoch": 2.74243004982752, "grad_norm": 0.08949868381023407, "learning_rate": 4.769823565199757e-06, "loss": 0.1788, "step": 50085 }, { "epoch": 2.7427038274106117, "grad_norm": 0.0929105281829834, "learning_rate": 4.764753599675523e-06, "loss": 0.1916, "step": 50090 }, { "epoch": 2.742977604993703, "grad_norm": 0.0912904441356659, "learning_rate": 4.759683634151288e-06, "loss": 0.1743, "step": 50095 }, { "epoch": 2.7432513825767946, "grad_norm": 0.08645537495613098, "learning_rate": 4.754613668627054e-06, "loss": 0.1809, "step": 50100 }, { "epoch": 2.7435251601598862, "grad_norm": 0.09548765420913696, "learning_rate": 4.7495437031028195e-06, "loss": 0.1785, "step": 50105 }, { "epoch": 2.7437989377429775, "grad_norm": 0.09833913296461105, "learning_rate": 4.744473737578585e-06, "loss": 0.1793, "step": 50110 }, { "epoch": 2.744072715326069, "grad_norm": 0.08760207146406174, "learning_rate": 4.73940377205435e-06, "loss": 0.1833, "step": 50115 }, { "epoch": 2.7443464929091608, "grad_norm": 0.0941125676035881, "learning_rate": 4.734333806530116e-06, "loss": 0.1863, "step": 50120 }, { "epoch": 2.744620270492252, "grad_norm": 0.08459503203630447, "learning_rate": 4.729263841005881e-06, "loss": 0.1819, "step": 50125 }, { "epoch": 2.7448940480753437, "grad_norm": 0.09286930412054062, "learning_rate": 4.724193875481647e-06, "loss": 0.1785, "step": 50130 }, { "epoch": 2.7451678256584353, "grad_norm": 0.09689490497112274, "learning_rate": 4.719123909957413e-06, "loss": 0.1805, "step": 50135 }, { "epoch": 2.7454416032415265, "grad_norm": 0.08867283165454865, "learning_rate": 4.714053944433179e-06, "loss": 0.18, "step": 50140 }, { "epoch": 2.745715380824618, "grad_norm": 0.09065933525562286, "learning_rate": 4.708983978908944e-06, "loss": 0.1803, "step": 50145 }, { "epoch": 2.74598915840771, "grad_norm": 0.09780942648649216, "learning_rate": 4.7039140133847095e-06, "loss": 0.1875, "step": 50150 }, { "epoch": 2.746262935990801, "grad_norm": 0.09804482758045197, "learning_rate": 4.6988440478604745e-06, "loss": 0.1912, "step": 50155 }, { "epoch": 2.7465367135738927, "grad_norm": 0.0964183583855629, "learning_rate": 4.69377408233624e-06, "loss": 0.1843, "step": 50160 }, { "epoch": 2.746810491156984, "grad_norm": 0.09573853015899658, "learning_rate": 4.688704116812006e-06, "loss": 0.1846, "step": 50165 }, { "epoch": 2.7470842687400756, "grad_norm": 0.09360948950052261, "learning_rate": 4.683634151287771e-06, "loss": 0.1767, "step": 50170 }, { "epoch": 2.747358046323167, "grad_norm": 0.08769068121910095, "learning_rate": 4.678564185763537e-06, "loss": 0.1807, "step": 50175 }, { "epoch": 2.7476318239062585, "grad_norm": 0.09188032895326614, "learning_rate": 4.673494220239302e-06, "loss": 0.1781, "step": 50180 }, { "epoch": 2.74790560148935, "grad_norm": 0.09811954200267792, "learning_rate": 4.668424254715068e-06, "loss": 0.1787, "step": 50185 }, { "epoch": 2.7481793790724414, "grad_norm": 0.09674910455942154, "learning_rate": 4.663354289190834e-06, "loss": 0.1771, "step": 50190 }, { "epoch": 2.748453156655533, "grad_norm": 0.0917278528213501, "learning_rate": 4.6582843236666e-06, "loss": 0.1849, "step": 50195 }, { "epoch": 2.7487269342386247, "grad_norm": 0.09498779475688934, "learning_rate": 4.653214358142365e-06, "loss": 0.1818, "step": 50200 }, { "epoch": 2.749000711821716, "grad_norm": 0.10373298823833466, "learning_rate": 4.6481443926181305e-06, "loss": 0.1908, "step": 50205 }, { "epoch": 2.7492744894048076, "grad_norm": 0.08617441356182098, "learning_rate": 4.6430744270938955e-06, "loss": 0.1822, "step": 50210 }, { "epoch": 2.7495482669878992, "grad_norm": 0.09543239325284958, "learning_rate": 4.638004461569661e-06, "loss": 0.1854, "step": 50215 }, { "epoch": 2.7498220445709904, "grad_norm": 0.09449759125709534, "learning_rate": 4.632934496045427e-06, "loss": 0.1785, "step": 50220 }, { "epoch": 2.750095822154082, "grad_norm": 0.09585832059383392, "learning_rate": 4.627864530521193e-06, "loss": 0.188, "step": 50225 }, { "epoch": 2.7503695997371738, "grad_norm": 0.10388205200433731, "learning_rate": 4.622794564996958e-06, "loss": 0.1851, "step": 50230 }, { "epoch": 2.750643377320265, "grad_norm": 0.10129453986883163, "learning_rate": 4.617724599472724e-06, "loss": 0.1906, "step": 50235 }, { "epoch": 2.7509171549033566, "grad_norm": 0.09435371309518814, "learning_rate": 4.612654633948489e-06, "loss": 0.1784, "step": 50240 }, { "epoch": 2.751190932486448, "grad_norm": 0.1079178899526596, "learning_rate": 4.607584668424255e-06, "loss": 0.1828, "step": 50245 }, { "epoch": 2.7514647100695395, "grad_norm": 0.09240218997001648, "learning_rate": 4.6025147029000205e-06, "loss": 0.1848, "step": 50250 }, { "epoch": 2.7517384876526307, "grad_norm": 0.10234464704990387, "learning_rate": 4.597444737375786e-06, "loss": 0.1817, "step": 50255 }, { "epoch": 2.7520122652357224, "grad_norm": 0.09922618418931961, "learning_rate": 4.592374771851551e-06, "loss": 0.1826, "step": 50260 }, { "epoch": 2.752286042818814, "grad_norm": 0.10836080461740494, "learning_rate": 4.587304806327317e-06, "loss": 0.184, "step": 50265 }, { "epoch": 2.7525598204019053, "grad_norm": 0.09937732666730881, "learning_rate": 4.582234840803082e-06, "loss": 0.1727, "step": 50270 }, { "epoch": 2.752833597984997, "grad_norm": 0.10033296048641205, "learning_rate": 4.577164875278848e-06, "loss": 0.1904, "step": 50275 }, { "epoch": 2.7531073755680886, "grad_norm": 0.09532087296247482, "learning_rate": 4.572094909754614e-06, "loss": 0.1767, "step": 50280 }, { "epoch": 2.75338115315118, "grad_norm": 0.10658375173807144, "learning_rate": 4.56702494423038e-06, "loss": 0.1812, "step": 50285 }, { "epoch": 2.7536549307342715, "grad_norm": 0.09511031955480576, "learning_rate": 4.561954978706145e-06, "loss": 0.1779, "step": 50290 }, { "epoch": 2.753928708317363, "grad_norm": 0.0957450270652771, "learning_rate": 4.556885013181911e-06, "loss": 0.177, "step": 50295 }, { "epoch": 2.7542024859004544, "grad_norm": 0.09304745495319366, "learning_rate": 4.551815047657676e-06, "loss": 0.1809, "step": 50300 }, { "epoch": 2.754476263483546, "grad_norm": 0.09773077815771103, "learning_rate": 4.546745082133442e-06, "loss": 0.1763, "step": 50305 }, { "epoch": 2.7547500410666377, "grad_norm": 0.09719407558441162, "learning_rate": 4.541675116609207e-06, "loss": 0.1807, "step": 50310 }, { "epoch": 2.755023818649729, "grad_norm": 0.0915011316537857, "learning_rate": 4.536605151084973e-06, "loss": 0.1829, "step": 50315 }, { "epoch": 2.7552975962328206, "grad_norm": 0.08968224376440048, "learning_rate": 4.531535185560738e-06, "loss": 0.1795, "step": 50320 }, { "epoch": 2.755571373815912, "grad_norm": 0.0973755493760109, "learning_rate": 4.526465220036504e-06, "loss": 0.1864, "step": 50325 }, { "epoch": 2.7558451513990034, "grad_norm": 0.0964251235127449, "learning_rate": 4.521395254512269e-06, "loss": 0.1902, "step": 50330 }, { "epoch": 2.756118928982095, "grad_norm": 0.09141772985458374, "learning_rate": 4.516325288988036e-06, "loss": 0.1853, "step": 50335 }, { "epoch": 2.7563927065651863, "grad_norm": 0.09300693869590759, "learning_rate": 4.511255323463801e-06, "loss": 0.1851, "step": 50340 }, { "epoch": 2.756666484148278, "grad_norm": 0.0916554182767868, "learning_rate": 4.5061853579395665e-06, "loss": 0.1837, "step": 50345 }, { "epoch": 2.756940261731369, "grad_norm": 0.09449172765016556, "learning_rate": 4.5011153924153315e-06, "loss": 0.1799, "step": 50350 }, { "epoch": 2.757214039314461, "grad_norm": 0.09130416065454483, "learning_rate": 4.496045426891097e-06, "loss": 0.1783, "step": 50355 }, { "epoch": 2.7574878168975525, "grad_norm": 0.08389032632112503, "learning_rate": 4.490975461366863e-06, "loss": 0.1753, "step": 50360 }, { "epoch": 2.7577615944806437, "grad_norm": 0.08944348245859146, "learning_rate": 4.485905495842629e-06, "loss": 0.1753, "step": 50365 }, { "epoch": 2.7580353720637354, "grad_norm": 0.09483306109905243, "learning_rate": 4.480835530318394e-06, "loss": 0.1824, "step": 50370 }, { "epoch": 2.758309149646827, "grad_norm": 0.09555499255657196, "learning_rate": 4.47576556479416e-06, "loss": 0.1813, "step": 50375 }, { "epoch": 2.7585829272299183, "grad_norm": 0.09926855564117432, "learning_rate": 4.470695599269925e-06, "loss": 0.1757, "step": 50380 }, { "epoch": 2.75885670481301, "grad_norm": 0.09259912371635437, "learning_rate": 4.465625633745691e-06, "loss": 0.1826, "step": 50385 }, { "epoch": 2.7591304823961016, "grad_norm": 0.10149117559194565, "learning_rate": 4.460555668221457e-06, "loss": 0.1825, "step": 50390 }, { "epoch": 2.759404259979193, "grad_norm": 0.0981244295835495, "learning_rate": 4.4554857026972224e-06, "loss": 0.1798, "step": 50395 }, { "epoch": 2.7596780375622845, "grad_norm": 0.1053582951426506, "learning_rate": 4.4504157371729874e-06, "loss": 0.1887, "step": 50400 }, { "epoch": 2.759951815145376, "grad_norm": 0.09824352711439133, "learning_rate": 4.445345771648753e-06, "loss": 0.1817, "step": 50405 }, { "epoch": 2.7602255927284673, "grad_norm": 0.08652278780937195, "learning_rate": 4.440275806124518e-06, "loss": 0.1861, "step": 50410 }, { "epoch": 2.760499370311559, "grad_norm": 0.08881501108407974, "learning_rate": 4.435205840600284e-06, "loss": 0.1781, "step": 50415 }, { "epoch": 2.7607731478946502, "grad_norm": 0.08976202458143234, "learning_rate": 4.43013587507605e-06, "loss": 0.1803, "step": 50420 }, { "epoch": 2.761046925477742, "grad_norm": 0.10218316316604614, "learning_rate": 4.425065909551816e-06, "loss": 0.1807, "step": 50425 }, { "epoch": 2.761320703060833, "grad_norm": 0.09747964888811111, "learning_rate": 4.419995944027581e-06, "loss": 0.1797, "step": 50430 }, { "epoch": 2.7615944806439248, "grad_norm": 0.0932713970541954, "learning_rate": 4.414925978503347e-06, "loss": 0.1797, "step": 50435 }, { "epoch": 2.7618682582270164, "grad_norm": 0.1047636866569519, "learning_rate": 4.409856012979112e-06, "loss": 0.1838, "step": 50440 }, { "epoch": 2.7621420358101076, "grad_norm": 0.09190868586301804, "learning_rate": 4.4047860474548775e-06, "loss": 0.1777, "step": 50445 }, { "epoch": 2.7624158133931993, "grad_norm": 0.09320556372404099, "learning_rate": 4.399716081930643e-06, "loss": 0.1866, "step": 50450 }, { "epoch": 2.762689590976291, "grad_norm": 0.10571218281984329, "learning_rate": 4.394646116406408e-06, "loss": 0.1903, "step": 50455 }, { "epoch": 2.762963368559382, "grad_norm": 0.09634103626012802, "learning_rate": 4.389576150882174e-06, "loss": 0.1931, "step": 50460 }, { "epoch": 2.763237146142474, "grad_norm": 0.09526430815458298, "learning_rate": 4.384506185357939e-06, "loss": 0.1788, "step": 50465 }, { "epoch": 2.7635109237255655, "grad_norm": 0.09596721827983856, "learning_rate": 4.379436219833705e-06, "loss": 0.1849, "step": 50470 }, { "epoch": 2.7637847013086567, "grad_norm": 0.0951542779803276, "learning_rate": 4.374366254309471e-06, "loss": 0.1778, "step": 50475 }, { "epoch": 2.7640584788917484, "grad_norm": 0.09295088052749634, "learning_rate": 4.369296288785237e-06, "loss": 0.1791, "step": 50480 }, { "epoch": 2.76433225647484, "grad_norm": 0.09028249233961105, "learning_rate": 4.364226323261002e-06, "loss": 0.1785, "step": 50485 }, { "epoch": 2.7646060340579313, "grad_norm": 0.0981873944401741, "learning_rate": 4.3591563577367676e-06, "loss": 0.1805, "step": 50490 }, { "epoch": 2.764879811641023, "grad_norm": 0.08723241090774536, "learning_rate": 4.3540863922125326e-06, "loss": 0.1837, "step": 50495 }, { "epoch": 2.7651535892241146, "grad_norm": 0.09396741539239883, "learning_rate": 4.349016426688298e-06, "loss": 0.1845, "step": 50500 }, { "epoch": 2.765427366807206, "grad_norm": 0.0888381078839302, "learning_rate": 4.343946461164064e-06, "loss": 0.1837, "step": 50505 }, { "epoch": 2.7657011443902975, "grad_norm": 0.09237950295209885, "learning_rate": 4.33887649563983e-06, "loss": 0.1849, "step": 50510 }, { "epoch": 2.7659749219733887, "grad_norm": 0.09293142706155777, "learning_rate": 4.333806530115595e-06, "loss": 0.189, "step": 50515 }, { "epoch": 2.7662486995564803, "grad_norm": 0.0927068442106247, "learning_rate": 4.328736564591361e-06, "loss": 0.183, "step": 50520 }, { "epoch": 2.7665224771395716, "grad_norm": 0.0984201580286026, "learning_rate": 4.323666599067126e-06, "loss": 0.1874, "step": 50525 }, { "epoch": 2.766796254722663, "grad_norm": 0.08878903090953827, "learning_rate": 4.318596633542892e-06, "loss": 0.1801, "step": 50530 }, { "epoch": 2.767070032305755, "grad_norm": 0.09413215517997742, "learning_rate": 4.313526668018658e-06, "loss": 0.1863, "step": 50535 }, { "epoch": 2.767343809888846, "grad_norm": 0.09420125186443329, "learning_rate": 4.3084567024944235e-06, "loss": 0.1906, "step": 50540 }, { "epoch": 2.7676175874719378, "grad_norm": 0.08784577250480652, "learning_rate": 4.3033867369701885e-06, "loss": 0.1824, "step": 50545 }, { "epoch": 2.7678913650550294, "grad_norm": 0.09517782181501389, "learning_rate": 4.298316771445954e-06, "loss": 0.1765, "step": 50550 }, { "epoch": 2.7681651426381206, "grad_norm": 0.0876067504286766, "learning_rate": 4.293246805921719e-06, "loss": 0.1835, "step": 50555 }, { "epoch": 2.7684389202212123, "grad_norm": 0.0891968384385109, "learning_rate": 4.288176840397486e-06, "loss": 0.182, "step": 50560 }, { "epoch": 2.768712697804304, "grad_norm": 0.09123297035694122, "learning_rate": 4.283106874873251e-06, "loss": 0.1885, "step": 50565 }, { "epoch": 2.768986475387395, "grad_norm": 0.09420972317457199, "learning_rate": 4.278036909349017e-06, "loss": 0.1796, "step": 50570 }, { "epoch": 2.769260252970487, "grad_norm": 0.08532235771417618, "learning_rate": 4.272966943824782e-06, "loss": 0.1774, "step": 50575 }, { "epoch": 2.7695340305535785, "grad_norm": 0.08709954470396042, "learning_rate": 4.267896978300548e-06, "loss": 0.191, "step": 50580 }, { "epoch": 2.7698078081366697, "grad_norm": 0.09747623652219772, "learning_rate": 4.2628270127763136e-06, "loss": 0.1863, "step": 50585 }, { "epoch": 2.7700815857197614, "grad_norm": 0.09019115567207336, "learning_rate": 4.257757047252079e-06, "loss": 0.1729, "step": 50590 }, { "epoch": 2.770355363302853, "grad_norm": 0.09704504162073135, "learning_rate": 4.252687081727844e-06, "loss": 0.1848, "step": 50595 }, { "epoch": 2.7706291408859443, "grad_norm": 0.08848444372415543, "learning_rate": 4.24761711620361e-06, "loss": 0.176, "step": 50600 }, { "epoch": 2.770902918469036, "grad_norm": 0.0857621505856514, "learning_rate": 4.242547150679375e-06, "loss": 0.1727, "step": 50605 }, { "epoch": 2.771176696052127, "grad_norm": 0.10217911005020142, "learning_rate": 4.237477185155141e-06, "loss": 0.1779, "step": 50610 }, { "epoch": 2.771450473635219, "grad_norm": 0.0872671902179718, "learning_rate": 4.232407219630907e-06, "loss": 0.1877, "step": 50615 }, { "epoch": 2.77172425121831, "grad_norm": 0.09688480943441391, "learning_rate": 4.227337254106673e-06, "loss": 0.1807, "step": 50620 }, { "epoch": 2.7719980288014017, "grad_norm": 0.09688080102205276, "learning_rate": 4.222267288582438e-06, "loss": 0.1817, "step": 50625 }, { "epoch": 2.7722718063844933, "grad_norm": 0.0904851108789444, "learning_rate": 4.217197323058204e-06, "loss": 0.1867, "step": 50630 }, { "epoch": 2.7725455839675845, "grad_norm": 0.09236098825931549, "learning_rate": 4.212127357533969e-06, "loss": 0.1844, "step": 50635 }, { "epoch": 2.772819361550676, "grad_norm": 0.10172387957572937, "learning_rate": 4.2070573920097345e-06, "loss": 0.1819, "step": 50640 }, { "epoch": 2.773093139133768, "grad_norm": 0.09371648728847504, "learning_rate": 4.2019874264855e-06, "loss": 0.1809, "step": 50645 }, { "epoch": 2.773366916716859, "grad_norm": 0.09206301718950272, "learning_rate": 4.196917460961266e-06, "loss": 0.1859, "step": 50650 }, { "epoch": 2.7736406942999507, "grad_norm": 0.09589219838380814, "learning_rate": 4.191847495437031e-06, "loss": 0.181, "step": 50655 }, { "epoch": 2.7739144718830424, "grad_norm": 0.08983349800109863, "learning_rate": 4.186777529912797e-06, "loss": 0.1819, "step": 50660 }, { "epoch": 2.7741882494661336, "grad_norm": 0.08723460882902145, "learning_rate": 4.181707564388562e-06, "loss": 0.1728, "step": 50665 }, { "epoch": 2.7744620270492253, "grad_norm": 0.09225497394800186, "learning_rate": 4.176637598864328e-06, "loss": 0.1828, "step": 50670 }, { "epoch": 2.774735804632317, "grad_norm": 0.10142207145690918, "learning_rate": 4.171567633340094e-06, "loss": 0.1805, "step": 50675 }, { "epoch": 2.775009582215408, "grad_norm": 0.09786440432071686, "learning_rate": 4.1664976678158595e-06, "loss": 0.1874, "step": 50680 }, { "epoch": 2.7752833597985, "grad_norm": 0.09083009511232376, "learning_rate": 4.1614277022916245e-06, "loss": 0.1832, "step": 50685 }, { "epoch": 2.775557137381591, "grad_norm": 0.09301512688398361, "learning_rate": 4.15635773676739e-06, "loss": 0.1773, "step": 50690 }, { "epoch": 2.7758309149646827, "grad_norm": 0.08773154020309448, "learning_rate": 4.151287771243155e-06, "loss": 0.1838, "step": 50695 }, { "epoch": 2.776104692547774, "grad_norm": 0.0953356996178627, "learning_rate": 4.146217805718921e-06, "loss": 0.1812, "step": 50700 }, { "epoch": 2.7763784701308656, "grad_norm": 0.08458562940359116, "learning_rate": 4.141147840194687e-06, "loss": 0.17, "step": 50705 }, { "epoch": 2.7766522477139572, "grad_norm": 0.09494567662477493, "learning_rate": 4.136077874670453e-06, "loss": 0.1824, "step": 50710 }, { "epoch": 2.7769260252970485, "grad_norm": 0.09115130454301834, "learning_rate": 4.131007909146218e-06, "loss": 0.1768, "step": 50715 }, { "epoch": 2.77719980288014, "grad_norm": 0.08729047328233719, "learning_rate": 4.125937943621984e-06, "loss": 0.1796, "step": 50720 }, { "epoch": 2.777473580463232, "grad_norm": 0.08909337967634201, "learning_rate": 4.120867978097749e-06, "loss": 0.1858, "step": 50725 }, { "epoch": 2.777747358046323, "grad_norm": 0.0855112373828888, "learning_rate": 4.115798012573515e-06, "loss": 0.1788, "step": 50730 }, { "epoch": 2.7780211356294147, "grad_norm": 0.08842393010854721, "learning_rate": 4.1107280470492805e-06, "loss": 0.1807, "step": 50735 }, { "epoch": 2.7782949132125063, "grad_norm": 0.08865133672952652, "learning_rate": 4.105658081525046e-06, "loss": 0.185, "step": 50740 }, { "epoch": 2.7785686907955975, "grad_norm": 0.08990421891212463, "learning_rate": 4.100588116000811e-06, "loss": 0.18, "step": 50745 }, { "epoch": 2.778842468378689, "grad_norm": 0.0987238958477974, "learning_rate": 4.095518150476576e-06, "loss": 0.1852, "step": 50750 }, { "epoch": 2.779116245961781, "grad_norm": 0.08969394117593765, "learning_rate": 4.090448184952342e-06, "loss": 0.1809, "step": 50755 }, { "epoch": 2.779390023544872, "grad_norm": 0.0989401638507843, "learning_rate": 4.085378219428108e-06, "loss": 0.176, "step": 50760 }, { "epoch": 2.7796638011279637, "grad_norm": 0.09024431556463242, "learning_rate": 4.080308253903874e-06, "loss": 0.1791, "step": 50765 }, { "epoch": 2.7799375787110554, "grad_norm": 0.09274569898843765, "learning_rate": 4.075238288379639e-06, "loss": 0.1801, "step": 50770 }, { "epoch": 2.7802113562941466, "grad_norm": 0.10155027359724045, "learning_rate": 4.070168322855405e-06, "loss": 0.1845, "step": 50775 }, { "epoch": 2.7804851338772383, "grad_norm": 0.10108941793441772, "learning_rate": 4.06509835733117e-06, "loss": 0.1859, "step": 50780 }, { "epoch": 2.7807589114603295, "grad_norm": 0.09367140382528305, "learning_rate": 4.060028391806936e-06, "loss": 0.1895, "step": 50785 }, { "epoch": 2.781032689043421, "grad_norm": 0.09980256855487823, "learning_rate": 4.054958426282701e-06, "loss": 0.1895, "step": 50790 }, { "epoch": 2.7813064666265124, "grad_norm": 0.0836942121386528, "learning_rate": 4.049888460758467e-06, "loss": 0.1866, "step": 50795 }, { "epoch": 2.781580244209604, "grad_norm": 0.095290407538414, "learning_rate": 4.044818495234232e-06, "loss": 0.1868, "step": 50800 }, { "epoch": 2.7818540217926957, "grad_norm": 0.09872283786535263, "learning_rate": 4.039748529709998e-06, "loss": 0.1871, "step": 50805 }, { "epoch": 2.782127799375787, "grad_norm": 0.09357403218746185, "learning_rate": 4.034678564185763e-06, "loss": 0.1856, "step": 50810 }, { "epoch": 2.7824015769588786, "grad_norm": 0.0881555899977684, "learning_rate": 4.02960859866153e-06, "loss": 0.1797, "step": 50815 }, { "epoch": 2.7826753545419702, "grad_norm": 0.09571436047554016, "learning_rate": 4.024538633137295e-06, "loss": 0.1883, "step": 50820 }, { "epoch": 2.7829491321250615, "grad_norm": 0.09199180454015732, "learning_rate": 4.019468667613061e-06, "loss": 0.1813, "step": 50825 }, { "epoch": 2.783222909708153, "grad_norm": 0.10214419662952423, "learning_rate": 4.014398702088826e-06, "loss": 0.1835, "step": 50830 }, { "epoch": 2.7834966872912448, "grad_norm": 0.08875669538974762, "learning_rate": 4.0093287365645914e-06, "loss": 0.1857, "step": 50835 }, { "epoch": 2.783770464874336, "grad_norm": 0.08362184464931488, "learning_rate": 4.004258771040357e-06, "loss": 0.179, "step": 50840 }, { "epoch": 2.7840442424574277, "grad_norm": 0.08559469133615494, "learning_rate": 3.999188805516123e-06, "loss": 0.1766, "step": 50845 }, { "epoch": 2.7843180200405193, "grad_norm": 0.09321741759777069, "learning_rate": 3.994118839991888e-06, "loss": 0.1753, "step": 50850 }, { "epoch": 2.7845917976236105, "grad_norm": 0.08511520177125931, "learning_rate": 3.989048874467654e-06, "loss": 0.1822, "step": 50855 }, { "epoch": 2.784865575206702, "grad_norm": 0.0964505597949028, "learning_rate": 3.983978908943419e-06, "loss": 0.1881, "step": 50860 }, { "epoch": 2.7851393527897934, "grad_norm": 0.08624092489480972, "learning_rate": 3.978908943419185e-06, "loss": 0.1748, "step": 50865 }, { "epoch": 2.785413130372885, "grad_norm": 0.09245485812425613, "learning_rate": 3.973838977894951e-06, "loss": 0.1812, "step": 50870 }, { "epoch": 2.7856869079559763, "grad_norm": 0.10542774945497513, "learning_rate": 3.9687690123707165e-06, "loss": 0.1843, "step": 50875 }, { "epoch": 2.785960685539068, "grad_norm": 0.09133646637201309, "learning_rate": 3.9636990468464815e-06, "loss": 0.1825, "step": 50880 }, { "epoch": 2.7862344631221596, "grad_norm": 0.09362486749887466, "learning_rate": 3.958629081322247e-06, "loss": 0.1723, "step": 50885 }, { "epoch": 2.786508240705251, "grad_norm": 0.09435766935348511, "learning_rate": 3.953559115798012e-06, "loss": 0.174, "step": 50890 }, { "epoch": 2.7867820182883425, "grad_norm": 0.09280598908662796, "learning_rate": 3.948489150273778e-06, "loss": 0.1888, "step": 50895 }, { "epoch": 2.787055795871434, "grad_norm": 0.08592863380908966, "learning_rate": 3.943419184749544e-06, "loss": 0.1818, "step": 50900 }, { "epoch": 2.7873295734545254, "grad_norm": 0.09499003738164902, "learning_rate": 3.93834921922531e-06, "loss": 0.1865, "step": 50905 }, { "epoch": 2.787603351037617, "grad_norm": 0.10034438222646713, "learning_rate": 3.933279253701075e-06, "loss": 0.1822, "step": 50910 }, { "epoch": 2.7878771286207087, "grad_norm": 0.09763854742050171, "learning_rate": 3.928209288176841e-06, "loss": 0.1834, "step": 50915 }, { "epoch": 2.7881509062038, "grad_norm": 0.09376706928014755, "learning_rate": 3.923139322652606e-06, "loss": 0.1888, "step": 50920 }, { "epoch": 2.7884246837868916, "grad_norm": 0.09084678441286087, "learning_rate": 3.918069357128372e-06, "loss": 0.1746, "step": 50925 }, { "epoch": 2.7886984613699832, "grad_norm": 0.09030196070671082, "learning_rate": 3.9129993916041374e-06, "loss": 0.1843, "step": 50930 }, { "epoch": 2.7889722389530744, "grad_norm": 0.09375392645597458, "learning_rate": 3.907929426079903e-06, "loss": 0.1783, "step": 50935 }, { "epoch": 2.789246016536166, "grad_norm": 0.08703791350126266, "learning_rate": 3.902859460555668e-06, "loss": 0.177, "step": 50940 }, { "epoch": 2.7895197941192578, "grad_norm": 0.09060519188642502, "learning_rate": 3.897789495031434e-06, "loss": 0.178, "step": 50945 }, { "epoch": 2.789793571702349, "grad_norm": 0.09076283872127533, "learning_rate": 3.892719529507199e-06, "loss": 0.1806, "step": 50950 }, { "epoch": 2.7900673492854406, "grad_norm": 0.08977628499269485, "learning_rate": 3.887649563982965e-06, "loss": 0.1862, "step": 50955 }, { "epoch": 2.790341126868532, "grad_norm": 0.0913747027516365, "learning_rate": 3.882579598458731e-06, "loss": 0.1803, "step": 50960 }, { "epoch": 2.7906149044516235, "grad_norm": 0.0940917506814003, "learning_rate": 3.877509632934497e-06, "loss": 0.1832, "step": 50965 }, { "epoch": 2.7908886820347147, "grad_norm": 0.09250848740339279, "learning_rate": 3.872439667410262e-06, "loss": 0.1857, "step": 50970 }, { "epoch": 2.7911624596178064, "grad_norm": 0.10251617431640625, "learning_rate": 3.8673697018860275e-06, "loss": 0.1798, "step": 50975 }, { "epoch": 2.791436237200898, "grad_norm": 0.08586765825748444, "learning_rate": 3.8622997363617925e-06, "loss": 0.172, "step": 50980 }, { "epoch": 2.7917100147839893, "grad_norm": 0.09142697602510452, "learning_rate": 3.857229770837558e-06, "loss": 0.1808, "step": 50985 }, { "epoch": 2.791983792367081, "grad_norm": 0.09039755165576935, "learning_rate": 3.852159805313324e-06, "loss": 0.1819, "step": 50990 }, { "epoch": 2.7922575699501726, "grad_norm": 0.10336750745773315, "learning_rate": 3.84708983978909e-06, "loss": 0.184, "step": 50995 }, { "epoch": 2.792531347533264, "grad_norm": 0.09525056183338165, "learning_rate": 3.842019874264855e-06, "loss": 0.1764, "step": 51000 }, { "epoch": 2.7928051251163555, "grad_norm": 0.09524437040090561, "learning_rate": 3.836949908740621e-06, "loss": 0.1765, "step": 51005 }, { "epoch": 2.793078902699447, "grad_norm": 0.09266812354326248, "learning_rate": 3.831879943216386e-06, "loss": 0.1875, "step": 51010 }, { "epoch": 2.7933526802825384, "grad_norm": 0.095641128718853, "learning_rate": 3.8268099776921526e-06, "loss": 0.1833, "step": 51015 }, { "epoch": 2.79362645786563, "grad_norm": 0.08493030816316605, "learning_rate": 3.8217400121679176e-06, "loss": 0.1782, "step": 51020 }, { "epoch": 2.7939002354487217, "grad_norm": 0.09221483767032623, "learning_rate": 3.816670046643683e-06, "loss": 0.1822, "step": 51025 }, { "epoch": 2.794174013031813, "grad_norm": 0.09429081529378891, "learning_rate": 3.8116000811194484e-06, "loss": 0.1799, "step": 51030 }, { "epoch": 2.7944477906149046, "grad_norm": 0.10175007581710815, "learning_rate": 3.8065301155952143e-06, "loss": 0.1833, "step": 51035 }, { "epoch": 2.794721568197996, "grad_norm": 0.08939743787050247, "learning_rate": 3.8014601500709797e-06, "loss": 0.1803, "step": 51040 }, { "epoch": 2.7949953457810874, "grad_norm": 0.09433013945817947, "learning_rate": 3.7963901845467455e-06, "loss": 0.1833, "step": 51045 }, { "epoch": 2.795269123364179, "grad_norm": 0.09025397896766663, "learning_rate": 3.791320219022511e-06, "loss": 0.1878, "step": 51050 }, { "epoch": 2.7955429009472703, "grad_norm": 0.09670963883399963, "learning_rate": 3.786250253498276e-06, "loss": 0.1818, "step": 51055 }, { "epoch": 2.795816678530362, "grad_norm": 0.09655892103910446, "learning_rate": 3.781180287974042e-06, "loss": 0.186, "step": 51060 }, { "epoch": 2.796090456113453, "grad_norm": 0.0873555839061737, "learning_rate": 3.7761103224498072e-06, "loss": 0.1817, "step": 51065 }, { "epoch": 2.796364233696545, "grad_norm": 0.10414846986532211, "learning_rate": 3.771040356925573e-06, "loss": 0.1902, "step": 51070 }, { "epoch": 2.7966380112796365, "grad_norm": 0.09118585288524628, "learning_rate": 3.7659703914013385e-06, "loss": 0.1823, "step": 51075 }, { "epoch": 2.7969117888627277, "grad_norm": 0.08978941291570663, "learning_rate": 3.7609004258771043e-06, "loss": 0.1841, "step": 51080 }, { "epoch": 2.7971855664458194, "grad_norm": 0.09086903184652328, "learning_rate": 3.7558304603528693e-06, "loss": 0.181, "step": 51085 }, { "epoch": 2.797459344028911, "grad_norm": 0.09834444522857666, "learning_rate": 3.7507604948286356e-06, "loss": 0.1819, "step": 51090 }, { "epoch": 2.7977331216120023, "grad_norm": 0.09816185384988785, "learning_rate": 3.7456905293044006e-06, "loss": 0.1825, "step": 51095 }, { "epoch": 2.798006899195094, "grad_norm": 0.10121259838342667, "learning_rate": 3.7406205637801664e-06, "loss": 0.1794, "step": 51100 }, { "epoch": 2.7982806767781856, "grad_norm": 0.09334911406040192, "learning_rate": 3.735550598255932e-06, "loss": 0.1827, "step": 51105 }, { "epoch": 2.798554454361277, "grad_norm": 0.08834698051214218, "learning_rate": 3.7304806327316977e-06, "loss": 0.1873, "step": 51110 }, { "epoch": 2.7988282319443685, "grad_norm": 0.08480972051620483, "learning_rate": 3.7254106672074627e-06, "loss": 0.1841, "step": 51115 }, { "epoch": 2.79910200952746, "grad_norm": 0.09246430546045303, "learning_rate": 3.720340701683229e-06, "loss": 0.1828, "step": 51120 }, { "epoch": 2.7993757871105514, "grad_norm": 0.09678006172180176, "learning_rate": 3.715270736158994e-06, "loss": 0.1752, "step": 51125 }, { "epoch": 2.799649564693643, "grad_norm": 0.09659474343061447, "learning_rate": 3.71020077063476e-06, "loss": 0.1846, "step": 51130 }, { "epoch": 2.7999233422767342, "grad_norm": 0.09179534763097763, "learning_rate": 3.7051308051105252e-06, "loss": 0.1857, "step": 51135 }, { "epoch": 2.800197119859826, "grad_norm": 0.09004160761833191, "learning_rate": 3.700060839586291e-06, "loss": 0.18, "step": 51140 }, { "epoch": 2.800470897442917, "grad_norm": 0.09760928153991699, "learning_rate": 3.6949908740620565e-06, "loss": 0.195, "step": 51145 }, { "epoch": 2.8007446750260088, "grad_norm": 0.0954500064253807, "learning_rate": 3.6899209085378224e-06, "loss": 0.1787, "step": 51150 }, { "epoch": 2.8010184526091004, "grad_norm": 0.08187130838632584, "learning_rate": 3.6848509430135874e-06, "loss": 0.1804, "step": 51155 }, { "epoch": 2.8012922301921916, "grad_norm": 0.09576839208602905, "learning_rate": 3.679780977489353e-06, "loss": 0.1765, "step": 51160 }, { "epoch": 2.8015660077752833, "grad_norm": 0.08801966905593872, "learning_rate": 3.6747110119651186e-06, "loss": 0.1766, "step": 51165 }, { "epoch": 2.801839785358375, "grad_norm": 0.09021971374750137, "learning_rate": 3.6696410464408845e-06, "loss": 0.1835, "step": 51170 }, { "epoch": 2.802113562941466, "grad_norm": 0.09734828770160675, "learning_rate": 3.66457108091665e-06, "loss": 0.1885, "step": 51175 }, { "epoch": 2.802387340524558, "grad_norm": 0.09440995752811432, "learning_rate": 3.6595011153924157e-06, "loss": 0.1845, "step": 51180 }, { "epoch": 2.8026611181076495, "grad_norm": 0.08649592101573944, "learning_rate": 3.6544311498681807e-06, "loss": 0.1731, "step": 51185 }, { "epoch": 2.8029348956907407, "grad_norm": 0.09362418949604034, "learning_rate": 3.649361184343947e-06, "loss": 0.1859, "step": 51190 }, { "epoch": 2.8032086732738324, "grad_norm": 0.09372212737798691, "learning_rate": 3.644291218819712e-06, "loss": 0.1808, "step": 51195 }, { "epoch": 2.803482450856924, "grad_norm": 0.08831187337636948, "learning_rate": 3.639221253295478e-06, "loss": 0.1779, "step": 51200 }, { "epoch": 2.8037562284400153, "grad_norm": 0.08559732139110565, "learning_rate": 3.6341512877712433e-06, "loss": 0.1735, "step": 51205 }, { "epoch": 2.804030006023107, "grad_norm": 0.09248816967010498, "learning_rate": 3.629081322247009e-06, "loss": 0.1847, "step": 51210 }, { "epoch": 2.8043037836061986, "grad_norm": 0.09146340936422348, "learning_rate": 3.624011356722774e-06, "loss": 0.1919, "step": 51215 }, { "epoch": 2.80457756118929, "grad_norm": 0.08881772309541702, "learning_rate": 3.6189413911985404e-06, "loss": 0.1829, "step": 51220 }, { "epoch": 2.8048513387723815, "grad_norm": 0.09848751872777939, "learning_rate": 3.6138714256743054e-06, "loss": 0.1795, "step": 51225 }, { "epoch": 2.8051251163554727, "grad_norm": 0.08784729242324829, "learning_rate": 3.6088014601500712e-06, "loss": 0.1812, "step": 51230 }, { "epoch": 2.8053988939385643, "grad_norm": 0.09403838217258453, "learning_rate": 3.6037314946258367e-06, "loss": 0.1852, "step": 51235 }, { "epoch": 2.8056726715216556, "grad_norm": 0.08609877526760101, "learning_rate": 3.5986615291016025e-06, "loss": 0.1813, "step": 51240 }, { "epoch": 2.805946449104747, "grad_norm": 0.10422152280807495, "learning_rate": 3.593591563577368e-06, "loss": 0.1839, "step": 51245 }, { "epoch": 2.806220226687839, "grad_norm": 0.10594277083873749, "learning_rate": 3.5885215980531338e-06, "loss": 0.1835, "step": 51250 }, { "epoch": 2.80649400427093, "grad_norm": 0.08922597020864487, "learning_rate": 3.5834516325288988e-06, "loss": 0.1822, "step": 51255 }, { "epoch": 2.8067677818540218, "grad_norm": 0.08883851021528244, "learning_rate": 3.5783816670046646e-06, "loss": 0.1781, "step": 51260 }, { "epoch": 2.8070415594371134, "grad_norm": 0.0936814546585083, "learning_rate": 3.57331170148043e-06, "loss": 0.1784, "step": 51265 }, { "epoch": 2.8073153370202046, "grad_norm": 0.09978991746902466, "learning_rate": 3.568241735956196e-06, "loss": 0.192, "step": 51270 }, { "epoch": 2.8075891146032963, "grad_norm": 0.08731047064065933, "learning_rate": 3.5631717704319613e-06, "loss": 0.1793, "step": 51275 }, { "epoch": 2.807862892186388, "grad_norm": 0.08813221752643585, "learning_rate": 3.558101804907727e-06, "loss": 0.1851, "step": 51280 }, { "epoch": 2.808136669769479, "grad_norm": 0.10003520548343658, "learning_rate": 3.553031839383492e-06, "loss": 0.1865, "step": 51285 }, { "epoch": 2.808410447352571, "grad_norm": 0.09846232831478119, "learning_rate": 3.5479618738592584e-06, "loss": 0.1945, "step": 51290 }, { "epoch": 2.8086842249356625, "grad_norm": 0.08879119902849197, "learning_rate": 3.5428919083350234e-06, "loss": 0.1726, "step": 51295 }, { "epoch": 2.8089580025187537, "grad_norm": 0.09647071361541748, "learning_rate": 3.5378219428107893e-06, "loss": 0.1856, "step": 51300 }, { "epoch": 2.8092317801018454, "grad_norm": 0.09053123742341995, "learning_rate": 3.5327519772865547e-06, "loss": 0.1761, "step": 51305 }, { "epoch": 2.8095055576849366, "grad_norm": 0.0950455293059349, "learning_rate": 3.5276820117623205e-06, "loss": 0.1759, "step": 51310 }, { "epoch": 2.8097793352680283, "grad_norm": 0.09540088474750519, "learning_rate": 3.5226120462380855e-06, "loss": 0.1769, "step": 51315 }, { "epoch": 2.81005311285112, "grad_norm": 0.09396479278802872, "learning_rate": 3.517542080713852e-06, "loss": 0.1777, "step": 51320 }, { "epoch": 2.810326890434211, "grad_norm": 0.0909937396645546, "learning_rate": 3.512472115189617e-06, "loss": 0.1702, "step": 51325 }, { "epoch": 2.810600668017303, "grad_norm": 0.09839697927236557, "learning_rate": 3.5074021496653826e-06, "loss": 0.1805, "step": 51330 }, { "epoch": 2.810874445600394, "grad_norm": 0.09089569002389908, "learning_rate": 3.502332184141148e-06, "loss": 0.1781, "step": 51335 }, { "epoch": 2.8111482231834857, "grad_norm": 0.10528115183115005, "learning_rate": 3.497262218616914e-06, "loss": 0.1878, "step": 51340 }, { "epoch": 2.8114220007665773, "grad_norm": 0.0977761447429657, "learning_rate": 3.4921922530926793e-06, "loss": 0.1836, "step": 51345 }, { "epoch": 2.8116957783496686, "grad_norm": 0.10278509557247162, "learning_rate": 3.4871222875684443e-06, "loss": 0.182, "step": 51350 }, { "epoch": 2.81196955593276, "grad_norm": 0.09115023165941238, "learning_rate": 3.48205232204421e-06, "loss": 0.1822, "step": 51355 }, { "epoch": 2.812243333515852, "grad_norm": 0.08376172930002213, "learning_rate": 3.4769823565199756e-06, "loss": 0.1816, "step": 51360 }, { "epoch": 2.812517111098943, "grad_norm": 0.09242353588342667, "learning_rate": 3.4719123909957414e-06, "loss": 0.1864, "step": 51365 }, { "epoch": 2.8127908886820348, "grad_norm": 0.09901877492666245, "learning_rate": 3.4668424254715064e-06, "loss": 0.1808, "step": 51370 }, { "epoch": 2.8130646662651264, "grad_norm": 0.09165740013122559, "learning_rate": 3.4617724599472727e-06, "loss": 0.1819, "step": 51375 }, { "epoch": 2.8133384438482176, "grad_norm": 0.09009827673435211, "learning_rate": 3.4567024944230377e-06, "loss": 0.1827, "step": 51380 }, { "epoch": 2.8136122214313093, "grad_norm": 0.09393152594566345, "learning_rate": 3.4516325288988036e-06, "loss": 0.1829, "step": 51385 }, { "epoch": 2.813885999014401, "grad_norm": 0.08714066445827484, "learning_rate": 3.446562563374569e-06, "loss": 0.1877, "step": 51390 }, { "epoch": 2.814159776597492, "grad_norm": 0.08461889624595642, "learning_rate": 3.441492597850335e-06, "loss": 0.1754, "step": 51395 }, { "epoch": 2.814433554180584, "grad_norm": 0.099190853536129, "learning_rate": 3.4364226323261002e-06, "loss": 0.1864, "step": 51400 }, { "epoch": 2.814707331763675, "grad_norm": 0.09579495340585709, "learning_rate": 3.431352666801866e-06, "loss": 0.1898, "step": 51405 }, { "epoch": 2.8149811093467667, "grad_norm": 0.09330158680677414, "learning_rate": 3.426282701277631e-06, "loss": 0.1799, "step": 51410 }, { "epoch": 2.815254886929858, "grad_norm": 0.09192760288715363, "learning_rate": 3.421212735753397e-06, "loss": 0.1835, "step": 51415 }, { "epoch": 2.8155286645129496, "grad_norm": 0.08751940727233887, "learning_rate": 3.4161427702291624e-06, "loss": 0.1863, "step": 51420 }, { "epoch": 2.8158024420960412, "grad_norm": 0.09548372030258179, "learning_rate": 3.411072804704928e-06, "loss": 0.1829, "step": 51425 }, { "epoch": 2.8160762196791325, "grad_norm": 0.09089960902929306, "learning_rate": 3.4060028391806936e-06, "loss": 0.1752, "step": 51430 }, { "epoch": 2.816349997262224, "grad_norm": 0.08594291657209396, "learning_rate": 3.4009328736564595e-06, "loss": 0.1819, "step": 51435 }, { "epoch": 2.816623774845316, "grad_norm": 0.08465485274791718, "learning_rate": 3.3958629081322245e-06, "loss": 0.1778, "step": 51440 }, { "epoch": 2.816897552428407, "grad_norm": 0.08934856206178665, "learning_rate": 3.3907929426079907e-06, "loss": 0.1785, "step": 51445 }, { "epoch": 2.8171713300114987, "grad_norm": 0.09068185091018677, "learning_rate": 3.3857229770837557e-06, "loss": 0.1689, "step": 51450 }, { "epoch": 2.8174451075945903, "grad_norm": 0.0880298912525177, "learning_rate": 3.3806530115595216e-06, "loss": 0.1891, "step": 51455 }, { "epoch": 2.8177188851776815, "grad_norm": 0.09375445544719696, "learning_rate": 3.375583046035287e-06, "loss": 0.1742, "step": 51460 }, { "epoch": 2.817992662760773, "grad_norm": 0.09106491506099701, "learning_rate": 3.370513080511053e-06, "loss": 0.1853, "step": 51465 }, { "epoch": 2.818266440343865, "grad_norm": 0.09927698969841003, "learning_rate": 3.365443114986818e-06, "loss": 0.175, "step": 51470 }, { "epoch": 2.818540217926956, "grad_norm": 0.09634945541620255, "learning_rate": 3.360373149462584e-06, "loss": 0.1787, "step": 51475 }, { "epoch": 2.8188139955100477, "grad_norm": 0.08973083645105362, "learning_rate": 3.355303183938349e-06, "loss": 0.1791, "step": 51480 }, { "epoch": 2.8190877730931394, "grad_norm": 0.09031898528337479, "learning_rate": 3.350233218414115e-06, "loss": 0.1769, "step": 51485 }, { "epoch": 2.8193615506762306, "grad_norm": 0.08835200220346451, "learning_rate": 3.3451632528898804e-06, "loss": 0.1814, "step": 51490 }, { "epoch": 2.8196353282593223, "grad_norm": 0.09097406268119812, "learning_rate": 3.3400932873656462e-06, "loss": 0.1851, "step": 51495 }, { "epoch": 2.8199091058424135, "grad_norm": 0.08113918453454971, "learning_rate": 3.3350233218414117e-06, "loss": 0.1837, "step": 51500 }, { "epoch": 2.820182883425505, "grad_norm": 0.09264032542705536, "learning_rate": 3.3299533563171775e-06, "loss": 0.1868, "step": 51505 }, { "epoch": 2.8204566610085964, "grad_norm": 0.09502049535512924, "learning_rate": 3.3248833907929425e-06, "loss": 0.182, "step": 51510 }, { "epoch": 2.820730438591688, "grad_norm": 0.10488001257181168, "learning_rate": 3.3198134252687083e-06, "loss": 0.1807, "step": 51515 }, { "epoch": 2.8210042161747797, "grad_norm": 0.0985398218035698, "learning_rate": 3.3147434597444738e-06, "loss": 0.1818, "step": 51520 }, { "epoch": 2.821277993757871, "grad_norm": 0.09666291624307632, "learning_rate": 3.3096734942202396e-06, "loss": 0.1829, "step": 51525 }, { "epoch": 2.8215517713409626, "grad_norm": 0.09415778517723083, "learning_rate": 3.304603528696005e-06, "loss": 0.1871, "step": 51530 }, { "epoch": 2.8218255489240542, "grad_norm": 0.10355210304260254, "learning_rate": 3.299533563171771e-06, "loss": 0.1768, "step": 51535 }, { "epoch": 2.8220993265071455, "grad_norm": 0.09266059100627899, "learning_rate": 3.294463597647536e-06, "loss": 0.1886, "step": 51540 }, { "epoch": 2.822373104090237, "grad_norm": 0.08999542891979218, "learning_rate": 3.289393632123302e-06, "loss": 0.179, "step": 51545 }, { "epoch": 2.822646881673329, "grad_norm": 0.10270025581121445, "learning_rate": 3.284323666599067e-06, "loss": 0.1846, "step": 51550 }, { "epoch": 2.82292065925642, "grad_norm": 0.09081479907035828, "learning_rate": 3.279253701074833e-06, "loss": 0.1818, "step": 51555 }, { "epoch": 2.8231944368395117, "grad_norm": 0.08916500955820084, "learning_rate": 3.2741837355505984e-06, "loss": 0.1822, "step": 51560 }, { "epoch": 2.8234682144226033, "grad_norm": 0.10354647785425186, "learning_rate": 3.2691137700263643e-06, "loss": 0.196, "step": 51565 }, { "epoch": 2.8237419920056945, "grad_norm": 0.086274154484272, "learning_rate": 3.2640438045021293e-06, "loss": 0.1735, "step": 51570 }, { "epoch": 2.824015769588786, "grad_norm": 0.08930410444736481, "learning_rate": 3.2589738389778955e-06, "loss": 0.1893, "step": 51575 }, { "epoch": 2.8242895471718774, "grad_norm": 0.08776260167360306, "learning_rate": 3.2539038734536605e-06, "loss": 0.1769, "step": 51580 }, { "epoch": 2.824563324754969, "grad_norm": 0.08699213713407516, "learning_rate": 3.2488339079294264e-06, "loss": 0.1886, "step": 51585 }, { "epoch": 2.8248371023380603, "grad_norm": 0.10034043341875076, "learning_rate": 3.243763942405192e-06, "loss": 0.1815, "step": 51590 }, { "epoch": 2.825110879921152, "grad_norm": 0.09109284728765488, "learning_rate": 3.2386939768809576e-06, "loss": 0.1758, "step": 51595 }, { "epoch": 2.8253846575042436, "grad_norm": 0.09685733169317245, "learning_rate": 3.233624011356723e-06, "loss": 0.1792, "step": 51600 }, { "epoch": 2.825658435087335, "grad_norm": 0.09045379608869553, "learning_rate": 3.228554045832489e-06, "loss": 0.1755, "step": 51605 }, { "epoch": 2.8259322126704265, "grad_norm": 0.09957130998373032, "learning_rate": 3.223484080308254e-06, "loss": 0.1807, "step": 51610 }, { "epoch": 2.826205990253518, "grad_norm": 0.09237003326416016, "learning_rate": 3.2184141147840198e-06, "loss": 0.1836, "step": 51615 }, { "epoch": 2.8264797678366094, "grad_norm": 0.09453530609607697, "learning_rate": 3.213344149259785e-06, "loss": 0.1862, "step": 51620 }, { "epoch": 2.826753545419701, "grad_norm": 0.09247712045907974, "learning_rate": 3.208274183735551e-06, "loss": 0.1773, "step": 51625 }, { "epoch": 2.8270273230027927, "grad_norm": 0.09876121580600739, "learning_rate": 3.2032042182113164e-06, "loss": 0.195, "step": 51630 }, { "epoch": 2.827301100585884, "grad_norm": 0.09419174492359161, "learning_rate": 3.1981342526870823e-06, "loss": 0.1893, "step": 51635 }, { "epoch": 2.8275748781689756, "grad_norm": 0.08441734313964844, "learning_rate": 3.1930642871628473e-06, "loss": 0.1749, "step": 51640 }, { "epoch": 2.8278486557520672, "grad_norm": 0.09232839196920395, "learning_rate": 3.1879943216386127e-06, "loss": 0.1829, "step": 51645 }, { "epoch": 2.8281224333351584, "grad_norm": 0.08640632778406143, "learning_rate": 3.1829243561143786e-06, "loss": 0.1798, "step": 51650 }, { "epoch": 2.82839621091825, "grad_norm": 0.09265244752168655, "learning_rate": 3.177854390590144e-06, "loss": 0.1842, "step": 51655 }, { "epoch": 2.8286699885013418, "grad_norm": 0.08924967050552368, "learning_rate": 3.17278442506591e-06, "loss": 0.1798, "step": 51660 }, { "epoch": 2.828943766084433, "grad_norm": 0.09102700650691986, "learning_rate": 3.167714459541675e-06, "loss": 0.1846, "step": 51665 }, { "epoch": 2.8292175436675246, "grad_norm": 0.0904824361205101, "learning_rate": 3.1626444940174407e-06, "loss": 0.1771, "step": 51670 }, { "epoch": 2.829491321250616, "grad_norm": 0.09007079899311066, "learning_rate": 3.157574528493206e-06, "loss": 0.1888, "step": 51675 }, { "epoch": 2.8297650988337075, "grad_norm": 0.09116882085800171, "learning_rate": 3.152504562968972e-06, "loss": 0.1825, "step": 51680 }, { "epoch": 2.8300388764167987, "grad_norm": 0.09635221213102341, "learning_rate": 3.1474345974447374e-06, "loss": 0.1951, "step": 51685 }, { "epoch": 2.8303126539998904, "grad_norm": 0.11359426379203796, "learning_rate": 3.142364631920503e-06, "loss": 0.1929, "step": 51690 }, { "epoch": 2.830586431582982, "grad_norm": 0.08840261399745941, "learning_rate": 3.137294666396268e-06, "loss": 0.1804, "step": 51695 }, { "epoch": 2.8308602091660733, "grad_norm": 0.10028444975614548, "learning_rate": 3.1322247008720345e-06, "loss": 0.1904, "step": 51700 }, { "epoch": 2.831133986749165, "grad_norm": 0.0886317566037178, "learning_rate": 3.1271547353477995e-06, "loss": 0.1857, "step": 51705 }, { "epoch": 2.8314077643322566, "grad_norm": 0.09709473699331284, "learning_rate": 3.1220847698235653e-06, "loss": 0.1878, "step": 51710 }, { "epoch": 2.831681541915348, "grad_norm": 0.09794687479734421, "learning_rate": 3.117014804299331e-06, "loss": 0.1841, "step": 51715 }, { "epoch": 2.8319553194984395, "grad_norm": 0.09857811033725739, "learning_rate": 3.1119448387750966e-06, "loss": 0.1756, "step": 51720 }, { "epoch": 2.832229097081531, "grad_norm": 0.08999276906251907, "learning_rate": 3.106874873250862e-06, "loss": 0.1787, "step": 51725 }, { "epoch": 2.8325028746646224, "grad_norm": 0.0890597477555275, "learning_rate": 3.101804907726628e-06, "loss": 0.179, "step": 51730 }, { "epoch": 2.832776652247714, "grad_norm": 0.09215107560157776, "learning_rate": 3.0967349422023933e-06, "loss": 0.1808, "step": 51735 }, { "epoch": 2.8330504298308057, "grad_norm": 0.08620011061429977, "learning_rate": 3.0916649766781587e-06, "loss": 0.1821, "step": 51740 }, { "epoch": 2.833324207413897, "grad_norm": 0.08878780901432037, "learning_rate": 3.0865950111539245e-06, "loss": 0.186, "step": 51745 }, { "epoch": 2.8335979849969886, "grad_norm": 0.08928626030683517, "learning_rate": 3.08152504562969e-06, "loss": 0.1866, "step": 51750 }, { "epoch": 2.83387176258008, "grad_norm": 0.09781992435455322, "learning_rate": 3.0764550801054554e-06, "loss": 0.1905, "step": 51755 }, { "epoch": 2.8341455401631714, "grad_norm": 0.0948372334241867, "learning_rate": 3.0713851145812212e-06, "loss": 0.1786, "step": 51760 }, { "epoch": 2.834419317746263, "grad_norm": 0.09536448866128922, "learning_rate": 3.0663151490569867e-06, "loss": 0.1803, "step": 51765 }, { "epoch": 2.8346930953293543, "grad_norm": 0.09961837530136108, "learning_rate": 3.061245183532752e-06, "loss": 0.1798, "step": 51770 }, { "epoch": 2.834966872912446, "grad_norm": 0.0874309167265892, "learning_rate": 3.056175218008518e-06, "loss": 0.1786, "step": 51775 }, { "epoch": 2.835240650495537, "grad_norm": 0.09530740976333618, "learning_rate": 3.0511052524842833e-06, "loss": 0.1835, "step": 51780 }, { "epoch": 2.835514428078629, "grad_norm": 0.08802131563425064, "learning_rate": 3.046035286960049e-06, "loss": 0.1723, "step": 51785 }, { "epoch": 2.8357882056617205, "grad_norm": 0.09728436917066574, "learning_rate": 3.040965321435814e-06, "loss": 0.1878, "step": 51790 }, { "epoch": 2.8360619832448117, "grad_norm": 0.08860167115926743, "learning_rate": 3.0358953559115796e-06, "loss": 0.1808, "step": 51795 }, { "epoch": 2.8363357608279034, "grad_norm": 0.08938198536634445, "learning_rate": 3.0308253903873455e-06, "loss": 0.1845, "step": 51800 }, { "epoch": 2.836609538410995, "grad_norm": 0.10111996531486511, "learning_rate": 3.025755424863111e-06, "loss": 0.184, "step": 51805 }, { "epoch": 2.8368833159940863, "grad_norm": 0.09338008612394333, "learning_rate": 3.0206854593388763e-06, "loss": 0.1798, "step": 51810 }, { "epoch": 2.837157093577178, "grad_norm": 0.10099077969789505, "learning_rate": 3.015615493814642e-06, "loss": 0.1791, "step": 51815 }, { "epoch": 2.8374308711602696, "grad_norm": 0.0948689728975296, "learning_rate": 3.0105455282904076e-06, "loss": 0.1751, "step": 51820 }, { "epoch": 2.837704648743361, "grad_norm": 0.08873743563890457, "learning_rate": 3.005475562766173e-06, "loss": 0.1732, "step": 51825 }, { "epoch": 2.8379784263264525, "grad_norm": 0.09694090485572815, "learning_rate": 3.000405597241939e-06, "loss": 0.178, "step": 51830 }, { "epoch": 2.838252203909544, "grad_norm": 0.10222246497869492, "learning_rate": 2.9953356317177043e-06, "loss": 0.1785, "step": 51835 }, { "epoch": 2.8385259814926354, "grad_norm": 0.08070819079875946, "learning_rate": 2.99026566619347e-06, "loss": 0.1821, "step": 51840 }, { "epoch": 2.838799759075727, "grad_norm": 0.10541746765375137, "learning_rate": 2.9851957006692355e-06, "loss": 0.1807, "step": 51845 }, { "epoch": 2.8390735366588182, "grad_norm": 0.09111533313989639, "learning_rate": 2.980125735145001e-06, "loss": 0.1808, "step": 51850 }, { "epoch": 2.83934731424191, "grad_norm": 0.10011913627386093, "learning_rate": 2.975055769620767e-06, "loss": 0.1907, "step": 51855 }, { "epoch": 2.839621091825001, "grad_norm": 0.09798705577850342, "learning_rate": 2.9699858040965322e-06, "loss": 0.183, "step": 51860 }, { "epoch": 2.8398948694080928, "grad_norm": 0.09661097079515457, "learning_rate": 2.9649158385722976e-06, "loss": 0.1849, "step": 51865 }, { "epoch": 2.8401686469911844, "grad_norm": 0.0962621420621872, "learning_rate": 2.9598458730480635e-06, "loss": 0.1811, "step": 51870 }, { "epoch": 2.8404424245742756, "grad_norm": 0.08620280027389526, "learning_rate": 2.954775907523829e-06, "loss": 0.1802, "step": 51875 }, { "epoch": 2.8407162021573673, "grad_norm": 0.09299731999635696, "learning_rate": 2.9497059419995943e-06, "loss": 0.1833, "step": 51880 }, { "epoch": 2.840989979740459, "grad_norm": 0.08709299564361572, "learning_rate": 2.94463597647536e-06, "loss": 0.1775, "step": 51885 }, { "epoch": 2.84126375732355, "grad_norm": 0.08825711160898209, "learning_rate": 2.9395660109511256e-06, "loss": 0.1797, "step": 51890 }, { "epoch": 2.841537534906642, "grad_norm": 0.0984610840678215, "learning_rate": 2.934496045426891e-06, "loss": 0.1797, "step": 51895 }, { "epoch": 2.8418113124897335, "grad_norm": 0.09145036339759827, "learning_rate": 2.929426079902657e-06, "loss": 0.1768, "step": 51900 }, { "epoch": 2.8420850900728247, "grad_norm": 0.08962634205818176, "learning_rate": 2.9243561143784223e-06, "loss": 0.1812, "step": 51905 }, { "epoch": 2.8423588676559164, "grad_norm": 0.09091074019670486, "learning_rate": 2.9192861488541877e-06, "loss": 0.1874, "step": 51910 }, { "epoch": 2.842632645239008, "grad_norm": 0.1057412177324295, "learning_rate": 2.9142161833299536e-06, "loss": 0.1812, "step": 51915 }, { "epoch": 2.8429064228220993, "grad_norm": 0.10631974041461945, "learning_rate": 2.909146217805719e-06, "loss": 0.181, "step": 51920 }, { "epoch": 2.843180200405191, "grad_norm": 0.08649417012929916, "learning_rate": 2.9040762522814844e-06, "loss": 0.1871, "step": 51925 }, { "epoch": 2.8434539779882826, "grad_norm": 0.09243721514940262, "learning_rate": 2.8990062867572503e-06, "loss": 0.1786, "step": 51930 }, { "epoch": 2.843727755571374, "grad_norm": 0.10453345626592636, "learning_rate": 2.8939363212330157e-06, "loss": 0.1907, "step": 51935 }, { "epoch": 2.8440015331544655, "grad_norm": 0.08742813020944595, "learning_rate": 2.8888663557087815e-06, "loss": 0.1837, "step": 51940 }, { "epoch": 2.8442753107375567, "grad_norm": 0.0887717604637146, "learning_rate": 2.883796390184547e-06, "loss": 0.1786, "step": 51945 }, { "epoch": 2.8445490883206483, "grad_norm": 0.08756112307310104, "learning_rate": 2.8787264246603124e-06, "loss": 0.1782, "step": 51950 }, { "epoch": 2.8448228659037396, "grad_norm": 0.10029737651348114, "learning_rate": 2.873656459136078e-06, "loss": 0.185, "step": 51955 }, { "epoch": 2.8450966434868312, "grad_norm": 0.08691168576478958, "learning_rate": 2.8685864936118436e-06, "loss": 0.176, "step": 51960 }, { "epoch": 2.845370421069923, "grad_norm": 0.08440343290567398, "learning_rate": 2.863516528087609e-06, "loss": 0.1844, "step": 51965 }, { "epoch": 2.845644198653014, "grad_norm": 0.09594795107841492, "learning_rate": 2.858446562563375e-06, "loss": 0.18, "step": 51970 }, { "epoch": 2.8459179762361058, "grad_norm": 0.08972199261188507, "learning_rate": 2.8533765970391403e-06, "loss": 0.1823, "step": 51975 }, { "epoch": 2.8461917538191974, "grad_norm": 0.08599305897951126, "learning_rate": 2.8483066315149057e-06, "loss": 0.1742, "step": 51980 }, { "epoch": 2.8464655314022886, "grad_norm": 0.08691269159317017, "learning_rate": 2.8432366659906716e-06, "loss": 0.1759, "step": 51985 }, { "epoch": 2.8467393089853803, "grad_norm": 0.09464240819215775, "learning_rate": 2.838166700466437e-06, "loss": 0.1846, "step": 51990 }, { "epoch": 2.847013086568472, "grad_norm": 0.09002570062875748, "learning_rate": 2.8330967349422024e-06, "loss": 0.1797, "step": 51995 }, { "epoch": 2.847286864151563, "grad_norm": 0.08878853917121887, "learning_rate": 2.8280267694179683e-06, "loss": 0.1827, "step": 52000 }, { "epoch": 2.847560641734655, "grad_norm": 0.08849068731069565, "learning_rate": 2.8229568038937337e-06, "loss": 0.1823, "step": 52005 }, { "epoch": 2.8478344193177465, "grad_norm": 0.09463267773389816, "learning_rate": 2.817886838369499e-06, "loss": 0.1842, "step": 52010 }, { "epoch": 2.8481081969008377, "grad_norm": 0.09672576934099197, "learning_rate": 2.812816872845265e-06, "loss": 0.187, "step": 52015 }, { "epoch": 2.8483819744839294, "grad_norm": 0.09050990641117096, "learning_rate": 2.8077469073210304e-06, "loss": 0.188, "step": 52020 }, { "epoch": 2.8486557520670206, "grad_norm": 0.09233810752630234, "learning_rate": 2.802676941796796e-06, "loss": 0.1802, "step": 52025 }, { "epoch": 2.8489295296501123, "grad_norm": 0.0963488295674324, "learning_rate": 2.7976069762725617e-06, "loss": 0.1808, "step": 52030 }, { "epoch": 2.8492033072332035, "grad_norm": 0.09361805021762848, "learning_rate": 2.792537010748327e-06, "loss": 0.1836, "step": 52035 }, { "epoch": 2.849477084816295, "grad_norm": 0.09637733548879623, "learning_rate": 2.787467045224093e-06, "loss": 0.1876, "step": 52040 }, { "epoch": 2.849750862399387, "grad_norm": 0.09401021897792816, "learning_rate": 2.7823970796998584e-06, "loss": 0.1793, "step": 52045 }, { "epoch": 2.850024639982478, "grad_norm": 0.0856599435210228, "learning_rate": 2.7773271141756238e-06, "loss": 0.1823, "step": 52050 }, { "epoch": 2.8502984175655697, "grad_norm": 0.09856032580137253, "learning_rate": 2.7722571486513896e-06, "loss": 0.1848, "step": 52055 }, { "epoch": 2.8505721951486613, "grad_norm": 0.09448349475860596, "learning_rate": 2.767187183127155e-06, "loss": 0.1874, "step": 52060 }, { "epoch": 2.8508459727317526, "grad_norm": 0.08387456089258194, "learning_rate": 2.7621172176029205e-06, "loss": 0.1734, "step": 52065 }, { "epoch": 2.851119750314844, "grad_norm": 0.09082537889480591, "learning_rate": 2.7570472520786863e-06, "loss": 0.1791, "step": 52070 }, { "epoch": 2.851393527897936, "grad_norm": 0.09589435905218124, "learning_rate": 2.7519772865544517e-06, "loss": 0.1905, "step": 52075 }, { "epoch": 2.851667305481027, "grad_norm": 0.09335926920175552, "learning_rate": 2.746907321030217e-06, "loss": 0.1825, "step": 52080 }, { "epoch": 2.8519410830641188, "grad_norm": 0.08178763836622238, "learning_rate": 2.741837355505983e-06, "loss": 0.1722, "step": 52085 }, { "epoch": 2.8522148606472104, "grad_norm": 0.08825697749853134, "learning_rate": 2.736767389981748e-06, "loss": 0.1746, "step": 52090 }, { "epoch": 2.8524886382303016, "grad_norm": 0.08450434356927872, "learning_rate": 2.731697424457514e-06, "loss": 0.1775, "step": 52095 }, { "epoch": 2.8527624158133933, "grad_norm": 0.08889928460121155, "learning_rate": 2.7266274589332793e-06, "loss": 0.1867, "step": 52100 }, { "epoch": 2.853036193396485, "grad_norm": 0.09284082055091858, "learning_rate": 2.7215574934090447e-06, "loss": 0.1819, "step": 52105 }, { "epoch": 2.853309970979576, "grad_norm": 0.10027925670146942, "learning_rate": 2.7164875278848105e-06, "loss": 0.1892, "step": 52110 }, { "epoch": 2.853583748562668, "grad_norm": 0.09630711376667023, "learning_rate": 2.711417562360576e-06, "loss": 0.186, "step": 52115 }, { "epoch": 2.853857526145759, "grad_norm": 0.09325740486383438, "learning_rate": 2.7063475968363414e-06, "loss": 0.1835, "step": 52120 }, { "epoch": 2.8541313037288507, "grad_norm": 0.09994319081306458, "learning_rate": 2.7012776313121072e-06, "loss": 0.1939, "step": 52125 }, { "epoch": 2.854405081311942, "grad_norm": 0.0962565466761589, "learning_rate": 2.6962076657878726e-06, "loss": 0.1838, "step": 52130 }, { "epoch": 2.8546788588950336, "grad_norm": 0.0895405262708664, "learning_rate": 2.691137700263638e-06, "loss": 0.1823, "step": 52135 }, { "epoch": 2.8549526364781252, "grad_norm": 0.09202209860086441, "learning_rate": 2.686067734739404e-06, "loss": 0.1843, "step": 52140 }, { "epoch": 2.8552264140612165, "grad_norm": 0.09260261058807373, "learning_rate": 2.6809977692151693e-06, "loss": 0.1811, "step": 52145 }, { "epoch": 2.855500191644308, "grad_norm": 0.09149954468011856, "learning_rate": 2.6759278036909348e-06, "loss": 0.1779, "step": 52150 }, { "epoch": 2.8557739692274, "grad_norm": 0.0958453118801117, "learning_rate": 2.6708578381667006e-06, "loss": 0.1872, "step": 52155 }, { "epoch": 2.856047746810491, "grad_norm": 0.10066787153482437, "learning_rate": 2.665787872642466e-06, "loss": 0.1892, "step": 52160 }, { "epoch": 2.8563215243935827, "grad_norm": 0.08995667099952698, "learning_rate": 2.6607179071182314e-06, "loss": 0.1866, "step": 52165 }, { "epoch": 2.8565953019766743, "grad_norm": 0.09644750505685806, "learning_rate": 2.6556479415939973e-06, "loss": 0.1793, "step": 52170 }, { "epoch": 2.8568690795597655, "grad_norm": 0.08508589118719101, "learning_rate": 2.6505779760697627e-06, "loss": 0.184, "step": 52175 }, { "epoch": 2.857142857142857, "grad_norm": 0.09478505700826645, "learning_rate": 2.6455080105455286e-06, "loss": 0.1824, "step": 52180 }, { "epoch": 2.857416634725949, "grad_norm": 0.09050440043210983, "learning_rate": 2.640438045021294e-06, "loss": 0.175, "step": 52185 }, { "epoch": 2.85769041230904, "grad_norm": 0.0946003645658493, "learning_rate": 2.6353680794970594e-06, "loss": 0.1823, "step": 52190 }, { "epoch": 2.8579641898921317, "grad_norm": 0.09658978134393692, "learning_rate": 2.6302981139728253e-06, "loss": 0.1793, "step": 52195 }, { "epoch": 2.858237967475223, "grad_norm": 0.09034227579832077, "learning_rate": 2.6252281484485907e-06, "loss": 0.1817, "step": 52200 }, { "epoch": 2.8585117450583146, "grad_norm": 0.08733978867530823, "learning_rate": 2.620158182924356e-06, "loss": 0.1794, "step": 52205 }, { "epoch": 2.8587855226414063, "grad_norm": 0.09231304377317429, "learning_rate": 2.615088217400122e-06, "loss": 0.1838, "step": 52210 }, { "epoch": 2.8590593002244975, "grad_norm": 0.08713670074939728, "learning_rate": 2.6100182518758874e-06, "loss": 0.179, "step": 52215 }, { "epoch": 2.859333077807589, "grad_norm": 0.0890103429555893, "learning_rate": 2.6049482863516528e-06, "loss": 0.1782, "step": 52220 }, { "epoch": 2.8596068553906804, "grad_norm": 0.10546539723873138, "learning_rate": 2.5998783208274186e-06, "loss": 0.1898, "step": 52225 }, { "epoch": 2.859880632973772, "grad_norm": 0.09237728267908096, "learning_rate": 2.594808355303184e-06, "loss": 0.1796, "step": 52230 }, { "epoch": 2.8601544105568637, "grad_norm": 0.09298158437013626, "learning_rate": 2.5897383897789495e-06, "loss": 0.1798, "step": 52235 }, { "epoch": 2.860428188139955, "grad_norm": 0.08703912049531937, "learning_rate": 2.5846684242547153e-06, "loss": 0.1786, "step": 52240 }, { "epoch": 2.8607019657230466, "grad_norm": 0.08468843251466751, "learning_rate": 2.5795984587304807e-06, "loss": 0.1794, "step": 52245 }, { "epoch": 2.8609757433061382, "grad_norm": 0.08614062517881393, "learning_rate": 2.574528493206246e-06, "loss": 0.1842, "step": 52250 }, { "epoch": 2.8612495208892295, "grad_norm": 0.0927758663892746, "learning_rate": 2.569458527682012e-06, "loss": 0.1856, "step": 52255 }, { "epoch": 2.861523298472321, "grad_norm": 0.09841896593570709, "learning_rate": 2.5643885621577774e-06, "loss": 0.1837, "step": 52260 }, { "epoch": 2.861797076055413, "grad_norm": 0.11074692755937576, "learning_rate": 2.559318596633543e-06, "loss": 0.1829, "step": 52265 }, { "epoch": 2.862070853638504, "grad_norm": 0.10653448849916458, "learning_rate": 2.5542486311093087e-06, "loss": 0.188, "step": 52270 }, { "epoch": 2.8623446312215957, "grad_norm": 0.0929490253329277, "learning_rate": 2.549178665585074e-06, "loss": 0.1793, "step": 52275 }, { "epoch": 2.8626184088046873, "grad_norm": 0.08875323832035065, "learning_rate": 2.54410870006084e-06, "loss": 0.1858, "step": 52280 }, { "epoch": 2.8628921863877785, "grad_norm": 0.08997608721256256, "learning_rate": 2.5390387345366054e-06, "loss": 0.1753, "step": 52285 }, { "epoch": 2.86316596397087, "grad_norm": 0.0849958285689354, "learning_rate": 2.533968769012371e-06, "loss": 0.1857, "step": 52290 }, { "epoch": 2.8634397415539614, "grad_norm": 0.09713956713676453, "learning_rate": 2.5288988034881367e-06, "loss": 0.182, "step": 52295 }, { "epoch": 2.863713519137053, "grad_norm": 0.09173107147216797, "learning_rate": 2.523828837963902e-06, "loss": 0.1756, "step": 52300 }, { "epoch": 2.8639872967201443, "grad_norm": 0.09652387350797653, "learning_rate": 2.5187588724396675e-06, "loss": 0.1862, "step": 52305 }, { "epoch": 2.864261074303236, "grad_norm": 0.0865548849105835, "learning_rate": 2.5136889069154334e-06, "loss": 0.1795, "step": 52310 }, { "epoch": 2.8645348518863276, "grad_norm": 0.08698484301567078, "learning_rate": 2.5086189413911988e-06, "loss": 0.1765, "step": 52315 }, { "epoch": 2.864808629469419, "grad_norm": 0.08993245661258698, "learning_rate": 2.503548975866964e-06, "loss": 0.1899, "step": 52320 }, { "epoch": 2.8650824070525105, "grad_norm": 0.09636033326387405, "learning_rate": 2.49847901034273e-06, "loss": 0.1839, "step": 52325 }, { "epoch": 2.865356184635602, "grad_norm": 0.08801580220460892, "learning_rate": 2.4934090448184955e-06, "loss": 0.1827, "step": 52330 }, { "epoch": 2.8656299622186934, "grad_norm": 0.08924929797649384, "learning_rate": 2.488339079294261e-06, "loss": 0.1827, "step": 52335 }, { "epoch": 2.865903739801785, "grad_norm": 0.08973953872919083, "learning_rate": 2.4832691137700267e-06, "loss": 0.1797, "step": 52340 }, { "epoch": 2.8661775173848767, "grad_norm": 0.09359356760978699, "learning_rate": 2.478199148245792e-06, "loss": 0.1836, "step": 52345 }, { "epoch": 2.866451294967968, "grad_norm": 0.09268512576818466, "learning_rate": 2.4731291827215576e-06, "loss": 0.1814, "step": 52350 }, { "epoch": 2.8667250725510596, "grad_norm": 0.09612184017896652, "learning_rate": 2.4680592171973234e-06, "loss": 0.1836, "step": 52355 }, { "epoch": 2.8669988501341512, "grad_norm": 0.09682870656251907, "learning_rate": 2.462989251673089e-06, "loss": 0.1811, "step": 52360 }, { "epoch": 2.8672726277172425, "grad_norm": 0.09612485021352768, "learning_rate": 2.4579192861488543e-06, "loss": 0.1828, "step": 52365 }, { "epoch": 2.867546405300334, "grad_norm": 0.09229735285043716, "learning_rate": 2.45284932062462e-06, "loss": 0.1859, "step": 52370 }, { "epoch": 2.8678201828834258, "grad_norm": 0.09033776074647903, "learning_rate": 2.4477793551003855e-06, "loss": 0.1812, "step": 52375 }, { "epoch": 2.868093960466517, "grad_norm": 0.08701317012310028, "learning_rate": 2.442709389576151e-06, "loss": 0.1812, "step": 52380 }, { "epoch": 2.8683677380496087, "grad_norm": 0.09043332934379578, "learning_rate": 2.4376394240519164e-06, "loss": 0.1836, "step": 52385 }, { "epoch": 2.8686415156327, "grad_norm": 0.09409549087285995, "learning_rate": 2.432569458527682e-06, "loss": 0.1793, "step": 52390 }, { "epoch": 2.8689152932157915, "grad_norm": 0.10528870671987534, "learning_rate": 2.4274994930034476e-06, "loss": 0.1946, "step": 52395 }, { "epoch": 2.8691890707988827, "grad_norm": 0.09291156381368637, "learning_rate": 2.422429527479213e-06, "loss": 0.181, "step": 52400 }, { "epoch": 2.8694628483819744, "grad_norm": 0.08974070101976395, "learning_rate": 2.4173595619549785e-06, "loss": 0.1815, "step": 52405 }, { "epoch": 2.869736625965066, "grad_norm": 0.08398036658763885, "learning_rate": 2.4122895964307443e-06, "loss": 0.177, "step": 52410 }, { "epoch": 2.8700104035481573, "grad_norm": 0.08733697235584259, "learning_rate": 2.4072196309065098e-06, "loss": 0.1777, "step": 52415 }, { "epoch": 2.870284181131249, "grad_norm": 0.08211895823478699, "learning_rate": 2.402149665382275e-06, "loss": 0.1742, "step": 52420 }, { "epoch": 2.8705579587143406, "grad_norm": 0.08450369536876678, "learning_rate": 2.397079699858041e-06, "loss": 0.1821, "step": 52425 }, { "epoch": 2.870831736297432, "grad_norm": 0.08905524015426636, "learning_rate": 2.3920097343338065e-06, "loss": 0.1841, "step": 52430 }, { "epoch": 2.8711055138805235, "grad_norm": 0.093740314245224, "learning_rate": 2.3869397688095723e-06, "loss": 0.1861, "step": 52435 }, { "epoch": 2.871379291463615, "grad_norm": 0.08828474581241608, "learning_rate": 2.3818698032853377e-06, "loss": 0.1804, "step": 52440 }, { "epoch": 2.8716530690467064, "grad_norm": 0.091520294547081, "learning_rate": 2.376799837761103e-06, "loss": 0.1762, "step": 52445 }, { "epoch": 2.871926846629798, "grad_norm": 0.0929904505610466, "learning_rate": 2.371729872236869e-06, "loss": 0.1907, "step": 52450 }, { "epoch": 2.8722006242128897, "grad_norm": 0.09146390855312347, "learning_rate": 2.3666599067126344e-06, "loss": 0.1883, "step": 52455 }, { "epoch": 2.872474401795981, "grad_norm": 0.08842026442289352, "learning_rate": 2.3615899411884e-06, "loss": 0.1742, "step": 52460 }, { "epoch": 2.8727481793790726, "grad_norm": 0.0901043713092804, "learning_rate": 2.3565199756641657e-06, "loss": 0.1768, "step": 52465 }, { "epoch": 2.873021956962164, "grad_norm": 0.08562386780977249, "learning_rate": 2.351450010139931e-06, "loss": 0.1714, "step": 52470 }, { "epoch": 2.8732957345452554, "grad_norm": 0.09736383706331253, "learning_rate": 2.3463800446156965e-06, "loss": 0.1853, "step": 52475 }, { "epoch": 2.8735695121283467, "grad_norm": 0.10039664804935455, "learning_rate": 2.3413100790914624e-06, "loss": 0.1877, "step": 52480 }, { "epoch": 2.8738432897114383, "grad_norm": 0.09729737788438797, "learning_rate": 2.336240113567228e-06, "loss": 0.1874, "step": 52485 }, { "epoch": 2.87411706729453, "grad_norm": 0.09362247586250305, "learning_rate": 2.3311701480429932e-06, "loss": 0.1867, "step": 52490 }, { "epoch": 2.874390844877621, "grad_norm": 0.09122419357299805, "learning_rate": 2.326100182518759e-06, "loss": 0.1709, "step": 52495 }, { "epoch": 2.874664622460713, "grad_norm": 0.08771850913763046, "learning_rate": 2.3210302169945245e-06, "loss": 0.1865, "step": 52500 }, { "epoch": 2.8749384000438045, "grad_norm": 0.08712667971849442, "learning_rate": 2.31596025147029e-06, "loss": 0.1723, "step": 52505 }, { "epoch": 2.8752121776268957, "grad_norm": 0.08960601687431335, "learning_rate": 2.3108902859460557e-06, "loss": 0.1728, "step": 52510 }, { "epoch": 2.8754859552099874, "grad_norm": 0.09444937855005264, "learning_rate": 2.305820320421821e-06, "loss": 0.1808, "step": 52515 }, { "epoch": 2.875759732793079, "grad_norm": 0.10896121710538864, "learning_rate": 2.3007503548975866e-06, "loss": 0.1868, "step": 52520 }, { "epoch": 2.8760335103761703, "grad_norm": 0.0823562890291214, "learning_rate": 2.2956803893733524e-06, "loss": 0.1722, "step": 52525 }, { "epoch": 2.876307287959262, "grad_norm": 0.09186495095491409, "learning_rate": 2.290610423849118e-06, "loss": 0.1731, "step": 52530 }, { "epoch": 2.8765810655423536, "grad_norm": 0.08522714674472809, "learning_rate": 2.2855404583248837e-06, "loss": 0.1766, "step": 52535 }, { "epoch": 2.876854843125445, "grad_norm": 0.0898188129067421, "learning_rate": 2.280470492800649e-06, "loss": 0.1783, "step": 52540 }, { "epoch": 2.8771286207085365, "grad_norm": 0.10105308890342712, "learning_rate": 2.2754005272764146e-06, "loss": 0.1878, "step": 52545 }, { "epoch": 2.877402398291628, "grad_norm": 0.09908083081245422, "learning_rate": 2.2703305617521804e-06, "loss": 0.1843, "step": 52550 }, { "epoch": 2.8776761758747194, "grad_norm": 0.09642329812049866, "learning_rate": 2.265260596227946e-06, "loss": 0.1794, "step": 52555 }, { "epoch": 2.877949953457811, "grad_norm": 0.08878295868635178, "learning_rate": 2.2601906307037112e-06, "loss": 0.1774, "step": 52560 }, { "epoch": 2.8782237310409022, "grad_norm": 0.08830161392688751, "learning_rate": 2.255120665179477e-06, "loss": 0.181, "step": 52565 }, { "epoch": 2.878497508623994, "grad_norm": 0.0905810222029686, "learning_rate": 2.2500506996552425e-06, "loss": 0.1804, "step": 52570 }, { "epoch": 2.878771286207085, "grad_norm": 0.09829438477754593, "learning_rate": 2.244980734131008e-06, "loss": 0.1811, "step": 52575 }, { "epoch": 2.8790450637901768, "grad_norm": 0.08798836916685104, "learning_rate": 2.2399107686067738e-06, "loss": 0.1879, "step": 52580 }, { "epoch": 2.8793188413732684, "grad_norm": 0.08784405142068863, "learning_rate": 2.234840803082539e-06, "loss": 0.1847, "step": 52585 }, { "epoch": 2.8795926189563597, "grad_norm": 0.09552767872810364, "learning_rate": 2.2297708375583046e-06, "loss": 0.188, "step": 52590 }, { "epoch": 2.8798663965394513, "grad_norm": 0.09383562952280045, "learning_rate": 2.2247008720340705e-06, "loss": 0.1842, "step": 52595 }, { "epoch": 2.880140174122543, "grad_norm": 0.09099137783050537, "learning_rate": 2.219630906509836e-06, "loss": 0.1889, "step": 52600 }, { "epoch": 2.880413951705634, "grad_norm": 0.09595891088247299, "learning_rate": 2.2145609409856013e-06, "loss": 0.182, "step": 52605 }, { "epoch": 2.880687729288726, "grad_norm": 0.08724454045295715, "learning_rate": 2.209490975461367e-06, "loss": 0.1775, "step": 52610 }, { "epoch": 2.8809615068718175, "grad_norm": 0.0889233946800232, "learning_rate": 2.2044210099371326e-06, "loss": 0.1777, "step": 52615 }, { "epoch": 2.8812352844549087, "grad_norm": 0.09020543098449707, "learning_rate": 2.199351044412898e-06, "loss": 0.1871, "step": 52620 }, { "epoch": 2.8815090620380004, "grad_norm": 0.09310516715049744, "learning_rate": 2.194281078888664e-06, "loss": 0.1792, "step": 52625 }, { "epoch": 2.881782839621092, "grad_norm": 0.09569055587053299, "learning_rate": 2.1892111133644293e-06, "loss": 0.1835, "step": 52630 }, { "epoch": 2.8820566172041833, "grad_norm": 0.08786802738904953, "learning_rate": 2.184141147840195e-06, "loss": 0.1784, "step": 52635 }, { "epoch": 2.882330394787275, "grad_norm": 0.08346498012542725, "learning_rate": 2.1790711823159605e-06, "loss": 0.1849, "step": 52640 }, { "epoch": 2.8826041723703666, "grad_norm": 0.08321152627468109, "learning_rate": 2.174001216791726e-06, "loss": 0.1804, "step": 52645 }, { "epoch": 2.882877949953458, "grad_norm": 0.0945521667599678, "learning_rate": 2.168931251267492e-06, "loss": 0.1786, "step": 52650 }, { "epoch": 2.8831517275365495, "grad_norm": 0.09997756779193878, "learning_rate": 2.1638612857432572e-06, "loss": 0.1844, "step": 52655 }, { "epoch": 2.8834255051196407, "grad_norm": 0.09079108387231827, "learning_rate": 2.1587913202190226e-06, "loss": 0.1857, "step": 52660 }, { "epoch": 2.8836992827027323, "grad_norm": 0.09123577922582626, "learning_rate": 2.1537213546947885e-06, "loss": 0.1962, "step": 52665 }, { "epoch": 2.8839730602858236, "grad_norm": 0.09046367555856705, "learning_rate": 2.148651389170554e-06, "loss": 0.1871, "step": 52670 }, { "epoch": 2.8842468378689152, "grad_norm": 0.09840891510248184, "learning_rate": 2.1435814236463193e-06, "loss": 0.1869, "step": 52675 }, { "epoch": 2.884520615452007, "grad_norm": 0.09085867553949356, "learning_rate": 2.1385114581220848e-06, "loss": 0.1792, "step": 52680 }, { "epoch": 2.884794393035098, "grad_norm": 0.08859410881996155, "learning_rate": 2.13344149259785e-06, "loss": 0.1784, "step": 52685 }, { "epoch": 2.8850681706181898, "grad_norm": 0.08389144390821457, "learning_rate": 2.128371527073616e-06, "loss": 0.1827, "step": 52690 }, { "epoch": 2.8853419482012814, "grad_norm": 0.08625427633523941, "learning_rate": 2.1233015615493815e-06, "loss": 0.1799, "step": 52695 }, { "epoch": 2.8856157257843726, "grad_norm": 0.0937497690320015, "learning_rate": 2.118231596025147e-06, "loss": 0.1814, "step": 52700 }, { "epoch": 2.8858895033674643, "grad_norm": 0.09068305045366287, "learning_rate": 2.1131616305009127e-06, "loss": 0.1796, "step": 52705 }, { "epoch": 2.886163280950556, "grad_norm": 0.09135740995407104, "learning_rate": 2.108091664976678e-06, "loss": 0.181, "step": 52710 }, { "epoch": 2.886437058533647, "grad_norm": 0.0945921242237091, "learning_rate": 2.1030216994524436e-06, "loss": 0.1834, "step": 52715 }, { "epoch": 2.886710836116739, "grad_norm": 0.08765450119972229, "learning_rate": 2.0979517339282094e-06, "loss": 0.1795, "step": 52720 }, { "epoch": 2.8869846136998305, "grad_norm": 0.0879223421216011, "learning_rate": 2.092881768403975e-06, "loss": 0.1883, "step": 52725 }, { "epoch": 2.8872583912829217, "grad_norm": 0.09882401674985886, "learning_rate": 2.0878118028797403e-06, "loss": 0.1808, "step": 52730 }, { "epoch": 2.8875321688660134, "grad_norm": 0.09023382514715195, "learning_rate": 2.082741837355506e-06, "loss": 0.1868, "step": 52735 }, { "epoch": 2.8878059464491046, "grad_norm": 0.08524719625711441, "learning_rate": 2.0776718718312715e-06, "loss": 0.1808, "step": 52740 }, { "epoch": 2.8880797240321963, "grad_norm": 0.09075888991355896, "learning_rate": 2.072601906307037e-06, "loss": 0.1758, "step": 52745 }, { "epoch": 2.8883535016152875, "grad_norm": 0.09026042371988297, "learning_rate": 2.067531940782803e-06, "loss": 0.1825, "step": 52750 }, { "epoch": 2.888627279198379, "grad_norm": 0.08955129235982895, "learning_rate": 2.0624619752585682e-06, "loss": 0.1844, "step": 52755 }, { "epoch": 2.888901056781471, "grad_norm": 0.09376107156276703, "learning_rate": 2.0573920097343336e-06, "loss": 0.1852, "step": 52760 }, { "epoch": 2.889174834364562, "grad_norm": 0.09374780207872391, "learning_rate": 2.0523220442100995e-06, "loss": 0.1829, "step": 52765 }, { "epoch": 2.8894486119476537, "grad_norm": 0.0911276787519455, "learning_rate": 2.047252078685865e-06, "loss": 0.1811, "step": 52770 }, { "epoch": 2.8897223895307453, "grad_norm": 0.10461738705635071, "learning_rate": 2.0421821131616303e-06, "loss": 0.1842, "step": 52775 }, { "epoch": 2.8899961671138366, "grad_norm": 0.09449523687362671, "learning_rate": 2.037112147637396e-06, "loss": 0.1878, "step": 52780 }, { "epoch": 2.890269944696928, "grad_norm": 0.0967281311750412, "learning_rate": 2.0320421821131616e-06, "loss": 0.1843, "step": 52785 }, { "epoch": 2.89054372228002, "grad_norm": 0.08313001692295074, "learning_rate": 2.0269722165889274e-06, "loss": 0.1793, "step": 52790 }, { "epoch": 2.890817499863111, "grad_norm": 0.08730199933052063, "learning_rate": 2.021902251064693e-06, "loss": 0.176, "step": 52795 }, { "epoch": 2.8910912774462028, "grad_norm": 0.0870756134390831, "learning_rate": 2.0168322855404583e-06, "loss": 0.1779, "step": 52800 }, { "epoch": 2.8913650550292944, "grad_norm": 0.0963754802942276, "learning_rate": 2.011762320016224e-06, "loss": 0.1804, "step": 52805 }, { "epoch": 2.8916388326123856, "grad_norm": 0.09687957167625427, "learning_rate": 2.0066923544919896e-06, "loss": 0.1797, "step": 52810 }, { "epoch": 2.8919126101954773, "grad_norm": 0.0902869924902916, "learning_rate": 2.001622388967755e-06, "loss": 0.1768, "step": 52815 }, { "epoch": 2.892186387778569, "grad_norm": 0.09606415778398514, "learning_rate": 1.996552423443521e-06, "loss": 0.1792, "step": 52820 }, { "epoch": 2.89246016536166, "grad_norm": 0.09011028707027435, "learning_rate": 1.9914824579192862e-06, "loss": 0.1876, "step": 52825 }, { "epoch": 2.892733942944752, "grad_norm": 0.09067905694246292, "learning_rate": 1.9864124923950517e-06, "loss": 0.1811, "step": 52830 }, { "epoch": 2.893007720527843, "grad_norm": 0.09267527610063553, "learning_rate": 1.9813425268708175e-06, "loss": 0.1856, "step": 52835 }, { "epoch": 2.8932814981109347, "grad_norm": 0.09958026558160782, "learning_rate": 1.976272561346583e-06, "loss": 0.1848, "step": 52840 }, { "epoch": 2.893555275694026, "grad_norm": 0.0935821458697319, "learning_rate": 1.9712025958223484e-06, "loss": 0.1784, "step": 52845 }, { "epoch": 2.8938290532771176, "grad_norm": 0.08546003699302673, "learning_rate": 1.966132630298114e-06, "loss": 0.1765, "step": 52850 }, { "epoch": 2.8941028308602093, "grad_norm": 0.08541879057884216, "learning_rate": 1.9610626647738796e-06, "loss": 0.1719, "step": 52855 }, { "epoch": 2.8943766084433005, "grad_norm": 0.09060737490653992, "learning_rate": 1.955992699249645e-06, "loss": 0.1744, "step": 52860 }, { "epoch": 2.894650386026392, "grad_norm": 0.09332843869924545, "learning_rate": 1.950922733725411e-06, "loss": 0.1864, "step": 52865 }, { "epoch": 2.894924163609484, "grad_norm": 0.0969558134675026, "learning_rate": 1.9458527682011763e-06, "loss": 0.1778, "step": 52870 }, { "epoch": 2.895197941192575, "grad_norm": 0.09668700397014618, "learning_rate": 1.9407828026769417e-06, "loss": 0.1823, "step": 52875 }, { "epoch": 2.8954717187756667, "grad_norm": 0.09609217941761017, "learning_rate": 1.9357128371527076e-06, "loss": 0.1924, "step": 52880 }, { "epoch": 2.8957454963587583, "grad_norm": 0.10114110261201859, "learning_rate": 1.930642871628473e-06, "loss": 0.1848, "step": 52885 }, { "epoch": 2.8960192739418495, "grad_norm": 0.08706095069646835, "learning_rate": 1.925572906104239e-06, "loss": 0.1789, "step": 52890 }, { "epoch": 2.896293051524941, "grad_norm": 0.09405224770307541, "learning_rate": 1.9205029405800043e-06, "loss": 0.182, "step": 52895 }, { "epoch": 2.896566829108033, "grad_norm": 0.08696393668651581, "learning_rate": 1.9154329750557697e-06, "loss": 0.1847, "step": 52900 }, { "epoch": 2.896840606691124, "grad_norm": 0.09206467121839523, "learning_rate": 1.9103630095315355e-06, "loss": 0.1844, "step": 52905 }, { "epoch": 2.8971143842742157, "grad_norm": 0.08535502851009369, "learning_rate": 1.905293044007301e-06, "loss": 0.1819, "step": 52910 }, { "epoch": 2.897388161857307, "grad_norm": 0.09197527915239334, "learning_rate": 1.9002230784830666e-06, "loss": 0.1789, "step": 52915 }, { "epoch": 2.8976619394403986, "grad_norm": 0.08938895910978317, "learning_rate": 1.895153112958832e-06, "loss": 0.1801, "step": 52920 }, { "epoch": 2.89793571702349, "grad_norm": 0.08828438818454742, "learning_rate": 1.8900831474345977e-06, "loss": 0.1812, "step": 52925 }, { "epoch": 2.8982094946065815, "grad_norm": 0.09190420806407928, "learning_rate": 1.8850131819103633e-06, "loss": 0.1788, "step": 52930 }, { "epoch": 2.898483272189673, "grad_norm": 0.09950774908065796, "learning_rate": 1.8799432163861287e-06, "loss": 0.1854, "step": 52935 }, { "epoch": 2.8987570497727644, "grad_norm": 0.08513873815536499, "learning_rate": 1.8748732508618943e-06, "loss": 0.1805, "step": 52940 }, { "epoch": 2.899030827355856, "grad_norm": 0.10023266822099686, "learning_rate": 1.86980328533766e-06, "loss": 0.1789, "step": 52945 }, { "epoch": 2.8993046049389477, "grad_norm": 0.0886920765042305, "learning_rate": 1.8647333198134254e-06, "loss": 0.1826, "step": 52950 }, { "epoch": 2.899578382522039, "grad_norm": 0.0898464098572731, "learning_rate": 1.859663354289191e-06, "loss": 0.1753, "step": 52955 }, { "epoch": 2.8998521601051306, "grad_norm": 0.08872844278812408, "learning_rate": 1.8545933887649567e-06, "loss": 0.1821, "step": 52960 }, { "epoch": 2.9001259376882222, "grad_norm": 0.0932890772819519, "learning_rate": 1.8495234232407223e-06, "loss": 0.188, "step": 52965 }, { "epoch": 2.9003997152713135, "grad_norm": 0.0957929864525795, "learning_rate": 1.8444534577164877e-06, "loss": 0.1884, "step": 52970 }, { "epoch": 2.900673492854405, "grad_norm": 0.09947895258665085, "learning_rate": 1.8393834921922534e-06, "loss": 0.1908, "step": 52975 }, { "epoch": 2.900947270437497, "grad_norm": 0.09502539038658142, "learning_rate": 1.8343135266680186e-06, "loss": 0.1851, "step": 52980 }, { "epoch": 2.901221048020588, "grad_norm": 0.10009295493364334, "learning_rate": 1.8292435611437842e-06, "loss": 0.1814, "step": 52985 }, { "epoch": 2.9014948256036797, "grad_norm": 0.09843229502439499, "learning_rate": 1.8241735956195496e-06, "loss": 0.1804, "step": 52990 }, { "epoch": 2.9017686031867713, "grad_norm": 0.08713499456644058, "learning_rate": 1.8191036300953153e-06, "loss": 0.1803, "step": 52995 }, { "epoch": 2.9020423807698625, "grad_norm": 0.08446590602397919, "learning_rate": 1.8140336645710809e-06, "loss": 0.1782, "step": 53000 }, { "epoch": 2.902316158352954, "grad_norm": 0.08584107458591461, "learning_rate": 1.8089636990468465e-06, "loss": 0.1759, "step": 53005 }, { "epoch": 2.9025899359360454, "grad_norm": 0.09563316404819489, "learning_rate": 1.803893733522612e-06, "loss": 0.1851, "step": 53010 }, { "epoch": 2.902863713519137, "grad_norm": 0.08624657243490219, "learning_rate": 1.7988237679983776e-06, "loss": 0.1738, "step": 53015 }, { "epoch": 2.9031374911022283, "grad_norm": 0.08539428561925888, "learning_rate": 1.7937538024741432e-06, "loss": 0.1765, "step": 53020 }, { "epoch": 2.90341126868532, "grad_norm": 0.09307657927274704, "learning_rate": 1.7886838369499086e-06, "loss": 0.1835, "step": 53025 }, { "epoch": 2.9036850462684116, "grad_norm": 0.08718118816614151, "learning_rate": 1.7836138714256743e-06, "loss": 0.1816, "step": 53030 }, { "epoch": 2.903958823851503, "grad_norm": 0.08735039085149765, "learning_rate": 1.77854390590144e-06, "loss": 0.1795, "step": 53035 }, { "epoch": 2.9042326014345945, "grad_norm": 0.09962323307991028, "learning_rate": 1.7734739403772053e-06, "loss": 0.1784, "step": 53040 }, { "epoch": 2.904506379017686, "grad_norm": 0.10218208283185959, "learning_rate": 1.768403974852971e-06, "loss": 0.184, "step": 53045 }, { "epoch": 2.9047801566007774, "grad_norm": 0.08819730579853058, "learning_rate": 1.7633340093287366e-06, "loss": 0.18, "step": 53050 }, { "epoch": 2.905053934183869, "grad_norm": 0.09205596894025803, "learning_rate": 1.7582640438045022e-06, "loss": 0.1869, "step": 53055 }, { "epoch": 2.9053277117669607, "grad_norm": 0.09410339593887329, "learning_rate": 1.7531940782802677e-06, "loss": 0.185, "step": 53060 }, { "epoch": 2.905601489350052, "grad_norm": 0.09601424634456635, "learning_rate": 1.7481241127560333e-06, "loss": 0.1858, "step": 53065 }, { "epoch": 2.9058752669331436, "grad_norm": 0.09769885241985321, "learning_rate": 1.743054147231799e-06, "loss": 0.1801, "step": 53070 }, { "epoch": 2.9061490445162352, "grad_norm": 0.09135479480028152, "learning_rate": 1.7379841817075643e-06, "loss": 0.1854, "step": 53075 }, { "epoch": 2.9064228220993265, "grad_norm": 0.09418962895870209, "learning_rate": 1.73291421618333e-06, "loss": 0.1767, "step": 53080 }, { "epoch": 2.906696599682418, "grad_norm": 0.08859564363956451, "learning_rate": 1.7278442506590956e-06, "loss": 0.1785, "step": 53085 }, { "epoch": 2.9069703772655098, "grad_norm": 0.08614657819271088, "learning_rate": 1.722774285134861e-06, "loss": 0.1781, "step": 53090 }, { "epoch": 2.907244154848601, "grad_norm": 0.09907166659832001, "learning_rate": 1.7177043196106267e-06, "loss": 0.1905, "step": 53095 }, { "epoch": 2.9075179324316927, "grad_norm": 0.08249020576477051, "learning_rate": 1.7126343540863923e-06, "loss": 0.1729, "step": 53100 }, { "epoch": 2.907791710014784, "grad_norm": 0.095197394490242, "learning_rate": 1.707564388562158e-06, "loss": 0.175, "step": 53105 }, { "epoch": 2.9080654875978755, "grad_norm": 0.09055926650762558, "learning_rate": 1.7024944230379234e-06, "loss": 0.1752, "step": 53110 }, { "epoch": 2.9083392651809667, "grad_norm": 0.08735096454620361, "learning_rate": 1.697424457513689e-06, "loss": 0.1777, "step": 53115 }, { "epoch": 2.9086130427640584, "grad_norm": 0.08714316040277481, "learning_rate": 1.6923544919894546e-06, "loss": 0.1833, "step": 53120 }, { "epoch": 2.90888682034715, "grad_norm": 0.08785078674554825, "learning_rate": 1.68728452646522e-06, "loss": 0.1746, "step": 53125 }, { "epoch": 2.9091605979302413, "grad_norm": 0.09241078048944473, "learning_rate": 1.6822145609409857e-06, "loss": 0.1847, "step": 53130 }, { "epoch": 2.909434375513333, "grad_norm": 0.2541106939315796, "learning_rate": 1.6771445954167513e-06, "loss": 0.187, "step": 53135 }, { "epoch": 2.9097081530964246, "grad_norm": 0.08880683034658432, "learning_rate": 1.6720746298925167e-06, "loss": 0.1786, "step": 53140 }, { "epoch": 2.909981930679516, "grad_norm": 0.09121871739625931, "learning_rate": 1.6670046643682824e-06, "loss": 0.1844, "step": 53145 }, { "epoch": 2.9102557082626075, "grad_norm": 0.09099595993757248, "learning_rate": 1.661934698844048e-06, "loss": 0.1761, "step": 53150 }, { "epoch": 2.910529485845699, "grad_norm": 0.08857318013906479, "learning_rate": 1.6568647333198136e-06, "loss": 0.1771, "step": 53155 }, { "epoch": 2.9108032634287904, "grad_norm": 0.08317426592111588, "learning_rate": 1.651794767795579e-06, "loss": 0.1821, "step": 53160 }, { "epoch": 2.911077041011882, "grad_norm": 0.08838603645563126, "learning_rate": 1.6467248022713447e-06, "loss": 0.1851, "step": 53165 }, { "epoch": 2.9113508185949737, "grad_norm": 0.08933157473802567, "learning_rate": 1.6416548367471103e-06, "loss": 0.1779, "step": 53170 }, { "epoch": 2.911624596178065, "grad_norm": 0.09077789634466171, "learning_rate": 1.6365848712228758e-06, "loss": 0.1808, "step": 53175 }, { "epoch": 2.9118983737611566, "grad_norm": 0.09293367713689804, "learning_rate": 1.6315149056986414e-06, "loss": 0.1831, "step": 53180 }, { "epoch": 2.912172151344248, "grad_norm": 0.09496237337589264, "learning_rate": 1.626444940174407e-06, "loss": 0.1836, "step": 53185 }, { "epoch": 2.9124459289273394, "grad_norm": 0.08563809841871262, "learning_rate": 1.6213749746501724e-06, "loss": 0.183, "step": 53190 }, { "epoch": 2.9127197065104307, "grad_norm": 0.08557050675153732, "learning_rate": 1.616305009125938e-06, "loss": 0.1826, "step": 53195 }, { "epoch": 2.9129934840935223, "grad_norm": 0.09812398254871368, "learning_rate": 1.6112350436017037e-06, "loss": 0.1757, "step": 53200 }, { "epoch": 2.913267261676614, "grad_norm": 0.0942385122179985, "learning_rate": 1.6061650780774693e-06, "loss": 0.1764, "step": 53205 }, { "epoch": 2.913541039259705, "grad_norm": 0.09461536258459091, "learning_rate": 1.6010951125532348e-06, "loss": 0.1877, "step": 53210 }, { "epoch": 2.913814816842797, "grad_norm": 0.08844945579767227, "learning_rate": 1.5960251470290004e-06, "loss": 0.1845, "step": 53215 }, { "epoch": 2.9140885944258885, "grad_norm": 0.08624538034200668, "learning_rate": 1.590955181504766e-06, "loss": 0.1873, "step": 53220 }, { "epoch": 2.9143623720089797, "grad_norm": 0.0898645892739296, "learning_rate": 1.5858852159805315e-06, "loss": 0.1787, "step": 53225 }, { "epoch": 2.9146361495920714, "grad_norm": 0.0892699733376503, "learning_rate": 1.580815250456297e-06, "loss": 0.1721, "step": 53230 }, { "epoch": 2.914909927175163, "grad_norm": 0.09651833027601242, "learning_rate": 1.5757452849320627e-06, "loss": 0.1785, "step": 53235 }, { "epoch": 2.9151837047582543, "grad_norm": 0.08310740441083908, "learning_rate": 1.5706753194078281e-06, "loss": 0.1776, "step": 53240 }, { "epoch": 2.915457482341346, "grad_norm": 0.09492731839418411, "learning_rate": 1.5656053538835938e-06, "loss": 0.1807, "step": 53245 }, { "epoch": 2.9157312599244376, "grad_norm": 0.09209206700325012, "learning_rate": 1.5605353883593592e-06, "loss": 0.1819, "step": 53250 }, { "epoch": 2.916005037507529, "grad_norm": 0.08709580451250076, "learning_rate": 1.5554654228351248e-06, "loss": 0.1817, "step": 53255 }, { "epoch": 2.9162788150906205, "grad_norm": 0.09642193466424942, "learning_rate": 1.5503954573108903e-06, "loss": 0.1873, "step": 53260 }, { "epoch": 2.916552592673712, "grad_norm": 0.0867060199379921, "learning_rate": 1.5453254917866559e-06, "loss": 0.1804, "step": 53265 }, { "epoch": 2.9168263702568034, "grad_norm": 0.09128911793231964, "learning_rate": 1.5402555262624215e-06, "loss": 0.1895, "step": 53270 }, { "epoch": 2.917100147839895, "grad_norm": 0.08756810426712036, "learning_rate": 1.5351855607381872e-06, "loss": 0.1889, "step": 53275 }, { "epoch": 2.9173739254229862, "grad_norm": 0.08339186012744904, "learning_rate": 1.5301155952139526e-06, "loss": 0.1749, "step": 53280 }, { "epoch": 2.917647703006078, "grad_norm": 0.08616364747285843, "learning_rate": 1.5250456296897182e-06, "loss": 0.1767, "step": 53285 }, { "epoch": 2.917921480589169, "grad_norm": 0.09682966768741608, "learning_rate": 1.5199756641654839e-06, "loss": 0.1821, "step": 53290 }, { "epoch": 2.9181952581722608, "grad_norm": 0.08884051442146301, "learning_rate": 1.5149056986412493e-06, "loss": 0.183, "step": 53295 }, { "epoch": 2.9184690357553524, "grad_norm": 0.09183000028133392, "learning_rate": 1.509835733117015e-06, "loss": 0.178, "step": 53300 }, { "epoch": 2.9187428133384437, "grad_norm": 0.08679571002721786, "learning_rate": 1.5047657675927805e-06, "loss": 0.1822, "step": 53305 }, { "epoch": 2.9190165909215353, "grad_norm": 0.09041688591241837, "learning_rate": 1.499695802068546e-06, "loss": 0.1814, "step": 53310 }, { "epoch": 2.919290368504627, "grad_norm": 0.09099521487951279, "learning_rate": 1.4946258365443116e-06, "loss": 0.1871, "step": 53315 }, { "epoch": 2.919564146087718, "grad_norm": 0.09047438949346542, "learning_rate": 1.4895558710200772e-06, "loss": 0.1759, "step": 53320 }, { "epoch": 2.91983792367081, "grad_norm": 0.09337174147367477, "learning_rate": 1.4844859054958427e-06, "loss": 0.1838, "step": 53325 }, { "epoch": 2.9201117012539015, "grad_norm": 0.09371034801006317, "learning_rate": 1.4794159399716083e-06, "loss": 0.1899, "step": 53330 }, { "epoch": 2.9203854788369927, "grad_norm": 0.08874240517616272, "learning_rate": 1.474345974447374e-06, "loss": 0.1805, "step": 53335 }, { "epoch": 2.9206592564200844, "grad_norm": 0.08940988034009933, "learning_rate": 1.4692760089231396e-06, "loss": 0.1763, "step": 53340 }, { "epoch": 2.920933034003176, "grad_norm": 0.09388605505228043, "learning_rate": 1.464206043398905e-06, "loss": 0.1826, "step": 53345 }, { "epoch": 2.9212068115862673, "grad_norm": 0.08271997421979904, "learning_rate": 1.4591360778746704e-06, "loss": 0.1844, "step": 53350 }, { "epoch": 2.921480589169359, "grad_norm": 0.09285619854927063, "learning_rate": 1.454066112350436e-06, "loss": 0.1776, "step": 53355 }, { "epoch": 2.92175436675245, "grad_norm": 0.08780321478843689, "learning_rate": 1.4489961468262017e-06, "loss": 0.1779, "step": 53360 }, { "epoch": 2.922028144335542, "grad_norm": 0.08662170171737671, "learning_rate": 1.443926181301967e-06, "loss": 0.1856, "step": 53365 }, { "epoch": 2.922301921918633, "grad_norm": 0.10496655851602554, "learning_rate": 1.4388562157777327e-06, "loss": 0.1805, "step": 53370 }, { "epoch": 2.9225756995017247, "grad_norm": 0.08626947551965714, "learning_rate": 1.4337862502534984e-06, "loss": 0.1841, "step": 53375 }, { "epoch": 2.9228494770848163, "grad_norm": 0.10058242827653885, "learning_rate": 1.4287162847292638e-06, "loss": 0.1806, "step": 53380 }, { "epoch": 2.9231232546679076, "grad_norm": 0.08640053868293762, "learning_rate": 1.4236463192050294e-06, "loss": 0.1761, "step": 53385 }, { "epoch": 2.9233970322509992, "grad_norm": 0.09067633002996445, "learning_rate": 1.418576353680795e-06, "loss": 0.1763, "step": 53390 }, { "epoch": 2.923670809834091, "grad_norm": 0.08766811341047287, "learning_rate": 1.4135063881565605e-06, "loss": 0.1864, "step": 53395 }, { "epoch": 2.923944587417182, "grad_norm": 0.09605361521244049, "learning_rate": 1.408436422632326e-06, "loss": 0.1803, "step": 53400 }, { "epoch": 2.9242183650002738, "grad_norm": 0.09596236795186996, "learning_rate": 1.4033664571080917e-06, "loss": 0.1805, "step": 53405 }, { "epoch": 2.9244921425833654, "grad_norm": 0.0905129685997963, "learning_rate": 1.3982964915838574e-06, "loss": 0.1872, "step": 53410 }, { "epoch": 2.9247659201664566, "grad_norm": 0.0901559591293335, "learning_rate": 1.3932265260596228e-06, "loss": 0.1802, "step": 53415 }, { "epoch": 2.9250396977495483, "grad_norm": 0.0923495665192604, "learning_rate": 1.3881565605353884e-06, "loss": 0.1833, "step": 53420 }, { "epoch": 2.92531347533264, "grad_norm": 0.08548948913812637, "learning_rate": 1.383086595011154e-06, "loss": 0.1726, "step": 53425 }, { "epoch": 2.925587252915731, "grad_norm": 0.09006094187498093, "learning_rate": 1.3780166294869195e-06, "loss": 0.1801, "step": 53430 }, { "epoch": 2.925861030498823, "grad_norm": 0.09584397077560425, "learning_rate": 1.3729466639626851e-06, "loss": 0.176, "step": 53435 }, { "epoch": 2.9261348080819145, "grad_norm": 0.09307517856359482, "learning_rate": 1.3678766984384508e-06, "loss": 0.1787, "step": 53440 }, { "epoch": 2.9264085856650057, "grad_norm": 0.08707299083471298, "learning_rate": 1.3628067329142162e-06, "loss": 0.1836, "step": 53445 }, { "epoch": 2.9266823632480974, "grad_norm": 0.08790351450443268, "learning_rate": 1.3577367673899818e-06, "loss": 0.1811, "step": 53450 }, { "epoch": 2.9269561408311886, "grad_norm": 0.08977444469928741, "learning_rate": 1.3526668018657474e-06, "loss": 0.1864, "step": 53455 }, { "epoch": 2.9272299184142803, "grad_norm": 0.08676568418741226, "learning_rate": 1.347596836341513e-06, "loss": 0.1855, "step": 53460 }, { "epoch": 2.9275036959973715, "grad_norm": 0.09254872798919678, "learning_rate": 1.3425268708172785e-06, "loss": 0.181, "step": 53465 }, { "epoch": 2.927777473580463, "grad_norm": 0.08946077525615692, "learning_rate": 1.3374569052930441e-06, "loss": 0.1802, "step": 53470 }, { "epoch": 2.928051251163555, "grad_norm": 0.08934817463159561, "learning_rate": 1.3323869397688098e-06, "loss": 0.1815, "step": 53475 }, { "epoch": 2.928325028746646, "grad_norm": 0.08760299533605576, "learning_rate": 1.3273169742445752e-06, "loss": 0.1808, "step": 53480 }, { "epoch": 2.9285988063297377, "grad_norm": 0.09459080547094345, "learning_rate": 1.3222470087203408e-06, "loss": 0.182, "step": 53485 }, { "epoch": 2.9288725839128293, "grad_norm": 0.08728624135255814, "learning_rate": 1.3171770431961065e-06, "loss": 0.1825, "step": 53490 }, { "epoch": 2.9291463614959206, "grad_norm": 0.09472659975290298, "learning_rate": 1.3121070776718719e-06, "loss": 0.1818, "step": 53495 }, { "epoch": 2.929420139079012, "grad_norm": 0.08396592736244202, "learning_rate": 1.3070371121476373e-06, "loss": 0.1734, "step": 53500 }, { "epoch": 2.929693916662104, "grad_norm": 0.08627591282129288, "learning_rate": 1.301967146623403e-06, "loss": 0.1778, "step": 53505 }, { "epoch": 2.929967694245195, "grad_norm": 0.08952970057725906, "learning_rate": 1.2968971810991686e-06, "loss": 0.1816, "step": 53510 }, { "epoch": 2.9302414718282868, "grad_norm": 0.08743041008710861, "learning_rate": 1.291827215574934e-06, "loss": 0.1875, "step": 53515 }, { "epoch": 2.9305152494113784, "grad_norm": 0.08597039431333542, "learning_rate": 1.2867572500506996e-06, "loss": 0.1777, "step": 53520 }, { "epoch": 2.9307890269944696, "grad_norm": 0.09488545358181, "learning_rate": 1.2816872845264653e-06, "loss": 0.1768, "step": 53525 }, { "epoch": 2.9310628045775613, "grad_norm": 0.08862966299057007, "learning_rate": 1.276617319002231e-06, "loss": 0.1781, "step": 53530 }, { "epoch": 2.931336582160653, "grad_norm": 0.08521638810634613, "learning_rate": 1.2715473534779963e-06, "loss": 0.1733, "step": 53535 }, { "epoch": 2.931610359743744, "grad_norm": 0.09618817269802094, "learning_rate": 1.266477387953762e-06, "loss": 0.1881, "step": 53540 }, { "epoch": 2.931884137326836, "grad_norm": 0.10192476212978363, "learning_rate": 1.2614074224295276e-06, "loss": 0.1763, "step": 53545 }, { "epoch": 2.932157914909927, "grad_norm": 0.08344142138957977, "learning_rate": 1.256337456905293e-06, "loss": 0.1827, "step": 53550 }, { "epoch": 2.9324316924930187, "grad_norm": 0.09344634413719177, "learning_rate": 1.2512674913810586e-06, "loss": 0.1829, "step": 53555 }, { "epoch": 2.93270547007611, "grad_norm": 0.09476105868816376, "learning_rate": 1.2461975258568243e-06, "loss": 0.1813, "step": 53560 }, { "epoch": 2.9329792476592016, "grad_norm": 0.08838105201721191, "learning_rate": 1.2411275603325897e-06, "loss": 0.1793, "step": 53565 }, { "epoch": 2.9332530252422933, "grad_norm": 0.09431877732276917, "learning_rate": 1.2360575948083553e-06, "loss": 0.1882, "step": 53570 }, { "epoch": 2.9335268028253845, "grad_norm": 0.08022038638591766, "learning_rate": 1.230987629284121e-06, "loss": 0.1747, "step": 53575 }, { "epoch": 2.933800580408476, "grad_norm": 0.08568266034126282, "learning_rate": 1.2259176637598866e-06, "loss": 0.1779, "step": 53580 }, { "epoch": 2.934074357991568, "grad_norm": 0.09380032122135162, "learning_rate": 1.220847698235652e-06, "loss": 0.1767, "step": 53585 }, { "epoch": 2.934348135574659, "grad_norm": 0.08932863175868988, "learning_rate": 1.2157777327114177e-06, "loss": 0.1778, "step": 53590 }, { "epoch": 2.9346219131577507, "grad_norm": 0.08715803176164627, "learning_rate": 1.2107077671871833e-06, "loss": 0.1754, "step": 53595 }, { "epoch": 2.9348956907408423, "grad_norm": 0.09254731982946396, "learning_rate": 1.2056378016629487e-06, "loss": 0.1864, "step": 53600 }, { "epoch": 2.9351694683239336, "grad_norm": 0.09414857625961304, "learning_rate": 1.2005678361387143e-06, "loss": 0.1851, "step": 53605 }, { "epoch": 2.935443245907025, "grad_norm": 0.09320987015962601, "learning_rate": 1.19549787061448e-06, "loss": 0.1834, "step": 53610 }, { "epoch": 2.935717023490117, "grad_norm": 0.08331219106912613, "learning_rate": 1.1904279050902454e-06, "loss": 0.1756, "step": 53615 }, { "epoch": 2.935990801073208, "grad_norm": 0.09130015969276428, "learning_rate": 1.185357939566011e-06, "loss": 0.1798, "step": 53620 }, { "epoch": 2.9362645786562998, "grad_norm": 0.09121009707450867, "learning_rate": 1.1802879740417767e-06, "loss": 0.1792, "step": 53625 }, { "epoch": 2.936538356239391, "grad_norm": 0.09386872500181198, "learning_rate": 1.1752180085175423e-06, "loss": 0.1788, "step": 53630 }, { "epoch": 2.9368121338224826, "grad_norm": 0.08946442604064941, "learning_rate": 1.1701480429933077e-06, "loss": 0.1783, "step": 53635 }, { "epoch": 2.937085911405574, "grad_norm": 0.08490581065416336, "learning_rate": 1.1650780774690734e-06, "loss": 0.1776, "step": 53640 }, { "epoch": 2.9373596889886655, "grad_norm": 0.09771959483623505, "learning_rate": 1.160008111944839e-06, "loss": 0.187, "step": 53645 }, { "epoch": 2.937633466571757, "grad_norm": 0.08748829364776611, "learning_rate": 1.1549381464206044e-06, "loss": 0.1867, "step": 53650 }, { "epoch": 2.9379072441548484, "grad_norm": 0.09238851815462112, "learning_rate": 1.1498681808963698e-06, "loss": 0.1907, "step": 53655 }, { "epoch": 2.93818102173794, "grad_norm": 0.0948953852057457, "learning_rate": 1.1447982153721355e-06, "loss": 0.1791, "step": 53660 }, { "epoch": 2.9384547993210317, "grad_norm": 0.09388699382543564, "learning_rate": 1.1397282498479011e-06, "loss": 0.185, "step": 53665 }, { "epoch": 2.938728576904123, "grad_norm": 0.0880732536315918, "learning_rate": 1.1346582843236665e-06, "loss": 0.1811, "step": 53670 }, { "epoch": 2.9390023544872146, "grad_norm": 0.09123024344444275, "learning_rate": 1.1295883187994322e-06, "loss": 0.1786, "step": 53675 }, { "epoch": 2.9392761320703062, "grad_norm": 0.09087807685136795, "learning_rate": 1.1245183532751978e-06, "loss": 0.1767, "step": 53680 }, { "epoch": 2.9395499096533975, "grad_norm": 0.08579376339912415, "learning_rate": 1.1194483877509632e-06, "loss": 0.1783, "step": 53685 }, { "epoch": 2.939823687236489, "grad_norm": 0.09602297842502594, "learning_rate": 1.1143784222267289e-06, "loss": 0.1835, "step": 53690 }, { "epoch": 2.940097464819581, "grad_norm": 0.0868416577577591, "learning_rate": 1.1093084567024945e-06, "loss": 0.1776, "step": 53695 }, { "epoch": 2.940371242402672, "grad_norm": 0.08545935899019241, "learning_rate": 1.1042384911782601e-06, "loss": 0.1831, "step": 53700 }, { "epoch": 2.9406450199857637, "grad_norm": 0.08931956440210342, "learning_rate": 1.0991685256540255e-06, "loss": 0.1834, "step": 53705 }, { "epoch": 2.9409187975688553, "grad_norm": 0.08961878716945648, "learning_rate": 1.0940985601297912e-06, "loss": 0.1813, "step": 53710 }, { "epoch": 2.9411925751519465, "grad_norm": 0.09443351626396179, "learning_rate": 1.0890285946055568e-06, "loss": 0.1853, "step": 53715 }, { "epoch": 2.941466352735038, "grad_norm": 0.09300781041383743, "learning_rate": 1.0839586290813222e-06, "loss": 0.1804, "step": 53720 }, { "epoch": 2.9417401303181294, "grad_norm": 0.08540031313896179, "learning_rate": 1.0788886635570879e-06, "loss": 0.1813, "step": 53725 }, { "epoch": 2.942013907901221, "grad_norm": 0.09518591314554214, "learning_rate": 1.0738186980328535e-06, "loss": 0.1805, "step": 53730 }, { "epoch": 2.9422876854843123, "grad_norm": 0.08941631019115448, "learning_rate": 1.068748732508619e-06, "loss": 0.1853, "step": 53735 }, { "epoch": 2.942561463067404, "grad_norm": 0.08761440217494965, "learning_rate": 1.0636787669843846e-06, "loss": 0.1748, "step": 53740 }, { "epoch": 2.9428352406504956, "grad_norm": 0.09132640808820724, "learning_rate": 1.0586088014601502e-06, "loss": 0.1764, "step": 53745 }, { "epoch": 2.943109018233587, "grad_norm": 0.0865606889128685, "learning_rate": 1.0535388359359158e-06, "loss": 0.1751, "step": 53750 }, { "epoch": 2.9433827958166785, "grad_norm": 0.09163583815097809, "learning_rate": 1.0484688704116812e-06, "loss": 0.1812, "step": 53755 }, { "epoch": 2.94365657339977, "grad_norm": 0.08758872747421265, "learning_rate": 1.0433989048874469e-06, "loss": 0.1801, "step": 53760 }, { "epoch": 2.9439303509828614, "grad_norm": 0.09729063510894775, "learning_rate": 1.0383289393632125e-06, "loss": 0.1798, "step": 53765 }, { "epoch": 2.944204128565953, "grad_norm": 0.09337596595287323, "learning_rate": 1.033258973838978e-06, "loss": 0.1889, "step": 53770 }, { "epoch": 2.9444779061490447, "grad_norm": 0.08594387024641037, "learning_rate": 1.0281890083147436e-06, "loss": 0.1789, "step": 53775 }, { "epoch": 2.944751683732136, "grad_norm": 0.09902659058570862, "learning_rate": 1.0231190427905092e-06, "loss": 0.1797, "step": 53780 }, { "epoch": 2.9450254613152276, "grad_norm": 0.08825870603322983, "learning_rate": 1.0180490772662746e-06, "loss": 0.1774, "step": 53785 }, { "epoch": 2.9452992388983192, "grad_norm": 0.09072978794574738, "learning_rate": 1.0129791117420403e-06, "loss": 0.1812, "step": 53790 }, { "epoch": 2.9455730164814105, "grad_norm": 0.08751600235700607, "learning_rate": 1.0079091462178057e-06, "loss": 0.1871, "step": 53795 }, { "epoch": 2.945846794064502, "grad_norm": 0.0917249545454979, "learning_rate": 1.0028391806935713e-06, "loss": 0.1824, "step": 53800 }, { "epoch": 2.9461205716475933, "grad_norm": 0.08780904859304428, "learning_rate": 9.977692151693367e-07, "loss": 0.186, "step": 53805 }, { "epoch": 2.946394349230685, "grad_norm": 0.09629060328006744, "learning_rate": 9.926992496451024e-07, "loss": 0.1916, "step": 53810 }, { "epoch": 2.946668126813776, "grad_norm": 0.10137420147657394, "learning_rate": 9.87629284120868e-07, "loss": 0.1792, "step": 53815 }, { "epoch": 2.946941904396868, "grad_norm": 0.0880139172077179, "learning_rate": 9.825593185966334e-07, "loss": 0.174, "step": 53820 }, { "epoch": 2.9472156819799595, "grad_norm": 0.09141013771295547, "learning_rate": 9.77489353072399e-07, "loss": 0.1903, "step": 53825 }, { "epoch": 2.9474894595630508, "grad_norm": 0.08444656431674957, "learning_rate": 9.724193875481647e-07, "loss": 0.1823, "step": 53830 }, { "epoch": 2.9477632371461424, "grad_norm": 0.09021249413490295, "learning_rate": 9.673494220239303e-07, "loss": 0.175, "step": 53835 }, { "epoch": 2.948037014729234, "grad_norm": 0.09856949001550674, "learning_rate": 9.622794564996958e-07, "loss": 0.1852, "step": 53840 }, { "epoch": 2.9483107923123253, "grad_norm": 0.09106126427650452, "learning_rate": 9.572094909754614e-07, "loss": 0.1832, "step": 53845 }, { "epoch": 2.948584569895417, "grad_norm": 0.09498558193445206, "learning_rate": 9.521395254512269e-07, "loss": 0.1759, "step": 53850 }, { "epoch": 2.9488583474785086, "grad_norm": 0.0879998505115509, "learning_rate": 9.470695599269926e-07, "loss": 0.1797, "step": 53855 }, { "epoch": 2.9491321250616, "grad_norm": 0.08394987881183624, "learning_rate": 9.419995944027581e-07, "loss": 0.1793, "step": 53860 }, { "epoch": 2.9494059026446915, "grad_norm": 0.0871557742357254, "learning_rate": 9.369296288785237e-07, "loss": 0.1805, "step": 53865 }, { "epoch": 2.949679680227783, "grad_norm": 0.09030278772115707, "learning_rate": 9.318596633542892e-07, "loss": 0.18, "step": 53870 }, { "epoch": 2.9499534578108744, "grad_norm": 0.09034160524606705, "learning_rate": 9.267896978300548e-07, "loss": 0.1782, "step": 53875 }, { "epoch": 2.950227235393966, "grad_norm": 0.09065477550029755, "learning_rate": 9.217197323058204e-07, "loss": 0.1852, "step": 53880 }, { "epoch": 2.9505010129770577, "grad_norm": 0.09590662270784378, "learning_rate": 9.166497667815859e-07, "loss": 0.18, "step": 53885 }, { "epoch": 2.950774790560149, "grad_norm": 0.08776962012052536, "learning_rate": 9.115798012573516e-07, "loss": 0.1773, "step": 53890 }, { "epoch": 2.9510485681432406, "grad_norm": 0.08578508347272873, "learning_rate": 9.065098357331171e-07, "loss": 0.1796, "step": 53895 }, { "epoch": 2.951322345726332, "grad_norm": 0.088238924741745, "learning_rate": 9.014398702088826e-07, "loss": 0.1811, "step": 53900 }, { "epoch": 2.9515961233094234, "grad_norm": 0.09301630407571793, "learning_rate": 8.963699046846483e-07, "loss": 0.1821, "step": 53905 }, { "epoch": 2.9518699008925147, "grad_norm": 0.08901041746139526, "learning_rate": 8.912999391604138e-07, "loss": 0.1962, "step": 53910 }, { "epoch": 2.9521436784756063, "grad_norm": 0.08200997859239578, "learning_rate": 8.862299736361794e-07, "loss": 0.1813, "step": 53915 }, { "epoch": 2.952417456058698, "grad_norm": 0.08748216181993484, "learning_rate": 8.811600081119449e-07, "loss": 0.1865, "step": 53920 }, { "epoch": 2.952691233641789, "grad_norm": 0.09558232873678207, "learning_rate": 8.760900425877105e-07, "loss": 0.1883, "step": 53925 }, { "epoch": 2.952965011224881, "grad_norm": 0.08930252492427826, "learning_rate": 8.710200770634761e-07, "loss": 0.1856, "step": 53930 }, { "epoch": 2.9532387888079725, "grad_norm": 0.08553281426429749, "learning_rate": 8.659501115392416e-07, "loss": 0.1844, "step": 53935 }, { "epoch": 2.9535125663910637, "grad_norm": 0.09510974586009979, "learning_rate": 8.608801460150073e-07, "loss": 0.1882, "step": 53940 }, { "epoch": 2.9537863439741554, "grad_norm": 0.0836031436920166, "learning_rate": 8.558101804907726e-07, "loss": 0.1794, "step": 53945 }, { "epoch": 2.954060121557247, "grad_norm": 0.09921377152204514, "learning_rate": 8.507402149665382e-07, "loss": 0.1899, "step": 53950 }, { "epoch": 2.9543338991403383, "grad_norm": 0.09179332107305527, "learning_rate": 8.456702494423037e-07, "loss": 0.1787, "step": 53955 }, { "epoch": 2.95460767672343, "grad_norm": 0.08677412569522858, "learning_rate": 8.406002839180694e-07, "loss": 0.176, "step": 53960 }, { "epoch": 2.9548814543065216, "grad_norm": 0.08883187919855118, "learning_rate": 8.355303183938349e-07, "loss": 0.1842, "step": 53965 }, { "epoch": 2.955155231889613, "grad_norm": 0.08411241322755814, "learning_rate": 8.304603528696004e-07, "loss": 0.1883, "step": 53970 }, { "epoch": 2.9554290094727045, "grad_norm": 0.08926553279161453, "learning_rate": 8.253903873453661e-07, "loss": 0.1785, "step": 53975 }, { "epoch": 2.955702787055796, "grad_norm": 0.08039094507694244, "learning_rate": 8.203204218211316e-07, "loss": 0.1772, "step": 53980 }, { "epoch": 2.9559765646388874, "grad_norm": 0.09774436801671982, "learning_rate": 8.152504562968972e-07, "loss": 0.1859, "step": 53985 }, { "epoch": 2.956250342221979, "grad_norm": 0.08392572402954102, "learning_rate": 8.101804907726628e-07, "loss": 0.183, "step": 53990 }, { "epoch": 2.9565241198050702, "grad_norm": 0.07957211136817932, "learning_rate": 8.051105252484283e-07, "loss": 0.1835, "step": 53995 }, { "epoch": 2.956797897388162, "grad_norm": 0.09038010984659195, "learning_rate": 8.000405597241939e-07, "loss": 0.1802, "step": 54000 }, { "epoch": 2.957071674971253, "grad_norm": 0.09298940747976303, "learning_rate": 7.949705941999595e-07, "loss": 0.1824, "step": 54005 }, { "epoch": 2.9573454525543448, "grad_norm": 0.09111801534891129, "learning_rate": 7.899006286757251e-07, "loss": 0.1922, "step": 54010 }, { "epoch": 2.9576192301374364, "grad_norm": 0.08955593407154083, "learning_rate": 7.848306631514906e-07, "loss": 0.1808, "step": 54015 }, { "epoch": 2.9578930077205277, "grad_norm": 0.08767372369766235, "learning_rate": 7.797606976272561e-07, "loss": 0.1775, "step": 54020 }, { "epoch": 2.9581667853036193, "grad_norm": 0.08791406452655792, "learning_rate": 7.746907321030218e-07, "loss": 0.1807, "step": 54025 }, { "epoch": 2.958440562886711, "grad_norm": 0.09387857466936111, "learning_rate": 7.696207665787873e-07, "loss": 0.1822, "step": 54030 }, { "epoch": 2.958714340469802, "grad_norm": 0.0933678075671196, "learning_rate": 7.645508010545529e-07, "loss": 0.1829, "step": 54035 }, { "epoch": 2.958988118052894, "grad_norm": 0.09792423993349075, "learning_rate": 7.594808355303185e-07, "loss": 0.1899, "step": 54040 }, { "epoch": 2.9592618956359855, "grad_norm": 0.085598886013031, "learning_rate": 7.54410870006084e-07, "loss": 0.1877, "step": 54045 }, { "epoch": 2.9595356732190767, "grad_norm": 0.08662212640047073, "learning_rate": 7.493409044818496e-07, "loss": 0.1806, "step": 54050 }, { "epoch": 2.9598094508021684, "grad_norm": 0.08930448442697525, "learning_rate": 7.44270938957615e-07, "loss": 0.1838, "step": 54055 }, { "epoch": 2.96008322838526, "grad_norm": 0.08780109137296677, "learning_rate": 7.392009734333807e-07, "loss": 0.1769, "step": 54060 }, { "epoch": 2.9603570059683513, "grad_norm": 0.08573953807353973, "learning_rate": 7.341310079091462e-07, "loss": 0.1824, "step": 54065 }, { "epoch": 2.960630783551443, "grad_norm": 0.09570465981960297, "learning_rate": 7.290610423849117e-07, "loss": 0.1826, "step": 54070 }, { "epoch": 2.960904561134534, "grad_norm": 0.08934131264686584, "learning_rate": 7.239910768606774e-07, "loss": 0.182, "step": 54075 }, { "epoch": 2.961178338717626, "grad_norm": 0.09469465166330338, "learning_rate": 7.189211113364429e-07, "loss": 0.1879, "step": 54080 }, { "epoch": 2.961452116300717, "grad_norm": 0.09270944446325302, "learning_rate": 7.138511458122085e-07, "loss": 0.1859, "step": 54085 }, { "epoch": 2.9617258938838087, "grad_norm": 0.0941404402256012, "learning_rate": 7.087811802879741e-07, "loss": 0.1846, "step": 54090 }, { "epoch": 2.9619996714669004, "grad_norm": 0.09108526259660721, "learning_rate": 7.037112147637396e-07, "loss": 0.1837, "step": 54095 }, { "epoch": 2.9622734490499916, "grad_norm": 0.08934517204761505, "learning_rate": 6.986412492395052e-07, "loss": 0.1758, "step": 54100 }, { "epoch": 2.9625472266330832, "grad_norm": 0.08808207511901855, "learning_rate": 6.935712837152708e-07, "loss": 0.1841, "step": 54105 }, { "epoch": 2.962821004216175, "grad_norm": 0.0889681726694107, "learning_rate": 6.885013181910364e-07, "loss": 0.1805, "step": 54110 }, { "epoch": 2.963094781799266, "grad_norm": 0.09530049562454224, "learning_rate": 6.834313526668019e-07, "loss": 0.1821, "step": 54115 }, { "epoch": 2.9633685593823578, "grad_norm": 0.08686399459838867, "learning_rate": 6.783613871425674e-07, "loss": 0.188, "step": 54120 }, { "epoch": 2.9636423369654494, "grad_norm": 0.08216628432273865, "learning_rate": 6.732914216183331e-07, "loss": 0.1686, "step": 54125 }, { "epoch": 2.9639161145485406, "grad_norm": 0.09012740850448608, "learning_rate": 6.682214560940985e-07, "loss": 0.1881, "step": 54130 }, { "epoch": 2.9641898921316323, "grad_norm": 0.08575581759214401, "learning_rate": 6.631514905698641e-07, "loss": 0.1833, "step": 54135 }, { "epoch": 2.964463669714724, "grad_norm": 0.08985693752765656, "learning_rate": 6.580815250456297e-07, "loss": 0.1787, "step": 54140 }, { "epoch": 2.964737447297815, "grad_norm": 0.08929776400327682, "learning_rate": 6.530115595213953e-07, "loss": 0.1878, "step": 54145 }, { "epoch": 2.965011224880907, "grad_norm": 0.09194683283567429, "learning_rate": 6.479415939971608e-07, "loss": 0.1786, "step": 54150 }, { "epoch": 2.9652850024639985, "grad_norm": 0.08033282309770584, "learning_rate": 6.428716284729264e-07, "loss": 0.1787, "step": 54155 }, { "epoch": 2.9655587800470897, "grad_norm": 0.09983814507722855, "learning_rate": 6.37801662948692e-07, "loss": 0.1939, "step": 54160 }, { "epoch": 2.9658325576301814, "grad_norm": 0.08986450731754303, "learning_rate": 6.327316974244575e-07, "loss": 0.1717, "step": 54165 }, { "epoch": 2.9661063352132726, "grad_norm": 0.08318860828876495, "learning_rate": 6.276617319002232e-07, "loss": 0.1763, "step": 54170 }, { "epoch": 2.9663801127963643, "grad_norm": 0.09463782608509064, "learning_rate": 6.225917663759887e-07, "loss": 0.1811, "step": 54175 }, { "epoch": 2.9666538903794555, "grad_norm": 0.09042444080114365, "learning_rate": 6.175218008517542e-07, "loss": 0.1835, "step": 54180 }, { "epoch": 2.966927667962547, "grad_norm": 0.10052882134914398, "learning_rate": 6.124518353275198e-07, "loss": 0.1933, "step": 54185 }, { "epoch": 2.967201445545639, "grad_norm": 0.08233167976140976, "learning_rate": 6.073818698032854e-07, "loss": 0.1768, "step": 54190 }, { "epoch": 2.96747522312873, "grad_norm": 0.08426987379789352, "learning_rate": 6.02311904279051e-07, "loss": 0.1759, "step": 54195 }, { "epoch": 2.9677490007118217, "grad_norm": 0.08653777837753296, "learning_rate": 5.972419387548165e-07, "loss": 0.1757, "step": 54200 }, { "epoch": 2.9680227782949133, "grad_norm": 0.09247712790966034, "learning_rate": 5.921719732305821e-07, "loss": 0.1915, "step": 54205 }, { "epoch": 2.9682965558780046, "grad_norm": 0.08909090608358383, "learning_rate": 5.871020077063476e-07, "loss": 0.1837, "step": 54210 }, { "epoch": 2.9685703334610962, "grad_norm": 0.08474339544773102, "learning_rate": 5.820320421821131e-07, "loss": 0.1776, "step": 54215 }, { "epoch": 2.968844111044188, "grad_norm": 0.09182705730199814, "learning_rate": 5.769620766578788e-07, "loss": 0.1799, "step": 54220 }, { "epoch": 2.969117888627279, "grad_norm": 0.1013512909412384, "learning_rate": 5.718921111336443e-07, "loss": 0.179, "step": 54225 }, { "epoch": 2.9693916662103708, "grad_norm": 0.09201131016016006, "learning_rate": 5.668221456094099e-07, "loss": 0.1831, "step": 54230 }, { "epoch": 2.9696654437934624, "grad_norm": 0.08308616280555725, "learning_rate": 5.617521800851754e-07, "loss": 0.1806, "step": 54235 }, { "epoch": 2.9699392213765536, "grad_norm": 0.09386564791202545, "learning_rate": 5.56682214560941e-07, "loss": 0.182, "step": 54240 }, { "epoch": 2.9702129989596453, "grad_norm": 0.0921911671757698, "learning_rate": 5.516122490367066e-07, "loss": 0.1788, "step": 54245 }, { "epoch": 2.9704867765427365, "grad_norm": 0.09479733556509018, "learning_rate": 5.465422835124721e-07, "loss": 0.182, "step": 54250 }, { "epoch": 2.970760554125828, "grad_norm": 0.09008567780256271, "learning_rate": 5.414723179882378e-07, "loss": 0.1766, "step": 54255 }, { "epoch": 2.97103433170892, "grad_norm": 0.09740496426820755, "learning_rate": 5.364023524640033e-07, "loss": 0.189, "step": 54260 }, { "epoch": 2.971308109292011, "grad_norm": 0.09468494355678558, "learning_rate": 5.313323869397688e-07, "loss": 0.1902, "step": 54265 }, { "epoch": 2.9715818868751027, "grad_norm": 0.09169932454824448, "learning_rate": 5.262624214155345e-07, "loss": 0.1774, "step": 54270 }, { "epoch": 2.971855664458194, "grad_norm": 0.08865271508693695, "learning_rate": 5.211924558913e-07, "loss": 0.1826, "step": 54275 }, { "epoch": 2.9721294420412856, "grad_norm": 0.08672592788934708, "learning_rate": 5.161224903670655e-07, "loss": 0.1778, "step": 54280 }, { "epoch": 2.9724032196243773, "grad_norm": 0.09520889818668365, "learning_rate": 5.11052524842831e-07, "loss": 0.178, "step": 54285 }, { "epoch": 2.9726769972074685, "grad_norm": 0.086271733045578, "learning_rate": 5.059825593185967e-07, "loss": 0.1842, "step": 54290 }, { "epoch": 2.97295077479056, "grad_norm": 0.09385251998901367, "learning_rate": 5.009125937943622e-07, "loss": 0.1801, "step": 54295 }, { "epoch": 2.973224552373652, "grad_norm": 0.09225571155548096, "learning_rate": 4.958426282701277e-07, "loss": 0.1769, "step": 54300 }, { "epoch": 2.973498329956743, "grad_norm": 0.08384891599416733, "learning_rate": 4.907726627458934e-07, "loss": 0.1789, "step": 54305 }, { "epoch": 2.9737721075398347, "grad_norm": 0.0847787857055664, "learning_rate": 4.857026972216589e-07, "loss": 0.18, "step": 54310 }, { "epoch": 2.9740458851229263, "grad_norm": 0.08343850076198578, "learning_rate": 4.806327316974245e-07, "loss": 0.1755, "step": 54315 }, { "epoch": 2.9743196627060176, "grad_norm": 0.09037571400403976, "learning_rate": 4.7556276617319005e-07, "loss": 0.1766, "step": 54320 }, { "epoch": 2.974593440289109, "grad_norm": 0.09272503852844238, "learning_rate": 4.7049280064895563e-07, "loss": 0.1816, "step": 54325 }, { "epoch": 2.974867217872201, "grad_norm": 0.08283964544534683, "learning_rate": 4.654228351247212e-07, "loss": 0.1861, "step": 54330 }, { "epoch": 2.975140995455292, "grad_norm": 0.08834657073020935, "learning_rate": 4.6035286960048674e-07, "loss": 0.1803, "step": 54335 }, { "epoch": 2.9754147730383838, "grad_norm": 0.09536239504814148, "learning_rate": 4.552829040762523e-07, "loss": 0.1789, "step": 54340 }, { "epoch": 2.975688550621475, "grad_norm": 0.08841733634471893, "learning_rate": 4.502129385520179e-07, "loss": 0.1751, "step": 54345 }, { "epoch": 2.9759623282045666, "grad_norm": 0.09247768670320511, "learning_rate": 4.451429730277835e-07, "loss": 0.1885, "step": 54350 }, { "epoch": 2.976236105787658, "grad_norm": 0.08196444064378738, "learning_rate": 4.4007300750354896e-07, "loss": 0.1819, "step": 54355 }, { "epoch": 2.9765098833707495, "grad_norm": 0.08628219366073608, "learning_rate": 4.3500304197931454e-07, "loss": 0.1825, "step": 54360 }, { "epoch": 2.976783660953841, "grad_norm": 0.09959172457456589, "learning_rate": 4.299330764550801e-07, "loss": 0.1844, "step": 54365 }, { "epoch": 2.9770574385369324, "grad_norm": 0.08924159407615662, "learning_rate": 4.2486311093084565e-07, "loss": 0.1821, "step": 54370 }, { "epoch": 2.977331216120024, "grad_norm": 0.09257680177688599, "learning_rate": 4.1979314540661123e-07, "loss": 0.179, "step": 54375 }, { "epoch": 2.9776049937031157, "grad_norm": 0.08863035589456558, "learning_rate": 4.147231798823768e-07, "loss": 0.1734, "step": 54380 }, { "epoch": 2.977878771286207, "grad_norm": 0.08813381940126419, "learning_rate": 4.096532143581424e-07, "loss": 0.191, "step": 54385 }, { "epoch": 2.9781525488692986, "grad_norm": 0.07989227026700974, "learning_rate": 4.04583248833908e-07, "loss": 0.1848, "step": 54390 }, { "epoch": 2.9784263264523902, "grad_norm": 0.08107208460569382, "learning_rate": 3.995132833096735e-07, "loss": 0.1822, "step": 54395 }, { "epoch": 2.9787001040354815, "grad_norm": 0.08508867025375366, "learning_rate": 3.944433177854391e-07, "loss": 0.1738, "step": 54400 }, { "epoch": 2.978973881618573, "grad_norm": 0.09029850363731384, "learning_rate": 3.8937335226120467e-07, "loss": 0.1832, "step": 54405 }, { "epoch": 2.979247659201665, "grad_norm": 0.08731847256422043, "learning_rate": 3.843033867369702e-07, "loss": 0.176, "step": 54410 }, { "epoch": 2.979521436784756, "grad_norm": 0.09523683786392212, "learning_rate": 3.792334212127358e-07, "loss": 0.1849, "step": 54415 }, { "epoch": 2.9797952143678477, "grad_norm": 0.0885341539978981, "learning_rate": 3.741634556885013e-07, "loss": 0.1799, "step": 54420 }, { "epoch": 2.9800689919509393, "grad_norm": 0.09157688915729523, "learning_rate": 3.690934901642669e-07, "loss": 0.1809, "step": 54425 }, { "epoch": 2.9803427695340305, "grad_norm": 0.09134512394666672, "learning_rate": 3.6402352464003247e-07, "loss": 0.1862, "step": 54430 }, { "epoch": 2.980616547117122, "grad_norm": 0.08729575574398041, "learning_rate": 3.5895355911579805e-07, "loss": 0.179, "step": 54435 }, { "epoch": 2.9808903247002134, "grad_norm": 0.09546972811222076, "learning_rate": 3.5388359359156363e-07, "loss": 0.1807, "step": 54440 }, { "epoch": 2.981164102283305, "grad_norm": 0.08542777597904205, "learning_rate": 3.4881362806732916e-07, "loss": 0.1753, "step": 54445 }, { "epoch": 2.9814378798663963, "grad_norm": 0.08695703744888306, "learning_rate": 3.437436625430947e-07, "loss": 0.1801, "step": 54450 }, { "epoch": 2.981711657449488, "grad_norm": 0.09791044890880585, "learning_rate": 3.3867369701886027e-07, "loss": 0.1833, "step": 54455 }, { "epoch": 2.9819854350325796, "grad_norm": 0.09174667298793793, "learning_rate": 3.3360373149462585e-07, "loss": 0.1756, "step": 54460 }, { "epoch": 2.982259212615671, "grad_norm": 0.09820322692394257, "learning_rate": 3.2853376597039143e-07, "loss": 0.1758, "step": 54465 }, { "epoch": 2.9825329901987625, "grad_norm": 0.08935681730508804, "learning_rate": 3.23463800446157e-07, "loss": 0.1851, "step": 54470 }, { "epoch": 2.982806767781854, "grad_norm": 0.09146849066019058, "learning_rate": 3.1839383492192254e-07, "loss": 0.179, "step": 54475 }, { "epoch": 2.9830805453649454, "grad_norm": 0.08824165910482407, "learning_rate": 3.133238693976881e-07, "loss": 0.1835, "step": 54480 }, { "epoch": 2.983354322948037, "grad_norm": 0.08778588473796844, "learning_rate": 3.0825390387345365e-07, "loss": 0.1788, "step": 54485 }, { "epoch": 2.9836281005311287, "grad_norm": 0.0906936377286911, "learning_rate": 3.0318393834921923e-07, "loss": 0.1878, "step": 54490 }, { "epoch": 2.98390187811422, "grad_norm": 0.0933956578373909, "learning_rate": 2.981139728249848e-07, "loss": 0.1838, "step": 54495 }, { "epoch": 2.9841756556973116, "grad_norm": 0.08699512481689453, "learning_rate": 2.930440073007504e-07, "loss": 0.1867, "step": 54500 }, { "epoch": 2.9844494332804032, "grad_norm": 0.09009120613336563, "learning_rate": 2.879740417765159e-07, "loss": 0.18, "step": 54505 }, { "epoch": 2.9847232108634945, "grad_norm": 0.08565568923950195, "learning_rate": 2.829040762522815e-07, "loss": 0.1898, "step": 54510 }, { "epoch": 2.984996988446586, "grad_norm": 0.08454275876283646, "learning_rate": 2.778341107280471e-07, "loss": 0.182, "step": 54515 }, { "epoch": 2.9852707660296773, "grad_norm": 0.0862966775894165, "learning_rate": 2.727641452038126e-07, "loss": 0.182, "step": 54520 }, { "epoch": 2.985544543612769, "grad_norm": 0.08943589776754379, "learning_rate": 2.676941796795782e-07, "loss": 0.1812, "step": 54525 }, { "epoch": 2.98581832119586, "grad_norm": 0.09717711806297302, "learning_rate": 2.6262421415534377e-07, "loss": 0.1819, "step": 54530 }, { "epoch": 2.986092098778952, "grad_norm": 0.08292528986930847, "learning_rate": 2.575542486311093e-07, "loss": 0.179, "step": 54535 }, { "epoch": 2.9863658763620435, "grad_norm": 0.09234075248241425, "learning_rate": 2.524842831068749e-07, "loss": 0.1792, "step": 54540 }, { "epoch": 2.9866396539451348, "grad_norm": 0.09161681681871414, "learning_rate": 2.4741431758264046e-07, "loss": 0.1823, "step": 54545 }, { "epoch": 2.9869134315282264, "grad_norm": 0.08865795284509659, "learning_rate": 2.4234435205840604e-07, "loss": 0.1801, "step": 54550 }, { "epoch": 2.987187209111318, "grad_norm": 0.08517686277627945, "learning_rate": 2.372743865341716e-07, "loss": 0.183, "step": 54555 }, { "epoch": 2.9874609866944093, "grad_norm": 0.0880202129483223, "learning_rate": 2.3220442100993712e-07, "loss": 0.1846, "step": 54560 }, { "epoch": 2.987734764277501, "grad_norm": 0.09038610756397247, "learning_rate": 2.271344554857027e-07, "loss": 0.1871, "step": 54565 }, { "epoch": 2.9880085418605926, "grad_norm": 0.09057258069515228, "learning_rate": 2.2206448996146826e-07, "loss": 0.1873, "step": 54570 }, { "epoch": 2.988282319443684, "grad_norm": 0.09085996448993683, "learning_rate": 2.1699452443723384e-07, "loss": 0.1781, "step": 54575 }, { "epoch": 2.9885560970267755, "grad_norm": 0.09365160018205643, "learning_rate": 2.1192455891299942e-07, "loss": 0.1868, "step": 54580 }, { "epoch": 2.988829874609867, "grad_norm": 0.09399157017469406, "learning_rate": 2.0685459338876498e-07, "loss": 0.1828, "step": 54585 }, { "epoch": 2.9891036521929584, "grad_norm": 0.09329371154308319, "learning_rate": 2.0178462786453056e-07, "loss": 0.188, "step": 54590 }, { "epoch": 2.98937742977605, "grad_norm": 0.0868108943104744, "learning_rate": 1.9671466234029609e-07, "loss": 0.18, "step": 54595 }, { "epoch": 2.9896512073591417, "grad_norm": 0.08753424882888794, "learning_rate": 1.9164469681606167e-07, "loss": 0.1782, "step": 54600 }, { "epoch": 2.989924984942233, "grad_norm": 0.08431120961904526, "learning_rate": 1.8657473129182722e-07, "loss": 0.1747, "step": 54605 }, { "epoch": 2.9901987625253246, "grad_norm": 0.09664366394281387, "learning_rate": 1.8150476576759278e-07, "loss": 0.1814, "step": 54610 }, { "epoch": 2.990472540108416, "grad_norm": 0.08898057043552399, "learning_rate": 1.7643480024335836e-07, "loss": 0.1739, "step": 54615 }, { "epoch": 2.9907463176915074, "grad_norm": 0.0906321257352829, "learning_rate": 1.7136483471912394e-07, "loss": 0.1822, "step": 54620 }, { "epoch": 2.9910200952745987, "grad_norm": 0.087025485932827, "learning_rate": 1.6629486919488947e-07, "loss": 0.1861, "step": 54625 }, { "epoch": 2.9912938728576903, "grad_norm": 0.08724864572286606, "learning_rate": 1.6122490367065505e-07, "loss": 0.1787, "step": 54630 }, { "epoch": 2.991567650440782, "grad_norm": 0.08514271676540375, "learning_rate": 1.5615493814642063e-07, "loss": 0.1804, "step": 54635 }, { "epoch": 2.991841428023873, "grad_norm": 0.08587974309921265, "learning_rate": 1.5108497262218616e-07, "loss": 0.1827, "step": 54640 }, { "epoch": 2.992115205606965, "grad_norm": 0.08574249595403671, "learning_rate": 1.4601500709795174e-07, "loss": 0.184, "step": 54645 }, { "epoch": 2.9923889831900565, "grad_norm": 0.08400974422693253, "learning_rate": 1.4094504157371732e-07, "loss": 0.1807, "step": 54650 }, { "epoch": 2.9926627607731477, "grad_norm": 0.0907333716750145, "learning_rate": 1.3587507604948287e-07, "loss": 0.1838, "step": 54655 }, { "epoch": 2.9929365383562394, "grad_norm": 0.09483548253774643, "learning_rate": 1.3080511052524843e-07, "loss": 0.1867, "step": 54660 }, { "epoch": 2.993210315939331, "grad_norm": 0.0826382040977478, "learning_rate": 1.25735145001014e-07, "loss": 0.177, "step": 54665 }, { "epoch": 2.9934840935224223, "grad_norm": 0.08290977776050568, "learning_rate": 1.2066517947677956e-07, "loss": 0.1806, "step": 54670 }, { "epoch": 2.993757871105514, "grad_norm": 0.0995459109544754, "learning_rate": 1.1559521395254514e-07, "loss": 0.18, "step": 54675 }, { "epoch": 2.9940316486886056, "grad_norm": 0.08806990832090378, "learning_rate": 1.1052524842831069e-07, "loss": 0.1881, "step": 54680 }, { "epoch": 2.994305426271697, "grad_norm": 0.08961119502782822, "learning_rate": 1.0545528290407625e-07, "loss": 0.1825, "step": 54685 }, { "epoch": 2.9945792038547885, "grad_norm": 0.08641808480024338, "learning_rate": 1.0038531737984183e-07, "loss": 0.174, "step": 54690 }, { "epoch": 2.9948529814378797, "grad_norm": 0.09356093406677246, "learning_rate": 9.531535185560739e-08, "loss": 0.1806, "step": 54695 }, { "epoch": 2.9951267590209714, "grad_norm": 0.08729088306427002, "learning_rate": 9.024538633137294e-08, "loss": 0.1838, "step": 54700 }, { "epoch": 2.995400536604063, "grad_norm": 0.08560165017843246, "learning_rate": 8.517542080713851e-08, "loss": 0.1783, "step": 54705 }, { "epoch": 2.9956743141871542, "grad_norm": 0.08869665116071701, "learning_rate": 8.010545528290408e-08, "loss": 0.1788, "step": 54710 }, { "epoch": 2.995948091770246, "grad_norm": 0.09091205149888992, "learning_rate": 7.503548975866965e-08, "loss": 0.181, "step": 54715 }, { "epoch": 2.996221869353337, "grad_norm": 0.08760733902454376, "learning_rate": 6.99655242344352e-08, "loss": 0.1822, "step": 54720 }, { "epoch": 2.996495646936429, "grad_norm": 0.09314662963151932, "learning_rate": 6.489555871020078e-08, "loss": 0.1833, "step": 54725 }, { "epoch": 2.9967694245195204, "grad_norm": 0.08891893178224564, "learning_rate": 5.982559318596634e-08, "loss": 0.1823, "step": 54730 }, { "epoch": 2.9970432021026117, "grad_norm": 0.08880165964365005, "learning_rate": 5.4755627661731905e-08, "loss": 0.1807, "step": 54735 }, { "epoch": 2.9973169796857033, "grad_norm": 0.08430104702711105, "learning_rate": 4.9685662137497466e-08, "loss": 0.1733, "step": 54740 }, { "epoch": 2.997590757268795, "grad_norm": 0.0856064110994339, "learning_rate": 4.4615696613263034e-08, "loss": 0.1815, "step": 54745 }, { "epoch": 2.997864534851886, "grad_norm": 0.08769194036722183, "learning_rate": 3.9545731089028595e-08, "loss": 0.1812, "step": 54750 }, { "epoch": 2.998138312434978, "grad_norm": 0.08968344330787659, "learning_rate": 3.447576556479416e-08, "loss": 0.1824, "step": 54755 }, { "epoch": 2.9984120900180695, "grad_norm": 0.08839382976293564, "learning_rate": 2.9405800040559727e-08, "loss": 0.1777, "step": 54760 }, { "epoch": 2.9986858676011607, "grad_norm": 0.08084201067686081, "learning_rate": 2.4335834516325292e-08, "loss": 0.1754, "step": 54765 }, { "epoch": 2.9989596451842524, "grad_norm": 0.0879841074347496, "learning_rate": 1.9265868992090853e-08, "loss": 0.18, "step": 54770 }, { "epoch": 2.999233422767344, "grad_norm": 0.09228377044200897, "learning_rate": 1.4195903467856419e-08, "loss": 0.1911, "step": 54775 }, { "epoch": 2.9995072003504353, "grad_norm": 0.08768866956233978, "learning_rate": 9.125937943621984e-09, "loss": 0.1778, "step": 54780 }, { "epoch": 2.999780977933527, "grad_norm": 0.08501627296209335, "learning_rate": 4.055972419387549e-09, "loss": 0.1879, "step": 54785 }, { "epoch": 3.0, "step": 54789, "total_flos": 2.437260619231934e+21, "train_loss": 0.21978757351587389, "train_runtime": 735453.379, "train_samples_per_second": 2.384, "train_steps_per_second": 0.074 } ], "logging_steps": 5, "max_steps": 54789, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.437260619231934e+21, "train_batch_size": 1, "trial_name": null, "trial_params": null }