{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4771, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00020959966464053657, "grad_norm": 0.22152887284755707, "learning_rate": 0.0, "loss": 0.1548, "step": 1 }, { "epoch": 0.00041919932928107315, "grad_norm": 0.1861487776041031, "learning_rate": 6.944444444444444e-08, "loss": 0.1418, "step": 2 }, { "epoch": 0.0006287989939216097, "grad_norm": 0.20890986919403076, "learning_rate": 1.3888888888888888e-07, "loss": 0.1602, "step": 3 }, { "epoch": 0.0008383986585621463, "grad_norm": 0.20199677348136902, "learning_rate": 2.0833333333333333e-07, "loss": 0.1467, "step": 4 }, { "epoch": 0.0010479983232026828, "grad_norm": 0.18860988318920135, "learning_rate": 2.7777777777777776e-07, "loss": 0.1357, "step": 5 }, { "epoch": 0.0012575979878432194, "grad_norm": 0.1798183023929596, "learning_rate": 3.472222222222223e-07, "loss": 0.1343, "step": 6 }, { "epoch": 0.001467197652483756, "grad_norm": 0.12569160759449005, "learning_rate": 4.1666666666666667e-07, "loss": 0.149, "step": 7 }, { "epoch": 0.0016767973171242926, "grad_norm": 0.12502586841583252, "learning_rate": 4.861111111111112e-07, "loss": 0.1465, "step": 8 }, { "epoch": 0.0018863969817648292, "grad_norm": 0.09551655501127243, "learning_rate": 5.555555555555555e-07, "loss": 0.1407, "step": 9 }, { "epoch": 0.0020959966464053656, "grad_norm": 0.27150362730026245, "learning_rate": 6.25e-07, "loss": 0.141, "step": 10 }, { "epoch": 0.002305596311045902, "grad_norm": 0.2170093059539795, "learning_rate": 6.944444444444446e-07, "loss": 0.1529, "step": 11 }, { "epoch": 0.0025151959756864388, "grad_norm": 0.1994446963071823, "learning_rate": 7.63888888888889e-07, "loss": 0.1387, "step": 12 }, { "epoch": 0.0027247956403269754, "grad_norm": 0.16523687541484833, "learning_rate": 8.333333333333333e-07, "loss": 0.1401, "step": 13 }, { "epoch": 0.002934395304967512, "grad_norm": 0.22484736144542694, "learning_rate": 9.027777777777779e-07, "loss": 0.1438, "step": 14 }, { "epoch": 0.0031439949696080486, "grad_norm": 0.14437100291252136, "learning_rate": 9.722222222222224e-07, "loss": 0.1485, "step": 15 }, { "epoch": 0.003353594634248585, "grad_norm": 0.12035862356424332, "learning_rate": 1.0416666666666667e-06, "loss": 0.1463, "step": 16 }, { "epoch": 0.0035631942988891218, "grad_norm": 0.19523362815380096, "learning_rate": 1.111111111111111e-06, "loss": 0.1401, "step": 17 }, { "epoch": 0.0037727939635296584, "grad_norm": 0.2469567507505417, "learning_rate": 1.1805555555555556e-06, "loss": 0.1442, "step": 18 }, { "epoch": 0.0039823936281701946, "grad_norm": 0.21258766949176788, "learning_rate": 1.25e-06, "loss": 0.1348, "step": 19 }, { "epoch": 0.004191993292810731, "grad_norm": 0.13423168659210205, "learning_rate": 1.3194444444444446e-06, "loss": 0.1419, "step": 20 }, { "epoch": 0.004401592957451268, "grad_norm": 0.15541696548461914, "learning_rate": 1.3888888888888892e-06, "loss": 0.1387, "step": 21 }, { "epoch": 0.004611192622091804, "grad_norm": 0.1762888878583908, "learning_rate": 1.4583333333333335e-06, "loss": 0.1372, "step": 22 }, { "epoch": 0.004820792286732341, "grad_norm": 0.1527898907661438, "learning_rate": 1.527777777777778e-06, "loss": 0.1325, "step": 23 }, { "epoch": 0.0050303919513728776, "grad_norm": 0.11752472072839737, "learning_rate": 1.5972222222222221e-06, "loss": 0.1354, "step": 24 }, { "epoch": 0.005239991616013414, "grad_norm": 0.1533581018447876, "learning_rate": 1.6666666666666667e-06, "loss": 0.1342, "step": 25 }, { "epoch": 0.005449591280653951, "grad_norm": 0.14854849874973297, "learning_rate": 1.7361111111111112e-06, "loss": 0.1397, "step": 26 }, { "epoch": 0.005659190945294487, "grad_norm": 0.1156805008649826, "learning_rate": 1.8055555555555557e-06, "loss": 0.1357, "step": 27 }, { "epoch": 0.005868790609935024, "grad_norm": 0.11853114515542984, "learning_rate": 1.8750000000000003e-06, "loss": 0.1258, "step": 28 }, { "epoch": 0.0060783902745755606, "grad_norm": 0.14961375296115875, "learning_rate": 1.944444444444445e-06, "loss": 0.1292, "step": 29 }, { "epoch": 0.006287989939216097, "grad_norm": 0.10973469167947769, "learning_rate": 2.0138888888888893e-06, "loss": 0.1403, "step": 30 }, { "epoch": 0.006497589603856634, "grad_norm": 0.10710575431585312, "learning_rate": 2.0833333333333334e-06, "loss": 0.1435, "step": 31 }, { "epoch": 0.00670718926849717, "grad_norm": 0.12166962772607803, "learning_rate": 2.152777777777778e-06, "loss": 0.1351, "step": 32 }, { "epoch": 0.006916788933137707, "grad_norm": 0.10747282952070236, "learning_rate": 2.222222222222222e-06, "loss": 0.1308, "step": 33 }, { "epoch": 0.0071263885977782436, "grad_norm": 0.10978581756353378, "learning_rate": 2.2916666666666666e-06, "loss": 0.1303, "step": 34 }, { "epoch": 0.00733598826241878, "grad_norm": 0.1297036111354828, "learning_rate": 2.361111111111111e-06, "loss": 0.1341, "step": 35 }, { "epoch": 0.007545587927059317, "grad_norm": 0.10622533410787582, "learning_rate": 2.4305555555555557e-06, "loss": 0.1305, "step": 36 }, { "epoch": 0.007755187591699853, "grad_norm": 0.12965598702430725, "learning_rate": 2.5e-06, "loss": 0.1179, "step": 37 }, { "epoch": 0.007964787256340389, "grad_norm": 0.08929996192455292, "learning_rate": 2.5694444444444443e-06, "loss": 0.1246, "step": 38 }, { "epoch": 0.008174386920980926, "grad_norm": 0.1213381290435791, "learning_rate": 2.6388888888888893e-06, "loss": 0.135, "step": 39 }, { "epoch": 0.008383986585621462, "grad_norm": 0.13696981966495514, "learning_rate": 2.7083333333333334e-06, "loss": 0.1359, "step": 40 }, { "epoch": 0.008593586250261999, "grad_norm": 0.10106691718101501, "learning_rate": 2.7777777777777783e-06, "loss": 0.1308, "step": 41 }, { "epoch": 0.008803185914902536, "grad_norm": 0.16903014481067657, "learning_rate": 2.8472222222222224e-06, "loss": 0.1243, "step": 42 }, { "epoch": 0.009012785579543072, "grad_norm": 0.26333266496658325, "learning_rate": 2.916666666666667e-06, "loss": 0.1269, "step": 43 }, { "epoch": 0.009222385244183609, "grad_norm": 0.5060687065124512, "learning_rate": 2.986111111111111e-06, "loss": 0.1267, "step": 44 }, { "epoch": 0.009431984908824145, "grad_norm": 0.37180081009864807, "learning_rate": 3.055555555555556e-06, "loss": 0.1359, "step": 45 }, { "epoch": 0.009641584573464682, "grad_norm": 0.37329721450805664, "learning_rate": 3.125e-06, "loss": 0.1266, "step": 46 }, { "epoch": 0.009851184238105219, "grad_norm": 0.9444931745529175, "learning_rate": 3.1944444444444443e-06, "loss": 0.1265, "step": 47 }, { "epoch": 0.010060783902745755, "grad_norm": 0.3645467460155487, "learning_rate": 3.2638888888888892e-06, "loss": 0.1252, "step": 48 }, { "epoch": 0.010270383567386292, "grad_norm": 0.35541367530822754, "learning_rate": 3.3333333333333333e-06, "loss": 0.1315, "step": 49 }, { "epoch": 0.010479983232026828, "grad_norm": 0.26033562421798706, "learning_rate": 3.4027777777777783e-06, "loss": 0.1319, "step": 50 }, { "epoch": 0.010689582896667365, "grad_norm": 0.43073058128356934, "learning_rate": 3.4722222222222224e-06, "loss": 0.1278, "step": 51 }, { "epoch": 0.010899182561307902, "grad_norm": 0.3041709363460541, "learning_rate": 3.5416666666666673e-06, "loss": 0.1283, "step": 52 }, { "epoch": 0.011108782225948438, "grad_norm": 0.3577912747859955, "learning_rate": 3.6111111111111115e-06, "loss": 0.1314, "step": 53 }, { "epoch": 0.011318381890588975, "grad_norm": 0.25406020879745483, "learning_rate": 3.680555555555556e-06, "loss": 0.1303, "step": 54 }, { "epoch": 0.011527981555229511, "grad_norm": 0.32610198855400085, "learning_rate": 3.7500000000000005e-06, "loss": 0.1254, "step": 55 }, { "epoch": 0.011737581219870048, "grad_norm": 0.2049138844013214, "learning_rate": 3.819444444444444e-06, "loss": 0.1319, "step": 56 }, { "epoch": 0.011947180884510585, "grad_norm": 0.29326915740966797, "learning_rate": 3.88888888888889e-06, "loss": 0.1192, "step": 57 }, { "epoch": 0.012156780549151121, "grad_norm": 0.2198316603899002, "learning_rate": 3.958333333333333e-06, "loss": 0.1177, "step": 58 }, { "epoch": 0.012366380213791658, "grad_norm": 0.20488092303276062, "learning_rate": 4.027777777777779e-06, "loss": 0.1266, "step": 59 }, { "epoch": 0.012575979878432194, "grad_norm": 0.22538422048091888, "learning_rate": 4.097222222222222e-06, "loss": 0.1168, "step": 60 }, { "epoch": 0.012785579543072731, "grad_norm": 0.2462712526321411, "learning_rate": 4.166666666666667e-06, "loss": 0.1159, "step": 61 }, { "epoch": 0.012995179207713268, "grad_norm": 0.17663376033306122, "learning_rate": 4.236111111111111e-06, "loss": 0.1273, "step": 62 }, { "epoch": 0.013204778872353804, "grad_norm": 0.16204069554805756, "learning_rate": 4.305555555555556e-06, "loss": 0.1134, "step": 63 }, { "epoch": 0.01341437853699434, "grad_norm": 0.2351599931716919, "learning_rate": 4.3750000000000005e-06, "loss": 0.1272, "step": 64 }, { "epoch": 0.013623978201634877, "grad_norm": 0.22874589264392853, "learning_rate": 4.444444444444444e-06, "loss": 0.1286, "step": 65 }, { "epoch": 0.013833577866275414, "grad_norm": 0.25400885939598083, "learning_rate": 4.5138888888888895e-06, "loss": 0.1205, "step": 66 }, { "epoch": 0.01404317753091595, "grad_norm": 0.07518032938241959, "learning_rate": 4.583333333333333e-06, "loss": 0.1247, "step": 67 }, { "epoch": 0.014252777195556487, "grad_norm": 0.2462567836046219, "learning_rate": 4.652777777777779e-06, "loss": 0.1212, "step": 68 }, { "epoch": 0.014462376860197024, "grad_norm": 0.3761221468448639, "learning_rate": 4.722222222222222e-06, "loss": 0.1161, "step": 69 }, { "epoch": 0.01467197652483756, "grad_norm": 0.3552791476249695, "learning_rate": 4.791666666666668e-06, "loss": 0.1275, "step": 70 }, { "epoch": 0.014881576189478097, "grad_norm": 0.2262704074382782, "learning_rate": 4.861111111111111e-06, "loss": 0.1217, "step": 71 }, { "epoch": 0.015091175854118634, "grad_norm": 1.2746901512145996, "learning_rate": 4.930555555555556e-06, "loss": 0.1272, "step": 72 }, { "epoch": 0.01530077551875917, "grad_norm": 0.47996070981025696, "learning_rate": 5e-06, "loss": 0.1326, "step": 73 }, { "epoch": 0.015510375183399707, "grad_norm": 0.591677725315094, "learning_rate": 5.069444444444445e-06, "loss": 0.1359, "step": 74 }, { "epoch": 0.015719974848040243, "grad_norm": 0.4169331192970276, "learning_rate": 5.138888888888889e-06, "loss": 0.1353, "step": 75 }, { "epoch": 0.015929574512680778, "grad_norm": 0.2262561172246933, "learning_rate": 5.208333333333334e-06, "loss": 0.1318, "step": 76 }, { "epoch": 0.016139174177321317, "grad_norm": 0.42291682958602905, "learning_rate": 5.2777777777777785e-06, "loss": 0.1287, "step": 77 }, { "epoch": 0.01634877384196185, "grad_norm": 0.36955782771110535, "learning_rate": 5.347222222222222e-06, "loss": 0.1348, "step": 78 }, { "epoch": 0.01655837350660239, "grad_norm": 0.286143034696579, "learning_rate": 5.416666666666667e-06, "loss": 0.1284, "step": 79 }, { "epoch": 0.016767973171242925, "grad_norm": 0.3029952347278595, "learning_rate": 5.486111111111112e-06, "loss": 0.1357, "step": 80 }, { "epoch": 0.016977572835883463, "grad_norm": 0.24525821208953857, "learning_rate": 5.555555555555557e-06, "loss": 0.1263, "step": 81 }, { "epoch": 0.017187172500523998, "grad_norm": 0.3474200665950775, "learning_rate": 5.625e-06, "loss": 0.133, "step": 82 }, { "epoch": 0.017396772165164536, "grad_norm": 0.2455555945634842, "learning_rate": 5.694444444444445e-06, "loss": 0.1292, "step": 83 }, { "epoch": 0.01760637182980507, "grad_norm": 0.26876527070999146, "learning_rate": 5.7638888888888886e-06, "loss": 0.1327, "step": 84 }, { "epoch": 0.01781597149444561, "grad_norm": 0.27033689618110657, "learning_rate": 5.833333333333334e-06, "loss": 0.1282, "step": 85 }, { "epoch": 0.018025571159086144, "grad_norm": 0.19568723440170288, "learning_rate": 5.9027777777777785e-06, "loss": 0.1258, "step": 86 }, { "epoch": 0.018235170823726683, "grad_norm": 0.30962151288986206, "learning_rate": 5.972222222222222e-06, "loss": 0.1259, "step": 87 }, { "epoch": 0.018444770488367217, "grad_norm": 0.22522631287574768, "learning_rate": 6.041666666666667e-06, "loss": 0.1231, "step": 88 }, { "epoch": 0.018654370153007756, "grad_norm": 0.3179832398891449, "learning_rate": 6.111111111111112e-06, "loss": 0.118, "step": 89 }, { "epoch": 0.01886396981764829, "grad_norm": 0.34084218740463257, "learning_rate": 6.180555555555557e-06, "loss": 0.1219, "step": 90 }, { "epoch": 0.01907356948228883, "grad_norm": 0.32963189482688904, "learning_rate": 6.25e-06, "loss": 0.1245, "step": 91 }, { "epoch": 0.019283169146929364, "grad_norm": 0.45660048723220825, "learning_rate": 6.319444444444445e-06, "loss": 0.1265, "step": 92 }, { "epoch": 0.019492768811569902, "grad_norm": 0.6942883133888245, "learning_rate": 6.3888888888888885e-06, "loss": 0.1288, "step": 93 }, { "epoch": 0.019702368476210437, "grad_norm": 0.7902718782424927, "learning_rate": 6.458333333333334e-06, "loss": 0.129, "step": 94 }, { "epoch": 0.019911968140850975, "grad_norm": 0.4735928773880005, "learning_rate": 6.5277777777777784e-06, "loss": 0.1187, "step": 95 }, { "epoch": 0.02012156780549151, "grad_norm": 0.22121404111385345, "learning_rate": 6.597222222222223e-06, "loss": 0.1236, "step": 96 }, { "epoch": 0.02033116747013205, "grad_norm": 0.515511691570282, "learning_rate": 6.666666666666667e-06, "loss": 0.1235, "step": 97 }, { "epoch": 0.020540767134772583, "grad_norm": 0.4277538061141968, "learning_rate": 6.736111111111112e-06, "loss": 0.1242, "step": 98 }, { "epoch": 0.020750366799413122, "grad_norm": 0.17368359863758087, "learning_rate": 6.8055555555555566e-06, "loss": 0.1263, "step": 99 }, { "epoch": 0.020959966464053657, "grad_norm": 0.37945282459259033, "learning_rate": 6.875e-06, "loss": 0.1212, "step": 100 }, { "epoch": 0.021169566128694195, "grad_norm": 0.366905152797699, "learning_rate": 6.944444444444445e-06, "loss": 0.1252, "step": 101 }, { "epoch": 0.02137916579333473, "grad_norm": 0.17307527363300323, "learning_rate": 7.013888888888889e-06, "loss": 0.1171, "step": 102 }, { "epoch": 0.021588765457975268, "grad_norm": 0.374055415391922, "learning_rate": 7.083333333333335e-06, "loss": 0.1113, "step": 103 }, { "epoch": 0.021798365122615803, "grad_norm": 0.3751891255378723, "learning_rate": 7.152777777777778e-06, "loss": 0.1286, "step": 104 }, { "epoch": 0.02200796478725634, "grad_norm": 0.20103880763053894, "learning_rate": 7.222222222222223e-06, "loss": 0.1132, "step": 105 }, { "epoch": 0.022217564451896876, "grad_norm": 0.20718024671077728, "learning_rate": 7.291666666666667e-06, "loss": 0.1152, "step": 106 }, { "epoch": 0.022427164116537415, "grad_norm": 0.30478349328041077, "learning_rate": 7.361111111111112e-06, "loss": 0.1166, "step": 107 }, { "epoch": 0.02263676378117795, "grad_norm": 0.2688567042350769, "learning_rate": 7.4305555555555565e-06, "loss": 0.115, "step": 108 }, { "epoch": 0.022846363445818488, "grad_norm": 0.19921305775642395, "learning_rate": 7.500000000000001e-06, "loss": 0.1181, "step": 109 }, { "epoch": 0.023055963110459023, "grad_norm": 0.34289443492889404, "learning_rate": 7.569444444444445e-06, "loss": 0.1153, "step": 110 }, { "epoch": 0.02326556277509956, "grad_norm": 0.4600578546524048, "learning_rate": 7.638888888888888e-06, "loss": 0.1192, "step": 111 }, { "epoch": 0.023475162439740096, "grad_norm": 0.29753392934799194, "learning_rate": 7.708333333333334e-06, "loss": 0.1153, "step": 112 }, { "epoch": 0.023684762104380634, "grad_norm": 0.19611868262290955, "learning_rate": 7.77777777777778e-06, "loss": 0.1139, "step": 113 }, { "epoch": 0.02389436176902117, "grad_norm": 0.3326936960220337, "learning_rate": 7.847222222222223e-06, "loss": 0.1125, "step": 114 }, { "epoch": 0.024103961433661707, "grad_norm": 0.43471208214759827, "learning_rate": 7.916666666666667e-06, "loss": 0.1144, "step": 115 }, { "epoch": 0.024313561098302242, "grad_norm": 0.419888436794281, "learning_rate": 7.986111111111112e-06, "loss": 0.1153, "step": 116 }, { "epoch": 0.02452316076294278, "grad_norm": 0.1664547622203827, "learning_rate": 8.055555555555557e-06, "loss": 0.1098, "step": 117 }, { "epoch": 0.024732760427583315, "grad_norm": 0.23118549585342407, "learning_rate": 8.125000000000001e-06, "loss": 0.1162, "step": 118 }, { "epoch": 0.024942360092223854, "grad_norm": 0.33760204911231995, "learning_rate": 8.194444444444445e-06, "loss": 0.1115, "step": 119 }, { "epoch": 0.02515195975686439, "grad_norm": 0.25965481996536255, "learning_rate": 8.263888888888888e-06, "loss": 0.1102, "step": 120 }, { "epoch": 0.025361559421504927, "grad_norm": 0.2375420331954956, "learning_rate": 8.333333333333334e-06, "loss": 0.1186, "step": 121 }, { "epoch": 0.025571159086145462, "grad_norm": 0.23151437938213348, "learning_rate": 8.402777777777779e-06, "loss": 0.1088, "step": 122 }, { "epoch": 0.025780758750786, "grad_norm": 0.34959205985069275, "learning_rate": 8.472222222222223e-06, "loss": 0.1084, "step": 123 }, { "epoch": 0.025990358415426535, "grad_norm": 0.3837231695652008, "learning_rate": 8.541666666666666e-06, "loss": 0.1142, "step": 124 }, { "epoch": 0.026199958080067073, "grad_norm": 0.317340612411499, "learning_rate": 8.611111111111112e-06, "loss": 0.1201, "step": 125 }, { "epoch": 0.026409557744707608, "grad_norm": 0.3754269778728485, "learning_rate": 8.680555555555557e-06, "loss": 0.1122, "step": 126 }, { "epoch": 0.026619157409348147, "grad_norm": 0.39836880564689636, "learning_rate": 8.750000000000001e-06, "loss": 0.1105, "step": 127 }, { "epoch": 0.02682875707398868, "grad_norm": 0.458625465631485, "learning_rate": 8.819444444444445e-06, "loss": 0.1169, "step": 128 }, { "epoch": 0.02703835673862922, "grad_norm": 0.34019047021865845, "learning_rate": 8.888888888888888e-06, "loss": 0.1127, "step": 129 }, { "epoch": 0.027247956403269755, "grad_norm": 0.3323642313480377, "learning_rate": 8.958333333333334e-06, "loss": 0.1098, "step": 130 }, { "epoch": 0.027457556067910293, "grad_norm": 0.2268533557653427, "learning_rate": 9.027777777777779e-06, "loss": 0.1095, "step": 131 }, { "epoch": 0.027667155732550828, "grad_norm": 0.4628676176071167, "learning_rate": 9.097222222222223e-06, "loss": 0.1077, "step": 132 }, { "epoch": 0.027876755397191366, "grad_norm": 0.5547027587890625, "learning_rate": 9.166666666666666e-06, "loss": 0.1156, "step": 133 }, { "epoch": 0.0280863550618319, "grad_norm": 0.5809503197669983, "learning_rate": 9.236111111111112e-06, "loss": 0.1168, "step": 134 }, { "epoch": 0.028295954726472436, "grad_norm": 0.497976154088974, "learning_rate": 9.305555555555557e-06, "loss": 0.1161, "step": 135 }, { "epoch": 0.028505554391112974, "grad_norm": 0.67780601978302, "learning_rate": 9.375000000000001e-06, "loss": 0.1201, "step": 136 }, { "epoch": 0.02871515405575351, "grad_norm": 0.6441759467124939, "learning_rate": 9.444444444444445e-06, "loss": 0.1192, "step": 137 }, { "epoch": 0.028924753720394047, "grad_norm": 0.3711428642272949, "learning_rate": 9.51388888888889e-06, "loss": 0.1195, "step": 138 }, { "epoch": 0.029134353385034582, "grad_norm": 0.3569689393043518, "learning_rate": 9.583333333333335e-06, "loss": 0.1118, "step": 139 }, { "epoch": 0.02934395304967512, "grad_norm": 0.3563655912876129, "learning_rate": 9.652777777777779e-06, "loss": 0.1176, "step": 140 }, { "epoch": 0.029553552714315656, "grad_norm": 0.2493436187505722, "learning_rate": 9.722222222222223e-06, "loss": 0.1147, "step": 141 }, { "epoch": 0.029763152378956194, "grad_norm": 0.29483145475387573, "learning_rate": 9.791666666666666e-06, "loss": 0.1153, "step": 142 }, { "epoch": 0.02997275204359673, "grad_norm": 0.2700521647930145, "learning_rate": 9.861111111111112e-06, "loss": 0.1111, "step": 143 }, { "epoch": 0.030182351708237267, "grad_norm": 0.26881495118141174, "learning_rate": 9.930555555555557e-06, "loss": 0.1104, "step": 144 }, { "epoch": 0.030391951372877802, "grad_norm": 0.30803802609443665, "learning_rate": 1e-05, "loss": 0.1122, "step": 145 }, { "epoch": 0.03060155103751834, "grad_norm": 0.31767815351486206, "learning_rate": 9.99999884750052e-06, "loss": 0.1096, "step": 146 }, { "epoch": 0.030811150702158875, "grad_norm": 0.22586488723754883, "learning_rate": 9.99999539000261e-06, "loss": 0.1082, "step": 147 }, { "epoch": 0.031020750366799413, "grad_norm": 0.2010418325662613, "learning_rate": 9.999989627507863e-06, "loss": 0.1123, "step": 148 }, { "epoch": 0.03123035003143995, "grad_norm": 0.2666708528995514, "learning_rate": 9.999981560018935e-06, "loss": 0.1035, "step": 149 }, { "epoch": 0.03143994969608049, "grad_norm": 0.34085071086883545, "learning_rate": 9.999971187539547e-06, "loss": 0.1063, "step": 150 }, { "epoch": 0.03164954936072102, "grad_norm": 0.31965869665145874, "learning_rate": 9.999958510074482e-06, "loss": 0.1095, "step": 151 }, { "epoch": 0.031859149025361556, "grad_norm": 0.26403307914733887, "learning_rate": 9.99994352762958e-06, "loss": 0.1017, "step": 152 }, { "epoch": 0.0320687486900021, "grad_norm": 0.17258979380130768, "learning_rate": 9.999926240211752e-06, "loss": 0.1045, "step": 153 }, { "epoch": 0.03227834835464263, "grad_norm": 0.2114904820919037, "learning_rate": 9.999906647828966e-06, "loss": 0.1017, "step": 154 }, { "epoch": 0.03248794801928317, "grad_norm": 0.2613145112991333, "learning_rate": 9.999884750490255e-06, "loss": 0.1064, "step": 155 }, { "epoch": 0.0326975476839237, "grad_norm": 0.23586612939834595, "learning_rate": 9.99986054820571e-06, "loss": 0.1097, "step": 156 }, { "epoch": 0.032907147348564245, "grad_norm": 0.24500718712806702, "learning_rate": 9.999834040986491e-06, "loss": 0.102, "step": 157 }, { "epoch": 0.03311674701320478, "grad_norm": 0.36189910769462585, "learning_rate": 9.999805228844818e-06, "loss": 0.1048, "step": 158 }, { "epoch": 0.033326346677845314, "grad_norm": 0.43638429045677185, "learning_rate": 9.999774111793974e-06, "loss": 0.1153, "step": 159 }, { "epoch": 0.03353594634248585, "grad_norm": 0.3592098355293274, "learning_rate": 9.999740689848302e-06, "loss": 0.115, "step": 160 }, { "epoch": 0.03374554600712639, "grad_norm": 0.23697715997695923, "learning_rate": 9.999704963023213e-06, "loss": 0.0969, "step": 161 }, { "epoch": 0.033955145671766926, "grad_norm": 0.3014034628868103, "learning_rate": 9.999666931335172e-06, "loss": 0.102, "step": 162 }, { "epoch": 0.03416474533640746, "grad_norm": 0.35838088393211365, "learning_rate": 9.999626594801714e-06, "loss": 0.1007, "step": 163 }, { "epoch": 0.034374345001047996, "grad_norm": 0.2900119125843048, "learning_rate": 9.999583953441436e-06, "loss": 0.0993, "step": 164 }, { "epoch": 0.03458394466568854, "grad_norm": 0.21882066130638123, "learning_rate": 9.999539007273993e-06, "loss": 0.1039, "step": 165 }, { "epoch": 0.03479354433032907, "grad_norm": 0.1922779232263565, "learning_rate": 9.999491756320105e-06, "loss": 0.1049, "step": 166 }, { "epoch": 0.03500314399496961, "grad_norm": 0.2402738481760025, "learning_rate": 9.999442200601559e-06, "loss": 0.1003, "step": 167 }, { "epoch": 0.03521274365961014, "grad_norm": 0.25979745388031006, "learning_rate": 9.999390340141195e-06, "loss": 0.1041, "step": 168 }, { "epoch": 0.035422343324250684, "grad_norm": 0.2431039661169052, "learning_rate": 9.999336174962922e-06, "loss": 0.1008, "step": 169 }, { "epoch": 0.03563194298889122, "grad_norm": 0.22206492722034454, "learning_rate": 9.999279705091711e-06, "loss": 0.1052, "step": 170 }, { "epoch": 0.035841542653531754, "grad_norm": 0.2511206567287445, "learning_rate": 9.999220930553595e-06, "loss": 0.1107, "step": 171 }, { "epoch": 0.03605114231817229, "grad_norm": 0.3174304664134979, "learning_rate": 9.999159851375668e-06, "loss": 0.1022, "step": 172 }, { "epoch": 0.03626074198281283, "grad_norm": 0.4061136841773987, "learning_rate": 9.99909646758609e-06, "loss": 0.101, "step": 173 }, { "epoch": 0.036470341647453365, "grad_norm": 0.47826582193374634, "learning_rate": 9.999030779214076e-06, "loss": 0.1052, "step": 174 }, { "epoch": 0.0366799413120939, "grad_norm": 0.4733898639678955, "learning_rate": 9.998962786289912e-06, "loss": 0.1049, "step": 175 }, { "epoch": 0.036889540976734435, "grad_norm": 0.3916158974170685, "learning_rate": 9.998892488844942e-06, "loss": 0.1058, "step": 176 }, { "epoch": 0.03709914064137498, "grad_norm": 0.25847071409225464, "learning_rate": 9.998819886911574e-06, "loss": 0.0942, "step": 177 }, { "epoch": 0.03730874030601551, "grad_norm": 0.2088022381067276, "learning_rate": 9.998744980523276e-06, "loss": 0.1051, "step": 178 }, { "epoch": 0.037518339970656046, "grad_norm": 0.34360599517822266, "learning_rate": 9.99866776971458e-06, "loss": 0.0934, "step": 179 }, { "epoch": 0.03772793963529658, "grad_norm": 0.4142899513244629, "learning_rate": 9.99858825452108e-06, "loss": 0.0996, "step": 180 }, { "epoch": 0.03793753929993712, "grad_norm": 0.32127901911735535, "learning_rate": 9.998506434979434e-06, "loss": 0.0974, "step": 181 }, { "epoch": 0.03814713896457766, "grad_norm": 0.16188944876194, "learning_rate": 9.998422311127357e-06, "loss": 0.0959, "step": 182 }, { "epoch": 0.03835673862921819, "grad_norm": 0.20421625673770905, "learning_rate": 9.998335883003636e-06, "loss": 0.0971, "step": 183 }, { "epoch": 0.03856633829385873, "grad_norm": 0.29221752285957336, "learning_rate": 9.99824715064811e-06, "loss": 0.1037, "step": 184 }, { "epoch": 0.03877593795849927, "grad_norm": 0.3054715096950531, "learning_rate": 9.998156114101687e-06, "loss": 0.0977, "step": 185 }, { "epoch": 0.038985537623139804, "grad_norm": 0.22702643275260925, "learning_rate": 9.998062773406332e-06, "loss": 0.0999, "step": 186 }, { "epoch": 0.03919513728778034, "grad_norm": 0.1409185230731964, "learning_rate": 9.997967128605078e-06, "loss": 0.0935, "step": 187 }, { "epoch": 0.039404736952420874, "grad_norm": 0.21621251106262207, "learning_rate": 9.997869179742014e-06, "loss": 0.0987, "step": 188 }, { "epoch": 0.039614336617061416, "grad_norm": 0.3209380507469177, "learning_rate": 9.9977689268623e-06, "loss": 0.0975, "step": 189 }, { "epoch": 0.03982393628170195, "grad_norm": 0.3941796123981476, "learning_rate": 9.997666370012145e-06, "loss": 0.0999, "step": 190 }, { "epoch": 0.040033535946342486, "grad_norm": 0.40109798312187195, "learning_rate": 9.997561509238833e-06, "loss": 0.0977, "step": 191 }, { "epoch": 0.04024313561098302, "grad_norm": 0.31651192903518677, "learning_rate": 9.997454344590702e-06, "loss": 0.1025, "step": 192 }, { "epoch": 0.04045273527562356, "grad_norm": 0.20569577813148499, "learning_rate": 9.997344876117157e-06, "loss": 0.0967, "step": 193 }, { "epoch": 0.0406623349402641, "grad_norm": 0.2173171490430832, "learning_rate": 9.997233103868664e-06, "loss": 0.098, "step": 194 }, { "epoch": 0.04087193460490463, "grad_norm": 0.26545464992523193, "learning_rate": 9.997119027896745e-06, "loss": 0.0997, "step": 195 }, { "epoch": 0.04108153426954517, "grad_norm": 0.25196799635887146, "learning_rate": 9.997002648253994e-06, "loss": 0.0936, "step": 196 }, { "epoch": 0.04129113393418571, "grad_norm": 0.18679989874362946, "learning_rate": 9.99688396499406e-06, "loss": 0.0958, "step": 197 }, { "epoch": 0.041500733598826244, "grad_norm": 0.16168518364429474, "learning_rate": 9.996762978171657e-06, "loss": 0.0906, "step": 198 }, { "epoch": 0.04171033326346678, "grad_norm": 0.19161826372146606, "learning_rate": 9.996639687842558e-06, "loss": 0.0974, "step": 199 }, { "epoch": 0.04191993292810731, "grad_norm": 0.18548256158828735, "learning_rate": 9.9965140940636e-06, "loss": 0.0976, "step": 200 }, { "epoch": 0.042129532592747855, "grad_norm": 0.14844392240047455, "learning_rate": 9.996386196892683e-06, "loss": 0.0948, "step": 201 }, { "epoch": 0.04233913225738839, "grad_norm": 0.10816401243209839, "learning_rate": 9.996255996388767e-06, "loss": 0.0927, "step": 202 }, { "epoch": 0.042548731922028925, "grad_norm": 0.14506785571575165, "learning_rate": 9.996123492611875e-06, "loss": 0.1019, "step": 203 }, { "epoch": 0.04275833158666946, "grad_norm": 0.18897125124931335, "learning_rate": 9.99598868562309e-06, "loss": 0.1029, "step": 204 }, { "epoch": 0.042967931251309995, "grad_norm": 0.24414150416851044, "learning_rate": 9.99585157548456e-06, "loss": 0.0895, "step": 205 }, { "epoch": 0.043177530915950536, "grad_norm": 0.34987929463386536, "learning_rate": 9.995712162259489e-06, "loss": 0.0933, "step": 206 }, { "epoch": 0.04338713058059107, "grad_norm": 0.4623091518878937, "learning_rate": 9.995570446012152e-06, "loss": 0.106, "step": 207 }, { "epoch": 0.043596730245231606, "grad_norm": 0.42472946643829346, "learning_rate": 9.995426426807875e-06, "loss": 0.1011, "step": 208 }, { "epoch": 0.04380632990987214, "grad_norm": 0.20523960888385773, "learning_rate": 9.995280104713055e-06, "loss": 0.096, "step": 209 }, { "epoch": 0.04401592957451268, "grad_norm": 0.23212192952632904, "learning_rate": 9.995131479795142e-06, "loss": 0.0945, "step": 210 }, { "epoch": 0.04422552923915322, "grad_norm": 0.40809500217437744, "learning_rate": 9.994980552122655e-06, "loss": 0.0935, "step": 211 }, { "epoch": 0.04443512890379375, "grad_norm": 0.3801667094230652, "learning_rate": 9.99482732176517e-06, "loss": 0.0985, "step": 212 }, { "epoch": 0.04464472856843429, "grad_norm": 0.17014552652835846, "learning_rate": 9.994671788793328e-06, "loss": 0.0951, "step": 213 }, { "epoch": 0.04485432823307483, "grad_norm": 0.31415608525276184, "learning_rate": 9.99451395327883e-06, "loss": 0.0986, "step": 214 }, { "epoch": 0.045063927897715364, "grad_norm": 0.3655521273612976, "learning_rate": 9.994353815294438e-06, "loss": 0.0979, "step": 215 }, { "epoch": 0.0452735275623559, "grad_norm": 0.22098667919635773, "learning_rate": 9.99419137491397e-06, "loss": 0.0965, "step": 216 }, { "epoch": 0.045483127226996434, "grad_norm": 0.2817780673503876, "learning_rate": 9.99402663221232e-06, "loss": 0.0976, "step": 217 }, { "epoch": 0.045692726891636976, "grad_norm": 0.27700185775756836, "learning_rate": 9.993859587265429e-06, "loss": 0.1014, "step": 218 }, { "epoch": 0.04590232655627751, "grad_norm": 0.18445956707000732, "learning_rate": 9.993690240150305e-06, "loss": 0.0954, "step": 219 }, { "epoch": 0.046111926220918045, "grad_norm": 0.2870791554450989, "learning_rate": 9.993518590945017e-06, "loss": 0.0995, "step": 220 }, { "epoch": 0.04632152588555858, "grad_norm": 0.20852920413017273, "learning_rate": 9.993344639728694e-06, "loss": 0.0923, "step": 221 }, { "epoch": 0.04653112555019912, "grad_norm": 0.2063411921262741, "learning_rate": 9.993168386581533e-06, "loss": 0.0903, "step": 222 }, { "epoch": 0.04674072521483966, "grad_norm": 0.2189558446407318, "learning_rate": 9.992989831584781e-06, "loss": 0.0934, "step": 223 }, { "epoch": 0.04695032487948019, "grad_norm": 0.2275790423154831, "learning_rate": 9.992808974820755e-06, "loss": 0.0944, "step": 224 }, { "epoch": 0.04715992454412073, "grad_norm": 0.29324543476104736, "learning_rate": 9.992625816372828e-06, "loss": 0.0966, "step": 225 }, { "epoch": 0.04736952420876127, "grad_norm": 0.24611949920654297, "learning_rate": 9.992440356325437e-06, "loss": 0.0948, "step": 226 }, { "epoch": 0.0475791238734018, "grad_norm": 0.25625893473625183, "learning_rate": 9.992252594764079e-06, "loss": 0.0896, "step": 227 }, { "epoch": 0.04778872353804234, "grad_norm": 0.22875913977622986, "learning_rate": 9.99206253177531e-06, "loss": 0.0887, "step": 228 }, { "epoch": 0.04799832320268287, "grad_norm": 0.2700249254703522, "learning_rate": 9.991870167446751e-06, "loss": 0.0935, "step": 229 }, { "epoch": 0.048207922867323415, "grad_norm": 0.2741994857788086, "learning_rate": 9.991675501867083e-06, "loss": 0.0945, "step": 230 }, { "epoch": 0.04841752253196395, "grad_norm": 0.2627350389957428, "learning_rate": 9.991478535126045e-06, "loss": 0.0937, "step": 231 }, { "epoch": 0.048627122196604484, "grad_norm": 0.2349289357662201, "learning_rate": 9.99127926731444e-06, "loss": 0.091, "step": 232 }, { "epoch": 0.04883672186124502, "grad_norm": 0.19692082703113556, "learning_rate": 9.991077698524128e-06, "loss": 0.0925, "step": 233 }, { "epoch": 0.04904632152588556, "grad_norm": 0.21425145864486694, "learning_rate": 9.990873828848035e-06, "loss": 0.092, "step": 234 }, { "epoch": 0.049255921190526096, "grad_norm": 0.22980082035064697, "learning_rate": 9.990667658380145e-06, "loss": 0.0891, "step": 235 }, { "epoch": 0.04946552085516663, "grad_norm": 0.3201009929180145, "learning_rate": 9.990459187215498e-06, "loss": 0.0925, "step": 236 }, { "epoch": 0.049675120519807166, "grad_norm": 0.366510272026062, "learning_rate": 9.990248415450204e-06, "loss": 0.0991, "step": 237 }, { "epoch": 0.04988472018444771, "grad_norm": 0.3486904203891754, "learning_rate": 9.990035343181426e-06, "loss": 0.0938, "step": 238 }, { "epoch": 0.05009431984908824, "grad_norm": 0.2655981779098511, "learning_rate": 9.989819970507392e-06, "loss": 0.0972, "step": 239 }, { "epoch": 0.05030391951372878, "grad_norm": 0.19759038090705872, "learning_rate": 9.989602297527387e-06, "loss": 0.0894, "step": 240 }, { "epoch": 0.05051351917836931, "grad_norm": 0.2099776715040207, "learning_rate": 9.98938232434176e-06, "loss": 0.0906, "step": 241 }, { "epoch": 0.050723118843009854, "grad_norm": 0.22497335076332092, "learning_rate": 9.98916005105192e-06, "loss": 0.0953, "step": 242 }, { "epoch": 0.05093271850765039, "grad_norm": 0.19926096498966217, "learning_rate": 9.98893547776033e-06, "loss": 0.0873, "step": 243 }, { "epoch": 0.051142318172290924, "grad_norm": 0.19615811109542847, "learning_rate": 9.988708604570523e-06, "loss": 0.0868, "step": 244 }, { "epoch": 0.05135191783693146, "grad_norm": 0.2284836620092392, "learning_rate": 9.988479431587085e-06, "loss": 0.0891, "step": 245 }, { "epoch": 0.051561517501572, "grad_norm": 0.24722371995449066, "learning_rate": 9.988247958915665e-06, "loss": 0.091, "step": 246 }, { "epoch": 0.051771117166212535, "grad_norm": 0.2157488912343979, "learning_rate": 9.988014186662971e-06, "loss": 0.0939, "step": 247 }, { "epoch": 0.05198071683085307, "grad_norm": 0.19561699032783508, "learning_rate": 9.987778114936775e-06, "loss": 0.0886, "step": 248 }, { "epoch": 0.052190316495493605, "grad_norm": 0.20849882066249847, "learning_rate": 9.987539743845902e-06, "loss": 0.09, "step": 249 }, { "epoch": 0.05239991616013415, "grad_norm": 0.23204147815704346, "learning_rate": 9.987299073500245e-06, "loss": 0.0882, "step": 250 }, { "epoch": 0.05260951582477468, "grad_norm": 0.2583141028881073, "learning_rate": 9.98705610401075e-06, "loss": 0.0949, "step": 251 }, { "epoch": 0.052819115489415217, "grad_norm": 0.27133336663246155, "learning_rate": 9.986810835489426e-06, "loss": 0.0914, "step": 252 }, { "epoch": 0.05302871515405575, "grad_norm": 0.2617621123790741, "learning_rate": 9.986563268049345e-06, "loss": 0.0862, "step": 253 }, { "epoch": 0.05323831481869629, "grad_norm": 0.2514387369155884, "learning_rate": 9.98631340180463e-06, "loss": 0.093, "step": 254 }, { "epoch": 0.05344791448333683, "grad_norm": 0.22442054748535156, "learning_rate": 9.986061236870478e-06, "loss": 0.0963, "step": 255 }, { "epoch": 0.05365751414797736, "grad_norm": 0.16423439979553223, "learning_rate": 9.985806773363127e-06, "loss": 0.0916, "step": 256 }, { "epoch": 0.0538671138126179, "grad_norm": 0.19374483823776245, "learning_rate": 9.985550011399889e-06, "loss": 0.088, "step": 257 }, { "epoch": 0.05407671347725844, "grad_norm": 0.28776028752326965, "learning_rate": 9.985290951099134e-06, "loss": 0.0912, "step": 258 }, { "epoch": 0.054286313141898974, "grad_norm": 0.3693079948425293, "learning_rate": 9.985029592580284e-06, "loss": 0.0912, "step": 259 }, { "epoch": 0.05449591280653951, "grad_norm": 0.4039154052734375, "learning_rate": 9.984765935963826e-06, "loss": 0.0935, "step": 260 }, { "epoch": 0.054705512471180044, "grad_norm": 0.3492220640182495, "learning_rate": 9.98449998137131e-06, "loss": 0.0939, "step": 261 }, { "epoch": 0.054915112135820586, "grad_norm": 0.21770621836185455, "learning_rate": 9.984231728925338e-06, "loss": 0.0911, "step": 262 }, { "epoch": 0.05512471180046112, "grad_norm": 0.17481771111488342, "learning_rate": 9.983961178749573e-06, "loss": 0.0894, "step": 263 }, { "epoch": 0.055334311465101656, "grad_norm": 0.23308181762695312, "learning_rate": 9.98368833096874e-06, "loss": 0.0929, "step": 264 }, { "epoch": 0.05554391112974219, "grad_norm": 0.2207728773355484, "learning_rate": 9.983413185708622e-06, "loss": 0.0912, "step": 265 }, { "epoch": 0.05575351079438273, "grad_norm": 0.2060740441083908, "learning_rate": 9.98313574309606e-06, "loss": 0.0877, "step": 266 }, { "epoch": 0.05596311045902327, "grad_norm": 0.22956304252147675, "learning_rate": 9.982856003258954e-06, "loss": 0.0892, "step": 267 }, { "epoch": 0.0561727101236638, "grad_norm": 0.22290349006652832, "learning_rate": 9.982573966326268e-06, "loss": 0.0882, "step": 268 }, { "epoch": 0.05638230978830434, "grad_norm": 0.20171046257019043, "learning_rate": 9.982289632428017e-06, "loss": 0.0891, "step": 269 }, { "epoch": 0.05659190945294487, "grad_norm": 0.2069445252418518, "learning_rate": 9.982003001695282e-06, "loss": 0.0881, "step": 270 }, { "epoch": 0.056801509117585414, "grad_norm": 0.206475630402565, "learning_rate": 9.981714074260196e-06, "loss": 0.0902, "step": 271 }, { "epoch": 0.05701110878222595, "grad_norm": 0.19828340411186218, "learning_rate": 9.98142285025596e-06, "loss": 0.0878, "step": 272 }, { "epoch": 0.05722070844686648, "grad_norm": 0.18767406046390533, "learning_rate": 9.981129329816821e-06, "loss": 0.0905, "step": 273 }, { "epoch": 0.05743030811150702, "grad_norm": 0.1480463147163391, "learning_rate": 9.980833513078097e-06, "loss": 0.089, "step": 274 }, { "epoch": 0.05763990777614756, "grad_norm": 0.12146848440170288, "learning_rate": 9.980535400176158e-06, "loss": 0.0853, "step": 275 }, { "epoch": 0.057849507440788095, "grad_norm": 0.17080923914909363, "learning_rate": 9.980234991248434e-06, "loss": 0.0851, "step": 276 }, { "epoch": 0.05805910710542863, "grad_norm": 0.20135587453842163, "learning_rate": 9.979932286433414e-06, "loss": 0.0851, "step": 277 }, { "epoch": 0.058268706770069165, "grad_norm": 0.193786159157753, "learning_rate": 9.979627285870644e-06, "loss": 0.0844, "step": 278 }, { "epoch": 0.058478306434709706, "grad_norm": 0.17661581933498383, "learning_rate": 9.979319989700729e-06, "loss": 0.0899, "step": 279 }, { "epoch": 0.05868790609935024, "grad_norm": 0.1813191920518875, "learning_rate": 9.979010398065334e-06, "loss": 0.0852, "step": 280 }, { "epoch": 0.058897505763990776, "grad_norm": 0.20303796231746674, "learning_rate": 9.97869851110718e-06, "loss": 0.0912, "step": 281 }, { "epoch": 0.05910710542863131, "grad_norm": 0.1993362307548523, "learning_rate": 9.978384328970045e-06, "loss": 0.0903, "step": 282 }, { "epoch": 0.05931670509327185, "grad_norm": 0.18841809034347534, "learning_rate": 9.978067851798771e-06, "loss": 0.0918, "step": 283 }, { "epoch": 0.05952630475791239, "grad_norm": 0.22483417391777039, "learning_rate": 9.97774907973925e-06, "loss": 0.0929, "step": 284 }, { "epoch": 0.05973590442255292, "grad_norm": 0.3062545657157898, "learning_rate": 9.977428012938437e-06, "loss": 0.0889, "step": 285 }, { "epoch": 0.05994550408719346, "grad_norm": 0.3955790400505066, "learning_rate": 9.977104651544342e-06, "loss": 0.0917, "step": 286 }, { "epoch": 0.060155103751834, "grad_norm": 0.4466722309589386, "learning_rate": 9.97677899570604e-06, "loss": 0.091, "step": 287 }, { "epoch": 0.060364703416474534, "grad_norm": 0.3777875304222107, "learning_rate": 9.976451045573653e-06, "loss": 0.0872, "step": 288 }, { "epoch": 0.06057430308111507, "grad_norm": 0.21344004571437836, "learning_rate": 9.976120801298368e-06, "loss": 0.0874, "step": 289 }, { "epoch": 0.060783902745755604, "grad_norm": 0.2490052580833435, "learning_rate": 9.975788263032427e-06, "loss": 0.0873, "step": 290 }, { "epoch": 0.060993502410396146, "grad_norm": 0.3740801513195038, "learning_rate": 9.97545343092913e-06, "loss": 0.0905, "step": 291 }, { "epoch": 0.06120310207503668, "grad_norm": 0.356458842754364, "learning_rate": 9.975116305142836e-06, "loss": 0.0872, "step": 292 }, { "epoch": 0.061412701739677215, "grad_norm": 0.2755817472934723, "learning_rate": 9.974776885828958e-06, "loss": 0.0902, "step": 293 }, { "epoch": 0.06162230140431775, "grad_norm": 0.222996786236763, "learning_rate": 9.974435173143968e-06, "loss": 0.0848, "step": 294 }, { "epoch": 0.06183190106895829, "grad_norm": 0.24334965646266937, "learning_rate": 9.974091167245397e-06, "loss": 0.0915, "step": 295 }, { "epoch": 0.06204150073359883, "grad_norm": 0.25788915157318115, "learning_rate": 9.973744868291832e-06, "loss": 0.0894, "step": 296 }, { "epoch": 0.06225110039823936, "grad_norm": 0.22377200424671173, "learning_rate": 9.973396276442917e-06, "loss": 0.0896, "step": 297 }, { "epoch": 0.0624607000628799, "grad_norm": 0.23889151215553284, "learning_rate": 9.973045391859348e-06, "loss": 0.0859, "step": 298 }, { "epoch": 0.06267029972752043, "grad_norm": 0.18626217544078827, "learning_rate": 9.97269221470289e-06, "loss": 0.0922, "step": 299 }, { "epoch": 0.06287989939216097, "grad_norm": 0.14671076834201813, "learning_rate": 9.97233674513635e-06, "loss": 0.0922, "step": 300 }, { "epoch": 0.06308949905680152, "grad_norm": 0.21669505536556244, "learning_rate": 9.971978983323606e-06, "loss": 0.084, "step": 301 }, { "epoch": 0.06329909872144204, "grad_norm": 0.2512351870536804, "learning_rate": 9.971618929429584e-06, "loss": 0.0917, "step": 302 }, { "epoch": 0.06350869838608258, "grad_norm": 0.28498363494873047, "learning_rate": 9.971256583620268e-06, "loss": 0.0854, "step": 303 }, { "epoch": 0.06371829805072311, "grad_norm": 0.2733207643032074, "learning_rate": 9.970891946062698e-06, "loss": 0.0867, "step": 304 }, { "epoch": 0.06392789771536365, "grad_norm": 0.19943512976169586, "learning_rate": 9.970525016924974e-06, "loss": 0.0913, "step": 305 }, { "epoch": 0.0641374973800042, "grad_norm": 0.16102051734924316, "learning_rate": 9.97015579637625e-06, "loss": 0.0878, "step": 306 }, { "epoch": 0.06434709704464472, "grad_norm": 0.19645822048187256, "learning_rate": 9.969784284586736e-06, "loss": 0.0856, "step": 307 }, { "epoch": 0.06455669670928527, "grad_norm": 0.2531105875968933, "learning_rate": 9.9694104817277e-06, "loss": 0.0897, "step": 308 }, { "epoch": 0.06476629637392581, "grad_norm": 0.24819032847881317, "learning_rate": 9.969034387971463e-06, "loss": 0.0854, "step": 309 }, { "epoch": 0.06497589603856634, "grad_norm": 0.23713769018650055, "learning_rate": 9.968656003491407e-06, "loss": 0.0861, "step": 310 }, { "epoch": 0.06518549570320688, "grad_norm": 0.21117182075977325, "learning_rate": 9.968275328461964e-06, "loss": 0.085, "step": 311 }, { "epoch": 0.0653950953678474, "grad_norm": 0.16627727448940277, "learning_rate": 9.967892363058626e-06, "loss": 0.0854, "step": 312 }, { "epoch": 0.06560469503248795, "grad_norm": 0.22320255637168884, "learning_rate": 9.967507107457942e-06, "loss": 0.085, "step": 313 }, { "epoch": 0.06581429469712849, "grad_norm": 0.30917251110076904, "learning_rate": 9.967119561837513e-06, "loss": 0.0909, "step": 314 }, { "epoch": 0.06602389436176902, "grad_norm": 0.3397315740585327, "learning_rate": 9.966729726375997e-06, "loss": 0.0938, "step": 315 }, { "epoch": 0.06623349402640956, "grad_norm": 0.3161088228225708, "learning_rate": 9.96633760125311e-06, "loss": 0.0945, "step": 316 }, { "epoch": 0.0664430936910501, "grad_norm": 0.2754218876361847, "learning_rate": 9.965943186649619e-06, "loss": 0.088, "step": 317 }, { "epoch": 0.06665269335569063, "grad_norm": 0.2540167272090912, "learning_rate": 9.965546482747352e-06, "loss": 0.0835, "step": 318 }, { "epoch": 0.06686229302033117, "grad_norm": 0.24167752265930176, "learning_rate": 9.965147489729187e-06, "loss": 0.0872, "step": 319 }, { "epoch": 0.0670718926849717, "grad_norm": 0.19365692138671875, "learning_rate": 9.96474620777906e-06, "loss": 0.0865, "step": 320 }, { "epoch": 0.06728149234961224, "grad_norm": 0.13701936602592468, "learning_rate": 9.964342637081962e-06, "loss": 0.0913, "step": 321 }, { "epoch": 0.06749109201425278, "grad_norm": 0.2027350515127182, "learning_rate": 9.963936777823941e-06, "loss": 0.0852, "step": 322 }, { "epoch": 0.06770069167889331, "grad_norm": 0.26346316933631897, "learning_rate": 9.963528630192098e-06, "loss": 0.086, "step": 323 }, { "epoch": 0.06791029134353385, "grad_norm": 0.24423374235630035, "learning_rate": 9.963118194374585e-06, "loss": 0.0885, "step": 324 }, { "epoch": 0.0681198910081744, "grad_norm": 0.21973365545272827, "learning_rate": 9.962705470560616e-06, "loss": 0.0876, "step": 325 }, { "epoch": 0.06832949067281492, "grad_norm": 0.23790043592453003, "learning_rate": 9.962290458940456e-06, "loss": 0.0858, "step": 326 }, { "epoch": 0.06853909033745546, "grad_norm": 0.23737956583499908, "learning_rate": 9.961873159705426e-06, "loss": 0.0813, "step": 327 }, { "epoch": 0.06874869000209599, "grad_norm": 0.1772824376821518, "learning_rate": 9.961453573047898e-06, "loss": 0.0837, "step": 328 }, { "epoch": 0.06895828966673653, "grad_norm": 0.14443804323673248, "learning_rate": 9.961031699161305e-06, "loss": 0.0847, "step": 329 }, { "epoch": 0.06916788933137707, "grad_norm": 0.16732539236545563, "learning_rate": 9.960607538240129e-06, "loss": 0.0901, "step": 330 }, { "epoch": 0.0693774889960176, "grad_norm": 0.16969183087348938, "learning_rate": 9.960181090479908e-06, "loss": 0.0868, "step": 331 }, { "epoch": 0.06958708866065814, "grad_norm": 0.17372508347034454, "learning_rate": 9.959752356077234e-06, "loss": 0.085, "step": 332 }, { "epoch": 0.06979668832529869, "grad_norm": 0.18569670617580414, "learning_rate": 9.959321335229754e-06, "loss": 0.0894, "step": 333 }, { "epoch": 0.07000628798993921, "grad_norm": 0.2023860216140747, "learning_rate": 9.95888802813617e-06, "loss": 0.0892, "step": 334 }, { "epoch": 0.07021588765457976, "grad_norm": 0.20857451856136322, "learning_rate": 9.958452434996235e-06, "loss": 0.0826, "step": 335 }, { "epoch": 0.07042548731922028, "grad_norm": 0.23407702147960663, "learning_rate": 9.958014556010757e-06, "loss": 0.0795, "step": 336 }, { "epoch": 0.07063508698386083, "grad_norm": 0.3349778950214386, "learning_rate": 9.957574391381597e-06, "loss": 0.0886, "step": 337 }, { "epoch": 0.07084468664850137, "grad_norm": 0.36891162395477295, "learning_rate": 9.957131941311675e-06, "loss": 0.0923, "step": 338 }, { "epoch": 0.0710542863131419, "grad_norm": 0.2576613128185272, "learning_rate": 9.956687206004955e-06, "loss": 0.0866, "step": 339 }, { "epoch": 0.07126388597778244, "grad_norm": 0.2293458729982376, "learning_rate": 9.956240185666465e-06, "loss": 0.0864, "step": 340 }, { "epoch": 0.07147348564242297, "grad_norm": 0.266307532787323, "learning_rate": 9.955790880502278e-06, "loss": 0.0923, "step": 341 }, { "epoch": 0.07168308530706351, "grad_norm": 0.22257249057292938, "learning_rate": 9.955339290719525e-06, "loss": 0.0858, "step": 342 }, { "epoch": 0.07189268497170405, "grad_norm": 0.25660571455955505, "learning_rate": 9.954885416526388e-06, "loss": 0.09, "step": 343 }, { "epoch": 0.07210228463634458, "grad_norm": 0.24506278336048126, "learning_rate": 9.954429258132102e-06, "loss": 0.0849, "step": 344 }, { "epoch": 0.07231188430098512, "grad_norm": 0.1874029040336609, "learning_rate": 9.953970815746958e-06, "loss": 0.0879, "step": 345 }, { "epoch": 0.07252148396562566, "grad_norm": 0.2142496556043625, "learning_rate": 9.953510089582297e-06, "loss": 0.083, "step": 346 }, { "epoch": 0.07273108363026619, "grad_norm": 0.200823113322258, "learning_rate": 9.953047079850514e-06, "loss": 0.0836, "step": 347 }, { "epoch": 0.07294068329490673, "grad_norm": 0.22651934623718262, "learning_rate": 9.952581786765057e-06, "loss": 0.0872, "step": 348 }, { "epoch": 0.07315028295954726, "grad_norm": 0.2763821482658386, "learning_rate": 9.952114210540423e-06, "loss": 0.0924, "step": 349 }, { "epoch": 0.0733598826241878, "grad_norm": 0.26317474246025085, "learning_rate": 9.951644351392167e-06, "loss": 0.0901, "step": 350 }, { "epoch": 0.07356948228882834, "grad_norm": 0.22677338123321533, "learning_rate": 9.951172209536895e-06, "loss": 0.0854, "step": 351 }, { "epoch": 0.07377908195346887, "grad_norm": 0.23431210219860077, "learning_rate": 9.95069778519226e-06, "loss": 0.087, "step": 352 }, { "epoch": 0.07398868161810941, "grad_norm": 0.23959867656230927, "learning_rate": 9.950221078576977e-06, "loss": 0.0839, "step": 353 }, { "epoch": 0.07419828128274995, "grad_norm": 0.21534642577171326, "learning_rate": 9.949742089910805e-06, "loss": 0.0893, "step": 354 }, { "epoch": 0.07440788094739048, "grad_norm": 0.2186158299446106, "learning_rate": 9.949260819414557e-06, "loss": 0.0807, "step": 355 }, { "epoch": 0.07461748061203102, "grad_norm": 0.23446103930473328, "learning_rate": 9.948777267310099e-06, "loss": 0.0836, "step": 356 }, { "epoch": 0.07482708027667155, "grad_norm": 0.20559126138687134, "learning_rate": 9.948291433820348e-06, "loss": 0.0866, "step": 357 }, { "epoch": 0.07503667994131209, "grad_norm": 0.1898106038570404, "learning_rate": 9.947803319169275e-06, "loss": 0.0869, "step": 358 }, { "epoch": 0.07524627960595263, "grad_norm": 0.2029874622821808, "learning_rate": 9.9473129235819e-06, "loss": 0.0866, "step": 359 }, { "epoch": 0.07545587927059316, "grad_norm": 0.20538607239723206, "learning_rate": 9.946820247284295e-06, "loss": 0.0795, "step": 360 }, { "epoch": 0.0756654789352337, "grad_norm": 0.24543048441410065, "learning_rate": 9.946325290503583e-06, "loss": 0.079, "step": 361 }, { "epoch": 0.07587507859987425, "grad_norm": 0.2604290246963501, "learning_rate": 9.945828053467939e-06, "loss": 0.0871, "step": 362 }, { "epoch": 0.07608467826451477, "grad_norm": 0.249879851937294, "learning_rate": 9.945328536406588e-06, "loss": 0.0826, "step": 363 }, { "epoch": 0.07629427792915532, "grad_norm": 0.23733551800251007, "learning_rate": 9.944826739549812e-06, "loss": 0.0852, "step": 364 }, { "epoch": 0.07650387759379584, "grad_norm": 0.18714891374111176, "learning_rate": 9.944322663128936e-06, "loss": 0.0845, "step": 365 }, { "epoch": 0.07671347725843639, "grad_norm": 0.17555269598960876, "learning_rate": 9.943816307376337e-06, "loss": 0.0801, "step": 366 }, { "epoch": 0.07692307692307693, "grad_norm": 0.20943832397460938, "learning_rate": 9.943307672525448e-06, "loss": 0.0853, "step": 367 }, { "epoch": 0.07713267658771746, "grad_norm": 0.21346049010753632, "learning_rate": 9.94279675881075e-06, "loss": 0.0841, "step": 368 }, { "epoch": 0.077342276252358, "grad_norm": 0.2275484949350357, "learning_rate": 9.942283566467773e-06, "loss": 0.0872, "step": 369 }, { "epoch": 0.07755187591699854, "grad_norm": 0.21712201833724976, "learning_rate": 9.941768095733098e-06, "loss": 0.0833, "step": 370 }, { "epoch": 0.07776147558163907, "grad_norm": 0.19494830071926117, "learning_rate": 9.941250346844358e-06, "loss": 0.0855, "step": 371 }, { "epoch": 0.07797107524627961, "grad_norm": 0.19557230174541473, "learning_rate": 9.940730320040233e-06, "loss": 0.0827, "step": 372 }, { "epoch": 0.07818067491092014, "grad_norm": 0.1738850325345993, "learning_rate": 9.940208015560458e-06, "loss": 0.0811, "step": 373 }, { "epoch": 0.07839027457556068, "grad_norm": 0.19140446186065674, "learning_rate": 9.939683433645813e-06, "loss": 0.0806, "step": 374 }, { "epoch": 0.07859987424020122, "grad_norm": 0.23054589331150055, "learning_rate": 9.939156574538131e-06, "loss": 0.0817, "step": 375 }, { "epoch": 0.07880947390484175, "grad_norm": 0.25932809710502625, "learning_rate": 9.938627438480295e-06, "loss": 0.0801, "step": 376 }, { "epoch": 0.07901907356948229, "grad_norm": 0.24759088456630707, "learning_rate": 9.938096025716235e-06, "loss": 0.0894, "step": 377 }, { "epoch": 0.07922867323412283, "grad_norm": 0.15014545619487762, "learning_rate": 9.937562336490934e-06, "loss": 0.0855, "step": 378 }, { "epoch": 0.07943827289876336, "grad_norm": 0.10033906251192093, "learning_rate": 9.93702637105042e-06, "loss": 0.0854, "step": 379 }, { "epoch": 0.0796478725634039, "grad_norm": 0.1658346801996231, "learning_rate": 9.936488129641777e-06, "loss": 0.0812, "step": 380 }, { "epoch": 0.07985747222804443, "grad_norm": 0.2382059395313263, "learning_rate": 9.935947612513129e-06, "loss": 0.0819, "step": 381 }, { "epoch": 0.08006707189268497, "grad_norm": 0.2596674859523773, "learning_rate": 9.935404819913658e-06, "loss": 0.0849, "step": 382 }, { "epoch": 0.08027667155732551, "grad_norm": 0.19236969947814941, "learning_rate": 9.93485975209359e-06, "loss": 0.0805, "step": 383 }, { "epoch": 0.08048627122196604, "grad_norm": 0.12010635435581207, "learning_rate": 9.934312409304201e-06, "loss": 0.0858, "step": 384 }, { "epoch": 0.08069587088660658, "grad_norm": 0.1619727462530136, "learning_rate": 9.933762791797816e-06, "loss": 0.0829, "step": 385 }, { "epoch": 0.08090547055124712, "grad_norm": 0.256807804107666, "learning_rate": 9.93321089982781e-06, "loss": 0.0805, "step": 386 }, { "epoch": 0.08111507021588765, "grad_norm": 0.3097319006919861, "learning_rate": 9.932656733648602e-06, "loss": 0.0834, "step": 387 }, { "epoch": 0.0813246698805282, "grad_norm": 0.3272354006767273, "learning_rate": 9.932100293515667e-06, "loss": 0.0817, "step": 388 }, { "epoch": 0.08153426954516872, "grad_norm": 0.36321696639060974, "learning_rate": 9.931541579685519e-06, "loss": 0.0859, "step": 389 }, { "epoch": 0.08174386920980926, "grad_norm": 0.40557923913002014, "learning_rate": 9.930980592415728e-06, "loss": 0.0852, "step": 390 }, { "epoch": 0.0819534688744498, "grad_norm": 0.3595048785209656, "learning_rate": 9.93041733196491e-06, "loss": 0.0873, "step": 391 }, { "epoch": 0.08216306853909033, "grad_norm": 0.18180204927921295, "learning_rate": 9.929851798592723e-06, "loss": 0.0863, "step": 392 }, { "epoch": 0.08237266820373088, "grad_norm": 0.3407440781593323, "learning_rate": 9.929283992559882e-06, "loss": 0.0889, "step": 393 }, { "epoch": 0.08258226786837142, "grad_norm": 0.39048606157302856, "learning_rate": 9.928713914128146e-06, "loss": 0.0907, "step": 394 }, { "epoch": 0.08279186753301195, "grad_norm": 0.2129063457250595, "learning_rate": 9.928141563560316e-06, "loss": 0.0911, "step": 395 }, { "epoch": 0.08300146719765249, "grad_norm": 0.3368994891643524, "learning_rate": 9.92756694112025e-06, "loss": 0.09, "step": 396 }, { "epoch": 0.08321106686229301, "grad_norm": 0.3096039891242981, "learning_rate": 9.926990047072849e-06, "loss": 0.0918, "step": 397 }, { "epoch": 0.08342066652693356, "grad_norm": 0.21943970024585724, "learning_rate": 9.92641088168406e-06, "loss": 0.0892, "step": 398 }, { "epoch": 0.0836302661915741, "grad_norm": 0.23184233903884888, "learning_rate": 9.925829445220876e-06, "loss": 0.0848, "step": 399 }, { "epoch": 0.08383986585621463, "grad_norm": 0.22483260929584503, "learning_rate": 9.925245737951342e-06, "loss": 0.0806, "step": 400 }, { "epoch": 0.08404946552085517, "grad_norm": 0.20455621182918549, "learning_rate": 9.924659760144546e-06, "loss": 0.0873, "step": 401 }, { "epoch": 0.08425906518549571, "grad_norm": 0.19038546085357666, "learning_rate": 9.924071512070623e-06, "loss": 0.0832, "step": 402 }, { "epoch": 0.08446866485013624, "grad_norm": 0.1766170859336853, "learning_rate": 9.923480994000756e-06, "loss": 0.0841, "step": 403 }, { "epoch": 0.08467826451477678, "grad_norm": 0.14354343712329865, "learning_rate": 9.922888206207174e-06, "loss": 0.0889, "step": 404 }, { "epoch": 0.08488786417941731, "grad_norm": 0.17357495427131653, "learning_rate": 9.922293148963152e-06, "loss": 0.0795, "step": 405 }, { "epoch": 0.08509746384405785, "grad_norm": 0.16406401991844177, "learning_rate": 9.921695822543009e-06, "loss": 0.0837, "step": 406 }, { "epoch": 0.08530706350869839, "grad_norm": 0.16059234738349915, "learning_rate": 9.921096227222115e-06, "loss": 0.0823, "step": 407 }, { "epoch": 0.08551666317333892, "grad_norm": 0.16859528422355652, "learning_rate": 9.920494363276882e-06, "loss": 0.0842, "step": 408 }, { "epoch": 0.08572626283797946, "grad_norm": 0.11957108229398727, "learning_rate": 9.91989023098477e-06, "loss": 0.0817, "step": 409 }, { "epoch": 0.08593586250261999, "grad_norm": 0.1288835108280182, "learning_rate": 9.919283830624283e-06, "loss": 0.0845, "step": 410 }, { "epoch": 0.08614546216726053, "grad_norm": 0.14702066779136658, "learning_rate": 9.918675162474974e-06, "loss": 0.0813, "step": 411 }, { "epoch": 0.08635506183190107, "grad_norm": 0.13575831055641174, "learning_rate": 9.918064226817435e-06, "loss": 0.0801, "step": 412 }, { "epoch": 0.0865646614965416, "grad_norm": 0.16246341168880463, "learning_rate": 9.91745102393331e-06, "loss": 0.0843, "step": 413 }, { "epoch": 0.08677426116118214, "grad_norm": 0.1807781308889389, "learning_rate": 9.916835554105282e-06, "loss": 0.0809, "step": 414 }, { "epoch": 0.08698386082582268, "grad_norm": 0.18538668751716614, "learning_rate": 9.916217817617087e-06, "loss": 0.083, "step": 415 }, { "epoch": 0.08719346049046321, "grad_norm": 0.19886340200901031, "learning_rate": 9.915597814753498e-06, "loss": 0.078, "step": 416 }, { "epoch": 0.08740306015510375, "grad_norm": 0.20253928005695343, "learning_rate": 9.914975545800339e-06, "loss": 0.0817, "step": 417 }, { "epoch": 0.08761265981974428, "grad_norm": 0.1704034060239792, "learning_rate": 9.914351011044472e-06, "loss": 0.0832, "step": 418 }, { "epoch": 0.08782225948438482, "grad_norm": 0.14112436771392822, "learning_rate": 9.913724210773812e-06, "loss": 0.0819, "step": 419 }, { "epoch": 0.08803185914902537, "grad_norm": 0.11341799050569534, "learning_rate": 9.91309514527731e-06, "loss": 0.0804, "step": 420 }, { "epoch": 0.0882414588136659, "grad_norm": 0.12630800902843475, "learning_rate": 9.912463814844968e-06, "loss": 0.0773, "step": 421 }, { "epoch": 0.08845105847830644, "grad_norm": 0.1604209989309311, "learning_rate": 9.911830219767827e-06, "loss": 0.0821, "step": 422 }, { "epoch": 0.08866065814294698, "grad_norm": 0.1671822965145111, "learning_rate": 9.911194360337975e-06, "loss": 0.083, "step": 423 }, { "epoch": 0.0888702578075875, "grad_norm": 0.14921946823596954, "learning_rate": 9.910556236848543e-06, "loss": 0.0817, "step": 424 }, { "epoch": 0.08907985747222805, "grad_norm": 0.13964901864528656, "learning_rate": 9.909915849593705e-06, "loss": 0.0815, "step": 425 }, { "epoch": 0.08928945713686857, "grad_norm": 0.1505957692861557, "learning_rate": 9.909273198868682e-06, "loss": 0.0837, "step": 426 }, { "epoch": 0.08949905680150912, "grad_norm": 0.15062043070793152, "learning_rate": 9.908628284969732e-06, "loss": 0.08, "step": 427 }, { "epoch": 0.08970865646614966, "grad_norm": 0.15089496970176697, "learning_rate": 9.907981108194165e-06, "loss": 0.0811, "step": 428 }, { "epoch": 0.08991825613079019, "grad_norm": 0.1628749966621399, "learning_rate": 9.907331668840325e-06, "loss": 0.0797, "step": 429 }, { "epoch": 0.09012785579543073, "grad_norm": 0.1800432950258255, "learning_rate": 9.906679967207604e-06, "loss": 0.08, "step": 430 }, { "epoch": 0.09033745546007127, "grad_norm": 0.1852189302444458, "learning_rate": 9.906026003596438e-06, "loss": 0.0866, "step": 431 }, { "epoch": 0.0905470551247118, "grad_norm": 0.16221773624420166, "learning_rate": 9.905369778308304e-06, "loss": 0.0785, "step": 432 }, { "epoch": 0.09075665478935234, "grad_norm": 0.13077011704444885, "learning_rate": 9.904711291645721e-06, "loss": 0.0793, "step": 433 }, { "epoch": 0.09096625445399287, "grad_norm": 0.14190125465393066, "learning_rate": 9.904050543912252e-06, "loss": 0.0774, "step": 434 }, { "epoch": 0.09117585411863341, "grad_norm": 0.1918221116065979, "learning_rate": 9.9033875354125e-06, "loss": 0.086, "step": 435 }, { "epoch": 0.09138545378327395, "grad_norm": 0.24989114701747894, "learning_rate": 9.902722266452112e-06, "loss": 0.0751, "step": 436 }, { "epoch": 0.09159505344791448, "grad_norm": 0.2872444987297058, "learning_rate": 9.902054737337778e-06, "loss": 0.0794, "step": 437 }, { "epoch": 0.09180465311255502, "grad_norm": 0.26268768310546875, "learning_rate": 9.90138494837723e-06, "loss": 0.0783, "step": 438 }, { "epoch": 0.09201425277719556, "grad_norm": 0.19035717844963074, "learning_rate": 9.900712899879237e-06, "loss": 0.0777, "step": 439 }, { "epoch": 0.09222385244183609, "grad_norm": 0.14101530611515045, "learning_rate": 9.900038592153616e-06, "loss": 0.0783, "step": 440 }, { "epoch": 0.09243345210647663, "grad_norm": 0.21867899596691132, "learning_rate": 9.899362025511221e-06, "loss": 0.0799, "step": 441 }, { "epoch": 0.09264305177111716, "grad_norm": 0.3020075857639313, "learning_rate": 9.898683200263951e-06, "loss": 0.0903, "step": 442 }, { "epoch": 0.0928526514357577, "grad_norm": 0.29326069355010986, "learning_rate": 9.898002116724743e-06, "loss": 0.081, "step": 443 }, { "epoch": 0.09306225110039824, "grad_norm": 0.2977989912033081, "learning_rate": 9.897318775207576e-06, "loss": 0.0794, "step": 444 }, { "epoch": 0.09327185076503877, "grad_norm": 0.3779714107513428, "learning_rate": 9.89663317602747e-06, "loss": 0.0853, "step": 445 }, { "epoch": 0.09348145042967931, "grad_norm": 0.3647499680519104, "learning_rate": 9.895945319500488e-06, "loss": 0.0878, "step": 446 }, { "epoch": 0.09369105009431986, "grad_norm": 0.18845055997371674, "learning_rate": 9.895255205943732e-06, "loss": 0.0776, "step": 447 }, { "epoch": 0.09390064975896038, "grad_norm": 0.23743781447410583, "learning_rate": 9.894562835675343e-06, "loss": 0.0822, "step": 448 }, { "epoch": 0.09411024942360093, "grad_norm": 0.32447198033332825, "learning_rate": 9.893868209014502e-06, "loss": 0.0813, "step": 449 }, { "epoch": 0.09431984908824145, "grad_norm": 0.22521555423736572, "learning_rate": 9.893171326281433e-06, "loss": 0.0779, "step": 450 }, { "epoch": 0.094529448752882, "grad_norm": 0.2047656625509262, "learning_rate": 9.8924721877974e-06, "loss": 0.0787, "step": 451 }, { "epoch": 0.09473904841752254, "grad_norm": 0.241439089179039, "learning_rate": 9.891770793884703e-06, "loss": 0.0838, "step": 452 }, { "epoch": 0.09494864808216306, "grad_norm": 0.18499858677387238, "learning_rate": 9.891067144866687e-06, "loss": 0.0806, "step": 453 }, { "epoch": 0.0951582477468036, "grad_norm": 0.19206255674362183, "learning_rate": 9.890361241067734e-06, "loss": 0.084, "step": 454 }, { "epoch": 0.09536784741144415, "grad_norm": 0.21595366299152374, "learning_rate": 9.889653082813264e-06, "loss": 0.0796, "step": 455 }, { "epoch": 0.09557744707608468, "grad_norm": 0.19207443296909332, "learning_rate": 9.888942670429738e-06, "loss": 0.0791, "step": 456 }, { "epoch": 0.09578704674072522, "grad_norm": 0.16599828004837036, "learning_rate": 9.888230004244657e-06, "loss": 0.0806, "step": 457 }, { "epoch": 0.09599664640536575, "grad_norm": 0.1895645707845688, "learning_rate": 9.88751508458656e-06, "loss": 0.0784, "step": 458 }, { "epoch": 0.09620624607000629, "grad_norm": 0.18604148924350739, "learning_rate": 9.886797911785023e-06, "loss": 0.0858, "step": 459 }, { "epoch": 0.09641584573464683, "grad_norm": 0.12998424470424652, "learning_rate": 9.886078486170665e-06, "loss": 0.0792, "step": 460 }, { "epoch": 0.09662544539928736, "grad_norm": 0.16682370007038116, "learning_rate": 9.885356808075139e-06, "loss": 0.0798, "step": 461 }, { "epoch": 0.0968350450639279, "grad_norm": 0.17146462202072144, "learning_rate": 9.884632877831139e-06, "loss": 0.0797, "step": 462 }, { "epoch": 0.09704464472856844, "grad_norm": 0.11582599580287933, "learning_rate": 9.883906695772399e-06, "loss": 0.0812, "step": 463 }, { "epoch": 0.09725424439320897, "grad_norm": 0.1586909145116806, "learning_rate": 9.883178262233684e-06, "loss": 0.0817, "step": 464 }, { "epoch": 0.09746384405784951, "grad_norm": 0.1509367823600769, "learning_rate": 9.882447577550805e-06, "loss": 0.0845, "step": 465 }, { "epoch": 0.09767344372249004, "grad_norm": 0.1255314201116562, "learning_rate": 9.881714642060609e-06, "loss": 0.0789, "step": 466 }, { "epoch": 0.09788304338713058, "grad_norm": 0.17706221342086792, "learning_rate": 9.880979456100974e-06, "loss": 0.0821, "step": 467 }, { "epoch": 0.09809264305177112, "grad_norm": 0.23085959255695343, "learning_rate": 9.880242020010827e-06, "loss": 0.0801, "step": 468 }, { "epoch": 0.09830224271641165, "grad_norm": 0.354623407125473, "learning_rate": 9.87950233413012e-06, "loss": 0.0826, "step": 469 }, { "epoch": 0.09851184238105219, "grad_norm": 0.4140135943889618, "learning_rate": 9.87876039879985e-06, "loss": 0.0901, "step": 470 }, { "epoch": 0.09872144204569272, "grad_norm": 0.289201021194458, "learning_rate": 9.878016214362051e-06, "loss": 0.0836, "step": 471 }, { "epoch": 0.09893104171033326, "grad_norm": 0.16922371089458466, "learning_rate": 9.877269781159791e-06, "loss": 0.0786, "step": 472 }, { "epoch": 0.0991406413749738, "grad_norm": 0.2799324095249176, "learning_rate": 9.876521099537173e-06, "loss": 0.0827, "step": 473 }, { "epoch": 0.09935024103961433, "grad_norm": 0.3039519190788269, "learning_rate": 9.875770169839343e-06, "loss": 0.0852, "step": 474 }, { "epoch": 0.09955984070425487, "grad_norm": 0.19314952194690704, "learning_rate": 9.875016992412476e-06, "loss": 0.0777, "step": 475 }, { "epoch": 0.09976944036889542, "grad_norm": 0.1906445324420929, "learning_rate": 9.87426156760379e-06, "loss": 0.0788, "step": 476 }, { "epoch": 0.09997904003353594, "grad_norm": 0.19903278350830078, "learning_rate": 9.87350389576153e-06, "loss": 0.0836, "step": 477 }, { "epoch": 0.10018863969817648, "grad_norm": 0.2028076946735382, "learning_rate": 9.872743977234992e-06, "loss": 0.0797, "step": 478 }, { "epoch": 0.10039823936281701, "grad_norm": 0.1903727948665619, "learning_rate": 9.871981812374488e-06, "loss": 0.0791, "step": 479 }, { "epoch": 0.10060783902745755, "grad_norm": 0.16171634197235107, "learning_rate": 9.871217401531382e-06, "loss": 0.0819, "step": 480 }, { "epoch": 0.1008174386920981, "grad_norm": 0.18417572975158691, "learning_rate": 9.870450745058066e-06, "loss": 0.0836, "step": 481 }, { "epoch": 0.10102703835673862, "grad_norm": 0.18208323419094086, "learning_rate": 9.869681843307968e-06, "loss": 0.0834, "step": 482 }, { "epoch": 0.10123663802137917, "grad_norm": 0.17092318832874298, "learning_rate": 9.868910696635551e-06, "loss": 0.0791, "step": 483 }, { "epoch": 0.10144623768601971, "grad_norm": 0.13848985731601715, "learning_rate": 9.868137305396317e-06, "loss": 0.0766, "step": 484 }, { "epoch": 0.10165583735066024, "grad_norm": 0.17035254836082458, "learning_rate": 9.867361669946793e-06, "loss": 0.0812, "step": 485 }, { "epoch": 0.10186543701530078, "grad_norm": 0.1731637865304947, "learning_rate": 9.866583790644553e-06, "loss": 0.078, "step": 486 }, { "epoch": 0.1020750366799413, "grad_norm": 0.16337701678276062, "learning_rate": 9.865803667848195e-06, "loss": 0.0807, "step": 487 }, { "epoch": 0.10228463634458185, "grad_norm": 0.16541989147663116, "learning_rate": 9.865021301917358e-06, "loss": 0.0792, "step": 488 }, { "epoch": 0.10249423600922239, "grad_norm": 0.13967937231063843, "learning_rate": 9.864236693212709e-06, "loss": 0.0748, "step": 489 }, { "epoch": 0.10270383567386292, "grad_norm": 0.15016354620456696, "learning_rate": 9.863449842095955e-06, "loss": 0.0794, "step": 490 }, { "epoch": 0.10291343533850346, "grad_norm": 0.18421977758407593, "learning_rate": 9.862660748929835e-06, "loss": 0.0783, "step": 491 }, { "epoch": 0.103123035003144, "grad_norm": 0.21068471670150757, "learning_rate": 9.86186941407812e-06, "loss": 0.0785, "step": 492 }, { "epoch": 0.10333263466778453, "grad_norm": 0.23309852182865143, "learning_rate": 9.861075837905616e-06, "loss": 0.0783, "step": 493 }, { "epoch": 0.10354223433242507, "grad_norm": 0.23478467762470245, "learning_rate": 9.860280020778158e-06, "loss": 0.0806, "step": 494 }, { "epoch": 0.1037518339970656, "grad_norm": 0.19462421536445618, "learning_rate": 9.859481963062623e-06, "loss": 0.0747, "step": 495 }, { "epoch": 0.10396143366170614, "grad_norm": 0.15075665712356567, "learning_rate": 9.85868166512691e-06, "loss": 0.0748, "step": 496 }, { "epoch": 0.10417103332634668, "grad_norm": 0.13068079948425293, "learning_rate": 9.85787912733996e-06, "loss": 0.079, "step": 497 }, { "epoch": 0.10438063299098721, "grad_norm": 0.14314256608486176, "learning_rate": 9.85707435007174e-06, "loss": 0.0805, "step": 498 }, { "epoch": 0.10459023265562775, "grad_norm": 0.18346239626407623, "learning_rate": 9.856267333693255e-06, "loss": 0.0804, "step": 499 }, { "epoch": 0.1047998323202683, "grad_norm": 0.2016962468624115, "learning_rate": 9.855458078576537e-06, "loss": 0.077, "step": 500 }, { "epoch": 0.10500943198490882, "grad_norm": 0.18932455778121948, "learning_rate": 9.854646585094654e-06, "loss": 0.0811, "step": 501 }, { "epoch": 0.10521903164954936, "grad_norm": 0.1751541942358017, "learning_rate": 9.853832853621703e-06, "loss": 0.0776, "step": 502 }, { "epoch": 0.10542863131418989, "grad_norm": 0.18486347794532776, "learning_rate": 9.853016884532814e-06, "loss": 0.077, "step": 503 }, { "epoch": 0.10563823097883043, "grad_norm": 0.20413736999034882, "learning_rate": 9.85219867820415e-06, "loss": 0.0793, "step": 504 }, { "epoch": 0.10584783064347097, "grad_norm": 0.19140803813934326, "learning_rate": 9.851378235012905e-06, "loss": 0.0792, "step": 505 }, { "epoch": 0.1060574303081115, "grad_norm": 0.14764432609081268, "learning_rate": 9.8505555553373e-06, "loss": 0.0826, "step": 506 }, { "epoch": 0.10626702997275204, "grad_norm": 0.1288614273071289, "learning_rate": 9.84973063955659e-06, "loss": 0.0768, "step": 507 }, { "epoch": 0.10647662963739259, "grad_norm": 0.14826104044914246, "learning_rate": 9.848903488051065e-06, "loss": 0.0772, "step": 508 }, { "epoch": 0.10668622930203311, "grad_norm": 0.14957068860530853, "learning_rate": 9.848074101202037e-06, "loss": 0.0755, "step": 509 }, { "epoch": 0.10689582896667366, "grad_norm": 0.11332403123378754, "learning_rate": 9.847242479391857e-06, "loss": 0.0733, "step": 510 }, { "epoch": 0.10710542863131418, "grad_norm": 0.10584530979394913, "learning_rate": 9.8464086230039e-06, "loss": 0.0752, "step": 511 }, { "epoch": 0.10731502829595473, "grad_norm": 0.1642168015241623, "learning_rate": 9.845572532422576e-06, "loss": 0.0752, "step": 512 }, { "epoch": 0.10752462796059527, "grad_norm": 0.22667261958122253, "learning_rate": 9.844734208033318e-06, "loss": 0.0826, "step": 513 }, { "epoch": 0.1077342276252358, "grad_norm": 0.2658557593822479, "learning_rate": 9.843893650222599e-06, "loss": 0.0838, "step": 514 }, { "epoch": 0.10794382728987634, "grad_norm": 0.27352774143218994, "learning_rate": 9.843050859377913e-06, "loss": 0.0832, "step": 515 }, { "epoch": 0.10815342695451688, "grad_norm": 0.24916625022888184, "learning_rate": 9.842205835887785e-06, "loss": 0.0777, "step": 516 }, { "epoch": 0.10836302661915741, "grad_norm": 0.2406039983034134, "learning_rate": 9.841358580141775e-06, "loss": 0.0757, "step": 517 }, { "epoch": 0.10857262628379795, "grad_norm": 0.23990845680236816, "learning_rate": 9.840509092530465e-06, "loss": 0.0778, "step": 518 }, { "epoch": 0.10878222594843848, "grad_norm": 0.21896076202392578, "learning_rate": 9.839657373445468e-06, "loss": 0.0787, "step": 519 }, { "epoch": 0.10899182561307902, "grad_norm": 0.2108394056558609, "learning_rate": 9.838803423279428e-06, "loss": 0.0782, "step": 520 }, { "epoch": 0.10920142527771956, "grad_norm": 0.20889486372470856, "learning_rate": 9.837947242426013e-06, "loss": 0.078, "step": 521 }, { "epoch": 0.10941102494236009, "grad_norm": 0.20450422167778015, "learning_rate": 9.837088831279927e-06, "loss": 0.0837, "step": 522 }, { "epoch": 0.10962062460700063, "grad_norm": 0.19421492516994476, "learning_rate": 9.836228190236892e-06, "loss": 0.0744, "step": 523 }, { "epoch": 0.10983022427164117, "grad_norm": 0.19942732155323029, "learning_rate": 9.835365319693667e-06, "loss": 0.0774, "step": 524 }, { "epoch": 0.1100398239362817, "grad_norm": 0.2001711130142212, "learning_rate": 9.834500220048034e-06, "loss": 0.0787, "step": 525 }, { "epoch": 0.11024942360092224, "grad_norm": 0.1794813573360443, "learning_rate": 9.833632891698801e-06, "loss": 0.0814, "step": 526 }, { "epoch": 0.11045902326556277, "grad_norm": 0.16165196895599365, "learning_rate": 9.832763335045812e-06, "loss": 0.0761, "step": 527 }, { "epoch": 0.11066862293020331, "grad_norm": 0.18171708285808563, "learning_rate": 9.83189155048993e-06, "loss": 0.0809, "step": 528 }, { "epoch": 0.11087822259484385, "grad_norm": 0.1997329592704773, "learning_rate": 9.831017538433045e-06, "loss": 0.0764, "step": 529 }, { "epoch": 0.11108782225948438, "grad_norm": 0.19061146676540375, "learning_rate": 9.83014129927808e-06, "loss": 0.0783, "step": 530 }, { "epoch": 0.11129742192412492, "grad_norm": 0.1783018410205841, "learning_rate": 9.829262833428978e-06, "loss": 0.079, "step": 531 }, { "epoch": 0.11150702158876546, "grad_norm": 0.18371321260929108, "learning_rate": 9.828382141290713e-06, "loss": 0.0769, "step": 532 }, { "epoch": 0.11171662125340599, "grad_norm": 0.16765272617340088, "learning_rate": 9.827499223269285e-06, "loss": 0.0772, "step": 533 }, { "epoch": 0.11192622091804653, "grad_norm": 0.12363309413194656, "learning_rate": 9.826614079771716e-06, "loss": 0.0765, "step": 534 }, { "epoch": 0.11213582058268706, "grad_norm": 0.10953837633132935, "learning_rate": 9.82572671120606e-06, "loss": 0.0817, "step": 535 }, { "epoch": 0.1123454202473276, "grad_norm": 0.14569856226444244, "learning_rate": 9.824837117981392e-06, "loss": 0.0754, "step": 536 }, { "epoch": 0.11255501991196815, "grad_norm": 0.16665959358215332, "learning_rate": 9.823945300507815e-06, "loss": 0.0772, "step": 537 }, { "epoch": 0.11276461957660867, "grad_norm": 0.167064368724823, "learning_rate": 9.823051259196456e-06, "loss": 0.0795, "step": 538 }, { "epoch": 0.11297421924124922, "grad_norm": 0.1980743259191513, "learning_rate": 9.82215499445947e-06, "loss": 0.0751, "step": 539 }, { "epoch": 0.11318381890588974, "grad_norm": 0.2310371845960617, "learning_rate": 9.821256506710032e-06, "loss": 0.0757, "step": 540 }, { "epoch": 0.11339341857053029, "grad_norm": 0.2004477083683014, "learning_rate": 9.820355796362346e-06, "loss": 0.0774, "step": 541 }, { "epoch": 0.11360301823517083, "grad_norm": 0.16151970624923706, "learning_rate": 9.81945286383164e-06, "loss": 0.0766, "step": 542 }, { "epoch": 0.11381261789981136, "grad_norm": 0.2251511514186859, "learning_rate": 9.818547709534163e-06, "loss": 0.0783, "step": 543 }, { "epoch": 0.1140222175644519, "grad_norm": 0.2668745219707489, "learning_rate": 9.817640333887194e-06, "loss": 0.0803, "step": 544 }, { "epoch": 0.11423181722909244, "grad_norm": 0.24963510036468506, "learning_rate": 9.816730737309032e-06, "loss": 0.0782, "step": 545 }, { "epoch": 0.11444141689373297, "grad_norm": 0.2355589121580124, "learning_rate": 9.815818920219e-06, "loss": 0.0758, "step": 546 }, { "epoch": 0.11465101655837351, "grad_norm": 0.20662012696266174, "learning_rate": 9.814904883037445e-06, "loss": 0.0769, "step": 547 }, { "epoch": 0.11486061622301404, "grad_norm": 0.15815681219100952, "learning_rate": 9.813988626185742e-06, "loss": 0.0782, "step": 548 }, { "epoch": 0.11507021588765458, "grad_norm": 0.16817304491996765, "learning_rate": 9.81307015008628e-06, "loss": 0.0769, "step": 549 }, { "epoch": 0.11527981555229512, "grad_norm": 0.19368018209934235, "learning_rate": 9.81214945516248e-06, "loss": 0.0796, "step": 550 }, { "epoch": 0.11548941521693565, "grad_norm": 0.2023620903491974, "learning_rate": 9.81122654183878e-06, "loss": 0.0763, "step": 551 }, { "epoch": 0.11569901488157619, "grad_norm": 0.2068459391593933, "learning_rate": 9.810301410540643e-06, "loss": 0.074, "step": 552 }, { "epoch": 0.11590861454621673, "grad_norm": 0.1989372819662094, "learning_rate": 9.809374061694555e-06, "loss": 0.0758, "step": 553 }, { "epoch": 0.11611821421085726, "grad_norm": 0.17017100751399994, "learning_rate": 9.808444495728024e-06, "loss": 0.0743, "step": 554 }, { "epoch": 0.1163278138754978, "grad_norm": 0.16604629158973694, "learning_rate": 9.80751271306958e-06, "loss": 0.0794, "step": 555 }, { "epoch": 0.11653741354013833, "grad_norm": 0.1727730929851532, "learning_rate": 9.806578714148774e-06, "loss": 0.0776, "step": 556 }, { "epoch": 0.11674701320477887, "grad_norm": 0.18742415308952332, "learning_rate": 9.805642499396177e-06, "loss": 0.0755, "step": 557 }, { "epoch": 0.11695661286941941, "grad_norm": 0.22661146521568298, "learning_rate": 9.804704069243389e-06, "loss": 0.0803, "step": 558 }, { "epoch": 0.11716621253405994, "grad_norm": 0.25402432680130005, "learning_rate": 9.80376342412302e-06, "loss": 0.0761, "step": 559 }, { "epoch": 0.11737581219870048, "grad_norm": 0.2397623509168625, "learning_rate": 9.802820564468712e-06, "loss": 0.0736, "step": 560 }, { "epoch": 0.11758541186334102, "grad_norm": 0.20311333239078522, "learning_rate": 9.801875490715123e-06, "loss": 0.076, "step": 561 }, { "epoch": 0.11779501152798155, "grad_norm": 0.16478881239891052, "learning_rate": 9.800928203297927e-06, "loss": 0.0755, "step": 562 }, { "epoch": 0.1180046111926221, "grad_norm": 0.14853514730930328, "learning_rate": 9.79997870265383e-06, "loss": 0.0707, "step": 563 }, { "epoch": 0.11821421085726262, "grad_norm": 0.17599347233772278, "learning_rate": 9.799026989220543e-06, "loss": 0.0779, "step": 564 }, { "epoch": 0.11842381052190316, "grad_norm": 0.2129105031490326, "learning_rate": 9.798073063436815e-06, "loss": 0.0767, "step": 565 }, { "epoch": 0.1186334101865437, "grad_norm": 0.23926801979541779, "learning_rate": 9.7971169257424e-06, "loss": 0.0771, "step": 566 }, { "epoch": 0.11884300985118423, "grad_norm": 0.2424420565366745, "learning_rate": 9.79615857657808e-06, "loss": 0.0823, "step": 567 }, { "epoch": 0.11905260951582478, "grad_norm": 0.21663707494735718, "learning_rate": 9.795198016385651e-06, "loss": 0.0778, "step": 568 }, { "epoch": 0.11926220918046532, "grad_norm": 0.21969793736934662, "learning_rate": 9.794235245607933e-06, "loss": 0.0756, "step": 569 }, { "epoch": 0.11947180884510585, "grad_norm": 0.26351258158683777, "learning_rate": 9.793270264688763e-06, "loss": 0.0776, "step": 570 }, { "epoch": 0.11968140850974639, "grad_norm": 0.3025349974632263, "learning_rate": 9.792303074072995e-06, "loss": 0.0747, "step": 571 }, { "epoch": 0.11989100817438691, "grad_norm": 0.30396711826324463, "learning_rate": 9.791333674206507e-06, "loss": 0.0796, "step": 572 }, { "epoch": 0.12010060783902746, "grad_norm": 0.23939970135688782, "learning_rate": 9.790362065536189e-06, "loss": 0.0797, "step": 573 }, { "epoch": 0.120310207503668, "grad_norm": 0.1715303361415863, "learning_rate": 9.789388248509957e-06, "loss": 0.075, "step": 574 }, { "epoch": 0.12051980716830853, "grad_norm": 0.16927644610404968, "learning_rate": 9.788412223576734e-06, "loss": 0.0821, "step": 575 }, { "epoch": 0.12072940683294907, "grad_norm": 0.18329590559005737, "learning_rate": 9.787433991186472e-06, "loss": 0.0732, "step": 576 }, { "epoch": 0.12093900649758961, "grad_norm": 0.18802891671657562, "learning_rate": 9.786453551790133e-06, "loss": 0.0783, "step": 577 }, { "epoch": 0.12114860616223014, "grad_norm": 0.1802418977022171, "learning_rate": 9.785470905839703e-06, "loss": 0.0733, "step": 578 }, { "epoch": 0.12135820582687068, "grad_norm": 0.19172324240207672, "learning_rate": 9.784486053788179e-06, "loss": 0.0771, "step": 579 }, { "epoch": 0.12156780549151121, "grad_norm": 0.1751202493906021, "learning_rate": 9.783498996089577e-06, "loss": 0.0773, "step": 580 }, { "epoch": 0.12177740515615175, "grad_norm": 0.12133464962244034, "learning_rate": 9.782509733198932e-06, "loss": 0.0751, "step": 581 }, { "epoch": 0.12198700482079229, "grad_norm": 0.14133097231388092, "learning_rate": 9.781518265572294e-06, "loss": 0.0748, "step": 582 }, { "epoch": 0.12219660448543282, "grad_norm": 0.18668805062770844, "learning_rate": 9.780524593666727e-06, "loss": 0.0736, "step": 583 }, { "epoch": 0.12240620415007336, "grad_norm": 0.18670430779457092, "learning_rate": 9.779528717940317e-06, "loss": 0.0739, "step": 584 }, { "epoch": 0.1226158038147139, "grad_norm": 0.19386443495750427, "learning_rate": 9.77853063885216e-06, "loss": 0.079, "step": 585 }, { "epoch": 0.12282540347935443, "grad_norm": 0.19840072095394135, "learning_rate": 9.77753035686237e-06, "loss": 0.0821, "step": 586 }, { "epoch": 0.12303500314399497, "grad_norm": 0.2034991979598999, "learning_rate": 9.77652787243208e-06, "loss": 0.0744, "step": 587 }, { "epoch": 0.1232446028086355, "grad_norm": 0.22970953583717346, "learning_rate": 9.775523186023432e-06, "loss": 0.0718, "step": 588 }, { "epoch": 0.12345420247327604, "grad_norm": 0.20958104729652405, "learning_rate": 9.774516298099588e-06, "loss": 0.0769, "step": 589 }, { "epoch": 0.12366380213791658, "grad_norm": 0.1604912430047989, "learning_rate": 9.773507209124721e-06, "loss": 0.0713, "step": 590 }, { "epoch": 0.12387340180255711, "grad_norm": 0.16623252630233765, "learning_rate": 9.772495919564022e-06, "loss": 0.0728, "step": 591 }, { "epoch": 0.12408300146719765, "grad_norm": 0.20733430981636047, "learning_rate": 9.771482429883697e-06, "loss": 0.0747, "step": 592 }, { "epoch": 0.1242926011318382, "grad_norm": 0.2416054755449295, "learning_rate": 9.770466740550963e-06, "loss": 0.0795, "step": 593 }, { "epoch": 0.12450220079647872, "grad_norm": 0.21449747681617737, "learning_rate": 9.769448852034051e-06, "loss": 0.0782, "step": 594 }, { "epoch": 0.12471180046111927, "grad_norm": 0.15167087316513062, "learning_rate": 9.768428764802209e-06, "loss": 0.0818, "step": 595 }, { "epoch": 0.1249214001257598, "grad_norm": 0.15075545012950897, "learning_rate": 9.767406479325698e-06, "loss": 0.0739, "step": 596 }, { "epoch": 0.12513099979040035, "grad_norm": 0.1983119398355484, "learning_rate": 9.76638199607579e-06, "loss": 0.076, "step": 597 }, { "epoch": 0.12534059945504086, "grad_norm": 0.18619777262210846, "learning_rate": 9.765355315524772e-06, "loss": 0.0787, "step": 598 }, { "epoch": 0.1255501991196814, "grad_norm": 0.13842901587486267, "learning_rate": 9.764326438145944e-06, "loss": 0.078, "step": 599 }, { "epoch": 0.12575979878432195, "grad_norm": 0.16321855783462524, "learning_rate": 9.763295364413616e-06, "loss": 0.0754, "step": 600 }, { "epoch": 0.1259693984489625, "grad_norm": 0.18347276747226715, "learning_rate": 9.762262094803115e-06, "loss": 0.0757, "step": 601 }, { "epoch": 0.12617899811360303, "grad_norm": 0.17401129007339478, "learning_rate": 9.761226629790777e-06, "loss": 0.0757, "step": 602 }, { "epoch": 0.12638859777824354, "grad_norm": 0.19704696536064148, "learning_rate": 9.760188969853953e-06, "loss": 0.0786, "step": 603 }, { "epoch": 0.1265981974428841, "grad_norm": 0.21989691257476807, "learning_rate": 9.759149115471001e-06, "loss": 0.0737, "step": 604 }, { "epoch": 0.12680779710752463, "grad_norm": 0.19509382545948029, "learning_rate": 9.758107067121298e-06, "loss": 0.0775, "step": 605 }, { "epoch": 0.12701739677216517, "grad_norm": 0.16452129185199738, "learning_rate": 9.757062825285223e-06, "loss": 0.0751, "step": 606 }, { "epoch": 0.1272269964368057, "grad_norm": 0.16775386035442352, "learning_rate": 9.756016390444174e-06, "loss": 0.0785, "step": 607 }, { "epoch": 0.12743659610144623, "grad_norm": 0.1647682636976242, "learning_rate": 9.754967763080558e-06, "loss": 0.0779, "step": 608 }, { "epoch": 0.12764619576608677, "grad_norm": 0.15964777767658234, "learning_rate": 9.75391694367779e-06, "loss": 0.0757, "step": 609 }, { "epoch": 0.1278557954307273, "grad_norm": 0.1809694468975067, "learning_rate": 9.7528639327203e-06, "loss": 0.0745, "step": 610 }, { "epoch": 0.12806539509536785, "grad_norm": 0.21518062055110931, "learning_rate": 9.751808730693521e-06, "loss": 0.0721, "step": 611 }, { "epoch": 0.1282749947600084, "grad_norm": 0.23137721419334412, "learning_rate": 9.750751338083906e-06, "loss": 0.0773, "step": 612 }, { "epoch": 0.1284845944246489, "grad_norm": 0.25104039907455444, "learning_rate": 9.749691755378912e-06, "loss": 0.0789, "step": 613 }, { "epoch": 0.12869419408928945, "grad_norm": 0.3227115869522095, "learning_rate": 9.748629983067004e-06, "loss": 0.0814, "step": 614 }, { "epoch": 0.12890379375393, "grad_norm": 0.3414157032966614, "learning_rate": 9.747566021637662e-06, "loss": 0.0784, "step": 615 }, { "epoch": 0.12911339341857053, "grad_norm": 0.2576848566532135, "learning_rate": 9.746499871581368e-06, "loss": 0.0821, "step": 616 }, { "epoch": 0.12932299308321107, "grad_norm": 0.18533697724342346, "learning_rate": 9.74543153338962e-06, "loss": 0.0791, "step": 617 }, { "epoch": 0.12953259274785162, "grad_norm": 0.21570216119289398, "learning_rate": 9.744361007554922e-06, "loss": 0.0762, "step": 618 }, { "epoch": 0.12974219241249213, "grad_norm": 0.2508070170879364, "learning_rate": 9.743288294570784e-06, "loss": 0.0747, "step": 619 }, { "epoch": 0.12995179207713267, "grad_norm": 0.23686811327934265, "learning_rate": 9.742213394931726e-06, "loss": 0.0766, "step": 620 }, { "epoch": 0.1301613917417732, "grad_norm": 0.18374064564704895, "learning_rate": 9.741136309133279e-06, "loss": 0.0751, "step": 621 }, { "epoch": 0.13037099140641376, "grad_norm": 0.21081578731536865, "learning_rate": 9.74005703767198e-06, "loss": 0.0769, "step": 622 }, { "epoch": 0.1305805910710543, "grad_norm": 0.20336848497390747, "learning_rate": 9.738975581045368e-06, "loss": 0.0788, "step": 623 }, { "epoch": 0.1307901907356948, "grad_norm": 0.1740516871213913, "learning_rate": 9.737891939752e-06, "loss": 0.0744, "step": 624 }, { "epoch": 0.13099979040033535, "grad_norm": 0.19728244841098785, "learning_rate": 9.73680611429143e-06, "loss": 0.0737, "step": 625 }, { "epoch": 0.1312093900649759, "grad_norm": 0.19369123876094818, "learning_rate": 9.735718105164228e-06, "loss": 0.0759, "step": 626 }, { "epoch": 0.13141898972961644, "grad_norm": 0.1801711618900299, "learning_rate": 9.734627912871962e-06, "loss": 0.0759, "step": 627 }, { "epoch": 0.13162858939425698, "grad_norm": 0.1812150627374649, "learning_rate": 9.733535537917211e-06, "loss": 0.078, "step": 628 }, { "epoch": 0.1318381890588975, "grad_norm": 0.17281141877174377, "learning_rate": 9.732440980803561e-06, "loss": 0.0759, "step": 629 }, { "epoch": 0.13204778872353803, "grad_norm": 0.18802779912948608, "learning_rate": 9.7313442420356e-06, "loss": 0.075, "step": 630 }, { "epoch": 0.13225738838817858, "grad_norm": 0.17967858910560608, "learning_rate": 9.730245322118929e-06, "loss": 0.0737, "step": 631 }, { "epoch": 0.13246698805281912, "grad_norm": 0.15727940201759338, "learning_rate": 9.729144221560145e-06, "loss": 0.0728, "step": 632 }, { "epoch": 0.13267658771745966, "grad_norm": 0.1466912478208542, "learning_rate": 9.72804094086686e-06, "loss": 0.0765, "step": 633 }, { "epoch": 0.1328861873821002, "grad_norm": 0.14428047835826874, "learning_rate": 9.726935480547679e-06, "loss": 0.0759, "step": 634 }, { "epoch": 0.13309578704674072, "grad_norm": 0.14207249879837036, "learning_rate": 9.725827841112226e-06, "loss": 0.0763, "step": 635 }, { "epoch": 0.13330538671138126, "grad_norm": 0.1447642594575882, "learning_rate": 9.72471802307112e-06, "loss": 0.0738, "step": 636 }, { "epoch": 0.1335149863760218, "grad_norm": 0.15724752843379974, "learning_rate": 9.723606026935986e-06, "loss": 0.0746, "step": 637 }, { "epoch": 0.13372458604066234, "grad_norm": 0.13662230968475342, "learning_rate": 9.722491853219455e-06, "loss": 0.0735, "step": 638 }, { "epoch": 0.13393418570530288, "grad_norm": 0.14362376928329468, "learning_rate": 9.72137550243516e-06, "loss": 0.0738, "step": 639 }, { "epoch": 0.1341437853699434, "grad_norm": 0.18275748193264008, "learning_rate": 9.720256975097741e-06, "loss": 0.0806, "step": 640 }, { "epoch": 0.13435338503458394, "grad_norm": 0.17419646680355072, "learning_rate": 9.719136271722835e-06, "loss": 0.076, "step": 641 }, { "epoch": 0.13456298469922448, "grad_norm": 0.15311293303966522, "learning_rate": 9.718013392827087e-06, "loss": 0.0743, "step": 642 }, { "epoch": 0.13477258436386502, "grad_norm": 0.18459968268871307, "learning_rate": 9.716888338928147e-06, "loss": 0.0764, "step": 643 }, { "epoch": 0.13498218402850556, "grad_norm": 0.2108069807291031, "learning_rate": 9.715761110544663e-06, "loss": 0.0752, "step": 644 }, { "epoch": 0.13519178369314608, "grad_norm": 0.19579482078552246, "learning_rate": 9.714631708196287e-06, "loss": 0.0788, "step": 645 }, { "epoch": 0.13540138335778662, "grad_norm": 0.18873129785060883, "learning_rate": 9.71350013240367e-06, "loss": 0.0733, "step": 646 }, { "epoch": 0.13561098302242716, "grad_norm": 0.21143223345279694, "learning_rate": 9.712366383688474e-06, "loss": 0.0746, "step": 647 }, { "epoch": 0.1358205826870677, "grad_norm": 0.23884397745132446, "learning_rate": 9.71123046257335e-06, "loss": 0.0744, "step": 648 }, { "epoch": 0.13603018235170825, "grad_norm": 0.22789151966571808, "learning_rate": 9.710092369581966e-06, "loss": 0.0766, "step": 649 }, { "epoch": 0.1362397820163488, "grad_norm": 0.18522526323795319, "learning_rate": 9.708952105238975e-06, "loss": 0.0743, "step": 650 }, { "epoch": 0.1364493816809893, "grad_norm": 0.20992542803287506, "learning_rate": 9.707809670070043e-06, "loss": 0.073, "step": 651 }, { "epoch": 0.13665898134562984, "grad_norm": 0.1938726305961609, "learning_rate": 9.706665064601831e-06, "loss": 0.0729, "step": 652 }, { "epoch": 0.13686858101027038, "grad_norm": 0.12811291217803955, "learning_rate": 9.705518289362001e-06, "loss": 0.0764, "step": 653 }, { "epoch": 0.13707818067491093, "grad_norm": 0.19632869958877563, "learning_rate": 9.704369344879219e-06, "loss": 0.0765, "step": 654 }, { "epoch": 0.13728778033955147, "grad_norm": 0.21263957023620605, "learning_rate": 9.703218231683143e-06, "loss": 0.0765, "step": 655 }, { "epoch": 0.13749738000419198, "grad_norm": 0.18144431710243225, "learning_rate": 9.702064950304442e-06, "loss": 0.0739, "step": 656 }, { "epoch": 0.13770697966883252, "grad_norm": 0.21412257850170135, "learning_rate": 9.700909501274773e-06, "loss": 0.0771, "step": 657 }, { "epoch": 0.13791657933347307, "grad_norm": 0.2056640088558197, "learning_rate": 9.699751885126803e-06, "loss": 0.0746, "step": 658 }, { "epoch": 0.1381261789981136, "grad_norm": 0.17441654205322266, "learning_rate": 9.698592102394188e-06, "loss": 0.0806, "step": 659 }, { "epoch": 0.13833577866275415, "grad_norm": 0.18476596474647522, "learning_rate": 9.697430153611592e-06, "loss": 0.0764, "step": 660 }, { "epoch": 0.13854537832739466, "grad_norm": 0.15858349204063416, "learning_rate": 9.69626603931467e-06, "loss": 0.0748, "step": 661 }, { "epoch": 0.1387549779920352, "grad_norm": 0.14706814289093018, "learning_rate": 9.695099760040079e-06, "loss": 0.0737, "step": 662 }, { "epoch": 0.13896457765667575, "grad_norm": 0.18588264286518097, "learning_rate": 9.693931316325473e-06, "loss": 0.0706, "step": 663 }, { "epoch": 0.1391741773213163, "grad_norm": 0.18025395274162292, "learning_rate": 9.692760708709506e-06, "loss": 0.0763, "step": 664 }, { "epoch": 0.13938377698595683, "grad_norm": 0.18428079783916473, "learning_rate": 9.691587937731827e-06, "loss": 0.0772, "step": 665 }, { "epoch": 0.13959337665059737, "grad_norm": 0.20305953919887543, "learning_rate": 9.690413003933084e-06, "loss": 0.0742, "step": 666 }, { "epoch": 0.1398029763152379, "grad_norm": 0.21288353204727173, "learning_rate": 9.68923590785492e-06, "loss": 0.077, "step": 667 }, { "epoch": 0.14001257597987843, "grad_norm": 0.2299814224243164, "learning_rate": 9.688056650039976e-06, "loss": 0.0754, "step": 668 }, { "epoch": 0.14022217564451897, "grad_norm": 0.23318803310394287, "learning_rate": 9.68687523103189e-06, "loss": 0.0748, "step": 669 }, { "epoch": 0.1404317753091595, "grad_norm": 0.21237865090370178, "learning_rate": 9.685691651375297e-06, "loss": 0.0719, "step": 670 }, { "epoch": 0.14064137497380005, "grad_norm": 0.20599277317523956, "learning_rate": 9.684505911615825e-06, "loss": 0.0731, "step": 671 }, { "epoch": 0.14085097463844057, "grad_norm": 0.2014642357826233, "learning_rate": 9.683318012300103e-06, "loss": 0.0739, "step": 672 }, { "epoch": 0.1410605743030811, "grad_norm": 0.21525225043296814, "learning_rate": 9.682127953975748e-06, "loss": 0.0796, "step": 673 }, { "epoch": 0.14127017396772165, "grad_norm": 0.20060989260673523, "learning_rate": 9.68093573719138e-06, "loss": 0.0717, "step": 674 }, { "epoch": 0.1414797736323622, "grad_norm": 0.19899265468120575, "learning_rate": 9.679741362496608e-06, "loss": 0.0723, "step": 675 }, { "epoch": 0.14168937329700274, "grad_norm": 0.1738089621067047, "learning_rate": 9.678544830442041e-06, "loss": 0.0776, "step": 676 }, { "epoch": 0.14189897296164325, "grad_norm": 0.1539677530527115, "learning_rate": 9.677346141579277e-06, "loss": 0.0725, "step": 677 }, { "epoch": 0.1421085726262838, "grad_norm": 0.16590017080307007, "learning_rate": 9.676145296460917e-06, "loss": 0.0733, "step": 678 }, { "epoch": 0.14231817229092433, "grad_norm": 0.17394165694713593, "learning_rate": 9.674942295640544e-06, "loss": 0.0744, "step": 679 }, { "epoch": 0.14252777195556487, "grad_norm": 0.17285530269145966, "learning_rate": 9.673737139672746e-06, "loss": 0.0729, "step": 680 }, { "epoch": 0.14273737162020542, "grad_norm": 0.14739976823329926, "learning_rate": 9.672529829113095e-06, "loss": 0.073, "step": 681 }, { "epoch": 0.14294697128484593, "grad_norm": 0.1775324046611786, "learning_rate": 9.671320364518164e-06, "loss": 0.0745, "step": 682 }, { "epoch": 0.14315657094948647, "grad_norm": 0.1740763634443283, "learning_rate": 9.670108746445514e-06, "loss": 0.0734, "step": 683 }, { "epoch": 0.14336617061412701, "grad_norm": 0.14297354221343994, "learning_rate": 9.668894975453705e-06, "loss": 0.0763, "step": 684 }, { "epoch": 0.14357577027876756, "grad_norm": 0.1698678582906723, "learning_rate": 9.66767905210228e-06, "loss": 0.0727, "step": 685 }, { "epoch": 0.1437853699434081, "grad_norm": 0.18393908441066742, "learning_rate": 9.666460976951783e-06, "loss": 0.0755, "step": 686 }, { "epoch": 0.14399496960804864, "grad_norm": 0.16469605267047882, "learning_rate": 9.665240750563743e-06, "loss": 0.0718, "step": 687 }, { "epoch": 0.14420456927268915, "grad_norm": 0.15286237001419067, "learning_rate": 9.664018373500688e-06, "loss": 0.0774, "step": 688 }, { "epoch": 0.1444141689373297, "grad_norm": 0.15138964354991913, "learning_rate": 9.662793846326131e-06, "loss": 0.0738, "step": 689 }, { "epoch": 0.14462376860197024, "grad_norm": 0.13426071405410767, "learning_rate": 9.661567169604579e-06, "loss": 0.0732, "step": 690 }, { "epoch": 0.14483336826661078, "grad_norm": 0.14377839863300323, "learning_rate": 9.66033834390153e-06, "loss": 0.0743, "step": 691 }, { "epoch": 0.14504296793125132, "grad_norm": 0.14970825612545013, "learning_rate": 9.659107369783473e-06, "loss": 0.0719, "step": 692 }, { "epoch": 0.14525256759589183, "grad_norm": 0.12804554402828217, "learning_rate": 9.657874247817886e-06, "loss": 0.0715, "step": 693 }, { "epoch": 0.14546216726053238, "grad_norm": 0.13127097487449646, "learning_rate": 9.656638978573238e-06, "loss": 0.0761, "step": 694 }, { "epoch": 0.14567176692517292, "grad_norm": 0.16138702630996704, "learning_rate": 9.655401562618989e-06, "loss": 0.0727, "step": 695 }, { "epoch": 0.14588136658981346, "grad_norm": 0.16640619933605194, "learning_rate": 9.654162000525585e-06, "loss": 0.0704, "step": 696 }, { "epoch": 0.146090966254454, "grad_norm": 0.17615856230258942, "learning_rate": 9.652920292864467e-06, "loss": 0.0763, "step": 697 }, { "epoch": 0.14630056591909452, "grad_norm": 0.20064422488212585, "learning_rate": 9.65167644020806e-06, "loss": 0.0709, "step": 698 }, { "epoch": 0.14651016558373506, "grad_norm": 0.2409866899251938, "learning_rate": 9.650430443129781e-06, "loss": 0.0752, "step": 699 }, { "epoch": 0.1467197652483756, "grad_norm": 0.28271573781967163, "learning_rate": 9.649182302204034e-06, "loss": 0.0732, "step": 700 }, { "epoch": 0.14692936491301614, "grad_norm": 0.29788634181022644, "learning_rate": 9.64793201800621e-06, "loss": 0.0734, "step": 701 }, { "epoch": 0.14713896457765668, "grad_norm": 0.25497421622276306, "learning_rate": 9.64667959111269e-06, "loss": 0.0739, "step": 702 }, { "epoch": 0.14734856424229723, "grad_norm": 0.1792333573102951, "learning_rate": 9.645425022100847e-06, "loss": 0.0752, "step": 703 }, { "epoch": 0.14755816390693774, "grad_norm": 0.16406488418579102, "learning_rate": 9.644168311549032e-06, "loss": 0.0736, "step": 704 }, { "epoch": 0.14776776357157828, "grad_norm": 0.20219507813453674, "learning_rate": 9.64290946003659e-06, "loss": 0.0803, "step": 705 }, { "epoch": 0.14797736323621882, "grad_norm": 0.20861254632472992, "learning_rate": 9.641648468143852e-06, "loss": 0.0759, "step": 706 }, { "epoch": 0.14818696290085936, "grad_norm": 0.17586849629878998, "learning_rate": 9.640385336452135e-06, "loss": 0.0752, "step": 707 }, { "epoch": 0.1483965625654999, "grad_norm": 0.16276384890079498, "learning_rate": 9.639120065543738e-06, "loss": 0.0714, "step": 708 }, { "epoch": 0.14860616223014042, "grad_norm": 0.20247167348861694, "learning_rate": 9.637852656001957e-06, "loss": 0.0752, "step": 709 }, { "epoch": 0.14881576189478096, "grad_norm": 0.18190069496631622, "learning_rate": 9.636583108411066e-06, "loss": 0.0753, "step": 710 }, { "epoch": 0.1490253615594215, "grad_norm": 0.1566484421491623, "learning_rate": 9.635311423356324e-06, "loss": 0.072, "step": 711 }, { "epoch": 0.14923496122406205, "grad_norm": 0.1933658868074417, "learning_rate": 9.63403760142398e-06, "loss": 0.0746, "step": 712 }, { "epoch": 0.1494445608887026, "grad_norm": 0.1682639867067337, "learning_rate": 9.632761643201262e-06, "loss": 0.073, "step": 713 }, { "epoch": 0.1496541605533431, "grad_norm": 0.13671089708805084, "learning_rate": 9.63148354927639e-06, "loss": 0.0719, "step": 714 }, { "epoch": 0.14986376021798364, "grad_norm": 0.15228819847106934, "learning_rate": 9.630203320238564e-06, "loss": 0.0724, "step": 715 }, { "epoch": 0.15007335988262419, "grad_norm": 0.15035517513751984, "learning_rate": 9.628920956677969e-06, "loss": 0.0737, "step": 716 }, { "epoch": 0.15028295954726473, "grad_norm": 0.1775098294019699, "learning_rate": 9.627636459185774e-06, "loss": 0.0695, "step": 717 }, { "epoch": 0.15049255921190527, "grad_norm": 0.20350755751132965, "learning_rate": 9.626349828354133e-06, "loss": 0.0755, "step": 718 }, { "epoch": 0.1507021588765458, "grad_norm": 0.18517783284187317, "learning_rate": 9.625061064776183e-06, "loss": 0.0733, "step": 719 }, { "epoch": 0.15091175854118632, "grad_norm": 0.19070328772068024, "learning_rate": 9.623770169046042e-06, "loss": 0.0725, "step": 720 }, { "epoch": 0.15112135820582687, "grad_norm": 0.19574813544750214, "learning_rate": 9.622477141758813e-06, "loss": 0.0717, "step": 721 }, { "epoch": 0.1513309578704674, "grad_norm": 0.18914958834648132, "learning_rate": 9.621181983510582e-06, "loss": 0.0761, "step": 722 }, { "epoch": 0.15154055753510795, "grad_norm": 0.18750722706317902, "learning_rate": 9.619884694898417e-06, "loss": 0.0726, "step": 723 }, { "epoch": 0.1517501571997485, "grad_norm": 0.23439928889274597, "learning_rate": 9.618585276520367e-06, "loss": 0.0723, "step": 724 }, { "epoch": 0.151959756864389, "grad_norm": 0.27055707573890686, "learning_rate": 9.617283728975464e-06, "loss": 0.0748, "step": 725 }, { "epoch": 0.15216935652902955, "grad_norm": 0.20538926124572754, "learning_rate": 9.61598005286372e-06, "loss": 0.0721, "step": 726 }, { "epoch": 0.1523789561936701, "grad_norm": 0.15176212787628174, "learning_rate": 9.614674248786131e-06, "loss": 0.0724, "step": 727 }, { "epoch": 0.15258855585831063, "grad_norm": 0.18770189583301544, "learning_rate": 9.613366317344674e-06, "loss": 0.0714, "step": 728 }, { "epoch": 0.15279815552295117, "grad_norm": 0.14766095578670502, "learning_rate": 9.6120562591423e-06, "loss": 0.0722, "step": 729 }, { "epoch": 0.1530077551875917, "grad_norm": 0.1400596648454666, "learning_rate": 9.610744074782951e-06, "loss": 0.0731, "step": 730 }, { "epoch": 0.15321735485223223, "grad_norm": 0.16470056772232056, "learning_rate": 9.60942976487154e-06, "loss": 0.0709, "step": 731 }, { "epoch": 0.15342695451687277, "grad_norm": 0.1465228945016861, "learning_rate": 9.608113330013964e-06, "loss": 0.0711, "step": 732 }, { "epoch": 0.1536365541815133, "grad_norm": 0.14212250709533691, "learning_rate": 9.606794770817102e-06, "loss": 0.0772, "step": 733 }, { "epoch": 0.15384615384615385, "grad_norm": 0.12148991227149963, "learning_rate": 9.605474087888806e-06, "loss": 0.0738, "step": 734 }, { "epoch": 0.1540557535107944, "grad_norm": 0.1582231968641281, "learning_rate": 9.604151281837912e-06, "loss": 0.0723, "step": 735 }, { "epoch": 0.1542653531754349, "grad_norm": 0.1933947652578354, "learning_rate": 9.602826353274235e-06, "loss": 0.073, "step": 736 }, { "epoch": 0.15447495284007545, "grad_norm": 0.16174952685832977, "learning_rate": 9.601499302808565e-06, "loss": 0.0688, "step": 737 }, { "epoch": 0.154684552504716, "grad_norm": 0.14676834642887115, "learning_rate": 9.600170131052671e-06, "loss": 0.0712, "step": 738 }, { "epoch": 0.15489415216935654, "grad_norm": 0.1428784430027008, "learning_rate": 9.598838838619302e-06, "loss": 0.0703, "step": 739 }, { "epoch": 0.15510375183399708, "grad_norm": 0.1900653839111328, "learning_rate": 9.597505426122184e-06, "loss": 0.0687, "step": 740 }, { "epoch": 0.1553133514986376, "grad_norm": 0.2115175575017929, "learning_rate": 9.596169894176021e-06, "loss": 0.0719, "step": 741 }, { "epoch": 0.15552295116327813, "grad_norm": 0.16086673736572266, "learning_rate": 9.59483224339649e-06, "loss": 0.0721, "step": 742 }, { "epoch": 0.15573255082791868, "grad_norm": 0.16307683289051056, "learning_rate": 9.59349247440025e-06, "loss": 0.0734, "step": 743 }, { "epoch": 0.15594215049255922, "grad_norm": 0.17130297422409058, "learning_rate": 9.592150587804934e-06, "loss": 0.0731, "step": 744 }, { "epoch": 0.15615175015719976, "grad_norm": 0.1763344258069992, "learning_rate": 9.590806584229149e-06, "loss": 0.0758, "step": 745 }, { "epoch": 0.15636134982184027, "grad_norm": 0.20289908349514008, "learning_rate": 9.589460464292483e-06, "loss": 0.0694, "step": 746 }, { "epoch": 0.15657094948648081, "grad_norm": 0.2093002051115036, "learning_rate": 9.588112228615495e-06, "loss": 0.0719, "step": 747 }, { "epoch": 0.15678054915112136, "grad_norm": 0.19119895994663239, "learning_rate": 9.586761877819726e-06, "loss": 0.0685, "step": 748 }, { "epoch": 0.1569901488157619, "grad_norm": 0.17849349975585938, "learning_rate": 9.585409412527682e-06, "loss": 0.0719, "step": 749 }, { "epoch": 0.15719974848040244, "grad_norm": 0.21115368604660034, "learning_rate": 9.584054833362851e-06, "loss": 0.0738, "step": 750 }, { "epoch": 0.15740934814504295, "grad_norm": 0.19666269421577454, "learning_rate": 9.582698140949696e-06, "loss": 0.0716, "step": 751 }, { "epoch": 0.1576189478096835, "grad_norm": 0.1261245310306549, "learning_rate": 9.581339335913647e-06, "loss": 0.0695, "step": 752 }, { "epoch": 0.15782854747432404, "grad_norm": 0.16366009414196014, "learning_rate": 9.579978418881118e-06, "loss": 0.0715, "step": 753 }, { "epoch": 0.15803814713896458, "grad_norm": 0.21418072283267975, "learning_rate": 9.57861539047949e-06, "loss": 0.0721, "step": 754 }, { "epoch": 0.15824774680360512, "grad_norm": 0.16937729716300964, "learning_rate": 9.577250251337114e-06, "loss": 0.0725, "step": 755 }, { "epoch": 0.15845734646824566, "grad_norm": 0.1247527152299881, "learning_rate": 9.575883002083326e-06, "loss": 0.0726, "step": 756 }, { "epoch": 0.15866694613288618, "grad_norm": 0.1638677716255188, "learning_rate": 9.574513643348424e-06, "loss": 0.0717, "step": 757 }, { "epoch": 0.15887654579752672, "grad_norm": 0.19583426415920258, "learning_rate": 9.573142175763683e-06, "loss": 0.0752, "step": 758 }, { "epoch": 0.15908614546216726, "grad_norm": 0.19242113828659058, "learning_rate": 9.571768599961349e-06, "loss": 0.0718, "step": 759 }, { "epoch": 0.1592957451268078, "grad_norm": 0.18468959629535675, "learning_rate": 9.570392916574639e-06, "loss": 0.0712, "step": 760 }, { "epoch": 0.15950534479144834, "grad_norm": 0.18113423883914948, "learning_rate": 9.569015126237744e-06, "loss": 0.0728, "step": 761 }, { "epoch": 0.15971494445608886, "grad_norm": 0.15867562592029572, "learning_rate": 9.567635229585826e-06, "loss": 0.0714, "step": 762 }, { "epoch": 0.1599245441207294, "grad_norm": 0.14228221774101257, "learning_rate": 9.566253227255015e-06, "loss": 0.0751, "step": 763 }, { "epoch": 0.16013414378536994, "grad_norm": 0.14296479523181915, "learning_rate": 9.564869119882414e-06, "loss": 0.0713, "step": 764 }, { "epoch": 0.16034374345001048, "grad_norm": 0.15336044132709503, "learning_rate": 9.563482908106098e-06, "loss": 0.0734, "step": 765 }, { "epoch": 0.16055334311465103, "grad_norm": 0.16982528567314148, "learning_rate": 9.56209459256511e-06, "loss": 0.0748, "step": 766 }, { "epoch": 0.16076294277929154, "grad_norm": 0.20288409292697906, "learning_rate": 9.560704173899461e-06, "loss": 0.0717, "step": 767 }, { "epoch": 0.16097254244393208, "grad_norm": 0.24066397547721863, "learning_rate": 9.559311652750135e-06, "loss": 0.0727, "step": 768 }, { "epoch": 0.16118214210857262, "grad_norm": 0.24527938663959503, "learning_rate": 9.557917029759087e-06, "loss": 0.0755, "step": 769 }, { "epoch": 0.16139174177321317, "grad_norm": 0.18823891878128052, "learning_rate": 9.556520305569232e-06, "loss": 0.0698, "step": 770 }, { "epoch": 0.1616013414378537, "grad_norm": 0.1405959278345108, "learning_rate": 9.555121480824463e-06, "loss": 0.0722, "step": 771 }, { "epoch": 0.16181094110249425, "grad_norm": 0.1578516960144043, "learning_rate": 9.553720556169639e-06, "loss": 0.0736, "step": 772 }, { "epoch": 0.16202054076713476, "grad_norm": 0.15858958661556244, "learning_rate": 9.552317532250584e-06, "loss": 0.0727, "step": 773 }, { "epoch": 0.1622301404317753, "grad_norm": 0.1704108715057373, "learning_rate": 9.55091240971409e-06, "loss": 0.0697, "step": 774 }, { "epoch": 0.16243974009641585, "grad_norm": 0.18552088737487793, "learning_rate": 9.549505189207924e-06, "loss": 0.0715, "step": 775 }, { "epoch": 0.1626493397610564, "grad_norm": 0.14782559871673584, "learning_rate": 9.548095871380808e-06, "loss": 0.0746, "step": 776 }, { "epoch": 0.16285893942569693, "grad_norm": 0.13771621882915497, "learning_rate": 9.54668445688244e-06, "loss": 0.0711, "step": 777 }, { "epoch": 0.16306853909033744, "grad_norm": 0.15888865292072296, "learning_rate": 9.545270946363484e-06, "loss": 0.0713, "step": 778 }, { "epoch": 0.163278138754978, "grad_norm": 0.13176760077476501, "learning_rate": 9.543855340475565e-06, "loss": 0.0713, "step": 779 }, { "epoch": 0.16348773841961853, "grad_norm": 0.1236625537276268, "learning_rate": 9.542437639871279e-06, "loss": 0.0736, "step": 780 }, { "epoch": 0.16369733808425907, "grad_norm": 0.14519374072551727, "learning_rate": 9.541017845204182e-06, "loss": 0.0725, "step": 781 }, { "epoch": 0.1639069377488996, "grad_norm": 0.13058701157569885, "learning_rate": 9.539595957128803e-06, "loss": 0.0672, "step": 782 }, { "epoch": 0.16411653741354013, "grad_norm": 0.13305912911891937, "learning_rate": 9.53817197630063e-06, "loss": 0.0706, "step": 783 }, { "epoch": 0.16432613707818067, "grad_norm": 0.15352047979831696, "learning_rate": 9.53674590337612e-06, "loss": 0.0721, "step": 784 }, { "epoch": 0.1645357367428212, "grad_norm": 0.15192946791648865, "learning_rate": 9.535317739012689e-06, "loss": 0.075, "step": 785 }, { "epoch": 0.16474533640746175, "grad_norm": 0.1807214766740799, "learning_rate": 9.533887483868723e-06, "loss": 0.0691, "step": 786 }, { "epoch": 0.1649549360721023, "grad_norm": 0.22403304278850555, "learning_rate": 9.53245513860357e-06, "loss": 0.0723, "step": 787 }, { "epoch": 0.16516453573674283, "grad_norm": 0.22124335169792175, "learning_rate": 9.531020703877539e-06, "loss": 0.0738, "step": 788 }, { "epoch": 0.16537413540138335, "grad_norm": 0.197389617562294, "learning_rate": 9.529584180351902e-06, "loss": 0.0728, "step": 789 }, { "epoch": 0.1655837350660239, "grad_norm": 0.19445890188217163, "learning_rate": 9.528145568688902e-06, "loss": 0.0709, "step": 790 }, { "epoch": 0.16579333473066443, "grad_norm": 0.1834816336631775, "learning_rate": 9.526704869551736e-06, "loss": 0.0734, "step": 791 }, { "epoch": 0.16600293439530497, "grad_norm": 0.15776723623275757, "learning_rate": 9.525262083604562e-06, "loss": 0.0735, "step": 792 }, { "epoch": 0.16621253405994552, "grad_norm": 0.1593591719865799, "learning_rate": 9.523817211512511e-06, "loss": 0.0708, "step": 793 }, { "epoch": 0.16642213372458603, "grad_norm": 0.19884854555130005, "learning_rate": 9.522370253941664e-06, "loss": 0.0729, "step": 794 }, { "epoch": 0.16663173338922657, "grad_norm": 0.208924800157547, "learning_rate": 9.520921211559067e-06, "loss": 0.0696, "step": 795 }, { "epoch": 0.1668413330538671, "grad_norm": 0.1883474439382553, "learning_rate": 9.519470085032733e-06, "loss": 0.0718, "step": 796 }, { "epoch": 0.16705093271850766, "grad_norm": 0.14821593463420868, "learning_rate": 9.518016875031628e-06, "loss": 0.0723, "step": 797 }, { "epoch": 0.1672605323831482, "grad_norm": 0.12627242505550385, "learning_rate": 9.516561582225682e-06, "loss": 0.0725, "step": 798 }, { "epoch": 0.1674701320477887, "grad_norm": 0.18372437357902527, "learning_rate": 9.515104207285785e-06, "loss": 0.0732, "step": 799 }, { "epoch": 0.16767973171242925, "grad_norm": 0.20521710813045502, "learning_rate": 9.513644750883786e-06, "loss": 0.0708, "step": 800 }, { "epoch": 0.1678893313770698, "grad_norm": 0.1553443819284439, "learning_rate": 9.512183213692494e-06, "loss": 0.0744, "step": 801 }, { "epoch": 0.16809893104171034, "grad_norm": 0.1297149360179901, "learning_rate": 9.510719596385678e-06, "loss": 0.0729, "step": 802 }, { "epoch": 0.16830853070635088, "grad_norm": 0.17294561862945557, "learning_rate": 9.509253899638066e-06, "loss": 0.0716, "step": 803 }, { "epoch": 0.16851813037099142, "grad_norm": 0.1920863389968872, "learning_rate": 9.507786124125342e-06, "loss": 0.0718, "step": 804 }, { "epoch": 0.16872773003563193, "grad_norm": 0.17762252688407898, "learning_rate": 9.506316270524152e-06, "loss": 0.0725, "step": 805 }, { "epoch": 0.16893732970027248, "grad_norm": 0.1815175563097, "learning_rate": 9.504844339512096e-06, "loss": 0.0707, "step": 806 }, { "epoch": 0.16914692936491302, "grad_norm": 0.18983136117458344, "learning_rate": 9.503370331767736e-06, "loss": 0.0685, "step": 807 }, { "epoch": 0.16935652902955356, "grad_norm": 0.16265596449375153, "learning_rate": 9.50189424797059e-06, "loss": 0.0699, "step": 808 }, { "epoch": 0.1695661286941941, "grad_norm": 0.14986425638198853, "learning_rate": 9.500416088801128e-06, "loss": 0.0718, "step": 809 }, { "epoch": 0.16977572835883462, "grad_norm": 0.16179025173187256, "learning_rate": 9.498935854940785e-06, "loss": 0.0711, "step": 810 }, { "epoch": 0.16998532802347516, "grad_norm": 0.14040148258209229, "learning_rate": 9.497453547071948e-06, "loss": 0.0735, "step": 811 }, { "epoch": 0.1701949276881157, "grad_norm": 0.14858156442642212, "learning_rate": 9.495969165877959e-06, "loss": 0.0714, "step": 812 }, { "epoch": 0.17040452735275624, "grad_norm": 0.1716984659433365, "learning_rate": 9.494482712043119e-06, "loss": 0.0701, "step": 813 }, { "epoch": 0.17061412701739678, "grad_norm": 0.1606951206922531, "learning_rate": 9.492994186252681e-06, "loss": 0.0727, "step": 814 }, { "epoch": 0.1708237266820373, "grad_norm": 0.14578759670257568, "learning_rate": 9.491503589192859e-06, "loss": 0.0709, "step": 815 }, { "epoch": 0.17103332634667784, "grad_norm": 0.16695253551006317, "learning_rate": 9.490010921550814e-06, "loss": 0.0683, "step": 816 }, { "epoch": 0.17124292601131838, "grad_norm": 0.19223074615001678, "learning_rate": 9.488516184014667e-06, "loss": 0.0721, "step": 817 }, { "epoch": 0.17145252567595892, "grad_norm": 0.2144256979227066, "learning_rate": 9.48701937727349e-06, "loss": 0.0729, "step": 818 }, { "epoch": 0.17166212534059946, "grad_norm": 0.2188243865966797, "learning_rate": 9.485520502017314e-06, "loss": 0.0684, "step": 819 }, { "epoch": 0.17187172500523998, "grad_norm": 0.1678440421819687, "learning_rate": 9.48401955893712e-06, "loss": 0.0722, "step": 820 }, { "epoch": 0.17208132466988052, "grad_norm": 0.13523709774017334, "learning_rate": 9.482516548724836e-06, "loss": 0.0705, "step": 821 }, { "epoch": 0.17229092433452106, "grad_norm": 0.1449403315782547, "learning_rate": 9.481011472073359e-06, "loss": 0.0697, "step": 822 }, { "epoch": 0.1725005239991616, "grad_norm": 0.1349162459373474, "learning_rate": 9.47950432967652e-06, "loss": 0.0749, "step": 823 }, { "epoch": 0.17271012366380215, "grad_norm": 0.1460842341184616, "learning_rate": 9.477995122229117e-06, "loss": 0.0669, "step": 824 }, { "epoch": 0.1729197233284427, "grad_norm": 0.16330771148204803, "learning_rate": 9.476483850426895e-06, "loss": 0.0705, "step": 825 }, { "epoch": 0.1731293229930832, "grad_norm": 0.1619548201560974, "learning_rate": 9.474970514966545e-06, "loss": 0.0695, "step": 826 }, { "epoch": 0.17333892265772374, "grad_norm": 0.19208967685699463, "learning_rate": 9.473455116545718e-06, "loss": 0.0697, "step": 827 }, { "epoch": 0.17354852232236428, "grad_norm": 0.20909924805164337, "learning_rate": 9.471937655863011e-06, "loss": 0.0732, "step": 828 }, { "epoch": 0.17375812198700483, "grad_norm": 0.18117384612560272, "learning_rate": 9.470418133617973e-06, "loss": 0.0719, "step": 829 }, { "epoch": 0.17396772165164537, "grad_norm": 0.1741568148136139, "learning_rate": 9.468896550511106e-06, "loss": 0.0695, "step": 830 }, { "epoch": 0.17417732131628588, "grad_norm": 0.18191158771514893, "learning_rate": 9.467372907243858e-06, "loss": 0.0737, "step": 831 }, { "epoch": 0.17438692098092642, "grad_norm": 0.1610174924135208, "learning_rate": 9.465847204518626e-06, "loss": 0.069, "step": 832 }, { "epoch": 0.17459652064556697, "grad_norm": 0.1415458768606186, "learning_rate": 9.464319443038759e-06, "loss": 0.071, "step": 833 }, { "epoch": 0.1748061203102075, "grad_norm": 0.12450738251209259, "learning_rate": 9.462789623508559e-06, "loss": 0.0735, "step": 834 }, { "epoch": 0.17501571997484805, "grad_norm": 0.1258174180984497, "learning_rate": 9.461257746633267e-06, "loss": 0.0696, "step": 835 }, { "epoch": 0.17522531963948856, "grad_norm": 0.16757184267044067, "learning_rate": 9.459723813119081e-06, "loss": 0.0707, "step": 836 }, { "epoch": 0.1754349193041291, "grad_norm": 0.18003013730049133, "learning_rate": 9.458187823673145e-06, "loss": 0.0681, "step": 837 }, { "epoch": 0.17564451896876965, "grad_norm": 0.15927360951900482, "learning_rate": 9.456649779003548e-06, "loss": 0.0696, "step": 838 }, { "epoch": 0.1758541186334102, "grad_norm": 0.1543167531490326, "learning_rate": 9.45510967981933e-06, "loss": 0.0673, "step": 839 }, { "epoch": 0.17606371829805073, "grad_norm": 0.1319018304347992, "learning_rate": 9.453567526830471e-06, "loss": 0.072, "step": 840 }, { "epoch": 0.17627331796269127, "grad_norm": 0.1290387362241745, "learning_rate": 9.452023320747909e-06, "loss": 0.0718, "step": 841 }, { "epoch": 0.1764829176273318, "grad_norm": 0.1876518577337265, "learning_rate": 9.450477062283523e-06, "loss": 0.0717, "step": 842 }, { "epoch": 0.17669251729197233, "grad_norm": 0.2217532843351364, "learning_rate": 9.448928752150134e-06, "loss": 0.0691, "step": 843 }, { "epoch": 0.17690211695661287, "grad_norm": 0.2288810908794403, "learning_rate": 9.447378391061515e-06, "loss": 0.0718, "step": 844 }, { "epoch": 0.1771117166212534, "grad_norm": 0.23466289043426514, "learning_rate": 9.44582597973238e-06, "loss": 0.0716, "step": 845 }, { "epoch": 0.17732131628589395, "grad_norm": 0.20989495515823364, "learning_rate": 9.444271518878393e-06, "loss": 0.0726, "step": 846 }, { "epoch": 0.17753091595053447, "grad_norm": 0.1636846363544464, "learning_rate": 9.442715009216159e-06, "loss": 0.0689, "step": 847 }, { "epoch": 0.177740515615175, "grad_norm": 0.13839955627918243, "learning_rate": 9.441156451463228e-06, "loss": 0.0693, "step": 848 }, { "epoch": 0.17795011527981555, "grad_norm": 0.17058932781219482, "learning_rate": 9.439595846338097e-06, "loss": 0.0693, "step": 849 }, { "epoch": 0.1781597149444561, "grad_norm": 0.20214615762233734, "learning_rate": 9.4380331945602e-06, "loss": 0.0727, "step": 850 }, { "epoch": 0.17836931460909664, "grad_norm": 0.178322434425354, "learning_rate": 9.436468496849924e-06, "loss": 0.0766, "step": 851 }, { "epoch": 0.17857891427373715, "grad_norm": 0.14965812861919403, "learning_rate": 9.434901753928593e-06, "loss": 0.0709, "step": 852 }, { "epoch": 0.1787885139383777, "grad_norm": 0.16711200773715973, "learning_rate": 9.433332966518473e-06, "loss": 0.0673, "step": 853 }, { "epoch": 0.17899811360301823, "grad_norm": 0.17375777661800385, "learning_rate": 9.43176213534278e-06, "loss": 0.0738, "step": 854 }, { "epoch": 0.17920771326765877, "grad_norm": 0.1556476205587387, "learning_rate": 9.43018926112566e-06, "loss": 0.068, "step": 855 }, { "epoch": 0.17941731293229932, "grad_norm": 0.1631685495376587, "learning_rate": 9.42861434459221e-06, "loss": 0.0718, "step": 856 }, { "epoch": 0.17962691259693986, "grad_norm": 0.1664845198392868, "learning_rate": 9.42703738646847e-06, "loss": 0.0689, "step": 857 }, { "epoch": 0.17983651226158037, "grad_norm": 0.15820707380771637, "learning_rate": 9.425458387481412e-06, "loss": 0.072, "step": 858 }, { "epoch": 0.18004611192622091, "grad_norm": 0.15286661684513092, "learning_rate": 9.423877348358956e-06, "loss": 0.0743, "step": 859 }, { "epoch": 0.18025571159086146, "grad_norm": 0.14925047755241394, "learning_rate": 9.422294269829963e-06, "loss": 0.0665, "step": 860 }, { "epoch": 0.180465311255502, "grad_norm": 0.15763109922409058, "learning_rate": 9.420709152624232e-06, "loss": 0.0725, "step": 861 }, { "epoch": 0.18067491092014254, "grad_norm": 0.1531532257795334, "learning_rate": 9.419121997472497e-06, "loss": 0.0699, "step": 862 }, { "epoch": 0.18088451058478305, "grad_norm": 0.14012256264686584, "learning_rate": 9.41753280510644e-06, "loss": 0.0718, "step": 863 }, { "epoch": 0.1810941102494236, "grad_norm": 0.14636793732643127, "learning_rate": 9.415941576258679e-06, "loss": 0.0683, "step": 864 }, { "epoch": 0.18130370991406414, "grad_norm": 0.16276605427265167, "learning_rate": 9.414348311662766e-06, "loss": 0.0706, "step": 865 }, { "epoch": 0.18151330957870468, "grad_norm": 0.1377471536397934, "learning_rate": 9.4127530120532e-06, "loss": 0.0727, "step": 866 }, { "epoch": 0.18172290924334522, "grad_norm": 0.11346344649791718, "learning_rate": 9.41115567816541e-06, "loss": 0.0699, "step": 867 }, { "epoch": 0.18193250890798573, "grad_norm": 0.1656496673822403, "learning_rate": 9.40955631073577e-06, "loss": 0.0673, "step": 868 }, { "epoch": 0.18214210857262628, "grad_norm": 0.16461622714996338, "learning_rate": 9.407954910501586e-06, "loss": 0.0769, "step": 869 }, { "epoch": 0.18235170823726682, "grad_norm": 0.12896548211574554, "learning_rate": 9.406351478201105e-06, "loss": 0.0699, "step": 870 }, { "epoch": 0.18256130790190736, "grad_norm": 0.17316146194934845, "learning_rate": 9.404746014573508e-06, "loss": 0.0713, "step": 871 }, { "epoch": 0.1827709075665479, "grad_norm": 0.17203350365161896, "learning_rate": 9.403138520358911e-06, "loss": 0.0718, "step": 872 }, { "epoch": 0.18298050723118844, "grad_norm": 0.17705942690372467, "learning_rate": 9.401528996298375e-06, "loss": 0.074, "step": 873 }, { "epoch": 0.18319010689582896, "grad_norm": 0.2316497564315796, "learning_rate": 9.399917443133883e-06, "loss": 0.0689, "step": 874 }, { "epoch": 0.1833997065604695, "grad_norm": 0.23241883516311646, "learning_rate": 9.398303861608363e-06, "loss": 0.0707, "step": 875 }, { "epoch": 0.18360930622511004, "grad_norm": 0.21609675884246826, "learning_rate": 9.396688252465678e-06, "loss": 0.0739, "step": 876 }, { "epoch": 0.18381890588975058, "grad_norm": 0.21849660575389862, "learning_rate": 9.395070616450622e-06, "loss": 0.0729, "step": 877 }, { "epoch": 0.18402850555439113, "grad_norm": 0.17387786507606506, "learning_rate": 9.393450954308922e-06, "loss": 0.0715, "step": 878 }, { "epoch": 0.18423810521903164, "grad_norm": 0.1386607140302658, "learning_rate": 9.391829266787248e-06, "loss": 0.0708, "step": 879 }, { "epoch": 0.18444770488367218, "grad_norm": 0.14751608669757843, "learning_rate": 9.390205554633193e-06, "loss": 0.0714, "step": 880 }, { "epoch": 0.18465730454831272, "grad_norm": 0.14015577733516693, "learning_rate": 9.38857981859529e-06, "loss": 0.0699, "step": 881 }, { "epoch": 0.18486690421295326, "grad_norm": 0.13030049204826355, "learning_rate": 9.386952059423e-06, "loss": 0.0732, "step": 882 }, { "epoch": 0.1850765038775938, "grad_norm": 0.14583101868629456, "learning_rate": 9.385322277866724e-06, "loss": 0.0681, "step": 883 }, { "epoch": 0.18528610354223432, "grad_norm": 0.1531733274459839, "learning_rate": 9.38369047467779e-06, "loss": 0.0691, "step": 884 }, { "epoch": 0.18549570320687486, "grad_norm": 0.16215740144252777, "learning_rate": 9.382056650608454e-06, "loss": 0.0709, "step": 885 }, { "epoch": 0.1857053028715154, "grad_norm": 0.1694212257862091, "learning_rate": 9.380420806411914e-06, "loss": 0.0694, "step": 886 }, { "epoch": 0.18591490253615595, "grad_norm": 0.16096119582653046, "learning_rate": 9.378782942842292e-06, "loss": 0.0656, "step": 887 }, { "epoch": 0.1861245022007965, "grad_norm": 0.13555142283439636, "learning_rate": 9.377143060654645e-06, "loss": 0.0741, "step": 888 }, { "epoch": 0.186334101865437, "grad_norm": 0.11763742566108704, "learning_rate": 9.375501160604955e-06, "loss": 0.069, "step": 889 }, { "epoch": 0.18654370153007754, "grad_norm": 0.14128689467906952, "learning_rate": 9.373857243450138e-06, "loss": 0.0691, "step": 890 }, { "epoch": 0.18675330119471809, "grad_norm": 0.15519973635673523, "learning_rate": 9.37221130994804e-06, "loss": 0.0691, "step": 891 }, { "epoch": 0.18696290085935863, "grad_norm": 0.1528463065624237, "learning_rate": 9.370563360857437e-06, "loss": 0.0708, "step": 892 }, { "epoch": 0.18717250052399917, "grad_norm": 0.1430312991142273, "learning_rate": 9.368913396938031e-06, "loss": 0.0702, "step": 893 }, { "epoch": 0.1873821001886397, "grad_norm": 0.13003818690776825, "learning_rate": 9.367261418950459e-06, "loss": 0.0729, "step": 894 }, { "epoch": 0.18759169985328022, "grad_norm": 0.1460046023130417, "learning_rate": 9.365607427656277e-06, "loss": 0.0701, "step": 895 }, { "epoch": 0.18780129951792077, "grad_norm": 0.1653718799352646, "learning_rate": 9.363951423817982e-06, "loss": 0.0713, "step": 896 }, { "epoch": 0.1880108991825613, "grad_norm": 0.1808050125837326, "learning_rate": 9.362293408198983e-06, "loss": 0.0671, "step": 897 }, { "epoch": 0.18822049884720185, "grad_norm": 0.17613857984542847, "learning_rate": 9.360633381563631e-06, "loss": 0.0684, "step": 898 }, { "epoch": 0.1884300985118424, "grad_norm": 0.17661046981811523, "learning_rate": 9.358971344677197e-06, "loss": 0.068, "step": 899 }, { "epoch": 0.1886396981764829, "grad_norm": 0.205166295170784, "learning_rate": 9.357307298305875e-06, "loss": 0.0695, "step": 900 }, { "epoch": 0.18884929784112345, "grad_norm": 0.22776846587657928, "learning_rate": 9.355641243216798e-06, "loss": 0.0695, "step": 901 }, { "epoch": 0.189058897505764, "grad_norm": 0.21361425518989563, "learning_rate": 9.35397318017801e-06, "loss": 0.0736, "step": 902 }, { "epoch": 0.18926849717040453, "grad_norm": 0.18417485058307648, "learning_rate": 9.35230310995849e-06, "loss": 0.0719, "step": 903 }, { "epoch": 0.18947809683504507, "grad_norm": 0.17140980064868927, "learning_rate": 9.35063103332814e-06, "loss": 0.0692, "step": 904 }, { "epoch": 0.1896876964996856, "grad_norm": 0.17577317357063293, "learning_rate": 9.348956951057788e-06, "loss": 0.0702, "step": 905 }, { "epoch": 0.18989729616432613, "grad_norm": 0.18869447708129883, "learning_rate": 9.347280863919186e-06, "loss": 0.0682, "step": 906 }, { "epoch": 0.19010689582896667, "grad_norm": 0.15782123804092407, "learning_rate": 9.345602772685008e-06, "loss": 0.0674, "step": 907 }, { "epoch": 0.1903164954936072, "grad_norm": 0.15781988203525543, "learning_rate": 9.343922678128854e-06, "loss": 0.0703, "step": 908 }, { "epoch": 0.19052609515824775, "grad_norm": 0.17203202843666077, "learning_rate": 9.342240581025248e-06, "loss": 0.0672, "step": 909 }, { "epoch": 0.1907356948228883, "grad_norm": 0.16652663052082062, "learning_rate": 9.340556482149637e-06, "loss": 0.0671, "step": 910 }, { "epoch": 0.1909452944875288, "grad_norm": 0.15487754344940186, "learning_rate": 9.33887038227839e-06, "loss": 0.0699, "step": 911 }, { "epoch": 0.19115489415216935, "grad_norm": 0.13165274262428284, "learning_rate": 9.337182282188794e-06, "loss": 0.0666, "step": 912 }, { "epoch": 0.1913644938168099, "grad_norm": 0.1349601447582245, "learning_rate": 9.335492182659071e-06, "loss": 0.0664, "step": 913 }, { "epoch": 0.19157409348145044, "grad_norm": 0.1551382839679718, "learning_rate": 9.33380008446835e-06, "loss": 0.0677, "step": 914 }, { "epoch": 0.19178369314609098, "grad_norm": 0.16102422773838043, "learning_rate": 9.332105988396692e-06, "loss": 0.0693, "step": 915 }, { "epoch": 0.1919932928107315, "grad_norm": 0.17208023369312286, "learning_rate": 9.330409895225072e-06, "loss": 0.0704, "step": 916 }, { "epoch": 0.19220289247537203, "grad_norm": 0.16486892104148865, "learning_rate": 9.32871180573539e-06, "loss": 0.0695, "step": 917 }, { "epoch": 0.19241249214001258, "grad_norm": 0.15366514027118683, "learning_rate": 9.327011720710464e-06, "loss": 0.0688, "step": 918 }, { "epoch": 0.19262209180465312, "grad_norm": 0.17730504274368286, "learning_rate": 9.325309640934036e-06, "loss": 0.0678, "step": 919 }, { "epoch": 0.19283169146929366, "grad_norm": 0.2048606127500534, "learning_rate": 9.323605567190758e-06, "loss": 0.0696, "step": 920 }, { "epoch": 0.19304129113393417, "grad_norm": 0.1947096735239029, "learning_rate": 9.321899500266216e-06, "loss": 0.0694, "step": 921 }, { "epoch": 0.19325089079857471, "grad_norm": 0.15000185370445251, "learning_rate": 9.3201914409469e-06, "loss": 0.0675, "step": 922 }, { "epoch": 0.19346049046321526, "grad_norm": 0.15103282034397125, "learning_rate": 9.318481390020226e-06, "loss": 0.071, "step": 923 }, { "epoch": 0.1936700901278558, "grad_norm": 0.16864833235740662, "learning_rate": 9.31676934827453e-06, "loss": 0.0695, "step": 924 }, { "epoch": 0.19387968979249634, "grad_norm": 0.1579030156135559, "learning_rate": 9.31505531649906e-06, "loss": 0.0652, "step": 925 }, { "epoch": 0.19408928945713688, "grad_norm": 0.13766202330589294, "learning_rate": 9.313339295483985e-06, "loss": 0.071, "step": 926 }, { "epoch": 0.1942988891217774, "grad_norm": 0.1618116796016693, "learning_rate": 9.311621286020393e-06, "loss": 0.0692, "step": 927 }, { "epoch": 0.19450848878641794, "grad_norm": 0.15055081248283386, "learning_rate": 9.309901288900283e-06, "loss": 0.0675, "step": 928 }, { "epoch": 0.19471808845105848, "grad_norm": 0.13458986580371857, "learning_rate": 9.308179304916573e-06, "loss": 0.0717, "step": 929 }, { "epoch": 0.19492768811569902, "grad_norm": 0.13572897017002106, "learning_rate": 9.306455334863098e-06, "loss": 0.0708, "step": 930 }, { "epoch": 0.19513728778033956, "grad_norm": 0.13782450556755066, "learning_rate": 9.30472937953461e-06, "loss": 0.067, "step": 931 }, { "epoch": 0.19534688744498008, "grad_norm": 0.1412908285856247, "learning_rate": 9.303001439726772e-06, "loss": 0.0667, "step": 932 }, { "epoch": 0.19555648710962062, "grad_norm": 0.1327088475227356, "learning_rate": 9.301271516236162e-06, "loss": 0.0674, "step": 933 }, { "epoch": 0.19576608677426116, "grad_norm": 0.13067220151424408, "learning_rate": 9.299539609860278e-06, "loss": 0.0683, "step": 934 }, { "epoch": 0.1959756864389017, "grad_norm": 0.13276226818561554, "learning_rate": 9.297805721397525e-06, "loss": 0.0717, "step": 935 }, { "epoch": 0.19618528610354224, "grad_norm": 0.13292694091796875, "learning_rate": 9.29606985164723e-06, "loss": 0.0682, "step": 936 }, { "epoch": 0.19639488576818276, "grad_norm": 0.1614527702331543, "learning_rate": 9.294332001409625e-06, "loss": 0.0693, "step": 937 }, { "epoch": 0.1966044854328233, "grad_norm": 0.20869992673397064, "learning_rate": 9.292592171485858e-06, "loss": 0.0687, "step": 938 }, { "epoch": 0.19681408509746384, "grad_norm": 0.25103163719177246, "learning_rate": 9.290850362677993e-06, "loss": 0.0673, "step": 939 }, { "epoch": 0.19702368476210438, "grad_norm": 0.2684808671474457, "learning_rate": 9.289106575789001e-06, "loss": 0.0711, "step": 940 }, { "epoch": 0.19723328442674493, "grad_norm": 0.2292683720588684, "learning_rate": 9.28736081162277e-06, "loss": 0.075, "step": 941 }, { "epoch": 0.19744288409138544, "grad_norm": 0.148003488779068, "learning_rate": 9.285613070984094e-06, "loss": 0.0736, "step": 942 }, { "epoch": 0.19765248375602598, "grad_norm": 0.1527291238307953, "learning_rate": 9.283863354678683e-06, "loss": 0.0691, "step": 943 }, { "epoch": 0.19786208342066652, "grad_norm": 0.17190136015415192, "learning_rate": 9.282111663513156e-06, "loss": 0.0696, "step": 944 }, { "epoch": 0.19807168308530707, "grad_norm": 0.16085271537303925, "learning_rate": 9.280357998295041e-06, "loss": 0.0692, "step": 945 }, { "epoch": 0.1982812827499476, "grad_norm": 0.16574037075042725, "learning_rate": 9.278602359832778e-06, "loss": 0.0695, "step": 946 }, { "epoch": 0.19849088241458815, "grad_norm": 0.1433473825454712, "learning_rate": 9.276844748935715e-06, "loss": 0.066, "step": 947 }, { "epoch": 0.19870048207922866, "grad_norm": 0.15432019531726837, "learning_rate": 9.275085166414113e-06, "loss": 0.0716, "step": 948 }, { "epoch": 0.1989100817438692, "grad_norm": 0.155776709318161, "learning_rate": 9.273323613079135e-06, "loss": 0.0687, "step": 949 }, { "epoch": 0.19911968140850975, "grad_norm": 0.14493581652641296, "learning_rate": 9.27156008974286e-06, "loss": 0.0688, "step": 950 }, { "epoch": 0.1993292810731503, "grad_norm": 0.16887947916984558, "learning_rate": 9.269794597218271e-06, "loss": 0.0682, "step": 951 }, { "epoch": 0.19953888073779083, "grad_norm": 0.15968920290470123, "learning_rate": 9.26802713631926e-06, "loss": 0.0652, "step": 952 }, { "epoch": 0.19974848040243134, "grad_norm": 0.15079444646835327, "learning_rate": 9.266257707860625e-06, "loss": 0.0661, "step": 953 }, { "epoch": 0.19995808006707189, "grad_norm": 0.15379679203033447, "learning_rate": 9.264486312658073e-06, "loss": 0.0723, "step": 954 }, { "epoch": 0.20016767973171243, "grad_norm": 0.1347336769104004, "learning_rate": 9.262712951528217e-06, "loss": 0.0729, "step": 955 }, { "epoch": 0.20037727939635297, "grad_norm": 0.13610079884529114, "learning_rate": 9.260937625288576e-06, "loss": 0.0693, "step": 956 }, { "epoch": 0.2005868790609935, "grad_norm": 0.14802809059619904, "learning_rate": 9.259160334757575e-06, "loss": 0.0685, "step": 957 }, { "epoch": 0.20079647872563403, "grad_norm": 0.15134483575820923, "learning_rate": 9.257381080754544e-06, "loss": 0.0655, "step": 958 }, { "epoch": 0.20100607839027457, "grad_norm": 0.1678546667098999, "learning_rate": 9.255599864099718e-06, "loss": 0.0715, "step": 959 }, { "epoch": 0.2012156780549151, "grad_norm": 0.15980494022369385, "learning_rate": 9.25381668561424e-06, "loss": 0.0695, "step": 960 }, { "epoch": 0.20142527771955565, "grad_norm": 0.13359016180038452, "learning_rate": 9.252031546120153e-06, "loss": 0.0671, "step": 961 }, { "epoch": 0.2016348773841962, "grad_norm": 0.154948428273201, "learning_rate": 9.250244446440406e-06, "loss": 0.0676, "step": 962 }, { "epoch": 0.20184447704883673, "grad_norm": 0.16912510991096497, "learning_rate": 9.248455387398853e-06, "loss": 0.0687, "step": 963 }, { "epoch": 0.20205407671347725, "grad_norm": 0.1481633484363556, "learning_rate": 9.246664369820249e-06, "loss": 0.0696, "step": 964 }, { "epoch": 0.2022636763781178, "grad_norm": 0.1526942402124405, "learning_rate": 9.244871394530252e-06, "loss": 0.0669, "step": 965 }, { "epoch": 0.20247327604275833, "grad_norm": 0.1594848930835724, "learning_rate": 9.243076462355424e-06, "loss": 0.0695, "step": 966 }, { "epoch": 0.20268287570739887, "grad_norm": 0.15428997576236725, "learning_rate": 9.241279574123228e-06, "loss": 0.0676, "step": 967 }, { "epoch": 0.20289247537203942, "grad_norm": 0.15648357570171356, "learning_rate": 9.239480730662029e-06, "loss": 0.0701, "step": 968 }, { "epoch": 0.20310207503667993, "grad_norm": 0.14083035290241241, "learning_rate": 9.237679932801095e-06, "loss": 0.0707, "step": 969 }, { "epoch": 0.20331167470132047, "grad_norm": 0.11451932042837143, "learning_rate": 9.235877181370592e-06, "loss": 0.0687, "step": 970 }, { "epoch": 0.203521274365961, "grad_norm": 0.12343299388885498, "learning_rate": 9.234072477201588e-06, "loss": 0.0672, "step": 971 }, { "epoch": 0.20373087403060156, "grad_norm": 0.1318097859621048, "learning_rate": 9.23226582112605e-06, "loss": 0.0692, "step": 972 }, { "epoch": 0.2039404736952421, "grad_norm": 0.13317148387432098, "learning_rate": 9.23045721397685e-06, "loss": 0.0652, "step": 973 }, { "epoch": 0.2041500733598826, "grad_norm": 0.14441275596618652, "learning_rate": 9.228646656587751e-06, "loss": 0.0669, "step": 974 }, { "epoch": 0.20435967302452315, "grad_norm": 0.1513681560754776, "learning_rate": 9.226834149793422e-06, "loss": 0.0654, "step": 975 }, { "epoch": 0.2045692726891637, "grad_norm": 0.17051391303539276, "learning_rate": 9.225019694429429e-06, "loss": 0.0664, "step": 976 }, { "epoch": 0.20477887235380424, "grad_norm": 0.19565874338150024, "learning_rate": 9.223203291332234e-06, "loss": 0.0691, "step": 977 }, { "epoch": 0.20498847201844478, "grad_norm": 0.2343725860118866, "learning_rate": 9.2213849413392e-06, "loss": 0.069, "step": 978 }, { "epoch": 0.20519807168308532, "grad_norm": 0.27315953373908997, "learning_rate": 9.219564645288583e-06, "loss": 0.0675, "step": 979 }, { "epoch": 0.20540767134772583, "grad_norm": 0.2724561393260956, "learning_rate": 9.217742404019544e-06, "loss": 0.0702, "step": 980 }, { "epoch": 0.20561727101236638, "grad_norm": 0.23024438321590424, "learning_rate": 9.215918218372128e-06, "loss": 0.0723, "step": 981 }, { "epoch": 0.20582687067700692, "grad_norm": 0.15981607139110565, "learning_rate": 9.214092089187293e-06, "loss": 0.0696, "step": 982 }, { "epoch": 0.20603647034164746, "grad_norm": 0.165577694773674, "learning_rate": 9.212264017306878e-06, "loss": 0.0725, "step": 983 }, { "epoch": 0.206246070006288, "grad_norm": 0.19346004724502563, "learning_rate": 9.210434003573627e-06, "loss": 0.0689, "step": 984 }, { "epoch": 0.20645566967092852, "grad_norm": 0.17302066087722778, "learning_rate": 9.208602048831176e-06, "loss": 0.0695, "step": 985 }, { "epoch": 0.20666526933556906, "grad_norm": 0.17835290729999542, "learning_rate": 9.206768153924052e-06, "loss": 0.0684, "step": 986 }, { "epoch": 0.2068748690002096, "grad_norm": 0.1855718344449997, "learning_rate": 9.204932319697686e-06, "loss": 0.0694, "step": 987 }, { "epoch": 0.20708446866485014, "grad_norm": 0.16161462664604187, "learning_rate": 9.203094546998392e-06, "loss": 0.0689, "step": 988 }, { "epoch": 0.20729406832949068, "grad_norm": 0.1590941995382309, "learning_rate": 9.201254836673386e-06, "loss": 0.0676, "step": 989 }, { "epoch": 0.2075036679941312, "grad_norm": 0.17780111730098724, "learning_rate": 9.199413189570772e-06, "loss": 0.0699, "step": 990 }, { "epoch": 0.20771326765877174, "grad_norm": 0.15813374519348145, "learning_rate": 9.197569606539551e-06, "loss": 0.0678, "step": 991 }, { "epoch": 0.20792286732341228, "grad_norm": 0.14259178936481476, "learning_rate": 9.195724088429611e-06, "loss": 0.0682, "step": 992 }, { "epoch": 0.20813246698805282, "grad_norm": 0.15479755401611328, "learning_rate": 9.193876636091741e-06, "loss": 0.0703, "step": 993 }, { "epoch": 0.20834206665269336, "grad_norm": 0.16291916370391846, "learning_rate": 9.192027250377611e-06, "loss": 0.0714, "step": 994 }, { "epoch": 0.2085516663173339, "grad_norm": 0.16300472617149353, "learning_rate": 9.19017593213979e-06, "loss": 0.0686, "step": 995 }, { "epoch": 0.20876126598197442, "grad_norm": 0.16274508833885193, "learning_rate": 9.188322682231733e-06, "loss": 0.0674, "step": 996 }, { "epoch": 0.20897086564661496, "grad_norm": 0.18186375498771667, "learning_rate": 9.186467501507792e-06, "loss": 0.0696, "step": 997 }, { "epoch": 0.2091804653112555, "grad_norm": 0.20312577486038208, "learning_rate": 9.184610390823202e-06, "loss": 0.0659, "step": 998 }, { "epoch": 0.20939006497589605, "grad_norm": 0.21246276795864105, "learning_rate": 9.18275135103409e-06, "loss": 0.0714, "step": 999 }, { "epoch": 0.2095996646405366, "grad_norm": 0.19625847041606903, "learning_rate": 9.180890382997473e-06, "loss": 0.0662, "step": 1000 }, { "epoch": 0.2098092643051771, "grad_norm": 0.181460440158844, "learning_rate": 9.179027487571258e-06, "loss": 0.0662, "step": 1001 }, { "epoch": 0.21001886396981764, "grad_norm": 0.17908546328544617, "learning_rate": 9.177162665614242e-06, "loss": 0.0685, "step": 1002 }, { "epoch": 0.21022846363445818, "grad_norm": 0.19209085404872894, "learning_rate": 9.175295917986103e-06, "loss": 0.0682, "step": 1003 }, { "epoch": 0.21043806329909873, "grad_norm": 0.21326391398906708, "learning_rate": 9.173427245547414e-06, "loss": 0.0691, "step": 1004 }, { "epoch": 0.21064766296373927, "grad_norm": 0.19269311428070068, "learning_rate": 9.17155664915963e-06, "loss": 0.0727, "step": 1005 }, { "epoch": 0.21085726262837978, "grad_norm": 0.19682708382606506, "learning_rate": 9.169684129685099e-06, "loss": 0.0713, "step": 1006 }, { "epoch": 0.21106686229302032, "grad_norm": 0.18189750611782074, "learning_rate": 9.16780968798705e-06, "loss": 0.0714, "step": 1007 }, { "epoch": 0.21127646195766087, "grad_norm": 0.169154092669487, "learning_rate": 9.165933324929599e-06, "loss": 0.0702, "step": 1008 }, { "epoch": 0.2114860616223014, "grad_norm": 0.20186983048915863, "learning_rate": 9.164055041377754e-06, "loss": 0.0691, "step": 1009 }, { "epoch": 0.21169566128694195, "grad_norm": 0.19826248288154602, "learning_rate": 9.162174838197396e-06, "loss": 0.0679, "step": 1010 }, { "epoch": 0.21190526095158246, "grad_norm": 0.1933613270521164, "learning_rate": 9.160292716255303e-06, "loss": 0.0691, "step": 1011 }, { "epoch": 0.212114860616223, "grad_norm": 0.1532355546951294, "learning_rate": 9.158408676419133e-06, "loss": 0.0712, "step": 1012 }, { "epoch": 0.21232446028086355, "grad_norm": 0.13550697267055511, "learning_rate": 9.156522719557428e-06, "loss": 0.0688, "step": 1013 }, { "epoch": 0.2125340599455041, "grad_norm": 0.19453023374080658, "learning_rate": 9.15463484653961e-06, "loss": 0.0646, "step": 1014 }, { "epoch": 0.21274365961014463, "grad_norm": 0.16020941734313965, "learning_rate": 9.152745058235993e-06, "loss": 0.0692, "step": 1015 }, { "epoch": 0.21295325927478517, "grad_norm": 0.17123140394687653, "learning_rate": 9.150853355517765e-06, "loss": 0.0689, "step": 1016 }, { "epoch": 0.2131628589394257, "grad_norm": 0.17353491485118866, "learning_rate": 9.148959739257005e-06, "loss": 0.0687, "step": 1017 }, { "epoch": 0.21337245860406623, "grad_norm": 0.15080298483371735, "learning_rate": 9.147064210326664e-06, "loss": 0.0664, "step": 1018 }, { "epoch": 0.21358205826870677, "grad_norm": 0.1491314321756363, "learning_rate": 9.145166769600584e-06, "loss": 0.0697, "step": 1019 }, { "epoch": 0.2137916579333473, "grad_norm": 0.10465126484632492, "learning_rate": 9.143267417953486e-06, "loss": 0.068, "step": 1020 }, { "epoch": 0.21400125759798785, "grad_norm": 0.15090526640415192, "learning_rate": 9.141366156260967e-06, "loss": 0.0727, "step": 1021 }, { "epoch": 0.21421085726262837, "grad_norm": 0.11032428592443466, "learning_rate": 9.139462985399512e-06, "loss": 0.0682, "step": 1022 }, { "epoch": 0.2144204569272689, "grad_norm": 0.13925404846668243, "learning_rate": 9.137557906246479e-06, "loss": 0.0682, "step": 1023 }, { "epoch": 0.21463005659190945, "grad_norm": 0.15173637866973877, "learning_rate": 9.135650919680112e-06, "loss": 0.0653, "step": 1024 }, { "epoch": 0.21483965625655, "grad_norm": 0.1346423327922821, "learning_rate": 9.133742026579528e-06, "loss": 0.0679, "step": 1025 }, { "epoch": 0.21504925592119054, "grad_norm": 0.1535898894071579, "learning_rate": 9.13183122782473e-06, "loss": 0.0667, "step": 1026 }, { "epoch": 0.21525885558583105, "grad_norm": 0.14157144725322723, "learning_rate": 9.129918524296596e-06, "loss": 0.0707, "step": 1027 }, { "epoch": 0.2154684552504716, "grad_norm": 0.14217810332775116, "learning_rate": 9.128003916876878e-06, "loss": 0.0708, "step": 1028 }, { "epoch": 0.21567805491511213, "grad_norm": 0.1338115632534027, "learning_rate": 9.126087406448211e-06, "loss": 0.0648, "step": 1029 }, { "epoch": 0.21588765457975267, "grad_norm": 0.12991099059581757, "learning_rate": 9.124168993894107e-06, "loss": 0.0677, "step": 1030 }, { "epoch": 0.21609725424439322, "grad_norm": 0.1184862032532692, "learning_rate": 9.122248680098956e-06, "loss": 0.0673, "step": 1031 }, { "epoch": 0.21630685390903376, "grad_norm": 0.1306854635477066, "learning_rate": 9.120326465948016e-06, "loss": 0.068, "step": 1032 }, { "epoch": 0.21651645357367427, "grad_norm": 0.16064853966236115, "learning_rate": 9.118402352327433e-06, "loss": 0.0663, "step": 1033 }, { "epoch": 0.21672605323831481, "grad_norm": 0.1781821846961975, "learning_rate": 9.11647634012422e-06, "loss": 0.0683, "step": 1034 }, { "epoch": 0.21693565290295536, "grad_norm": 0.18965835869312286, "learning_rate": 9.11454843022627e-06, "loss": 0.0679, "step": 1035 }, { "epoch": 0.2171452525675959, "grad_norm": 0.19845952093601227, "learning_rate": 9.112618623522351e-06, "loss": 0.0663, "step": 1036 }, { "epoch": 0.21735485223223644, "grad_norm": 0.16511160135269165, "learning_rate": 9.110686920902097e-06, "loss": 0.0717, "step": 1037 }, { "epoch": 0.21756445189687695, "grad_norm": 0.1469397395849228, "learning_rate": 9.108753323256028e-06, "loss": 0.0662, "step": 1038 }, { "epoch": 0.2177740515615175, "grad_norm": 0.15417161583900452, "learning_rate": 9.106817831475529e-06, "loss": 0.0736, "step": 1039 }, { "epoch": 0.21798365122615804, "grad_norm": 0.10822763293981552, "learning_rate": 9.104880446452866e-06, "loss": 0.0666, "step": 1040 }, { "epoch": 0.21819325089079858, "grad_norm": 0.1328975260257721, "learning_rate": 9.102941169081167e-06, "loss": 0.0671, "step": 1041 }, { "epoch": 0.21840285055543912, "grad_norm": 0.15660911798477173, "learning_rate": 9.101000000254442e-06, "loss": 0.0679, "step": 1042 }, { "epoch": 0.21861245022007963, "grad_norm": 0.14654120802879333, "learning_rate": 9.09905694086757e-06, "loss": 0.0692, "step": 1043 }, { "epoch": 0.21882204988472018, "grad_norm": 0.15261763334274292, "learning_rate": 9.097111991816297e-06, "loss": 0.0677, "step": 1044 }, { "epoch": 0.21903164954936072, "grad_norm": 0.1555902063846588, "learning_rate": 9.095165153997249e-06, "loss": 0.0653, "step": 1045 }, { "epoch": 0.21924124921400126, "grad_norm": 0.1320633888244629, "learning_rate": 9.093216428307914e-06, "loss": 0.0683, "step": 1046 }, { "epoch": 0.2194508488786418, "grad_norm": 0.12523691356182098, "learning_rate": 9.091265815646658e-06, "loss": 0.0646, "step": 1047 }, { "epoch": 0.21966044854328234, "grad_norm": 0.14748716354370117, "learning_rate": 9.089313316912708e-06, "loss": 0.067, "step": 1048 }, { "epoch": 0.21987004820792286, "grad_norm": 0.13303905725479126, "learning_rate": 9.08735893300617e-06, "loss": 0.0677, "step": 1049 }, { "epoch": 0.2200796478725634, "grad_norm": 0.1327192634344101, "learning_rate": 9.085402664828013e-06, "loss": 0.072, "step": 1050 }, { "epoch": 0.22028924753720394, "grad_norm": 0.13275259733200073, "learning_rate": 9.083444513280076e-06, "loss": 0.0659, "step": 1051 }, { "epoch": 0.22049884720184448, "grad_norm": 0.11244450509548187, "learning_rate": 9.081484479265067e-06, "loss": 0.0719, "step": 1052 }, { "epoch": 0.22070844686648503, "grad_norm": 0.11435073614120483, "learning_rate": 9.07952256368656e-06, "loss": 0.0648, "step": 1053 }, { "epoch": 0.22091804653112554, "grad_norm": 0.1512385904788971, "learning_rate": 9.077558767448999e-06, "loss": 0.0677, "step": 1054 }, { "epoch": 0.22112764619576608, "grad_norm": 0.1435592770576477, "learning_rate": 9.075593091457692e-06, "loss": 0.064, "step": 1055 }, { "epoch": 0.22133724586040662, "grad_norm": 0.14917264878749847, "learning_rate": 9.073625536618819e-06, "loss": 0.0661, "step": 1056 }, { "epoch": 0.22154684552504716, "grad_norm": 0.17828944325447083, "learning_rate": 9.071656103839419e-06, "loss": 0.0683, "step": 1057 }, { "epoch": 0.2217564451896877, "grad_norm": 0.13577091693878174, "learning_rate": 9.069684794027401e-06, "loss": 0.0659, "step": 1058 }, { "epoch": 0.22196604485432822, "grad_norm": 0.1218876987695694, "learning_rate": 9.067711608091536e-06, "loss": 0.0655, "step": 1059 }, { "epoch": 0.22217564451896876, "grad_norm": 0.14669421315193176, "learning_rate": 9.065736546941467e-06, "loss": 0.0669, "step": 1060 }, { "epoch": 0.2223852441836093, "grad_norm": 0.12597538530826569, "learning_rate": 9.063759611487693e-06, "loss": 0.0673, "step": 1061 }, { "epoch": 0.22259484384824985, "grad_norm": 0.13924571871757507, "learning_rate": 9.061780802641582e-06, "loss": 0.0628, "step": 1062 }, { "epoch": 0.2228044435128904, "grad_norm": 0.14818163216114044, "learning_rate": 9.059800121315365e-06, "loss": 0.0656, "step": 1063 }, { "epoch": 0.22301404317753093, "grad_norm": 0.13732635974884033, "learning_rate": 9.057817568422135e-06, "loss": 0.0666, "step": 1064 }, { "epoch": 0.22322364284217144, "grad_norm": 0.1377662718296051, "learning_rate": 9.05583314487585e-06, "loss": 0.0695, "step": 1065 }, { "epoch": 0.22343324250681199, "grad_norm": 0.14314699172973633, "learning_rate": 9.053846851591328e-06, "loss": 0.0689, "step": 1066 }, { "epoch": 0.22364284217145253, "grad_norm": 0.16072338819503784, "learning_rate": 9.05185868948425e-06, "loss": 0.0672, "step": 1067 }, { "epoch": 0.22385244183609307, "grad_norm": 0.1533009558916092, "learning_rate": 9.049868659471156e-06, "loss": 0.0703, "step": 1068 }, { "epoch": 0.2240620415007336, "grad_norm": 0.16292600333690643, "learning_rate": 9.047876762469451e-06, "loss": 0.0701, "step": 1069 }, { "epoch": 0.22427164116537412, "grad_norm": 0.19941456615924835, "learning_rate": 9.0458829993974e-06, "loss": 0.0683, "step": 1070 }, { "epoch": 0.22448124083001467, "grad_norm": 0.22352056205272675, "learning_rate": 9.043887371174128e-06, "loss": 0.0661, "step": 1071 }, { "epoch": 0.2246908404946552, "grad_norm": 0.24914206564426422, "learning_rate": 9.041889878719617e-06, "loss": 0.0674, "step": 1072 }, { "epoch": 0.22490044015929575, "grad_norm": 0.21870644390583038, "learning_rate": 9.03989052295471e-06, "loss": 0.0669, "step": 1073 }, { "epoch": 0.2251100398239363, "grad_norm": 0.16291193664073944, "learning_rate": 9.037889304801112e-06, "loss": 0.0678, "step": 1074 }, { "epoch": 0.2253196394885768, "grad_norm": 0.12002480030059814, "learning_rate": 9.035886225181384e-06, "loss": 0.0663, "step": 1075 }, { "epoch": 0.22552923915321735, "grad_norm": 0.12656140327453613, "learning_rate": 9.033881285018945e-06, "loss": 0.0649, "step": 1076 }, { "epoch": 0.2257388388178579, "grad_norm": 0.17447209358215332, "learning_rate": 9.031874485238068e-06, "loss": 0.0653, "step": 1077 }, { "epoch": 0.22594843848249843, "grad_norm": 0.1942858248949051, "learning_rate": 9.029865826763895e-06, "loss": 0.068, "step": 1078 }, { "epoch": 0.22615803814713897, "grad_norm": 0.16547489166259766, "learning_rate": 9.027855310522411e-06, "loss": 0.0645, "step": 1079 }, { "epoch": 0.2263676378117795, "grad_norm": 0.1284940242767334, "learning_rate": 9.025842937440466e-06, "loss": 0.0648, "step": 1080 }, { "epoch": 0.22657723747642003, "grad_norm": 0.12198358029127121, "learning_rate": 9.023828708445762e-06, "loss": 0.0707, "step": 1081 }, { "epoch": 0.22678683714106057, "grad_norm": 0.13251115381717682, "learning_rate": 9.02181262446686e-06, "loss": 0.069, "step": 1082 }, { "epoch": 0.2269964368057011, "grad_norm": 0.13787177205085754, "learning_rate": 9.019794686433174e-06, "loss": 0.0696, "step": 1083 }, { "epoch": 0.22720603647034165, "grad_norm": 0.12242651730775833, "learning_rate": 9.017774895274971e-06, "loss": 0.066, "step": 1084 }, { "epoch": 0.2274156361349822, "grad_norm": 0.11319856345653534, "learning_rate": 9.015753251923378e-06, "loss": 0.0644, "step": 1085 }, { "epoch": 0.2276252357996227, "grad_norm": 0.11255019158124924, "learning_rate": 9.013729757310368e-06, "loss": 0.0681, "step": 1086 }, { "epoch": 0.22783483546426325, "grad_norm": 0.10680454969406128, "learning_rate": 9.011704412368776e-06, "loss": 0.0675, "step": 1087 }, { "epoch": 0.2280444351289038, "grad_norm": 0.10743577778339386, "learning_rate": 9.00967721803228e-06, "loss": 0.0701, "step": 1088 }, { "epoch": 0.22825403479354434, "grad_norm": 0.09966851025819778, "learning_rate": 9.007648175235421e-06, "loss": 0.0657, "step": 1089 }, { "epoch": 0.22846363445818488, "grad_norm": 0.10842377692461014, "learning_rate": 9.005617284913586e-06, "loss": 0.0646, "step": 1090 }, { "epoch": 0.2286732341228254, "grad_norm": 0.12540468573570251, "learning_rate": 9.003584548003015e-06, "loss": 0.0661, "step": 1091 }, { "epoch": 0.22888283378746593, "grad_norm": 0.13884596526622772, "learning_rate": 9.001549965440798e-06, "loss": 0.0704, "step": 1092 }, { "epoch": 0.22909243345210648, "grad_norm": 0.16879726946353912, "learning_rate": 8.99951353816488e-06, "loss": 0.0625, "step": 1093 }, { "epoch": 0.22930203311674702, "grad_norm": 0.19034424424171448, "learning_rate": 8.99747526711405e-06, "loss": 0.0672, "step": 1094 }, { "epoch": 0.22951163278138756, "grad_norm": 0.18537910282611847, "learning_rate": 8.995435153227951e-06, "loss": 0.0687, "step": 1095 }, { "epoch": 0.22972123244602807, "grad_norm": 0.15541553497314453, "learning_rate": 8.993393197447078e-06, "loss": 0.0692, "step": 1096 }, { "epoch": 0.22993083211066861, "grad_norm": 0.12911075353622437, "learning_rate": 8.991349400712772e-06, "loss": 0.0684, "step": 1097 }, { "epoch": 0.23014043177530916, "grad_norm": 0.12497539818286896, "learning_rate": 8.989303763967218e-06, "loss": 0.0694, "step": 1098 }, { "epoch": 0.2303500314399497, "grad_norm": 0.12200130522251129, "learning_rate": 8.98725628815346e-06, "loss": 0.0662, "step": 1099 }, { "epoch": 0.23055963110459024, "grad_norm": 0.14163267612457275, "learning_rate": 8.985206974215381e-06, "loss": 0.0688, "step": 1100 }, { "epoch": 0.23076923076923078, "grad_norm": 0.18513454496860504, "learning_rate": 8.983155823097713e-06, "loss": 0.0702, "step": 1101 }, { "epoch": 0.2309788304338713, "grad_norm": 0.20042511820793152, "learning_rate": 8.98110283574604e-06, "loss": 0.0672, "step": 1102 }, { "epoch": 0.23118843009851184, "grad_norm": 0.1693289577960968, "learning_rate": 8.979048013106786e-06, "loss": 0.0691, "step": 1103 }, { "epoch": 0.23139802976315238, "grad_norm": 0.1325998604297638, "learning_rate": 8.976991356127225e-06, "loss": 0.0681, "step": 1104 }, { "epoch": 0.23160762942779292, "grad_norm": 0.14559799432754517, "learning_rate": 8.974932865755472e-06, "loss": 0.069, "step": 1105 }, { "epoch": 0.23181722909243346, "grad_norm": 0.1867041438817978, "learning_rate": 8.972872542940496e-06, "loss": 0.0691, "step": 1106 }, { "epoch": 0.23202682875707398, "grad_norm": 0.18667063117027283, "learning_rate": 8.970810388632102e-06, "loss": 0.0693, "step": 1107 }, { "epoch": 0.23223642842171452, "grad_norm": 0.149042010307312, "learning_rate": 8.968746403780945e-06, "loss": 0.0651, "step": 1108 }, { "epoch": 0.23244602808635506, "grad_norm": 0.11694446206092834, "learning_rate": 8.96668058933852e-06, "loss": 0.0658, "step": 1109 }, { "epoch": 0.2326556277509956, "grad_norm": 0.12780682742595673, "learning_rate": 8.964612946257167e-06, "loss": 0.0652, "step": 1110 }, { "epoch": 0.23286522741563614, "grad_norm": 0.17143504321575165, "learning_rate": 8.962543475490068e-06, "loss": 0.0654, "step": 1111 }, { "epoch": 0.23307482708027666, "grad_norm": 0.1847982108592987, "learning_rate": 8.960472177991252e-06, "loss": 0.0667, "step": 1112 }, { "epoch": 0.2332844267449172, "grad_norm": 0.1703869104385376, "learning_rate": 8.958399054715583e-06, "loss": 0.0655, "step": 1113 }, { "epoch": 0.23349402640955774, "grad_norm": 0.14903445541858673, "learning_rate": 8.956324106618773e-06, "loss": 0.066, "step": 1114 }, { "epoch": 0.23370362607419828, "grad_norm": 0.13800345361232758, "learning_rate": 8.954247334657371e-06, "loss": 0.0662, "step": 1115 }, { "epoch": 0.23391322573883883, "grad_norm": 0.14202521741390228, "learning_rate": 8.952168739788769e-06, "loss": 0.0662, "step": 1116 }, { "epoch": 0.23412282540347937, "grad_norm": 0.1242147833108902, "learning_rate": 8.9500883229712e-06, "loss": 0.0659, "step": 1117 }, { "epoch": 0.23433242506811988, "grad_norm": 0.13212472200393677, "learning_rate": 8.948006085163735e-06, "loss": 0.0659, "step": 1118 }, { "epoch": 0.23454202473276042, "grad_norm": 0.13743017613887787, "learning_rate": 8.945922027326283e-06, "loss": 0.0653, "step": 1119 }, { "epoch": 0.23475162439740097, "grad_norm": 0.12877334654331207, "learning_rate": 8.943836150419596e-06, "loss": 0.0652, "step": 1120 }, { "epoch": 0.2349612240620415, "grad_norm": 0.13227277994155884, "learning_rate": 8.941748455405264e-06, "loss": 0.0677, "step": 1121 }, { "epoch": 0.23517082372668205, "grad_norm": 0.1250903308391571, "learning_rate": 8.939658943245712e-06, "loss": 0.0655, "step": 1122 }, { "epoch": 0.23538042339132256, "grad_norm": 0.10851810872554779, "learning_rate": 8.937567614904205e-06, "loss": 0.0631, "step": 1123 }, { "epoch": 0.2355900230559631, "grad_norm": 0.13291695713996887, "learning_rate": 8.935474471344848e-06, "loss": 0.0655, "step": 1124 }, { "epoch": 0.23579962272060365, "grad_norm": 0.16525736451148987, "learning_rate": 8.933379513532575e-06, "loss": 0.0637, "step": 1125 }, { "epoch": 0.2360092223852442, "grad_norm": 0.20342209935188293, "learning_rate": 8.931282742433163e-06, "loss": 0.066, "step": 1126 }, { "epoch": 0.23621882204988473, "grad_norm": 0.24097025394439697, "learning_rate": 8.929184159013225e-06, "loss": 0.0687, "step": 1127 }, { "epoch": 0.23642842171452524, "grad_norm": 0.23975655436515808, "learning_rate": 8.927083764240205e-06, "loss": 0.0708, "step": 1128 }, { "epoch": 0.23663802137916579, "grad_norm": 0.20488587021827698, "learning_rate": 8.924981559082386e-06, "loss": 0.0659, "step": 1129 }, { "epoch": 0.23684762104380633, "grad_norm": 0.16722434759140015, "learning_rate": 8.922877544508882e-06, "loss": 0.0665, "step": 1130 }, { "epoch": 0.23705722070844687, "grad_norm": 0.15572957694530487, "learning_rate": 8.920771721489646e-06, "loss": 0.0673, "step": 1131 }, { "epoch": 0.2372668203730874, "grad_norm": 0.13812050223350525, "learning_rate": 8.91866409099546e-06, "loss": 0.0702, "step": 1132 }, { "epoch": 0.23747642003772795, "grad_norm": 0.12852191925048828, "learning_rate": 8.916554653997943e-06, "loss": 0.0691, "step": 1133 }, { "epoch": 0.23768601970236847, "grad_norm": 0.14980794489383698, "learning_rate": 8.914443411469544e-06, "loss": 0.0705, "step": 1134 }, { "epoch": 0.237895619367009, "grad_norm": 0.15687119960784912, "learning_rate": 8.912330364383546e-06, "loss": 0.0716, "step": 1135 }, { "epoch": 0.23810521903164955, "grad_norm": 0.13968421518802643, "learning_rate": 8.91021551371406e-06, "loss": 0.0679, "step": 1136 }, { "epoch": 0.2383148186962901, "grad_norm": 0.1380040943622589, "learning_rate": 8.908098860436036e-06, "loss": 0.0624, "step": 1137 }, { "epoch": 0.23852441836093063, "grad_norm": 0.13776816427707672, "learning_rate": 8.90598040552525e-06, "loss": 0.0655, "step": 1138 }, { "epoch": 0.23873401802557115, "grad_norm": 0.11478232592344284, "learning_rate": 8.903860149958308e-06, "loss": 0.0681, "step": 1139 }, { "epoch": 0.2389436176902117, "grad_norm": 0.1092422679066658, "learning_rate": 8.901738094712648e-06, "loss": 0.0639, "step": 1140 }, { "epoch": 0.23915321735485223, "grad_norm": 0.11861050873994827, "learning_rate": 8.899614240766537e-06, "loss": 0.0646, "step": 1141 }, { "epoch": 0.23936281701949277, "grad_norm": 0.11883353441953659, "learning_rate": 8.89748858909907e-06, "loss": 0.0654, "step": 1142 }, { "epoch": 0.23957241668413332, "grad_norm": 0.11742465198040009, "learning_rate": 8.895361140690173e-06, "loss": 0.0655, "step": 1143 }, { "epoch": 0.23978201634877383, "grad_norm": 0.1311412900686264, "learning_rate": 8.8932318965206e-06, "loss": 0.0668, "step": 1144 }, { "epoch": 0.23999161601341437, "grad_norm": 0.16970883309841156, "learning_rate": 8.89110085757193e-06, "loss": 0.0681, "step": 1145 }, { "epoch": 0.2402012156780549, "grad_norm": 0.18959131836891174, "learning_rate": 8.888968024826575e-06, "loss": 0.0645, "step": 1146 }, { "epoch": 0.24041081534269546, "grad_norm": 0.17889679968357086, "learning_rate": 8.886833399267767e-06, "loss": 0.0666, "step": 1147 }, { "epoch": 0.240620415007336, "grad_norm": 0.1588187962770462, "learning_rate": 8.88469698187957e-06, "loss": 0.0651, "step": 1148 }, { "epoch": 0.2408300146719765, "grad_norm": 0.13770383596420288, "learning_rate": 8.88255877364687e-06, "loss": 0.0638, "step": 1149 }, { "epoch": 0.24103961433661705, "grad_norm": 0.12436515837907791, "learning_rate": 8.880418775555382e-06, "loss": 0.0687, "step": 1150 }, { "epoch": 0.2412492140012576, "grad_norm": 0.13518837094306946, "learning_rate": 8.878276988591645e-06, "loss": 0.071, "step": 1151 }, { "epoch": 0.24145881366589814, "grad_norm": 0.14451143145561218, "learning_rate": 8.876133413743023e-06, "loss": 0.0668, "step": 1152 }, { "epoch": 0.24166841333053868, "grad_norm": 0.13057959079742432, "learning_rate": 8.873988051997702e-06, "loss": 0.066, "step": 1153 }, { "epoch": 0.24187801299517922, "grad_norm": 0.11131347715854645, "learning_rate": 8.871840904344692e-06, "loss": 0.0641, "step": 1154 }, { "epoch": 0.24208761265981973, "grad_norm": 0.116900235414505, "learning_rate": 8.86969197177383e-06, "loss": 0.0639, "step": 1155 }, { "epoch": 0.24229721232446028, "grad_norm": 0.12555626034736633, "learning_rate": 8.867541255275774e-06, "loss": 0.0681, "step": 1156 }, { "epoch": 0.24250681198910082, "grad_norm": 0.12001737207174301, "learning_rate": 8.865388755842002e-06, "loss": 0.0634, "step": 1157 }, { "epoch": 0.24271641165374136, "grad_norm": 0.12198159843683243, "learning_rate": 8.863234474464817e-06, "loss": 0.0657, "step": 1158 }, { "epoch": 0.2429260113183819, "grad_norm": 0.14027060568332672, "learning_rate": 8.86107841213734e-06, "loss": 0.067, "step": 1159 }, { "epoch": 0.24313561098302242, "grad_norm": 0.1360728144645691, "learning_rate": 8.85892056985352e-06, "loss": 0.0653, "step": 1160 }, { "epoch": 0.24334521064766296, "grad_norm": 0.11649616807699203, "learning_rate": 8.856760948608117e-06, "loss": 0.0665, "step": 1161 }, { "epoch": 0.2435548103123035, "grad_norm": 0.11606436967849731, "learning_rate": 8.854599549396717e-06, "loss": 0.0644, "step": 1162 }, { "epoch": 0.24376440997694404, "grad_norm": 0.13204872608184814, "learning_rate": 8.852436373215727e-06, "loss": 0.0654, "step": 1163 }, { "epoch": 0.24397400964158458, "grad_norm": 0.1417747139930725, "learning_rate": 8.850271421062368e-06, "loss": 0.0639, "step": 1164 }, { "epoch": 0.2441836093062251, "grad_norm": 0.1318253129720688, "learning_rate": 8.848104693934683e-06, "loss": 0.065, "step": 1165 }, { "epoch": 0.24439320897086564, "grad_norm": 0.11403854191303253, "learning_rate": 8.845936192831536e-06, "loss": 0.065, "step": 1166 }, { "epoch": 0.24460280863550618, "grad_norm": 0.11362165957689285, "learning_rate": 8.8437659187526e-06, "loss": 0.063, "step": 1167 }, { "epoch": 0.24481240830014672, "grad_norm": 0.11161333322525024, "learning_rate": 8.841593872698377e-06, "loss": 0.0663, "step": 1168 }, { "epoch": 0.24502200796478726, "grad_norm": 0.11067195981740952, "learning_rate": 8.839420055670175e-06, "loss": 0.0646, "step": 1169 }, { "epoch": 0.2452316076294278, "grad_norm": 0.13394485414028168, "learning_rate": 8.837244468670126e-06, "loss": 0.0637, "step": 1170 }, { "epoch": 0.24544120729406832, "grad_norm": 0.1596195250749588, "learning_rate": 8.835067112701172e-06, "loss": 0.0679, "step": 1171 }, { "epoch": 0.24565080695870886, "grad_norm": 0.1561581790447235, "learning_rate": 8.832887988767076e-06, "loss": 0.066, "step": 1172 }, { "epoch": 0.2458604066233494, "grad_norm": 0.15718278288841248, "learning_rate": 8.830707097872413e-06, "loss": 0.0637, "step": 1173 }, { "epoch": 0.24607000628798995, "grad_norm": 0.18930961191654205, "learning_rate": 8.828524441022575e-06, "loss": 0.0649, "step": 1174 }, { "epoch": 0.2462796059526305, "grad_norm": 0.20854079723358154, "learning_rate": 8.826340019223765e-06, "loss": 0.0711, "step": 1175 }, { "epoch": 0.246489205617271, "grad_norm": 0.17393162846565247, "learning_rate": 8.824153833483001e-06, "loss": 0.0639, "step": 1176 }, { "epoch": 0.24669880528191154, "grad_norm": 0.12246709316968918, "learning_rate": 8.821965884808112e-06, "loss": 0.0641, "step": 1177 }, { "epoch": 0.24690840494655208, "grad_norm": 0.11515267938375473, "learning_rate": 8.819776174207746e-06, "loss": 0.0629, "step": 1178 }, { "epoch": 0.24711800461119263, "grad_norm": 0.14278475940227509, "learning_rate": 8.817584702691358e-06, "loss": 0.0679, "step": 1179 }, { "epoch": 0.24732760427583317, "grad_norm": 0.17082667350769043, "learning_rate": 8.815391471269212e-06, "loss": 0.0655, "step": 1180 }, { "epoch": 0.24753720394047368, "grad_norm": 0.17258518934249878, "learning_rate": 8.813196480952393e-06, "loss": 0.0674, "step": 1181 }, { "epoch": 0.24774680360511422, "grad_norm": 0.15490064024925232, "learning_rate": 8.810999732752788e-06, "loss": 0.0649, "step": 1182 }, { "epoch": 0.24795640326975477, "grad_norm": 0.1470610648393631, "learning_rate": 8.808801227683095e-06, "loss": 0.0647, "step": 1183 }, { "epoch": 0.2481660029343953, "grad_norm": 0.14642740786075592, "learning_rate": 8.80660096675683e-06, "loss": 0.066, "step": 1184 }, { "epoch": 0.24837560259903585, "grad_norm": 0.13396520912647247, "learning_rate": 8.80439895098831e-06, "loss": 0.0664, "step": 1185 }, { "epoch": 0.2485852022636764, "grad_norm": 0.12936848402023315, "learning_rate": 8.802195181392663e-06, "loss": 0.0659, "step": 1186 }, { "epoch": 0.2487948019283169, "grad_norm": 0.1439359188079834, "learning_rate": 8.799989658985828e-06, "loss": 0.0656, "step": 1187 }, { "epoch": 0.24900440159295745, "grad_norm": 0.14836929738521576, "learning_rate": 8.797782384784549e-06, "loss": 0.0661, "step": 1188 }, { "epoch": 0.249214001257598, "grad_norm": 0.12386839091777802, "learning_rate": 8.79557335980638e-06, "loss": 0.0694, "step": 1189 }, { "epoch": 0.24942360092223853, "grad_norm": 0.1206989586353302, "learning_rate": 8.793362585069677e-06, "loss": 0.0644, "step": 1190 }, { "epoch": 0.24963320058687907, "grad_norm": 0.13306036591529846, "learning_rate": 8.791150061593615e-06, "loss": 0.066, "step": 1191 }, { "epoch": 0.2498428002515196, "grad_norm": 0.12486235052347183, "learning_rate": 8.78893579039816e-06, "loss": 0.0662, "step": 1192 }, { "epoch": 0.25005239991616013, "grad_norm": 0.12596681714057922, "learning_rate": 8.786719772504092e-06, "loss": 0.0653, "step": 1193 }, { "epoch": 0.2502619995808007, "grad_norm": 0.1434643715620041, "learning_rate": 8.784502008932998e-06, "loss": 0.0651, "step": 1194 }, { "epoch": 0.2504715992454412, "grad_norm": 0.13905848562717438, "learning_rate": 8.782282500707262e-06, "loss": 0.0611, "step": 1195 }, { "epoch": 0.2506811989100817, "grad_norm": 0.14193584024906158, "learning_rate": 8.780061248850078e-06, "loss": 0.064, "step": 1196 }, { "epoch": 0.2508907985747223, "grad_norm": 0.15785372257232666, "learning_rate": 8.777838254385444e-06, "loss": 0.065, "step": 1197 }, { "epoch": 0.2511003982393628, "grad_norm": 0.16986316442489624, "learning_rate": 8.775613518338161e-06, "loss": 0.0641, "step": 1198 }, { "epoch": 0.2513099979040034, "grad_norm": 0.16595768928527832, "learning_rate": 8.773387041733829e-06, "loss": 0.0638, "step": 1199 }, { "epoch": 0.2515195975686439, "grad_norm": 0.15224842727184296, "learning_rate": 8.771158825598855e-06, "loss": 0.0669, "step": 1200 }, { "epoch": 0.2517291972332844, "grad_norm": 0.14295248687267303, "learning_rate": 8.768928870960447e-06, "loss": 0.0642, "step": 1201 }, { "epoch": 0.251938796897925, "grad_norm": 0.1518334299325943, "learning_rate": 8.766697178846611e-06, "loss": 0.0665, "step": 1202 }, { "epoch": 0.2521483965625655, "grad_norm": 0.15003719925880432, "learning_rate": 8.764463750286158e-06, "loss": 0.0665, "step": 1203 }, { "epoch": 0.25235799622720606, "grad_norm": 0.13841992616653442, "learning_rate": 8.762228586308697e-06, "loss": 0.0639, "step": 1204 }, { "epoch": 0.2525675958918466, "grad_norm": 0.13648587465286255, "learning_rate": 8.75999168794464e-06, "loss": 0.0656, "step": 1205 }, { "epoch": 0.2527771955564871, "grad_norm": 0.1389138102531433, "learning_rate": 8.757753056225197e-06, "loss": 0.0641, "step": 1206 }, { "epoch": 0.25298679522112766, "grad_norm": 0.15803200006484985, "learning_rate": 8.755512692182376e-06, "loss": 0.0641, "step": 1207 }, { "epoch": 0.2531963948857682, "grad_norm": 0.17170608043670654, "learning_rate": 8.753270596848982e-06, "loss": 0.0642, "step": 1208 }, { "epoch": 0.25340599455040874, "grad_norm": 0.1522701382637024, "learning_rate": 8.751026771258622e-06, "loss": 0.0679, "step": 1209 }, { "epoch": 0.25361559421504926, "grad_norm": 0.137386754155159, "learning_rate": 8.748781216445702e-06, "loss": 0.0632, "step": 1210 }, { "epoch": 0.25382519387968977, "grad_norm": 0.15736961364746094, "learning_rate": 8.746533933445418e-06, "loss": 0.0675, "step": 1211 }, { "epoch": 0.25403479354433034, "grad_norm": 0.15517348051071167, "learning_rate": 8.74428492329377e-06, "loss": 0.0628, "step": 1212 }, { "epoch": 0.25424439320897085, "grad_norm": 0.14377760887145996, "learning_rate": 8.74203418702755e-06, "loss": 0.0655, "step": 1213 }, { "epoch": 0.2544539928736114, "grad_norm": 0.13686007261276245, "learning_rate": 8.739781725684346e-06, "loss": 0.0634, "step": 1214 }, { "epoch": 0.25466359253825194, "grad_norm": 0.1474810093641281, "learning_rate": 8.737527540302543e-06, "loss": 0.0695, "step": 1215 }, { "epoch": 0.25487319220289245, "grad_norm": 0.16937799751758575, "learning_rate": 8.735271631921322e-06, "loss": 0.0691, "step": 1216 }, { "epoch": 0.255082791867533, "grad_norm": 0.1835523098707199, "learning_rate": 8.733014001580656e-06, "loss": 0.0642, "step": 1217 }, { "epoch": 0.25529239153217353, "grad_norm": 0.1800583302974701, "learning_rate": 8.730754650321307e-06, "loss": 0.0675, "step": 1218 }, { "epoch": 0.2555019911968141, "grad_norm": 0.14136351644992828, "learning_rate": 8.728493579184841e-06, "loss": 0.0669, "step": 1219 }, { "epoch": 0.2557115908614546, "grad_norm": 0.1205543577671051, "learning_rate": 8.72623078921361e-06, "loss": 0.0643, "step": 1220 }, { "epoch": 0.25592119052609513, "grad_norm": 0.13225074112415314, "learning_rate": 8.723966281450758e-06, "loss": 0.0618, "step": 1221 }, { "epoch": 0.2561307901907357, "grad_norm": 0.18301644921302795, "learning_rate": 8.721700056940224e-06, "loss": 0.0663, "step": 1222 }, { "epoch": 0.2563403898553762, "grad_norm": 0.1921045482158661, "learning_rate": 8.719432116726738e-06, "loss": 0.0687, "step": 1223 }, { "epoch": 0.2565499895200168, "grad_norm": 0.15844593942165375, "learning_rate": 8.717162461855817e-06, "loss": 0.0646, "step": 1224 }, { "epoch": 0.2567595891846573, "grad_norm": 0.16561459004878998, "learning_rate": 8.714891093373774e-06, "loss": 0.0662, "step": 1225 }, { "epoch": 0.2569691888492978, "grad_norm": 0.1608954221010208, "learning_rate": 8.712618012327709e-06, "loss": 0.0681, "step": 1226 }, { "epoch": 0.2571787885139384, "grad_norm": 0.1320895552635193, "learning_rate": 8.710343219765512e-06, "loss": 0.0708, "step": 1227 }, { "epoch": 0.2573883881785789, "grad_norm": 0.12631681561470032, "learning_rate": 8.70806671673586e-06, "loss": 0.065, "step": 1228 }, { "epoch": 0.25759798784321947, "grad_norm": 0.1299394816160202, "learning_rate": 8.705788504288222e-06, "loss": 0.0662, "step": 1229 }, { "epoch": 0.25780758750786, "grad_norm": 0.14543578028678894, "learning_rate": 8.703508583472855e-06, "loss": 0.0628, "step": 1230 }, { "epoch": 0.25801718717250055, "grad_norm": 0.1560242772102356, "learning_rate": 8.701226955340797e-06, "loss": 0.063, "step": 1231 }, { "epoch": 0.25822678683714106, "grad_norm": 0.15224745869636536, "learning_rate": 8.698943620943885e-06, "loss": 0.0642, "step": 1232 }, { "epoch": 0.2584363865017816, "grad_norm": 0.17773905396461487, "learning_rate": 8.696658581334728e-06, "loss": 0.0693, "step": 1233 }, { "epoch": 0.25864598616642215, "grad_norm": 0.18498320877552032, "learning_rate": 8.694371837566737e-06, "loss": 0.0649, "step": 1234 }, { "epoch": 0.25885558583106266, "grad_norm": 0.15482178330421448, "learning_rate": 8.692083390694095e-06, "loss": 0.0639, "step": 1235 }, { "epoch": 0.25906518549570323, "grad_norm": 0.1407817006111145, "learning_rate": 8.689793241771775e-06, "loss": 0.0639, "step": 1236 }, { "epoch": 0.25927478516034375, "grad_norm": 0.11920575052499771, "learning_rate": 8.68750139185554e-06, "loss": 0.0641, "step": 1237 }, { "epoch": 0.25948438482498426, "grad_norm": 0.12093566358089447, "learning_rate": 8.685207842001928e-06, "loss": 0.0656, "step": 1238 }, { "epoch": 0.25969398448962483, "grad_norm": 0.14436878263950348, "learning_rate": 8.682912593268265e-06, "loss": 0.0635, "step": 1239 }, { "epoch": 0.25990358415426534, "grad_norm": 0.17225278913974762, "learning_rate": 8.680615646712663e-06, "loss": 0.0664, "step": 1240 }, { "epoch": 0.2601131838189059, "grad_norm": 0.16033266484737396, "learning_rate": 8.678317003394013e-06, "loss": 0.0665, "step": 1241 }, { "epoch": 0.2603227834835464, "grad_norm": 0.13092049956321716, "learning_rate": 8.676016664371986e-06, "loss": 0.0654, "step": 1242 }, { "epoch": 0.26053238314818694, "grad_norm": 0.12013121694326401, "learning_rate": 8.673714630707043e-06, "loss": 0.0668, "step": 1243 }, { "epoch": 0.2607419828128275, "grad_norm": 0.10177928954362869, "learning_rate": 8.671410903460416e-06, "loss": 0.0633, "step": 1244 }, { "epoch": 0.260951582477468, "grad_norm": 0.11560986191034317, "learning_rate": 8.669105483694126e-06, "loss": 0.0653, "step": 1245 }, { "epoch": 0.2611611821421086, "grad_norm": 0.1309051811695099, "learning_rate": 8.666798372470971e-06, "loss": 0.0649, "step": 1246 }, { "epoch": 0.2613707818067491, "grad_norm": 0.13534289598464966, "learning_rate": 8.664489570854526e-06, "loss": 0.0646, "step": 1247 }, { "epoch": 0.2615803814713896, "grad_norm": 0.13940168917179108, "learning_rate": 8.662179079909152e-06, "loss": 0.0652, "step": 1248 }, { "epoch": 0.2617899811360302, "grad_norm": 0.16176725924015045, "learning_rate": 8.659866900699983e-06, "loss": 0.0652, "step": 1249 }, { "epoch": 0.2619995808006707, "grad_norm": 0.18724969029426575, "learning_rate": 8.657553034292932e-06, "loss": 0.0638, "step": 1250 }, { "epoch": 0.2622091804653113, "grad_norm": 0.198954775929451, "learning_rate": 8.65523748175469e-06, "loss": 0.0639, "step": 1251 }, { "epoch": 0.2624187801299518, "grad_norm": 0.17343983054161072, "learning_rate": 8.652920244152732e-06, "loss": 0.0675, "step": 1252 }, { "epoch": 0.2626283797945923, "grad_norm": 0.12653297185897827, "learning_rate": 8.650601322555299e-06, "loss": 0.0635, "step": 1253 }, { "epoch": 0.2628379794592329, "grad_norm": 0.16065514087677002, "learning_rate": 8.648280718031412e-06, "loss": 0.0663, "step": 1254 }, { "epoch": 0.2630475791238734, "grad_norm": 0.17681656777858734, "learning_rate": 8.645958431650875e-06, "loss": 0.0636, "step": 1255 }, { "epoch": 0.26325717878851396, "grad_norm": 0.13592669367790222, "learning_rate": 8.643634464484257e-06, "loss": 0.0646, "step": 1256 }, { "epoch": 0.26346677845315447, "grad_norm": 0.1315130740404129, "learning_rate": 8.641308817602908e-06, "loss": 0.0654, "step": 1257 }, { "epoch": 0.263676378117795, "grad_norm": 0.1273263692855835, "learning_rate": 8.63898149207895e-06, "loss": 0.0675, "step": 1258 }, { "epoch": 0.26388597778243555, "grad_norm": 0.10599275678396225, "learning_rate": 8.636652488985282e-06, "loss": 0.0647, "step": 1259 }, { "epoch": 0.26409557744707607, "grad_norm": 0.11827687919139862, "learning_rate": 8.634321809395569e-06, "loss": 0.0642, "step": 1260 }, { "epoch": 0.26430517711171664, "grad_norm": 0.14330358803272247, "learning_rate": 8.631989454384258e-06, "loss": 0.063, "step": 1261 }, { "epoch": 0.26451477677635715, "grad_norm": 0.13524498045444489, "learning_rate": 8.629655425026564e-06, "loss": 0.0629, "step": 1262 }, { "epoch": 0.2647243764409977, "grad_norm": 0.12704388797283173, "learning_rate": 8.627319722398471e-06, "loss": 0.062, "step": 1263 }, { "epoch": 0.26493397610563824, "grad_norm": 0.13736125826835632, "learning_rate": 8.624982347576741e-06, "loss": 0.0634, "step": 1264 }, { "epoch": 0.26514357577027875, "grad_norm": 0.12946844100952148, "learning_rate": 8.622643301638902e-06, "loss": 0.0662, "step": 1265 }, { "epoch": 0.2653531754349193, "grad_norm": 0.1286906898021698, "learning_rate": 8.620302585663252e-06, "loss": 0.0663, "step": 1266 }, { "epoch": 0.26556277509955983, "grad_norm": 0.12868361175060272, "learning_rate": 8.617960200728863e-06, "loss": 0.0593, "step": 1267 }, { "epoch": 0.2657723747642004, "grad_norm": 0.12836746871471405, "learning_rate": 8.615616147915573e-06, "loss": 0.0656, "step": 1268 }, { "epoch": 0.2659819744288409, "grad_norm": 0.14441126585006714, "learning_rate": 8.613270428303991e-06, "loss": 0.0686, "step": 1269 }, { "epoch": 0.26619157409348143, "grad_norm": 0.14020347595214844, "learning_rate": 8.61092304297549e-06, "loss": 0.0651, "step": 1270 }, { "epoch": 0.266401173758122, "grad_norm": 0.125614196062088, "learning_rate": 8.608573993012217e-06, "loss": 0.0661, "step": 1271 }, { "epoch": 0.2666107734227625, "grad_norm": 0.14085769653320312, "learning_rate": 8.606223279497081e-06, "loss": 0.0643, "step": 1272 }, { "epoch": 0.2668203730874031, "grad_norm": 0.14944879710674286, "learning_rate": 8.603870903513765e-06, "loss": 0.0641, "step": 1273 }, { "epoch": 0.2670299727520436, "grad_norm": 0.13971124589443207, "learning_rate": 8.601516866146711e-06, "loss": 0.0644, "step": 1274 }, { "epoch": 0.2672395724166841, "grad_norm": 0.1283096969127655, "learning_rate": 8.599161168481127e-06, "loss": 0.0622, "step": 1275 }, { "epoch": 0.2674491720813247, "grad_norm": 0.13809433579444885, "learning_rate": 8.596803811602994e-06, "loss": 0.0658, "step": 1276 }, { "epoch": 0.2676587717459652, "grad_norm": 0.13916701078414917, "learning_rate": 8.594444796599051e-06, "loss": 0.0636, "step": 1277 }, { "epoch": 0.26786837141060577, "grad_norm": 0.1267821043729782, "learning_rate": 8.592084124556803e-06, "loss": 0.0632, "step": 1278 }, { "epoch": 0.2680779710752463, "grad_norm": 0.14250284433364868, "learning_rate": 8.589721796564521e-06, "loss": 0.0617, "step": 1279 }, { "epoch": 0.2682875707398868, "grad_norm": 0.14109215140342712, "learning_rate": 8.587357813711234e-06, "loss": 0.0637, "step": 1280 }, { "epoch": 0.26849717040452736, "grad_norm": 0.14766204357147217, "learning_rate": 8.584992177086742e-06, "loss": 0.0653, "step": 1281 }, { "epoch": 0.2687067700691679, "grad_norm": 0.1617199033498764, "learning_rate": 8.5826248877816e-06, "loss": 0.0659, "step": 1282 }, { "epoch": 0.26891636973380845, "grad_norm": 0.17226214706897736, "learning_rate": 8.580255946887129e-06, "loss": 0.0618, "step": 1283 }, { "epoch": 0.26912596939844896, "grad_norm": 0.1840105652809143, "learning_rate": 8.577885355495412e-06, "loss": 0.0641, "step": 1284 }, { "epoch": 0.2693355690630895, "grad_norm": 0.1407589465379715, "learning_rate": 8.575513114699288e-06, "loss": 0.0621, "step": 1285 }, { "epoch": 0.26954516872773004, "grad_norm": 0.14098970592021942, "learning_rate": 8.57313922559236e-06, "loss": 0.0632, "step": 1286 }, { "epoch": 0.26975476839237056, "grad_norm": 0.14431780576705933, "learning_rate": 8.57076368926899e-06, "loss": 0.0636, "step": 1287 }, { "epoch": 0.26996436805701113, "grad_norm": 0.11286340653896332, "learning_rate": 8.568386506824304e-06, "loss": 0.0634, "step": 1288 }, { "epoch": 0.27017396772165164, "grad_norm": 0.13212832808494568, "learning_rate": 8.566007679354178e-06, "loss": 0.0621, "step": 1289 }, { "epoch": 0.27038356738629216, "grad_norm": 0.1318579912185669, "learning_rate": 8.563627207955255e-06, "loss": 0.0629, "step": 1290 }, { "epoch": 0.2705931670509327, "grad_norm": 0.12174471467733383, "learning_rate": 8.561245093724926e-06, "loss": 0.0654, "step": 1291 }, { "epoch": 0.27080276671557324, "grad_norm": 0.12569588422775269, "learning_rate": 8.558861337761349e-06, "loss": 0.0644, "step": 1292 }, { "epoch": 0.2710123663802138, "grad_norm": 0.09849688410758972, "learning_rate": 8.556475941163436e-06, "loss": 0.0645, "step": 1293 }, { "epoch": 0.2712219660448543, "grad_norm": 0.11899732053279877, "learning_rate": 8.554088905030852e-06, "loss": 0.0658, "step": 1294 }, { "epoch": 0.27143156570949484, "grad_norm": 0.12569987773895264, "learning_rate": 8.551700230464022e-06, "loss": 0.0658, "step": 1295 }, { "epoch": 0.2716411653741354, "grad_norm": 0.13572193682193756, "learning_rate": 8.549309918564122e-06, "loss": 0.0599, "step": 1296 }, { "epoch": 0.2718507650387759, "grad_norm": 0.16335296630859375, "learning_rate": 8.546917970433087e-06, "loss": 0.065, "step": 1297 }, { "epoch": 0.2720603647034165, "grad_norm": 0.1542273610830307, "learning_rate": 8.544524387173605e-06, "loss": 0.0633, "step": 1298 }, { "epoch": 0.272269964368057, "grad_norm": 0.18188713490962982, "learning_rate": 8.542129169889117e-06, "loss": 0.0615, "step": 1299 }, { "epoch": 0.2724795640326976, "grad_norm": 0.18146541714668274, "learning_rate": 8.539732319683817e-06, "loss": 0.0629, "step": 1300 }, { "epoch": 0.2726891636973381, "grad_norm": 0.16707837581634521, "learning_rate": 8.537333837662653e-06, "loss": 0.0646, "step": 1301 }, { "epoch": 0.2728987633619786, "grad_norm": 0.143534317612648, "learning_rate": 8.534933724931324e-06, "loss": 0.0642, "step": 1302 }, { "epoch": 0.27310836302661917, "grad_norm": 0.10924031585454941, "learning_rate": 8.532531982596284e-06, "loss": 0.0634, "step": 1303 }, { "epoch": 0.2733179626912597, "grad_norm": 0.11168647557497025, "learning_rate": 8.530128611764731e-06, "loss": 0.0634, "step": 1304 }, { "epoch": 0.27352756235590026, "grad_norm": 0.14308258891105652, "learning_rate": 8.527723613544623e-06, "loss": 0.0638, "step": 1305 }, { "epoch": 0.27373716202054077, "grad_norm": 0.18562361598014832, "learning_rate": 8.525316989044663e-06, "loss": 0.0666, "step": 1306 }, { "epoch": 0.2739467616851813, "grad_norm": 0.21448108553886414, "learning_rate": 8.5229087393743e-06, "loss": 0.0633, "step": 1307 }, { "epoch": 0.27415636134982185, "grad_norm": 0.20579898357391357, "learning_rate": 8.520498865643742e-06, "loss": 0.0624, "step": 1308 }, { "epoch": 0.27436596101446237, "grad_norm": 0.18763695657253265, "learning_rate": 8.518087368963938e-06, "loss": 0.066, "step": 1309 }, { "epoch": 0.27457556067910294, "grad_norm": 0.15348191559314728, "learning_rate": 8.515674250446588e-06, "loss": 0.0644, "step": 1310 }, { "epoch": 0.27478516034374345, "grad_norm": 0.11642424762248993, "learning_rate": 8.51325951120414e-06, "loss": 0.0639, "step": 1311 }, { "epoch": 0.27499476000838396, "grad_norm": 0.15585097670555115, "learning_rate": 8.510843152349786e-06, "loss": 0.0624, "step": 1312 }, { "epoch": 0.27520435967302453, "grad_norm": 0.18433383107185364, "learning_rate": 8.508425174997467e-06, "loss": 0.0648, "step": 1313 }, { "epoch": 0.27541395933766505, "grad_norm": 0.1758815348148346, "learning_rate": 8.506005580261872e-06, "loss": 0.0672, "step": 1314 }, { "epoch": 0.2756235590023056, "grad_norm": 0.11144900321960449, "learning_rate": 8.503584369258434e-06, "loss": 0.0606, "step": 1315 }, { "epoch": 0.27583315866694613, "grad_norm": 0.11539231985807419, "learning_rate": 8.501161543103327e-06, "loss": 0.0634, "step": 1316 }, { "epoch": 0.27604275833158665, "grad_norm": 0.15406975150108337, "learning_rate": 8.498737102913476e-06, "loss": 0.0628, "step": 1317 }, { "epoch": 0.2762523579962272, "grad_norm": 0.16817119717597961, "learning_rate": 8.496311049806549e-06, "loss": 0.0657, "step": 1318 }, { "epoch": 0.27646195766086773, "grad_norm": 0.1416136920452118, "learning_rate": 8.493883384900953e-06, "loss": 0.0644, "step": 1319 }, { "epoch": 0.2766715573255083, "grad_norm": 0.10792358964681625, "learning_rate": 8.491454109315844e-06, "loss": 0.0654, "step": 1320 }, { "epoch": 0.2768811569901488, "grad_norm": 0.1336362659931183, "learning_rate": 8.489023224171114e-06, "loss": 0.064, "step": 1321 }, { "epoch": 0.2770907566547893, "grad_norm": 0.15568262338638306, "learning_rate": 8.486590730587403e-06, "loss": 0.0648, "step": 1322 }, { "epoch": 0.2773003563194299, "grad_norm": 0.1655251383781433, "learning_rate": 8.48415662968609e-06, "loss": 0.0647, "step": 1323 }, { "epoch": 0.2775099559840704, "grad_norm": 0.1461930274963379, "learning_rate": 8.481720922589294e-06, "loss": 0.0631, "step": 1324 }, { "epoch": 0.277719555648711, "grad_norm": 0.13114267587661743, "learning_rate": 8.479283610419876e-06, "loss": 0.0632, "step": 1325 }, { "epoch": 0.2779291553133515, "grad_norm": 0.12551239132881165, "learning_rate": 8.476844694301437e-06, "loss": 0.0617, "step": 1326 }, { "epoch": 0.278138754977992, "grad_norm": 0.13701596856117249, "learning_rate": 8.474404175358315e-06, "loss": 0.063, "step": 1327 }, { "epoch": 0.2783483546426326, "grad_norm": 0.140949547290802, "learning_rate": 8.47196205471559e-06, "loss": 0.0644, "step": 1328 }, { "epoch": 0.2785579543072731, "grad_norm": 0.13582463562488556, "learning_rate": 8.469518333499079e-06, "loss": 0.064, "step": 1329 }, { "epoch": 0.27876755397191366, "grad_norm": 0.14434821903705597, "learning_rate": 8.467073012835338e-06, "loss": 0.0583, "step": 1330 }, { "epoch": 0.2789771536365542, "grad_norm": 0.14979779720306396, "learning_rate": 8.464626093851657e-06, "loss": 0.0643, "step": 1331 }, { "epoch": 0.27918675330119475, "grad_norm": 0.15899671614170074, "learning_rate": 8.462177577676066e-06, "loss": 0.0639, "step": 1332 }, { "epoch": 0.27939635296583526, "grad_norm": 0.16557057201862335, "learning_rate": 8.459727465437332e-06, "loss": 0.0674, "step": 1333 }, { "epoch": 0.2796059526304758, "grad_norm": 0.16371293365955353, "learning_rate": 8.457275758264956e-06, "loss": 0.0625, "step": 1334 }, { "epoch": 0.27981555229511634, "grad_norm": 0.16819259524345398, "learning_rate": 8.45482245728917e-06, "loss": 0.066, "step": 1335 }, { "epoch": 0.28002515195975686, "grad_norm": 0.1529543697834015, "learning_rate": 8.452367563640953e-06, "loss": 0.0608, "step": 1336 }, { "epoch": 0.2802347516243974, "grad_norm": 0.13105508685112, "learning_rate": 8.449911078452004e-06, "loss": 0.0615, "step": 1337 }, { "epoch": 0.28044435128903794, "grad_norm": 0.13537244498729706, "learning_rate": 8.447453002854763e-06, "loss": 0.0608, "step": 1338 }, { "epoch": 0.28065395095367845, "grad_norm": 0.13226556777954102, "learning_rate": 8.444993337982408e-06, "loss": 0.0627, "step": 1339 }, { "epoch": 0.280863550618319, "grad_norm": 0.11894568800926208, "learning_rate": 8.442532084968836e-06, "loss": 0.0631, "step": 1340 }, { "epoch": 0.28107315028295954, "grad_norm": 0.0991387814283371, "learning_rate": 8.44006924494869e-06, "loss": 0.0619, "step": 1341 }, { "epoch": 0.2812827499476001, "grad_norm": 0.11959309130907059, "learning_rate": 8.437604819057336e-06, "loss": 0.0658, "step": 1342 }, { "epoch": 0.2814923496122406, "grad_norm": 0.13812269270420074, "learning_rate": 8.435138808430873e-06, "loss": 0.0599, "step": 1343 }, { "epoch": 0.28170194927688114, "grad_norm": 0.15743722021579742, "learning_rate": 8.432671214206135e-06, "loss": 0.0643, "step": 1344 }, { "epoch": 0.2819115489415217, "grad_norm": 0.15632624924182892, "learning_rate": 8.43020203752068e-06, "loss": 0.0648, "step": 1345 }, { "epoch": 0.2821211486061622, "grad_norm": 0.1336655467748642, "learning_rate": 8.427731279512797e-06, "loss": 0.0648, "step": 1346 }, { "epoch": 0.2823307482708028, "grad_norm": 0.11537309736013412, "learning_rate": 8.425258941321508e-06, "loss": 0.0627, "step": 1347 }, { "epoch": 0.2825403479354433, "grad_norm": 0.12339965254068375, "learning_rate": 8.422785024086557e-06, "loss": 0.0608, "step": 1348 }, { "epoch": 0.2827499476000838, "grad_norm": 0.139318585395813, "learning_rate": 8.420309528948422e-06, "loss": 0.065, "step": 1349 }, { "epoch": 0.2829595472647244, "grad_norm": 0.12725292146205902, "learning_rate": 8.417832457048302e-06, "loss": 0.0621, "step": 1350 }, { "epoch": 0.2831691469293649, "grad_norm": 0.1099279448390007, "learning_rate": 8.415353809528133e-06, "loss": 0.0631, "step": 1351 }, { "epoch": 0.28337874659400547, "grad_norm": 0.11662383377552032, "learning_rate": 8.412873587530565e-06, "loss": 0.0633, "step": 1352 }, { "epoch": 0.283588346258646, "grad_norm": 0.13634788990020752, "learning_rate": 8.410391792198982e-06, "loss": 0.0673, "step": 1353 }, { "epoch": 0.2837979459232865, "grad_norm": 0.1415703296661377, "learning_rate": 8.407908424677493e-06, "loss": 0.065, "step": 1354 }, { "epoch": 0.28400754558792707, "grad_norm": 0.13151812553405762, "learning_rate": 8.405423486110926e-06, "loss": 0.0633, "step": 1355 }, { "epoch": 0.2842171452525676, "grad_norm": 0.1383858174085617, "learning_rate": 8.40293697764484e-06, "loss": 0.0613, "step": 1356 }, { "epoch": 0.28442674491720815, "grad_norm": 0.15346135199069977, "learning_rate": 8.400448900425515e-06, "loss": 0.0641, "step": 1357 }, { "epoch": 0.28463634458184867, "grad_norm": 0.14821362495422363, "learning_rate": 8.397959255599952e-06, "loss": 0.0655, "step": 1358 }, { "epoch": 0.2848459442464892, "grad_norm": 0.1301778107881546, "learning_rate": 8.395468044315878e-06, "loss": 0.063, "step": 1359 }, { "epoch": 0.28505554391112975, "grad_norm": 0.13555464148521423, "learning_rate": 8.392975267721742e-06, "loss": 0.0635, "step": 1360 }, { "epoch": 0.28526514357577026, "grad_norm": 0.1415833681821823, "learning_rate": 8.39048092696671e-06, "loss": 0.0608, "step": 1361 }, { "epoch": 0.28547474324041083, "grad_norm": 0.1284692883491516, "learning_rate": 8.387985023200677e-06, "loss": 0.063, "step": 1362 }, { "epoch": 0.28568434290505135, "grad_norm": 0.10931932926177979, "learning_rate": 8.385487557574253e-06, "loss": 0.0633, "step": 1363 }, { "epoch": 0.28589394256969186, "grad_norm": 0.11410527676343918, "learning_rate": 8.382988531238766e-06, "loss": 0.0621, "step": 1364 }, { "epoch": 0.28610354223433243, "grad_norm": 0.13259191811084747, "learning_rate": 8.380487945346269e-06, "loss": 0.062, "step": 1365 }, { "epoch": 0.28631314189897294, "grad_norm": 0.1383572220802307, "learning_rate": 8.377985801049533e-06, "loss": 0.0611, "step": 1366 }, { "epoch": 0.2865227415636135, "grad_norm": 0.13124366104602814, "learning_rate": 8.375482099502043e-06, "loss": 0.0654, "step": 1367 }, { "epoch": 0.28673234122825403, "grad_norm": 0.1339644193649292, "learning_rate": 8.372976841858007e-06, "loss": 0.0616, "step": 1368 }, { "epoch": 0.2869419408928946, "grad_norm": 0.14934468269348145, "learning_rate": 8.370470029272348e-06, "loss": 0.0616, "step": 1369 }, { "epoch": 0.2871515405575351, "grad_norm": 0.15344421565532684, "learning_rate": 8.367961662900704e-06, "loss": 0.0625, "step": 1370 }, { "epoch": 0.2873611402221756, "grad_norm": 0.14816948771476746, "learning_rate": 8.365451743899433e-06, "loss": 0.065, "step": 1371 }, { "epoch": 0.2875707398868162, "grad_norm": 0.16822992265224457, "learning_rate": 8.362940273425609e-06, "loss": 0.0622, "step": 1372 }, { "epoch": 0.2877803395514567, "grad_norm": 0.19093914330005646, "learning_rate": 8.360427252637015e-06, "loss": 0.0627, "step": 1373 }, { "epoch": 0.2879899392160973, "grad_norm": 0.17542891204357147, "learning_rate": 8.357912682692158e-06, "loss": 0.0645, "step": 1374 }, { "epoch": 0.2881995388807378, "grad_norm": 0.13324493169784546, "learning_rate": 8.355396564750251e-06, "loss": 0.0636, "step": 1375 }, { "epoch": 0.2884091385453783, "grad_norm": 0.11579709500074387, "learning_rate": 8.352878899971225e-06, "loss": 0.064, "step": 1376 }, { "epoch": 0.2886187382100189, "grad_norm": 0.1267646849155426, "learning_rate": 8.35035968951572e-06, "loss": 0.0654, "step": 1377 }, { "epoch": 0.2888283378746594, "grad_norm": 0.13641488552093506, "learning_rate": 8.347838934545097e-06, "loss": 0.064, "step": 1378 }, { "epoch": 0.28903793753929996, "grad_norm": 0.14326706528663635, "learning_rate": 8.34531663622142e-06, "loss": 0.0621, "step": 1379 }, { "epoch": 0.2892475372039405, "grad_norm": 0.14493903517723083, "learning_rate": 8.342792795707468e-06, "loss": 0.0651, "step": 1380 }, { "epoch": 0.289457136868581, "grad_norm": 0.1295025646686554, "learning_rate": 8.340267414166731e-06, "loss": 0.0628, "step": 1381 }, { "epoch": 0.28966673653322156, "grad_norm": 0.11359550803899765, "learning_rate": 8.337740492763412e-06, "loss": 0.0608, "step": 1382 }, { "epoch": 0.2898763361978621, "grad_norm": 0.1159205511212349, "learning_rate": 8.33521203266242e-06, "loss": 0.0629, "step": 1383 }, { "epoch": 0.29008593586250264, "grad_norm": 0.1216607317328453, "learning_rate": 8.33268203502937e-06, "loss": 0.0619, "step": 1384 }, { "epoch": 0.29029553552714316, "grad_norm": 0.12574604153633118, "learning_rate": 8.330150501030597e-06, "loss": 0.0611, "step": 1385 }, { "epoch": 0.29050513519178367, "grad_norm": 0.13100169599056244, "learning_rate": 8.327617431833132e-06, "loss": 0.06, "step": 1386 }, { "epoch": 0.29071473485642424, "grad_norm": 0.14436903595924377, "learning_rate": 8.325082828604724e-06, "loss": 0.0687, "step": 1387 }, { "epoch": 0.29092433452106475, "grad_norm": 0.150770902633667, "learning_rate": 8.322546692513822e-06, "loss": 0.0639, "step": 1388 }, { "epoch": 0.2911339341857053, "grad_norm": 0.13927258551120758, "learning_rate": 8.320009024729586e-06, "loss": 0.0638, "step": 1389 }, { "epoch": 0.29134353385034584, "grad_norm": 0.1205689087510109, "learning_rate": 8.317469826421877e-06, "loss": 0.0614, "step": 1390 }, { "epoch": 0.29155313351498635, "grad_norm": 0.12410090863704681, "learning_rate": 8.314929098761268e-06, "loss": 0.064, "step": 1391 }, { "epoch": 0.2917627331796269, "grad_norm": 0.1389668583869934, "learning_rate": 8.312386842919034e-06, "loss": 0.0623, "step": 1392 }, { "epoch": 0.29197233284426743, "grad_norm": 0.1345934271812439, "learning_rate": 8.309843060067152e-06, "loss": 0.0655, "step": 1393 }, { "epoch": 0.292181932508908, "grad_norm": 0.13228191435337067, "learning_rate": 8.307297751378309e-06, "loss": 0.0644, "step": 1394 }, { "epoch": 0.2923915321735485, "grad_norm": 0.1374116837978363, "learning_rate": 8.304750918025888e-06, "loss": 0.0639, "step": 1395 }, { "epoch": 0.29260113183818903, "grad_norm": 0.13251402974128723, "learning_rate": 8.30220256118398e-06, "loss": 0.067, "step": 1396 }, { "epoch": 0.2928107315028296, "grad_norm": 0.124875508248806, "learning_rate": 8.29965268202738e-06, "loss": 0.0629, "step": 1397 }, { "epoch": 0.2930203311674701, "grad_norm": 0.12808595597743988, "learning_rate": 8.297101281731576e-06, "loss": 0.0595, "step": 1398 }, { "epoch": 0.2932299308321107, "grad_norm": 0.13613183796405792, "learning_rate": 8.294548361472767e-06, "loss": 0.064, "step": 1399 }, { "epoch": 0.2934395304967512, "grad_norm": 0.1279173195362091, "learning_rate": 8.291993922427848e-06, "loss": 0.0584, "step": 1400 }, { "epoch": 0.29364913016139177, "grad_norm": 0.11164448410272598, "learning_rate": 8.289437965774414e-06, "loss": 0.063, "step": 1401 }, { "epoch": 0.2938587298260323, "grad_norm": 0.11260772496461868, "learning_rate": 8.286880492690761e-06, "loss": 0.0582, "step": 1402 }, { "epoch": 0.2940683294906728, "grad_norm": 0.12324314564466476, "learning_rate": 8.284321504355884e-06, "loss": 0.0635, "step": 1403 }, { "epoch": 0.29427792915531337, "grad_norm": 0.12230779230594635, "learning_rate": 8.281761001949474e-06, "loss": 0.063, "step": 1404 }, { "epoch": 0.2944875288199539, "grad_norm": 0.11499473452568054, "learning_rate": 8.279198986651925e-06, "loss": 0.0613, "step": 1405 }, { "epoch": 0.29469712848459445, "grad_norm": 0.12175990641117096, "learning_rate": 8.276635459644327e-06, "loss": 0.0623, "step": 1406 }, { "epoch": 0.29490672814923496, "grad_norm": 0.14893771708011627, "learning_rate": 8.274070422108458e-06, "loss": 0.0619, "step": 1407 }, { "epoch": 0.2951163278138755, "grad_norm": 0.17682506144046783, "learning_rate": 8.271503875226807e-06, "loss": 0.0652, "step": 1408 }, { "epoch": 0.29532592747851605, "grad_norm": 0.1828548014163971, "learning_rate": 8.268935820182549e-06, "loss": 0.0639, "step": 1409 }, { "epoch": 0.29553552714315656, "grad_norm": 0.16200965642929077, "learning_rate": 8.266366258159557e-06, "loss": 0.0657, "step": 1410 }, { "epoch": 0.29574512680779713, "grad_norm": 0.12557782232761383, "learning_rate": 8.263795190342398e-06, "loss": 0.062, "step": 1411 }, { "epoch": 0.29595472647243765, "grad_norm": 0.11403919011354446, "learning_rate": 8.261222617916335e-06, "loss": 0.064, "step": 1412 }, { "epoch": 0.29616432613707816, "grad_norm": 0.14523234963417053, "learning_rate": 8.258648542067322e-06, "loss": 0.0627, "step": 1413 }, { "epoch": 0.29637392580171873, "grad_norm": 0.16806814074516296, "learning_rate": 8.256072963982008e-06, "loss": 0.0636, "step": 1414 }, { "epoch": 0.29658352546635924, "grad_norm": 0.16369985044002533, "learning_rate": 8.253495884847735e-06, "loss": 0.0627, "step": 1415 }, { "epoch": 0.2967931251309998, "grad_norm": 0.14731934666633606, "learning_rate": 8.250917305852532e-06, "loss": 0.0622, "step": 1416 }, { "epoch": 0.2970027247956403, "grad_norm": 0.14016751945018768, "learning_rate": 8.248337228185128e-06, "loss": 0.0643, "step": 1417 }, { "epoch": 0.29721232446028084, "grad_norm": 0.12805722653865814, "learning_rate": 8.245755653034938e-06, "loss": 0.0641, "step": 1418 }, { "epoch": 0.2974219241249214, "grad_norm": 0.13014602661132812, "learning_rate": 8.243172581592066e-06, "loss": 0.0627, "step": 1419 }, { "epoch": 0.2976315237895619, "grad_norm": 0.12117176502943039, "learning_rate": 8.240588015047306e-06, "loss": 0.0617, "step": 1420 }, { "epoch": 0.2978411234542025, "grad_norm": 0.11306478828191757, "learning_rate": 8.238001954592143e-06, "loss": 0.0598, "step": 1421 }, { "epoch": 0.298050723118843, "grad_norm": 0.1199759840965271, "learning_rate": 8.235414401418754e-06, "loss": 0.0624, "step": 1422 }, { "epoch": 0.2982603227834835, "grad_norm": 0.12165199220180511, "learning_rate": 8.232825356719998e-06, "loss": 0.0645, "step": 1423 }, { "epoch": 0.2984699224481241, "grad_norm": 0.1166660338640213, "learning_rate": 8.230234821689423e-06, "loss": 0.0657, "step": 1424 }, { "epoch": 0.2986795221127646, "grad_norm": 0.11805491894483566, "learning_rate": 8.227642797521265e-06, "loss": 0.0615, "step": 1425 }, { "epoch": 0.2988891217774052, "grad_norm": 0.11944872885942459, "learning_rate": 8.22504928541045e-06, "loss": 0.062, "step": 1426 }, { "epoch": 0.2990987214420457, "grad_norm": 0.1310216188430786, "learning_rate": 8.222454286552583e-06, "loss": 0.0641, "step": 1427 }, { "epoch": 0.2993083211066862, "grad_norm": 0.15212152898311615, "learning_rate": 8.21985780214396e-06, "loss": 0.0626, "step": 1428 }, { "epoch": 0.2995179207713268, "grad_norm": 0.14432914555072784, "learning_rate": 8.217259833381559e-06, "loss": 0.0609, "step": 1429 }, { "epoch": 0.2997275204359673, "grad_norm": 0.11263640224933624, "learning_rate": 8.214660381463043e-06, "loss": 0.0616, "step": 1430 }, { "epoch": 0.29993712010060786, "grad_norm": 0.12339860200881958, "learning_rate": 8.212059447586758e-06, "loss": 0.0627, "step": 1431 }, { "epoch": 0.30014671976524837, "grad_norm": 0.15246935188770294, "learning_rate": 8.209457032951735e-06, "loss": 0.0607, "step": 1432 }, { "epoch": 0.3003563194298889, "grad_norm": 0.16209474205970764, "learning_rate": 8.206853138757687e-06, "loss": 0.0666, "step": 1433 }, { "epoch": 0.30056591909452945, "grad_norm": 0.15916895866394043, "learning_rate": 8.204247766205008e-06, "loss": 0.0616, "step": 1434 }, { "epoch": 0.30077551875916997, "grad_norm": 0.16150227189064026, "learning_rate": 8.201640916494776e-06, "loss": 0.0629, "step": 1435 }, { "epoch": 0.30098511842381054, "grad_norm": 0.15138226747512817, "learning_rate": 8.199032590828745e-06, "loss": 0.0625, "step": 1436 }, { "epoch": 0.30119471808845105, "grad_norm": 0.1358492225408554, "learning_rate": 8.196422790409352e-06, "loss": 0.0648, "step": 1437 }, { "epoch": 0.3014043177530916, "grad_norm": 0.11498319357633591, "learning_rate": 8.19381151643972e-06, "loss": 0.0634, "step": 1438 }, { "epoch": 0.30161391741773214, "grad_norm": 0.09225708991289139, "learning_rate": 8.191198770123643e-06, "loss": 0.0627, "step": 1439 }, { "epoch": 0.30182351708237265, "grad_norm": 0.10581572353839874, "learning_rate": 8.188584552665592e-06, "loss": 0.0615, "step": 1440 }, { "epoch": 0.3020331167470132, "grad_norm": 0.1425763964653015, "learning_rate": 8.185968865270729e-06, "loss": 0.0624, "step": 1441 }, { "epoch": 0.30224271641165373, "grad_norm": 0.14036968350410461, "learning_rate": 8.183351709144877e-06, "loss": 0.0617, "step": 1442 }, { "epoch": 0.3024523160762943, "grad_norm": 0.11378060281276703, "learning_rate": 8.18073308549455e-06, "loss": 0.0581, "step": 1443 }, { "epoch": 0.3026619157409348, "grad_norm": 0.12382522225379944, "learning_rate": 8.178112995526932e-06, "loss": 0.0589, "step": 1444 }, { "epoch": 0.30287151540557533, "grad_norm": 0.1571829468011856, "learning_rate": 8.17549144044988e-06, "loss": 0.0616, "step": 1445 }, { "epoch": 0.3030811150702159, "grad_norm": 0.1604911983013153, "learning_rate": 8.172868421471936e-06, "loss": 0.0628, "step": 1446 }, { "epoch": 0.3032907147348564, "grad_norm": 0.13987775146961212, "learning_rate": 8.17024393980231e-06, "loss": 0.0592, "step": 1447 }, { "epoch": 0.303500314399497, "grad_norm": 0.1492827981710434, "learning_rate": 8.167617996650885e-06, "loss": 0.0579, "step": 1448 }, { "epoch": 0.3037099140641375, "grad_norm": 0.14875125885009766, "learning_rate": 8.164990593228222e-06, "loss": 0.0636, "step": 1449 }, { "epoch": 0.303919513728778, "grad_norm": 0.12761233747005463, "learning_rate": 8.16236173074555e-06, "loss": 0.0606, "step": 1450 }, { "epoch": 0.3041291133934186, "grad_norm": 0.13052557408809662, "learning_rate": 8.15973141041478e-06, "loss": 0.0608, "step": 1451 }, { "epoch": 0.3043387130580591, "grad_norm": 0.14425970613956451, "learning_rate": 8.157099633448486e-06, "loss": 0.0626, "step": 1452 }, { "epoch": 0.30454831272269967, "grad_norm": 0.13814501464366913, "learning_rate": 8.154466401059916e-06, "loss": 0.0627, "step": 1453 }, { "epoch": 0.3047579123873402, "grad_norm": 0.160516157746315, "learning_rate": 8.15183171446299e-06, "loss": 0.0588, "step": 1454 }, { "epoch": 0.3049675120519807, "grad_norm": 0.19766630232334137, "learning_rate": 8.1491955748723e-06, "loss": 0.0662, "step": 1455 }, { "epoch": 0.30517711171662126, "grad_norm": 0.17199066281318665, "learning_rate": 8.146557983503103e-06, "loss": 0.0651, "step": 1456 }, { "epoch": 0.3053867113812618, "grad_norm": 0.13344770669937134, "learning_rate": 8.143918941571329e-06, "loss": 0.0614, "step": 1457 }, { "epoch": 0.30559631104590235, "grad_norm": 0.12617316842079163, "learning_rate": 8.141278450293576e-06, "loss": 0.0616, "step": 1458 }, { "epoch": 0.30580591071054286, "grad_norm": 0.12648488581180573, "learning_rate": 8.13863651088711e-06, "loss": 0.0619, "step": 1459 }, { "epoch": 0.3060155103751834, "grad_norm": 0.1468046009540558, "learning_rate": 8.135993124569865e-06, "loss": 0.0636, "step": 1460 }, { "epoch": 0.30622511003982394, "grad_norm": 0.16080905497074127, "learning_rate": 8.133348292560442e-06, "loss": 0.0615, "step": 1461 }, { "epoch": 0.30643470970446446, "grad_norm": 0.15127602219581604, "learning_rate": 8.130702016078105e-06, "loss": 0.0616, "step": 1462 }, { "epoch": 0.30664430936910503, "grad_norm": 0.12708480656147003, "learning_rate": 8.12805429634279e-06, "loss": 0.063, "step": 1463 }, { "epoch": 0.30685390903374554, "grad_norm": 0.12862573564052582, "learning_rate": 8.125405134575093e-06, "loss": 0.0598, "step": 1464 }, { "epoch": 0.30706350869838606, "grad_norm": 0.13088904321193695, "learning_rate": 8.122754531996278e-06, "loss": 0.0642, "step": 1465 }, { "epoch": 0.3072731083630266, "grad_norm": 0.1227186918258667, "learning_rate": 8.120102489828273e-06, "loss": 0.0607, "step": 1466 }, { "epoch": 0.30748270802766714, "grad_norm": 0.12489646673202515, "learning_rate": 8.117449009293668e-06, "loss": 0.058, "step": 1467 }, { "epoch": 0.3076923076923077, "grad_norm": 0.13636116683483124, "learning_rate": 8.114794091615718e-06, "loss": 0.0638, "step": 1468 }, { "epoch": 0.3079019073569482, "grad_norm": 0.15484750270843506, "learning_rate": 8.11213773801834e-06, "loss": 0.0628, "step": 1469 }, { "epoch": 0.3081115070215888, "grad_norm": 0.15130093693733215, "learning_rate": 8.109479949726109e-06, "loss": 0.0613, "step": 1470 }, { "epoch": 0.3083211066862293, "grad_norm": 0.13728772103786469, "learning_rate": 8.106820727964267e-06, "loss": 0.0608, "step": 1471 }, { "epoch": 0.3085307063508698, "grad_norm": 0.13158975541591644, "learning_rate": 8.104160073958716e-06, "loss": 0.0656, "step": 1472 }, { "epoch": 0.3087403060155104, "grad_norm": 0.14283715188503265, "learning_rate": 8.101497988936015e-06, "loss": 0.0615, "step": 1473 }, { "epoch": 0.3089499056801509, "grad_norm": 0.14496344327926636, "learning_rate": 8.098834474123385e-06, "loss": 0.0608, "step": 1474 }, { "epoch": 0.3091595053447915, "grad_norm": 0.11485762149095535, "learning_rate": 8.096169530748708e-06, "loss": 0.0628, "step": 1475 }, { "epoch": 0.309369105009432, "grad_norm": 0.10256731510162354, "learning_rate": 8.093503160040517e-06, "loss": 0.0632, "step": 1476 }, { "epoch": 0.3095787046740725, "grad_norm": 0.12671735882759094, "learning_rate": 8.090835363228016e-06, "loss": 0.063, "step": 1477 }, { "epoch": 0.30978830433871307, "grad_norm": 0.1311434656381607, "learning_rate": 8.088166141541052e-06, "loss": 0.0607, "step": 1478 }, { "epoch": 0.3099979040033536, "grad_norm": 0.12401635199785233, "learning_rate": 8.08549549621014e-06, "loss": 0.0606, "step": 1479 }, { "epoch": 0.31020750366799416, "grad_norm": 0.12939530611038208, "learning_rate": 8.082823428466442e-06, "loss": 0.0625, "step": 1480 }, { "epoch": 0.31041710333263467, "grad_norm": 0.14530347287654877, "learning_rate": 8.080149939541786e-06, "loss": 0.0619, "step": 1481 }, { "epoch": 0.3106267029972752, "grad_norm": 0.16024935245513916, "learning_rate": 8.077475030668647e-06, "loss": 0.057, "step": 1482 }, { "epoch": 0.31083630266191575, "grad_norm": 0.16392071545124054, "learning_rate": 8.074798703080158e-06, "loss": 0.0627, "step": 1483 }, { "epoch": 0.31104590232655627, "grad_norm": 0.16095848381519318, "learning_rate": 8.072120958010106e-06, "loss": 0.0598, "step": 1484 }, { "epoch": 0.31125550199119684, "grad_norm": 0.13986064493656158, "learning_rate": 8.069441796692932e-06, "loss": 0.0633, "step": 1485 }, { "epoch": 0.31146510165583735, "grad_norm": 0.11697458475828171, "learning_rate": 8.066761220363724e-06, "loss": 0.0608, "step": 1486 }, { "epoch": 0.31167470132047786, "grad_norm": 0.1427479237318039, "learning_rate": 8.064079230258233e-06, "loss": 0.0633, "step": 1487 }, { "epoch": 0.31188430098511843, "grad_norm": 0.1725568026304245, "learning_rate": 8.061395827612854e-06, "loss": 0.0622, "step": 1488 }, { "epoch": 0.31209390064975895, "grad_norm": 0.15745431184768677, "learning_rate": 8.058711013664633e-06, "loss": 0.0627, "step": 1489 }, { "epoch": 0.3123035003143995, "grad_norm": 0.13785965740680695, "learning_rate": 8.056024789651269e-06, "loss": 0.0605, "step": 1490 }, { "epoch": 0.31251309997904003, "grad_norm": 0.12854214012622833, "learning_rate": 8.053337156811112e-06, "loss": 0.061, "step": 1491 }, { "epoch": 0.31272269964368055, "grad_norm": 0.13071073591709137, "learning_rate": 8.050648116383162e-06, "loss": 0.0609, "step": 1492 }, { "epoch": 0.3129322993083211, "grad_norm": 0.13808673620224, "learning_rate": 8.047957669607062e-06, "loss": 0.0584, "step": 1493 }, { "epoch": 0.31314189897296163, "grad_norm": 0.1332629919052124, "learning_rate": 8.04526581772311e-06, "loss": 0.0627, "step": 1494 }, { "epoch": 0.3133514986376022, "grad_norm": 0.11996429413557053, "learning_rate": 8.042572561972249e-06, "loss": 0.0623, "step": 1495 }, { "epoch": 0.3135610983022427, "grad_norm": 0.1371801495552063, "learning_rate": 8.039877903596069e-06, "loss": 0.0623, "step": 1496 }, { "epoch": 0.3137706979668832, "grad_norm": 0.1538374274969101, "learning_rate": 8.037181843836807e-06, "loss": 0.0595, "step": 1497 }, { "epoch": 0.3139802976315238, "grad_norm": 0.09957927465438843, "learning_rate": 8.034484383937345e-06, "loss": 0.0629, "step": 1498 }, { "epoch": 0.3141898972961643, "grad_norm": 0.12047518789768219, "learning_rate": 8.031785525141214e-06, "loss": 0.0626, "step": 1499 }, { "epoch": 0.3143994969608049, "grad_norm": 0.17362119257450104, "learning_rate": 8.029085268692584e-06, "loss": 0.0599, "step": 1500 }, { "epoch": 0.3146090966254454, "grad_norm": 0.12966394424438477, "learning_rate": 8.026383615836273e-06, "loss": 0.0631, "step": 1501 }, { "epoch": 0.3148186962900859, "grad_norm": 0.16443343460559845, "learning_rate": 8.023680567817746e-06, "loss": 0.0623, "step": 1502 }, { "epoch": 0.3150282959547265, "grad_norm": 0.18140558898448944, "learning_rate": 8.020976125883105e-06, "loss": 0.0622, "step": 1503 }, { "epoch": 0.315237895619367, "grad_norm": 0.1121053621172905, "learning_rate": 8.018270291279098e-06, "loss": 0.0598, "step": 1504 }, { "epoch": 0.31544749528400756, "grad_norm": 0.1866942197084427, "learning_rate": 8.015563065253113e-06, "loss": 0.0623, "step": 1505 }, { "epoch": 0.3156570949486481, "grad_norm": 0.1441410928964615, "learning_rate": 8.01285444905318e-06, "loss": 0.0621, "step": 1506 }, { "epoch": 0.31586669461328865, "grad_norm": 0.13300150632858276, "learning_rate": 8.010144443927974e-06, "loss": 0.0604, "step": 1507 }, { "epoch": 0.31607629427792916, "grad_norm": 0.17902185022830963, "learning_rate": 8.007433051126805e-06, "loss": 0.0619, "step": 1508 }, { "epoch": 0.3162858939425697, "grad_norm": 0.12487073242664337, "learning_rate": 8.004720271899622e-06, "loss": 0.063, "step": 1509 }, { "epoch": 0.31649549360721024, "grad_norm": 0.14495615661144257, "learning_rate": 8.002006107497018e-06, "loss": 0.0617, "step": 1510 }, { "epoch": 0.31670509327185076, "grad_norm": 0.14461486041545868, "learning_rate": 7.999290559170222e-06, "loss": 0.0651, "step": 1511 }, { "epoch": 0.3169146929364913, "grad_norm": 0.16186927258968353, "learning_rate": 7.996573628171103e-06, "loss": 0.061, "step": 1512 }, { "epoch": 0.31712429260113184, "grad_norm": 0.20015843212604523, "learning_rate": 7.993855315752163e-06, "loss": 0.063, "step": 1513 }, { "epoch": 0.31733389226577235, "grad_norm": 0.1886589378118515, "learning_rate": 7.991135623166543e-06, "loss": 0.064, "step": 1514 }, { "epoch": 0.3175434919304129, "grad_norm": 0.15665331482887268, "learning_rate": 7.988414551668025e-06, "loss": 0.0644, "step": 1515 }, { "epoch": 0.31775309159505344, "grad_norm": 0.13003768026828766, "learning_rate": 7.985692102511018e-06, "loss": 0.0632, "step": 1516 }, { "epoch": 0.317962691259694, "grad_norm": 0.14635911583900452, "learning_rate": 7.982968276950568e-06, "loss": 0.0614, "step": 1517 }, { "epoch": 0.3181722909243345, "grad_norm": 0.13516655564308167, "learning_rate": 7.980243076242367e-06, "loss": 0.0647, "step": 1518 }, { "epoch": 0.31838189058897504, "grad_norm": 0.15535247325897217, "learning_rate": 7.977516501642725e-06, "loss": 0.0612, "step": 1519 }, { "epoch": 0.3185914902536156, "grad_norm": 0.16718165576457977, "learning_rate": 7.974788554408594e-06, "loss": 0.0611, "step": 1520 }, { "epoch": 0.3188010899182561, "grad_norm": 0.14838221669197083, "learning_rate": 7.97205923579756e-06, "loss": 0.0602, "step": 1521 }, { "epoch": 0.3190106895828967, "grad_norm": 0.15352647006511688, "learning_rate": 7.969328547067832e-06, "loss": 0.0625, "step": 1522 }, { "epoch": 0.3192202892475372, "grad_norm": 0.1140708476305008, "learning_rate": 7.966596489478261e-06, "loss": 0.059, "step": 1523 }, { "epoch": 0.3194298889121777, "grad_norm": 0.11593464761972427, "learning_rate": 7.963863064288326e-06, "loss": 0.0605, "step": 1524 }, { "epoch": 0.3196394885768183, "grad_norm": 0.11878301203250885, "learning_rate": 7.961128272758133e-06, "loss": 0.0617, "step": 1525 }, { "epoch": 0.3198490882414588, "grad_norm": 0.10293014347553253, "learning_rate": 7.958392116148424e-06, "loss": 0.0601, "step": 1526 }, { "epoch": 0.32005868790609937, "grad_norm": 0.12255340069532394, "learning_rate": 7.95565459572056e-06, "loss": 0.0616, "step": 1527 }, { "epoch": 0.3202682875707399, "grad_norm": 0.1113123744726181, "learning_rate": 7.952915712736545e-06, "loss": 0.0629, "step": 1528 }, { "epoch": 0.3204778872353804, "grad_norm": 0.11932878196239471, "learning_rate": 7.950175468458999e-06, "loss": 0.0632, "step": 1529 }, { "epoch": 0.32068748690002097, "grad_norm": 0.13146136701107025, "learning_rate": 7.947433864151173e-06, "loss": 0.0624, "step": 1530 }, { "epoch": 0.3208970865646615, "grad_norm": 0.11554694920778275, "learning_rate": 7.944690901076949e-06, "loss": 0.0591, "step": 1531 }, { "epoch": 0.32110668622930205, "grad_norm": 0.11757069826126099, "learning_rate": 7.94194658050083e-06, "loss": 0.0659, "step": 1532 }, { "epoch": 0.32131628589394257, "grad_norm": 0.12100815027952194, "learning_rate": 7.93920090368795e-06, "loss": 0.0579, "step": 1533 }, { "epoch": 0.3215258855585831, "grad_norm": 0.12287240475416183, "learning_rate": 7.936453871904065e-06, "loss": 0.0601, "step": 1534 }, { "epoch": 0.32173548522322365, "grad_norm": 0.12194748222827911, "learning_rate": 7.933705486415553e-06, "loss": 0.06, "step": 1535 }, { "epoch": 0.32194508488786416, "grad_norm": 0.11489801853895187, "learning_rate": 7.93095574848942e-06, "loss": 0.0594, "step": 1536 }, { "epoch": 0.32215468455250473, "grad_norm": 0.11319497227668762, "learning_rate": 7.928204659393297e-06, "loss": 0.06, "step": 1537 }, { "epoch": 0.32236428421714525, "grad_norm": 0.10197950154542923, "learning_rate": 7.925452220395436e-06, "loss": 0.064, "step": 1538 }, { "epoch": 0.3225738838817858, "grad_norm": 0.10284919291734695, "learning_rate": 7.922698432764709e-06, "loss": 0.0616, "step": 1539 }, { "epoch": 0.32278348354642633, "grad_norm": 0.10016728937625885, "learning_rate": 7.919943297770609e-06, "loss": 0.0588, "step": 1540 }, { "epoch": 0.32299308321106684, "grad_norm": 0.10762354731559753, "learning_rate": 7.917186816683256e-06, "loss": 0.0587, "step": 1541 }, { "epoch": 0.3232026828757074, "grad_norm": 0.13142406940460205, "learning_rate": 7.914428990773388e-06, "loss": 0.0594, "step": 1542 }, { "epoch": 0.32341228254034793, "grad_norm": 0.15556885302066803, "learning_rate": 7.91166982131236e-06, "loss": 0.0594, "step": 1543 }, { "epoch": 0.3236218822049885, "grad_norm": 0.18388265371322632, "learning_rate": 7.908909309572147e-06, "loss": 0.0606, "step": 1544 }, { "epoch": 0.323831481869629, "grad_norm": 0.21343399584293365, "learning_rate": 7.906147456825349e-06, "loss": 0.0634, "step": 1545 }, { "epoch": 0.3240410815342695, "grad_norm": 0.21992023289203644, "learning_rate": 7.903384264345177e-06, "loss": 0.0635, "step": 1546 }, { "epoch": 0.3242506811989101, "grad_norm": 0.16700875759124756, "learning_rate": 7.900619733405462e-06, "loss": 0.0641, "step": 1547 }, { "epoch": 0.3244602808635506, "grad_norm": 0.09242475032806396, "learning_rate": 7.897853865280652e-06, "loss": 0.0614, "step": 1548 }, { "epoch": 0.3246698805281912, "grad_norm": 0.12979759275913239, "learning_rate": 7.895086661245811e-06, "loss": 0.061, "step": 1549 }, { "epoch": 0.3248794801928317, "grad_norm": 0.19285419583320618, "learning_rate": 7.892318122576623e-06, "loss": 0.0602, "step": 1550 }, { "epoch": 0.3250890798574722, "grad_norm": 0.1928529143333435, "learning_rate": 7.889548250549379e-06, "loss": 0.0613, "step": 1551 }, { "epoch": 0.3252986795221128, "grad_norm": 0.13492649793624878, "learning_rate": 7.886777046440993e-06, "loss": 0.061, "step": 1552 }, { "epoch": 0.3255082791867533, "grad_norm": 0.1183929294347763, "learning_rate": 7.884004511528988e-06, "loss": 0.0637, "step": 1553 }, { "epoch": 0.32571787885139386, "grad_norm": 0.17243815958499908, "learning_rate": 7.881230647091502e-06, "loss": 0.0601, "step": 1554 }, { "epoch": 0.3259274785160344, "grad_norm": 0.1707419902086258, "learning_rate": 7.878455454407285e-06, "loss": 0.0625, "step": 1555 }, { "epoch": 0.3261370781806749, "grad_norm": 0.14786763489246368, "learning_rate": 7.875678934755704e-06, "loss": 0.0632, "step": 1556 }, { "epoch": 0.32634667784531546, "grad_norm": 0.1466890424489975, "learning_rate": 7.87290108941673e-06, "loss": 0.0613, "step": 1557 }, { "epoch": 0.326556277509956, "grad_norm": 0.10979172587394714, "learning_rate": 7.87012191967095e-06, "loss": 0.063, "step": 1558 }, { "epoch": 0.32676587717459654, "grad_norm": 0.11911406368017197, "learning_rate": 7.867341426799562e-06, "loss": 0.0567, "step": 1559 }, { "epoch": 0.32697547683923706, "grad_norm": 0.14850041270256042, "learning_rate": 7.864559612084372e-06, "loss": 0.0609, "step": 1560 }, { "epoch": 0.32718507650387757, "grad_norm": 0.13936297595500946, "learning_rate": 7.861776476807795e-06, "loss": 0.0613, "step": 1561 }, { "epoch": 0.32739467616851814, "grad_norm": 0.12747827172279358, "learning_rate": 7.858992022252859e-06, "loss": 0.0626, "step": 1562 }, { "epoch": 0.32760427583315865, "grad_norm": 0.11281277984380722, "learning_rate": 7.856206249703191e-06, "loss": 0.0612, "step": 1563 }, { "epoch": 0.3278138754977992, "grad_norm": 0.11081743985414505, "learning_rate": 7.853419160443038e-06, "loss": 0.0625, "step": 1564 }, { "epoch": 0.32802347516243974, "grad_norm": 0.10656673461198807, "learning_rate": 7.850630755757242e-06, "loss": 0.0625, "step": 1565 }, { "epoch": 0.32823307482708025, "grad_norm": 0.09804133325815201, "learning_rate": 7.847841036931263e-06, "loss": 0.0602, "step": 1566 }, { "epoch": 0.3284426744917208, "grad_norm": 0.10339375585317612, "learning_rate": 7.845050005251156e-06, "loss": 0.06, "step": 1567 }, { "epoch": 0.32865227415636133, "grad_norm": 0.09550289064645767, "learning_rate": 7.842257662003587e-06, "loss": 0.0571, "step": 1568 }, { "epoch": 0.3288618738210019, "grad_norm": 0.09345971792936325, "learning_rate": 7.839464008475825e-06, "loss": 0.0604, "step": 1569 }, { "epoch": 0.3290714734856424, "grad_norm": 0.10545200109481812, "learning_rate": 7.836669045955746e-06, "loss": 0.0598, "step": 1570 }, { "epoch": 0.32928107315028293, "grad_norm": 0.11077643185853958, "learning_rate": 7.83387277573183e-06, "loss": 0.0586, "step": 1571 }, { "epoch": 0.3294906728149235, "grad_norm": 0.12594513595104218, "learning_rate": 7.831075199093148e-06, "loss": 0.0598, "step": 1572 }, { "epoch": 0.329700272479564, "grad_norm": 0.13642063736915588, "learning_rate": 7.828276317329388e-06, "loss": 0.0601, "step": 1573 }, { "epoch": 0.3299098721442046, "grad_norm": 0.13808242976665497, "learning_rate": 7.825476131730836e-06, "loss": 0.0591, "step": 1574 }, { "epoch": 0.3301194718088451, "grad_norm": 0.14252924919128418, "learning_rate": 7.822674643588372e-06, "loss": 0.0589, "step": 1575 }, { "epoch": 0.33032907147348567, "grad_norm": 0.12825094163417816, "learning_rate": 7.819871854193484e-06, "loss": 0.0622, "step": 1576 }, { "epoch": 0.3305386711381262, "grad_norm": 0.10428497940301895, "learning_rate": 7.817067764838257e-06, "loss": 0.0583, "step": 1577 }, { "epoch": 0.3307482708027667, "grad_norm": 0.10690061748027802, "learning_rate": 7.814262376815375e-06, "loss": 0.0581, "step": 1578 }, { "epoch": 0.33095787046740727, "grad_norm": 0.11123620718717575, "learning_rate": 7.811455691418123e-06, "loss": 0.0607, "step": 1579 }, { "epoch": 0.3311674701320478, "grad_norm": 0.10212855786085129, "learning_rate": 7.80864770994038e-06, "loss": 0.0624, "step": 1580 }, { "epoch": 0.33137706979668835, "grad_norm": 0.09154222905635834, "learning_rate": 7.805838433676627e-06, "loss": 0.0604, "step": 1581 }, { "epoch": 0.33158666946132886, "grad_norm": 0.09208472818136215, "learning_rate": 7.803027863921939e-06, "loss": 0.06, "step": 1582 }, { "epoch": 0.3317962691259694, "grad_norm": 0.09515499323606491, "learning_rate": 7.800216001971988e-06, "loss": 0.0654, "step": 1583 }, { "epoch": 0.33200586879060995, "grad_norm": 0.09495844691991806, "learning_rate": 7.79740284912304e-06, "loss": 0.063, "step": 1584 }, { "epoch": 0.33221546845525046, "grad_norm": 0.09243495017290115, "learning_rate": 7.794588406671962e-06, "loss": 0.0622, "step": 1585 }, { "epoch": 0.33242506811989103, "grad_norm": 0.08537401258945465, "learning_rate": 7.791772675916207e-06, "loss": 0.0601, "step": 1586 }, { "epoch": 0.33263466778453155, "grad_norm": 0.08266492187976837, "learning_rate": 7.788955658153829e-06, "loss": 0.0582, "step": 1587 }, { "epoch": 0.33284426744917206, "grad_norm": 0.10280580073595047, "learning_rate": 7.786137354683472e-06, "loss": 0.0598, "step": 1588 }, { "epoch": 0.33305386711381263, "grad_norm": 0.13502821326255798, "learning_rate": 7.783317766804375e-06, "loss": 0.0586, "step": 1589 }, { "epoch": 0.33326346677845314, "grad_norm": 0.16894817352294922, "learning_rate": 7.780496895816363e-06, "loss": 0.0596, "step": 1590 }, { "epoch": 0.3334730664430937, "grad_norm": 0.21050219237804413, "learning_rate": 7.77767474301986e-06, "loss": 0.0609, "step": 1591 }, { "epoch": 0.3336826661077342, "grad_norm": 0.203968346118927, "learning_rate": 7.774851309715878e-06, "loss": 0.0625, "step": 1592 }, { "epoch": 0.33389226577237474, "grad_norm": 0.13803130388259888, "learning_rate": 7.77202659720602e-06, "loss": 0.057, "step": 1593 }, { "epoch": 0.3341018654370153, "grad_norm": 0.13375061750411987, "learning_rate": 7.769200606792476e-06, "loss": 0.057, "step": 1594 }, { "epoch": 0.3343114651016558, "grad_norm": 0.12815599143505096, "learning_rate": 7.766373339778026e-06, "loss": 0.0587, "step": 1595 }, { "epoch": 0.3345210647662964, "grad_norm": 0.14430293440818787, "learning_rate": 7.763544797466041e-06, "loss": 0.0586, "step": 1596 }, { "epoch": 0.3347306644309369, "grad_norm": 0.15994097292423248, "learning_rate": 7.76071498116048e-06, "loss": 0.0569, "step": 1597 }, { "epoch": 0.3349402640955774, "grad_norm": 0.12301892787218094, "learning_rate": 7.757883892165886e-06, "loss": 0.0596, "step": 1598 }, { "epoch": 0.335149863760218, "grad_norm": 0.12102188915014267, "learning_rate": 7.755051531787388e-06, "loss": 0.0604, "step": 1599 }, { "epoch": 0.3353594634248585, "grad_norm": 0.12119577080011368, "learning_rate": 7.752217901330707e-06, "loss": 0.0602, "step": 1600 }, { "epoch": 0.3355690630894991, "grad_norm": 0.11957602202892303, "learning_rate": 7.749383002102147e-06, "loss": 0.0608, "step": 1601 }, { "epoch": 0.3357786627541396, "grad_norm": 0.1391220986843109, "learning_rate": 7.746546835408593e-06, "loss": 0.0592, "step": 1602 }, { "epoch": 0.3359882624187801, "grad_norm": 0.14518260955810547, "learning_rate": 7.74370940255752e-06, "loss": 0.0605, "step": 1603 }, { "epoch": 0.3361978620834207, "grad_norm": 0.15618950128555298, "learning_rate": 7.74087070485698e-06, "loss": 0.0604, "step": 1604 }, { "epoch": 0.3364074617480612, "grad_norm": 0.13505420088768005, "learning_rate": 7.738030743615615e-06, "loss": 0.0605, "step": 1605 }, { "epoch": 0.33661706141270176, "grad_norm": 0.11345727741718292, "learning_rate": 7.735189520142645e-06, "loss": 0.0627, "step": 1606 }, { "epoch": 0.33682666107734227, "grad_norm": 0.10767614841461182, "learning_rate": 7.732347035747878e-06, "loss": 0.0631, "step": 1607 }, { "epoch": 0.33703626074198284, "grad_norm": 0.11709927022457123, "learning_rate": 7.72950329174169e-06, "loss": 0.0615, "step": 1608 }, { "epoch": 0.33724586040662335, "grad_norm": 0.13534541428089142, "learning_rate": 7.726658289435055e-06, "loss": 0.0617, "step": 1609 }, { "epoch": 0.33745546007126387, "grad_norm": 0.12872527539730072, "learning_rate": 7.723812030139514e-06, "loss": 0.0574, "step": 1610 }, { "epoch": 0.33766505973590444, "grad_norm": 0.12165652960538864, "learning_rate": 7.720964515167193e-06, "loss": 0.0573, "step": 1611 }, { "epoch": 0.33787465940054495, "grad_norm": 0.10215598344802856, "learning_rate": 7.718115745830797e-06, "loss": 0.0579, "step": 1612 }, { "epoch": 0.3380842590651855, "grad_norm": 0.09414192289113998, "learning_rate": 7.715265723443606e-06, "loss": 0.0607, "step": 1613 }, { "epoch": 0.33829385872982604, "grad_norm": 0.11345679312944412, "learning_rate": 7.712414449319478e-06, "loss": 0.0591, "step": 1614 }, { "epoch": 0.33850345839446655, "grad_norm": 0.11734048277139664, "learning_rate": 7.709561924772855e-06, "loss": 0.0595, "step": 1615 }, { "epoch": 0.3387130580591071, "grad_norm": 0.10740841180086136, "learning_rate": 7.706708151118747e-06, "loss": 0.0593, "step": 1616 }, { "epoch": 0.33892265772374763, "grad_norm": 0.1052752360701561, "learning_rate": 7.703853129672742e-06, "loss": 0.0576, "step": 1617 }, { "epoch": 0.3391322573883882, "grad_norm": 0.11419695615768433, "learning_rate": 7.700996861751009e-06, "loss": 0.0632, "step": 1618 }, { "epoch": 0.3393418570530287, "grad_norm": 0.10033425688743591, "learning_rate": 7.698139348670281e-06, "loss": 0.061, "step": 1619 }, { "epoch": 0.33955145671766923, "grad_norm": 0.10025940090417862, "learning_rate": 7.695280591747875e-06, "loss": 0.0609, "step": 1620 }, { "epoch": 0.3397610563823098, "grad_norm": 0.09737148880958557, "learning_rate": 7.692420592301675e-06, "loss": 0.0609, "step": 1621 }, { "epoch": 0.3399706560469503, "grad_norm": 0.09889432042837143, "learning_rate": 7.689559351650142e-06, "loss": 0.0598, "step": 1622 }, { "epoch": 0.3401802557115909, "grad_norm": 0.10725081712007523, "learning_rate": 7.686696871112306e-06, "loss": 0.0581, "step": 1623 }, { "epoch": 0.3403898553762314, "grad_norm": 0.10398346930742264, "learning_rate": 7.683833152007772e-06, "loss": 0.0577, "step": 1624 }, { "epoch": 0.3405994550408719, "grad_norm": 0.11018167436122894, "learning_rate": 7.68096819565671e-06, "loss": 0.0603, "step": 1625 }, { "epoch": 0.3408090547055125, "grad_norm": 0.10882928967475891, "learning_rate": 7.67810200337987e-06, "loss": 0.0595, "step": 1626 }, { "epoch": 0.341018654370153, "grad_norm": 0.09781121462583542, "learning_rate": 7.675234576498561e-06, "loss": 0.0611, "step": 1627 }, { "epoch": 0.34122825403479357, "grad_norm": 0.08528628945350647, "learning_rate": 7.672365916334668e-06, "loss": 0.0576, "step": 1628 }, { "epoch": 0.3414378536994341, "grad_norm": 0.08956452459096909, "learning_rate": 7.66949602421064e-06, "loss": 0.0624, "step": 1629 }, { "epoch": 0.3416474533640746, "grad_norm": 0.09840685874223709, "learning_rate": 7.666624901449503e-06, "loss": 0.0581, "step": 1630 }, { "epoch": 0.34185705302871516, "grad_norm": 0.11417452245950699, "learning_rate": 7.66375254937484e-06, "loss": 0.0581, "step": 1631 }, { "epoch": 0.3420666526933557, "grad_norm": 0.11850450932979584, "learning_rate": 7.660878969310803e-06, "loss": 0.0607, "step": 1632 }, { "epoch": 0.34227625235799625, "grad_norm": 0.12879729270935059, "learning_rate": 7.658004162582116e-06, "loss": 0.0619, "step": 1633 }, { "epoch": 0.34248585202263676, "grad_norm": 0.1304604858160019, "learning_rate": 7.655128130514061e-06, "loss": 0.0632, "step": 1634 }, { "epoch": 0.3426954516872773, "grad_norm": 0.10286222398281097, "learning_rate": 7.652250874432489e-06, "loss": 0.0611, "step": 1635 }, { "epoch": 0.34290505135191784, "grad_norm": 0.11092126369476318, "learning_rate": 7.649372395663816e-06, "loss": 0.0615, "step": 1636 }, { "epoch": 0.34311465101655836, "grad_norm": 0.109388567507267, "learning_rate": 7.646492695535018e-06, "loss": 0.0641, "step": 1637 }, { "epoch": 0.34332425068119893, "grad_norm": 0.09455049782991409, "learning_rate": 7.643611775373637e-06, "loss": 0.0613, "step": 1638 }, { "epoch": 0.34353385034583944, "grad_norm": 0.09676145017147064, "learning_rate": 7.640729636507778e-06, "loss": 0.0597, "step": 1639 }, { "epoch": 0.34374345001047996, "grad_norm": 0.09647035598754883, "learning_rate": 7.637846280266103e-06, "loss": 0.0593, "step": 1640 }, { "epoch": 0.3439530496751205, "grad_norm": 0.10977063328027725, "learning_rate": 7.634961707977843e-06, "loss": 0.0596, "step": 1641 }, { "epoch": 0.34416264933976104, "grad_norm": 0.1262187659740448, "learning_rate": 7.632075920972782e-06, "loss": 0.0604, "step": 1642 }, { "epoch": 0.3443722490044016, "grad_norm": 0.1421433538198471, "learning_rate": 7.629188920581267e-06, "loss": 0.0571, "step": 1643 }, { "epoch": 0.3445818486690421, "grad_norm": 0.14843688905239105, "learning_rate": 7.626300708134207e-06, "loss": 0.0601, "step": 1644 }, { "epoch": 0.3447914483336827, "grad_norm": 0.14421787858009338, "learning_rate": 7.623411284963066e-06, "loss": 0.0608, "step": 1645 }, { "epoch": 0.3450010479983232, "grad_norm": 0.14530403912067413, "learning_rate": 7.620520652399867e-06, "loss": 0.059, "step": 1646 }, { "epoch": 0.3452106476629637, "grad_norm": 0.14376644790172577, "learning_rate": 7.617628811777191e-06, "loss": 0.0613, "step": 1647 }, { "epoch": 0.3454202473276043, "grad_norm": 0.14656515419483185, "learning_rate": 7.614735764428178e-06, "loss": 0.0616, "step": 1648 }, { "epoch": 0.3456298469922448, "grad_norm": 0.15034882724285126, "learning_rate": 7.611841511686521e-06, "loss": 0.0645, "step": 1649 }, { "epoch": 0.3458394466568854, "grad_norm": 0.17289340496063232, "learning_rate": 7.608946054886468e-06, "loss": 0.0605, "step": 1650 }, { "epoch": 0.3460490463215259, "grad_norm": 0.20710116624832153, "learning_rate": 7.606049395362827e-06, "loss": 0.0601, "step": 1651 }, { "epoch": 0.3462586459861664, "grad_norm": 0.24352329969406128, "learning_rate": 7.6031515344509545e-06, "loss": 0.0619, "step": 1652 }, { "epoch": 0.34646824565080697, "grad_norm": 0.24319349229335785, "learning_rate": 7.6002524734867676e-06, "loss": 0.0637, "step": 1653 }, { "epoch": 0.3466778453154475, "grad_norm": 0.18296609818935394, "learning_rate": 7.597352213806729e-06, "loss": 0.0614, "step": 1654 }, { "epoch": 0.34688744498008806, "grad_norm": 0.11361169070005417, "learning_rate": 7.5944507567478585e-06, "loss": 0.0596, "step": 1655 }, { "epoch": 0.34709704464472857, "grad_norm": 0.14272648096084595, "learning_rate": 7.59154810364773e-06, "loss": 0.0609, "step": 1656 }, { "epoch": 0.3473066443093691, "grad_norm": 0.18227772414684296, "learning_rate": 7.588644255844464e-06, "loss": 0.0609, "step": 1657 }, { "epoch": 0.34751624397400965, "grad_norm": 0.15642352402210236, "learning_rate": 7.585739214676731e-06, "loss": 0.0611, "step": 1658 }, { "epoch": 0.34772584363865017, "grad_norm": 0.13368570804595947, "learning_rate": 7.582832981483761e-06, "loss": 0.0607, "step": 1659 }, { "epoch": 0.34793544330329074, "grad_norm": 0.12491488456726074, "learning_rate": 7.57992555760532e-06, "loss": 0.0628, "step": 1660 }, { "epoch": 0.34814504296793125, "grad_norm": 0.12365151941776276, "learning_rate": 7.577016944381734e-06, "loss": 0.0582, "step": 1661 }, { "epoch": 0.34835464263257176, "grad_norm": 0.14696553349494934, "learning_rate": 7.574107143153872e-06, "loss": 0.0611, "step": 1662 }, { "epoch": 0.34856424229721233, "grad_norm": 0.14201070368289948, "learning_rate": 7.571196155263152e-06, "loss": 0.0617, "step": 1663 }, { "epoch": 0.34877384196185285, "grad_norm": 0.13257716596126556, "learning_rate": 7.568283982051538e-06, "loss": 0.062, "step": 1664 }, { "epoch": 0.3489834416264934, "grad_norm": 0.10744427889585495, "learning_rate": 7.565370624861541e-06, "loss": 0.0615, "step": 1665 }, { "epoch": 0.34919304129113393, "grad_norm": 0.11801417917013168, "learning_rate": 7.562456085036221e-06, "loss": 0.0591, "step": 1666 }, { "epoch": 0.34940264095577445, "grad_norm": 0.13146089017391205, "learning_rate": 7.5595403639191775e-06, "loss": 0.0618, "step": 1667 }, { "epoch": 0.349612240620415, "grad_norm": 0.11520653963088989, "learning_rate": 7.556623462854555e-06, "loss": 0.0604, "step": 1668 }, { "epoch": 0.34982184028505553, "grad_norm": 0.111871138215065, "learning_rate": 7.553705383187051e-06, "loss": 0.0567, "step": 1669 }, { "epoch": 0.3500314399496961, "grad_norm": 0.10216851532459259, "learning_rate": 7.550786126261893e-06, "loss": 0.0601, "step": 1670 }, { "epoch": 0.3502410396143366, "grad_norm": 0.11334113776683807, "learning_rate": 7.5478656934248626e-06, "loss": 0.0607, "step": 1671 }, { "epoch": 0.3504506392789771, "grad_norm": 0.12396923452615738, "learning_rate": 7.544944086022276e-06, "loss": 0.0611, "step": 1672 }, { "epoch": 0.3506602389436177, "grad_norm": 0.11316753178834915, "learning_rate": 7.5420213054009935e-06, "loss": 0.0623, "step": 1673 }, { "epoch": 0.3508698386082582, "grad_norm": 0.12763440608978271, "learning_rate": 7.539097352908419e-06, "loss": 0.0587, "step": 1674 }, { "epoch": 0.3510794382728988, "grad_norm": 0.12276781350374222, "learning_rate": 7.536172229892491e-06, "loss": 0.0599, "step": 1675 }, { "epoch": 0.3512890379375393, "grad_norm": 0.10066503286361694, "learning_rate": 7.533245937701692e-06, "loss": 0.0587, "step": 1676 }, { "epoch": 0.35149863760217986, "grad_norm": 0.11499958485364914, "learning_rate": 7.530318477685043e-06, "loss": 0.0578, "step": 1677 }, { "epoch": 0.3517082372668204, "grad_norm": 0.10679785162210464, "learning_rate": 7.527389851192099e-06, "loss": 0.0618, "step": 1678 }, { "epoch": 0.3519178369314609, "grad_norm": 0.11170266568660736, "learning_rate": 7.52446005957296e-06, "loss": 0.0567, "step": 1679 }, { "epoch": 0.35212743659610146, "grad_norm": 0.13220727443695068, "learning_rate": 7.521529104178258e-06, "loss": 0.0624, "step": 1680 }, { "epoch": 0.352337036260742, "grad_norm": 0.11351287364959717, "learning_rate": 7.518596986359163e-06, "loss": 0.0576, "step": 1681 }, { "epoch": 0.35254663592538255, "grad_norm": 0.1113462969660759, "learning_rate": 7.51566370746738e-06, "loss": 0.0627, "step": 1682 }, { "epoch": 0.35275623559002306, "grad_norm": 0.11083466559648514, "learning_rate": 7.51272926885515e-06, "loss": 0.0582, "step": 1683 }, { "epoch": 0.3529658352546636, "grad_norm": 0.09305007755756378, "learning_rate": 7.50979367187525e-06, "loss": 0.0585, "step": 1684 }, { "epoch": 0.35317543491930414, "grad_norm": 0.09011313319206238, "learning_rate": 7.506856917880989e-06, "loss": 0.0612, "step": 1685 }, { "epoch": 0.35338503458394466, "grad_norm": 0.09725736081600189, "learning_rate": 7.503919008226208e-06, "loss": 0.0565, "step": 1686 }, { "epoch": 0.3535946342485852, "grad_norm": 0.10482776910066605, "learning_rate": 7.5009799442652856e-06, "loss": 0.0598, "step": 1687 }, { "epoch": 0.35380423391322574, "grad_norm": 0.12371653318405151, "learning_rate": 7.498039727353127e-06, "loss": 0.0615, "step": 1688 }, { "epoch": 0.35401383357786625, "grad_norm": 0.13332313299179077, "learning_rate": 7.495098358845174e-06, "loss": 0.0587, "step": 1689 }, { "epoch": 0.3542234332425068, "grad_norm": 0.11412235349416733, "learning_rate": 7.492155840097396e-06, "loss": 0.0608, "step": 1690 }, { "epoch": 0.35443303290714734, "grad_norm": 0.09121813625097275, "learning_rate": 7.4892121724662915e-06, "loss": 0.0599, "step": 1691 }, { "epoch": 0.3546426325717879, "grad_norm": 0.09299511462450027, "learning_rate": 7.486267357308896e-06, "loss": 0.0611, "step": 1692 }, { "epoch": 0.3548522322364284, "grad_norm": 0.09584937989711761, "learning_rate": 7.483321395982762e-06, "loss": 0.0628, "step": 1693 }, { "epoch": 0.35506183190106894, "grad_norm": 0.0966387689113617, "learning_rate": 7.4803742898459816e-06, "loss": 0.0595, "step": 1694 }, { "epoch": 0.3552714315657095, "grad_norm": 0.09714031964540482, "learning_rate": 7.4774260402571696e-06, "loss": 0.0597, "step": 1695 }, { "epoch": 0.35548103123035, "grad_norm": 0.09430839866399765, "learning_rate": 7.474476648575466e-06, "loss": 0.059, "step": 1696 }, { "epoch": 0.3556906308949906, "grad_norm": 0.09601857513189316, "learning_rate": 7.471526116160542e-06, "loss": 0.0576, "step": 1697 }, { "epoch": 0.3559002305596311, "grad_norm": 0.11064789444208145, "learning_rate": 7.468574444372593e-06, "loss": 0.0608, "step": 1698 }, { "epoch": 0.3561098302242716, "grad_norm": 0.11107967793941498, "learning_rate": 7.465621634572336e-06, "loss": 0.0602, "step": 1699 }, { "epoch": 0.3563194298889122, "grad_norm": 0.12140575051307678, "learning_rate": 7.462667688121019e-06, "loss": 0.0576, "step": 1700 }, { "epoch": 0.3565290295535527, "grad_norm": 0.14458388090133667, "learning_rate": 7.459712606380409e-06, "loss": 0.0606, "step": 1701 }, { "epoch": 0.35673862921819327, "grad_norm": 0.14192935824394226, "learning_rate": 7.4567563907127985e-06, "loss": 0.061, "step": 1702 }, { "epoch": 0.3569482288828338, "grad_norm": 0.14503100514411926, "learning_rate": 7.453799042481002e-06, "loss": 0.059, "step": 1703 }, { "epoch": 0.3571578285474743, "grad_norm": 0.13211016356945038, "learning_rate": 7.450840563048356e-06, "loss": 0.0612, "step": 1704 }, { "epoch": 0.35736742821211487, "grad_norm": 0.10646926611661911, "learning_rate": 7.44788095377872e-06, "loss": 0.0621, "step": 1705 }, { "epoch": 0.3575770278767554, "grad_norm": 0.09362011402845383, "learning_rate": 7.444920216036473e-06, "loss": 0.0574, "step": 1706 }, { "epoch": 0.35778662754139595, "grad_norm": 0.11936776340007782, "learning_rate": 7.441958351186514e-06, "loss": 0.0604, "step": 1707 }, { "epoch": 0.35799622720603647, "grad_norm": 0.13509775698184967, "learning_rate": 7.4389953605942634e-06, "loss": 0.0611, "step": 1708 }, { "epoch": 0.358205826870677, "grad_norm": 0.11575739830732346, "learning_rate": 7.436031245625657e-06, "loss": 0.0607, "step": 1709 }, { "epoch": 0.35841542653531755, "grad_norm": 0.10143768787384033, "learning_rate": 7.433066007647152e-06, "loss": 0.0576, "step": 1710 }, { "epoch": 0.35862502619995806, "grad_norm": 0.09407970309257507, "learning_rate": 7.430099648025723e-06, "loss": 0.0597, "step": 1711 }, { "epoch": 0.35883462586459863, "grad_norm": 0.11093775928020477, "learning_rate": 7.427132168128862e-06, "loss": 0.0594, "step": 1712 }, { "epoch": 0.35904422552923915, "grad_norm": 0.13045847415924072, "learning_rate": 7.4241635693245766e-06, "loss": 0.0631, "step": 1713 }, { "epoch": 0.3592538251938797, "grad_norm": 0.128498837351799, "learning_rate": 7.421193852981386e-06, "loss": 0.0617, "step": 1714 }, { "epoch": 0.35946342485852023, "grad_norm": 0.12185259163379669, "learning_rate": 7.418223020468335e-06, "loss": 0.0578, "step": 1715 }, { "epoch": 0.35967302452316074, "grad_norm": 0.13501186668872833, "learning_rate": 7.415251073154972e-06, "loss": 0.0589, "step": 1716 }, { "epoch": 0.3598826241878013, "grad_norm": 0.12140613049268723, "learning_rate": 7.412278012411368e-06, "loss": 0.0621, "step": 1717 }, { "epoch": 0.36009222385244183, "grad_norm": 0.10289863497018814, "learning_rate": 7.4093038396081e-06, "loss": 0.0575, "step": 1718 }, { "epoch": 0.3603018235170824, "grad_norm": 0.11283747851848602, "learning_rate": 7.4063285561162624e-06, "loss": 0.0584, "step": 1719 }, { "epoch": 0.3605114231817229, "grad_norm": 0.13699781894683838, "learning_rate": 7.40335216330746e-06, "loss": 0.0603, "step": 1720 }, { "epoch": 0.3607210228463634, "grad_norm": 0.16813865303993225, "learning_rate": 7.400374662553811e-06, "loss": 0.0605, "step": 1721 }, { "epoch": 0.360930622511004, "grad_norm": 0.1661754846572876, "learning_rate": 7.39739605522794e-06, "loss": 0.0595, "step": 1722 }, { "epoch": 0.3611402221756445, "grad_norm": 0.13529466092586517, "learning_rate": 7.394416342702986e-06, "loss": 0.0588, "step": 1723 }, { "epoch": 0.3613498218402851, "grad_norm": 0.1041502058506012, "learning_rate": 7.391435526352594e-06, "loss": 0.0619, "step": 1724 }, { "epoch": 0.3615594215049256, "grad_norm": 0.08625433593988419, "learning_rate": 7.3884536075509225e-06, "loss": 0.0591, "step": 1725 }, { "epoch": 0.3617690211695661, "grad_norm": 0.1282806396484375, "learning_rate": 7.385470587672634e-06, "loss": 0.0607, "step": 1726 }, { "epoch": 0.3619786208342067, "grad_norm": 0.15391872823238373, "learning_rate": 7.382486468092899e-06, "loss": 0.0588, "step": 1727 }, { "epoch": 0.3621882204988472, "grad_norm": 0.1479339599609375, "learning_rate": 7.379501250187399e-06, "loss": 0.0598, "step": 1728 }, { "epoch": 0.36239782016348776, "grad_norm": 0.1250709593296051, "learning_rate": 7.376514935332314e-06, "loss": 0.0605, "step": 1729 }, { "epoch": 0.3626074198281283, "grad_norm": 0.09578738361597061, "learning_rate": 7.37352752490434e-06, "loss": 0.0602, "step": 1730 }, { "epoch": 0.3628170194927688, "grad_norm": 0.09304331988096237, "learning_rate": 7.37053902028067e-06, "loss": 0.0581, "step": 1731 }, { "epoch": 0.36302661915740936, "grad_norm": 0.11090435832738876, "learning_rate": 7.367549422839002e-06, "loss": 0.0587, "step": 1732 }, { "epoch": 0.36323621882204987, "grad_norm": 0.12240754812955856, "learning_rate": 7.364558733957543e-06, "loss": 0.0576, "step": 1733 }, { "epoch": 0.36344581848669044, "grad_norm": 0.12205494195222855, "learning_rate": 7.361566955014999e-06, "loss": 0.0618, "step": 1734 }, { "epoch": 0.36365541815133096, "grad_norm": 0.1121978685259819, "learning_rate": 7.3585740873905795e-06, "loss": 0.0586, "step": 1735 }, { "epoch": 0.36386501781597147, "grad_norm": 0.09682106226682663, "learning_rate": 7.355580132463998e-06, "loss": 0.0604, "step": 1736 }, { "epoch": 0.36407461748061204, "grad_norm": 0.08918868750333786, "learning_rate": 7.352585091615461e-06, "loss": 0.0609, "step": 1737 }, { "epoch": 0.36428421714525255, "grad_norm": 0.08579257130622864, "learning_rate": 7.349588966225686e-06, "loss": 0.0565, "step": 1738 }, { "epoch": 0.3644938168098931, "grad_norm": 0.08958027511835098, "learning_rate": 7.346591757675886e-06, "loss": 0.059, "step": 1739 }, { "epoch": 0.36470341647453364, "grad_norm": 0.08919413387775421, "learning_rate": 7.3435934673477735e-06, "loss": 0.0588, "step": 1740 }, { "epoch": 0.36491301613917415, "grad_norm": 0.08150623738765717, "learning_rate": 7.340594096623559e-06, "loss": 0.0612, "step": 1741 }, { "epoch": 0.3651226158038147, "grad_norm": 0.08697859197854996, "learning_rate": 7.337593646885952e-06, "loss": 0.0579, "step": 1742 }, { "epoch": 0.36533221546845523, "grad_norm": 0.09409506618976593, "learning_rate": 7.3345921195181605e-06, "loss": 0.0609, "step": 1743 }, { "epoch": 0.3655418151330958, "grad_norm": 0.12264561653137207, "learning_rate": 7.331589515903885e-06, "loss": 0.0571, "step": 1744 }, { "epoch": 0.3657514147977363, "grad_norm": 0.15233023464679718, "learning_rate": 7.328585837427329e-06, "loss": 0.0616, "step": 1745 }, { "epoch": 0.3659610144623769, "grad_norm": 0.15437071025371552, "learning_rate": 7.3255810854731845e-06, "loss": 0.0583, "step": 1746 }, { "epoch": 0.3661706141270174, "grad_norm": 0.14194343984127045, "learning_rate": 7.322575261426643e-06, "loss": 0.0579, "step": 1747 }, { "epoch": 0.3663802137916579, "grad_norm": 0.14112666249275208, "learning_rate": 7.319568366673389e-06, "loss": 0.0576, "step": 1748 }, { "epoch": 0.3665898134562985, "grad_norm": 0.15609388053417206, "learning_rate": 7.316560402599598e-06, "loss": 0.0629, "step": 1749 }, { "epoch": 0.366799413120939, "grad_norm": 0.1522039920091629, "learning_rate": 7.313551370591944e-06, "loss": 0.0605, "step": 1750 }, { "epoch": 0.36700901278557957, "grad_norm": 0.13480624556541443, "learning_rate": 7.310541272037588e-06, "loss": 0.0589, "step": 1751 }, { "epoch": 0.3672186124502201, "grad_norm": 0.12041886895895004, "learning_rate": 7.307530108324186e-06, "loss": 0.0586, "step": 1752 }, { "epoch": 0.3674282121148606, "grad_norm": 0.11600147187709808, "learning_rate": 7.304517880839883e-06, "loss": 0.058, "step": 1753 }, { "epoch": 0.36763781177950117, "grad_norm": 0.11566165834665298, "learning_rate": 7.3015045909733165e-06, "loss": 0.0623, "step": 1754 }, { "epoch": 0.3678474114441417, "grad_norm": 0.12034988403320312, "learning_rate": 7.2984902401136115e-06, "loss": 0.059, "step": 1755 }, { "epoch": 0.36805701110878225, "grad_norm": 0.13276949524879456, "learning_rate": 7.295474829650382e-06, "loss": 0.0582, "step": 1756 }, { "epoch": 0.36826661077342276, "grad_norm": 0.14941634237766266, "learning_rate": 7.292458360973733e-06, "loss": 0.059, "step": 1757 }, { "epoch": 0.3684762104380633, "grad_norm": 0.14865432679653168, "learning_rate": 7.289440835474257e-06, "loss": 0.0625, "step": 1758 }, { "epoch": 0.36868581010270385, "grad_norm": 0.11310829967260361, "learning_rate": 7.286422254543031e-06, "loss": 0.0582, "step": 1759 }, { "epoch": 0.36889540976734436, "grad_norm": 0.0944291204214096, "learning_rate": 7.28340261957162e-06, "loss": 0.0588, "step": 1760 }, { "epoch": 0.36910500943198493, "grad_norm": 0.11601667106151581, "learning_rate": 7.2803819319520765e-06, "loss": 0.0576, "step": 1761 }, { "epoch": 0.36931460909662545, "grad_norm": 0.1313844472169876, "learning_rate": 7.277360193076936e-06, "loss": 0.062, "step": 1762 }, { "epoch": 0.36952420876126596, "grad_norm": 0.12570670247077942, "learning_rate": 7.274337404339218e-06, "loss": 0.0607, "step": 1763 }, { "epoch": 0.36973380842590653, "grad_norm": 0.10776617377996445, "learning_rate": 7.271313567132431e-06, "loss": 0.0618, "step": 1764 }, { "epoch": 0.36994340809054704, "grad_norm": 0.09985363483428955, "learning_rate": 7.26828868285056e-06, "loss": 0.0583, "step": 1765 }, { "epoch": 0.3701530077551876, "grad_norm": 0.10462814569473267, "learning_rate": 7.265262752888078e-06, "loss": 0.0603, "step": 1766 }, { "epoch": 0.3703626074198281, "grad_norm": 0.10572189837694168, "learning_rate": 7.262235778639938e-06, "loss": 0.0587, "step": 1767 }, { "epoch": 0.37057220708446864, "grad_norm": 0.12369339168071747, "learning_rate": 7.259207761501572e-06, "loss": 0.0567, "step": 1768 }, { "epoch": 0.3707818067491092, "grad_norm": 0.14378707110881805, "learning_rate": 7.256178702868899e-06, "loss": 0.0586, "step": 1769 }, { "epoch": 0.3709914064137497, "grad_norm": 0.13347645103931427, "learning_rate": 7.253148604138312e-06, "loss": 0.0572, "step": 1770 }, { "epoch": 0.3712010060783903, "grad_norm": 0.12877966463565826, "learning_rate": 7.250117466706686e-06, "loss": 0.0575, "step": 1771 }, { "epoch": 0.3714106057430308, "grad_norm": 0.15981978178024292, "learning_rate": 7.247085291971377e-06, "loss": 0.0589, "step": 1772 }, { "epoch": 0.3716202054076713, "grad_norm": 0.17421603202819824, "learning_rate": 7.244052081330214e-06, "loss": 0.0602, "step": 1773 }, { "epoch": 0.3718298050723119, "grad_norm": 0.166316419839859, "learning_rate": 7.241017836181508e-06, "loss": 0.059, "step": 1774 }, { "epoch": 0.3720394047369524, "grad_norm": 0.14877326786518097, "learning_rate": 7.237982557924044e-06, "loss": 0.0604, "step": 1775 }, { "epoch": 0.372249004401593, "grad_norm": 0.11550521105527878, "learning_rate": 7.234946247957087e-06, "loss": 0.058, "step": 1776 }, { "epoch": 0.3724586040662335, "grad_norm": 0.12925846874713898, "learning_rate": 7.231908907680373e-06, "loss": 0.0607, "step": 1777 }, { "epoch": 0.372668203730874, "grad_norm": 0.16908065974712372, "learning_rate": 7.228870538494116e-06, "loss": 0.0592, "step": 1778 }, { "epoch": 0.3728778033955146, "grad_norm": 0.1643764078617096, "learning_rate": 7.225831141799004e-06, "loss": 0.0613, "step": 1779 }, { "epoch": 0.3730874030601551, "grad_norm": 0.1174246221780777, "learning_rate": 7.222790718996199e-06, "loss": 0.058, "step": 1780 }, { "epoch": 0.37329700272479566, "grad_norm": 0.11582423746585846, "learning_rate": 7.219749271487333e-06, "loss": 0.0582, "step": 1781 }, { "epoch": 0.37350660238943617, "grad_norm": 0.1282123178243637, "learning_rate": 7.216706800674514e-06, "loss": 0.0604, "step": 1782 }, { "epoch": 0.37371620205407674, "grad_norm": 0.12925361096858978, "learning_rate": 7.213663307960321e-06, "loss": 0.0586, "step": 1783 }, { "epoch": 0.37392580171871725, "grad_norm": 0.12871502339839935, "learning_rate": 7.210618794747803e-06, "loss": 0.0582, "step": 1784 }, { "epoch": 0.37413540138335777, "grad_norm": 0.11913754045963287, "learning_rate": 7.207573262440479e-06, "loss": 0.059, "step": 1785 }, { "epoch": 0.37434500104799834, "grad_norm": 0.12632472813129425, "learning_rate": 7.20452671244234e-06, "loss": 0.0597, "step": 1786 }, { "epoch": 0.37455460071263885, "grad_norm": 0.14089874923229218, "learning_rate": 7.2014791461578445e-06, "loss": 0.0598, "step": 1787 }, { "epoch": 0.3747642003772794, "grad_norm": 0.1204831525683403, "learning_rate": 7.1984305649919195e-06, "loss": 0.0588, "step": 1788 }, { "epoch": 0.37497380004191994, "grad_norm": 0.11883781105279922, "learning_rate": 7.195380970349961e-06, "loss": 0.0595, "step": 1789 }, { "epoch": 0.37518339970656045, "grad_norm": 0.12562531232833862, "learning_rate": 7.192330363637832e-06, "loss": 0.0608, "step": 1790 }, { "epoch": 0.375392999371201, "grad_norm": 0.1165882870554924, "learning_rate": 7.18927874626186e-06, "loss": 0.059, "step": 1791 }, { "epoch": 0.37560259903584153, "grad_norm": 0.12378109246492386, "learning_rate": 7.186226119628841e-06, "loss": 0.0599, "step": 1792 }, { "epoch": 0.3758121987004821, "grad_norm": 0.11985018849372864, "learning_rate": 7.183172485146033e-06, "loss": 0.0592, "step": 1793 }, { "epoch": 0.3760217983651226, "grad_norm": 0.09804270416498184, "learning_rate": 7.180117844221166e-06, "loss": 0.0577, "step": 1794 }, { "epoch": 0.37623139802976313, "grad_norm": 0.11297204345464706, "learning_rate": 7.177062198262424e-06, "loss": 0.0617, "step": 1795 }, { "epoch": 0.3764409976944037, "grad_norm": 0.11378408223390579, "learning_rate": 7.1740055486784595e-06, "loss": 0.0601, "step": 1796 }, { "epoch": 0.3766505973590442, "grad_norm": 0.10527677834033966, "learning_rate": 7.170947896878392e-06, "loss": 0.0609, "step": 1797 }, { "epoch": 0.3768601970236848, "grad_norm": 0.11885930597782135, "learning_rate": 7.167889244271792e-06, "loss": 0.0564, "step": 1798 }, { "epoch": 0.3770697966883253, "grad_norm": 0.11827868223190308, "learning_rate": 7.164829592268702e-06, "loss": 0.0605, "step": 1799 }, { "epoch": 0.3772793963529658, "grad_norm": 0.09840241819620132, "learning_rate": 7.16176894227962e-06, "loss": 0.0598, "step": 1800 }, { "epoch": 0.3774889960176064, "grad_norm": 0.11675871908664703, "learning_rate": 7.158707295715504e-06, "loss": 0.0584, "step": 1801 }, { "epoch": 0.3776985956822469, "grad_norm": 0.1085144504904747, "learning_rate": 7.155644653987772e-06, "loss": 0.0565, "step": 1802 }, { "epoch": 0.37790819534688747, "grad_norm": 0.0793166384100914, "learning_rate": 7.152581018508305e-06, "loss": 0.0586, "step": 1803 }, { "epoch": 0.378117795011528, "grad_norm": 0.09427671879529953, "learning_rate": 7.149516390689433e-06, "loss": 0.0602, "step": 1804 }, { "epoch": 0.3783273946761685, "grad_norm": 0.09693881869316101, "learning_rate": 7.146450771943953e-06, "loss": 0.0579, "step": 1805 }, { "epoch": 0.37853699434080906, "grad_norm": 0.08774615824222565, "learning_rate": 7.143384163685112e-06, "loss": 0.0581, "step": 1806 }, { "epoch": 0.3787465940054496, "grad_norm": 0.10799058526754379, "learning_rate": 7.140316567326617e-06, "loss": 0.0581, "step": 1807 }, { "epoch": 0.37895619367009015, "grad_norm": 0.12135607749223709, "learning_rate": 7.13724798428263e-06, "loss": 0.0586, "step": 1808 }, { "epoch": 0.37916579333473066, "grad_norm": 0.12671098113059998, "learning_rate": 7.134178415967765e-06, "loss": 0.0583, "step": 1809 }, { "epoch": 0.3793753929993712, "grad_norm": 0.1515565663576126, "learning_rate": 7.131107863797093e-06, "loss": 0.0578, "step": 1810 }, { "epoch": 0.37958499266401174, "grad_norm": 0.18114930391311646, "learning_rate": 7.12803632918614e-06, "loss": 0.0587, "step": 1811 }, { "epoch": 0.37979459232865226, "grad_norm": 0.18494822084903717, "learning_rate": 7.12496381355088e-06, "loss": 0.0581, "step": 1812 }, { "epoch": 0.38000419199329283, "grad_norm": 0.14751562476158142, "learning_rate": 7.121890318307745e-06, "loss": 0.0603, "step": 1813 }, { "epoch": 0.38021379165793334, "grad_norm": 0.09215519577264786, "learning_rate": 7.1188158448736135e-06, "loss": 0.0607, "step": 1814 }, { "epoch": 0.38042339132257386, "grad_norm": 0.10190481692552567, "learning_rate": 7.115740394665816e-06, "loss": 0.0595, "step": 1815 }, { "epoch": 0.3806329909872144, "grad_norm": 0.10842154920101166, "learning_rate": 7.112663969102138e-06, "loss": 0.0562, "step": 1816 }, { "epoch": 0.38084259065185494, "grad_norm": 0.11397344619035721, "learning_rate": 7.1095865696008085e-06, "loss": 0.0586, "step": 1817 }, { "epoch": 0.3810521903164955, "grad_norm": 0.12245843559503555, "learning_rate": 7.1065081975805086e-06, "loss": 0.0613, "step": 1818 }, { "epoch": 0.381261789981136, "grad_norm": 0.11635053157806396, "learning_rate": 7.103428854460367e-06, "loss": 0.0585, "step": 1819 }, { "epoch": 0.3814713896457766, "grad_norm": 0.10383999347686768, "learning_rate": 7.100348541659961e-06, "loss": 0.0625, "step": 1820 }, { "epoch": 0.3816809893104171, "grad_norm": 0.09212907403707504, "learning_rate": 7.0972672605993106e-06, "loss": 0.0634, "step": 1821 }, { "epoch": 0.3818905889750576, "grad_norm": 0.10867664963006973, "learning_rate": 7.094185012698893e-06, "loss": 0.0571, "step": 1822 }, { "epoch": 0.3821001886396982, "grad_norm": 0.10434751957654953, "learning_rate": 7.091101799379617e-06, "loss": 0.0615, "step": 1823 }, { "epoch": 0.3823097883043387, "grad_norm": 0.09127280861139297, "learning_rate": 7.088017622062847e-06, "loss": 0.0572, "step": 1824 }, { "epoch": 0.3825193879689793, "grad_norm": 0.11248623579740524, "learning_rate": 7.084932482170385e-06, "loss": 0.06, "step": 1825 }, { "epoch": 0.3827289876336198, "grad_norm": 0.12546822428703308, "learning_rate": 7.081846381124484e-06, "loss": 0.0571, "step": 1826 }, { "epoch": 0.3829385872982603, "grad_norm": 0.11402714997529984, "learning_rate": 7.078759320347833e-06, "loss": 0.0578, "step": 1827 }, { "epoch": 0.38314818696290087, "grad_norm": 0.10201937705278397, "learning_rate": 7.075671301263568e-06, "loss": 0.0579, "step": 1828 }, { "epoch": 0.3833577866275414, "grad_norm": 0.0954611748456955, "learning_rate": 7.072582325295262e-06, "loss": 0.0569, "step": 1829 }, { "epoch": 0.38356738629218196, "grad_norm": 0.10198832303285599, "learning_rate": 7.069492393866937e-06, "loss": 0.0601, "step": 1830 }, { "epoch": 0.38377698595682247, "grad_norm": 0.09932437539100647, "learning_rate": 7.066401508403047e-06, "loss": 0.0576, "step": 1831 }, { "epoch": 0.383986585621463, "grad_norm": 0.08973968774080276, "learning_rate": 7.063309670328491e-06, "loss": 0.0588, "step": 1832 }, { "epoch": 0.38419618528610355, "grad_norm": 0.10677745938301086, "learning_rate": 7.060216881068607e-06, "loss": 0.0569, "step": 1833 }, { "epoch": 0.38440578495074407, "grad_norm": 0.1219630241394043, "learning_rate": 7.057123142049166e-06, "loss": 0.0576, "step": 1834 }, { "epoch": 0.38461538461538464, "grad_norm": 0.14446859061717987, "learning_rate": 7.0540284546963846e-06, "loss": 0.0577, "step": 1835 }, { "epoch": 0.38482498428002515, "grad_norm": 0.17997580766677856, "learning_rate": 7.050932820436915e-06, "loss": 0.0598, "step": 1836 }, { "epoch": 0.38503458394466566, "grad_norm": 0.17284150421619415, "learning_rate": 7.047836240697837e-06, "loss": 0.0602, "step": 1837 }, { "epoch": 0.38524418360930623, "grad_norm": 0.12842464447021484, "learning_rate": 7.044738716906679e-06, "loss": 0.0594, "step": 1838 }, { "epoch": 0.38545378327394675, "grad_norm": 0.12296885251998901, "learning_rate": 7.041640250491398e-06, "loss": 0.0619, "step": 1839 }, { "epoch": 0.3856633829385873, "grad_norm": 0.14263387024402618, "learning_rate": 7.038540842880386e-06, "loss": 0.06, "step": 1840 }, { "epoch": 0.38587298260322783, "grad_norm": 0.14516735076904297, "learning_rate": 7.035440495502469e-06, "loss": 0.0623, "step": 1841 }, { "epoch": 0.38608258226786835, "grad_norm": 0.12423399090766907, "learning_rate": 7.0323392097869044e-06, "loss": 0.0591, "step": 1842 }, { "epoch": 0.3862921819325089, "grad_norm": 0.10325875133275986, "learning_rate": 7.029236987163388e-06, "loss": 0.0577, "step": 1843 }, { "epoch": 0.38650178159714943, "grad_norm": 0.09640491008758545, "learning_rate": 7.026133829062041e-06, "loss": 0.0592, "step": 1844 }, { "epoch": 0.38671138126179, "grad_norm": 0.09991448372602463, "learning_rate": 7.023029736913421e-06, "loss": 0.0593, "step": 1845 }, { "epoch": 0.3869209809264305, "grad_norm": 0.10741396993398666, "learning_rate": 7.019924712148511e-06, "loss": 0.0589, "step": 1846 }, { "epoch": 0.387130580591071, "grad_norm": 0.10864882171154022, "learning_rate": 7.0168187561987285e-06, "loss": 0.0581, "step": 1847 }, { "epoch": 0.3873401802557116, "grad_norm": 0.10337284207344055, "learning_rate": 7.013711870495919e-06, "loss": 0.0576, "step": 1848 }, { "epoch": 0.3875497799203521, "grad_norm": 0.1069745123386383, "learning_rate": 7.010604056472355e-06, "loss": 0.0572, "step": 1849 }, { "epoch": 0.3877593795849927, "grad_norm": 0.09350664913654327, "learning_rate": 7.0074953155607395e-06, "loss": 0.0566, "step": 1850 }, { "epoch": 0.3879689792496332, "grad_norm": 0.08867273479700089, "learning_rate": 7.004385649194199e-06, "loss": 0.0596, "step": 1851 }, { "epoch": 0.38817857891427376, "grad_norm": 0.109881192445755, "learning_rate": 7.001275058806292e-06, "loss": 0.058, "step": 1852 }, { "epoch": 0.3883881785789143, "grad_norm": 0.1241583600640297, "learning_rate": 6.998163545830998e-06, "loss": 0.0577, "step": 1853 }, { "epoch": 0.3885977782435548, "grad_norm": 0.10716693848371506, "learning_rate": 6.995051111702724e-06, "loss": 0.0574, "step": 1854 }, { "epoch": 0.38880737790819536, "grad_norm": 0.10287656635046005, "learning_rate": 6.991937757856302e-06, "loss": 0.0589, "step": 1855 }, { "epoch": 0.3890169775728359, "grad_norm": 0.128378227353096, "learning_rate": 6.9888234857269875e-06, "loss": 0.0598, "step": 1856 }, { "epoch": 0.38922657723747645, "grad_norm": 0.13415974378585815, "learning_rate": 6.98570829675046e-06, "loss": 0.0621, "step": 1857 }, { "epoch": 0.38943617690211696, "grad_norm": 0.1083337590098381, "learning_rate": 6.98259219236282e-06, "loss": 0.0588, "step": 1858 }, { "epoch": 0.3896457765667575, "grad_norm": 0.10138161480426788, "learning_rate": 6.979475174000591e-06, "loss": 0.0616, "step": 1859 }, { "epoch": 0.38985537623139804, "grad_norm": 0.1118636503815651, "learning_rate": 6.976357243100718e-06, "loss": 0.0579, "step": 1860 }, { "epoch": 0.39006497589603856, "grad_norm": 0.11336030066013336, "learning_rate": 6.973238401100565e-06, "loss": 0.0558, "step": 1861 }, { "epoch": 0.3902745755606791, "grad_norm": 0.11859306693077087, "learning_rate": 6.970118649437919e-06, "loss": 0.0579, "step": 1862 }, { "epoch": 0.39048417522531964, "grad_norm": 0.1387511044740677, "learning_rate": 6.966997989550988e-06, "loss": 0.0584, "step": 1863 }, { "epoch": 0.39069377488996015, "grad_norm": 0.14954118430614471, "learning_rate": 6.96387642287839e-06, "loss": 0.0588, "step": 1864 }, { "epoch": 0.3909033745546007, "grad_norm": 0.14520269632339478, "learning_rate": 6.960753950859168e-06, "loss": 0.0596, "step": 1865 }, { "epoch": 0.39111297421924124, "grad_norm": 0.11415694653987885, "learning_rate": 6.957630574932784e-06, "loss": 0.0603, "step": 1866 }, { "epoch": 0.3913225738838818, "grad_norm": 0.13128365576267242, "learning_rate": 6.954506296539112e-06, "loss": 0.0591, "step": 1867 }, { "epoch": 0.3915321735485223, "grad_norm": 0.13788260519504547, "learning_rate": 6.951381117118441e-06, "loss": 0.059, "step": 1868 }, { "epoch": 0.39174177321316284, "grad_norm": 0.1196761503815651, "learning_rate": 6.948255038111482e-06, "loss": 0.0558, "step": 1869 }, { "epoch": 0.3919513728778034, "grad_norm": 0.13322733342647552, "learning_rate": 6.945128060959354e-06, "loss": 0.0576, "step": 1870 }, { "epoch": 0.3921609725424439, "grad_norm": 0.12423675507307053, "learning_rate": 6.942000187103594e-06, "loss": 0.0593, "step": 1871 }, { "epoch": 0.3923705722070845, "grad_norm": 0.1317249983549118, "learning_rate": 6.938871417986153e-06, "loss": 0.0581, "step": 1872 }, { "epoch": 0.392580171871725, "grad_norm": 0.1220802292227745, "learning_rate": 6.935741755049389e-06, "loss": 0.0602, "step": 1873 }, { "epoch": 0.3927897715363655, "grad_norm": 0.10902206599712372, "learning_rate": 6.9326111997360775e-06, "loss": 0.056, "step": 1874 }, { "epoch": 0.3929993712010061, "grad_norm": 0.12212048470973969, "learning_rate": 6.929479753489406e-06, "loss": 0.0588, "step": 1875 }, { "epoch": 0.3932089708656466, "grad_norm": 0.09955111145973206, "learning_rate": 6.926347417752969e-06, "loss": 0.0581, "step": 1876 }, { "epoch": 0.39341857053028717, "grad_norm": 0.11181198060512543, "learning_rate": 6.923214193970773e-06, "loss": 0.0576, "step": 1877 }, { "epoch": 0.3936281701949277, "grad_norm": 0.13282683491706848, "learning_rate": 6.920080083587231e-06, "loss": 0.0619, "step": 1878 }, { "epoch": 0.3938377698595682, "grad_norm": 0.09793748706579208, "learning_rate": 6.916945088047171e-06, "loss": 0.0548, "step": 1879 }, { "epoch": 0.39404736952420877, "grad_norm": 0.1649271547794342, "learning_rate": 6.913809208795823e-06, "loss": 0.06, "step": 1880 }, { "epoch": 0.3942569691888493, "grad_norm": 0.1789517104625702, "learning_rate": 6.910672447278827e-06, "loss": 0.061, "step": 1881 }, { "epoch": 0.39446656885348985, "grad_norm": 0.17387685179710388, "learning_rate": 6.90753480494223e-06, "loss": 0.0588, "step": 1882 }, { "epoch": 0.39467616851813037, "grad_norm": 0.1808399260044098, "learning_rate": 6.904396283232484e-06, "loss": 0.0592, "step": 1883 }, { "epoch": 0.3948857681827709, "grad_norm": 0.12922294437885284, "learning_rate": 6.9012568835964474e-06, "loss": 0.0611, "step": 1884 }, { "epoch": 0.39509536784741145, "grad_norm": 0.14349722862243652, "learning_rate": 6.898116607481382e-06, "loss": 0.0583, "step": 1885 }, { "epoch": 0.39530496751205196, "grad_norm": 0.14803367853164673, "learning_rate": 6.894975456334956e-06, "loss": 0.0587, "step": 1886 }, { "epoch": 0.39551456717669253, "grad_norm": 0.15976615250110626, "learning_rate": 6.891833431605237e-06, "loss": 0.0583, "step": 1887 }, { "epoch": 0.39572416684133305, "grad_norm": 0.1385822743177414, "learning_rate": 6.8886905347406985e-06, "loss": 0.0625, "step": 1888 }, { "epoch": 0.3959337665059736, "grad_norm": 0.12182749807834625, "learning_rate": 6.885546767190217e-06, "loss": 0.0548, "step": 1889 }, { "epoch": 0.39614336617061413, "grad_norm": 0.13791294395923615, "learning_rate": 6.882402130403066e-06, "loss": 0.0583, "step": 1890 }, { "epoch": 0.39635296583525464, "grad_norm": 0.10044901072978973, "learning_rate": 6.879256625828925e-06, "loss": 0.057, "step": 1891 }, { "epoch": 0.3965625654998952, "grad_norm": 0.13600683212280273, "learning_rate": 6.8761102549178706e-06, "loss": 0.0614, "step": 1892 }, { "epoch": 0.39677216516453573, "grad_norm": 0.1270650029182434, "learning_rate": 6.872963019120377e-06, "loss": 0.0601, "step": 1893 }, { "epoch": 0.3969817648291763, "grad_norm": 0.10988420993089676, "learning_rate": 6.869814919887321e-06, "loss": 0.0561, "step": 1894 }, { "epoch": 0.3971913644938168, "grad_norm": 0.11587254703044891, "learning_rate": 6.866665958669976e-06, "loss": 0.0584, "step": 1895 }, { "epoch": 0.3974009641584573, "grad_norm": 0.1132350042462349, "learning_rate": 6.863516136920012e-06, "loss": 0.0598, "step": 1896 }, { "epoch": 0.3976105638230979, "grad_norm": 0.14506936073303223, "learning_rate": 6.860365456089495e-06, "loss": 0.058, "step": 1897 }, { "epoch": 0.3978201634877384, "grad_norm": 0.12474244832992554, "learning_rate": 6.8572139176308885e-06, "loss": 0.0584, "step": 1898 }, { "epoch": 0.398029763152379, "grad_norm": 0.1177903488278389, "learning_rate": 6.854061522997053e-06, "loss": 0.0563, "step": 1899 }, { "epoch": 0.3982393628170195, "grad_norm": 0.11654134839773178, "learning_rate": 6.85090827364124e-06, "loss": 0.0556, "step": 1900 }, { "epoch": 0.39844896248166, "grad_norm": 0.10216284543275833, "learning_rate": 6.847754171017097e-06, "loss": 0.0629, "step": 1901 }, { "epoch": 0.3986585621463006, "grad_norm": 0.11993620544672012, "learning_rate": 6.844599216578667e-06, "loss": 0.0565, "step": 1902 }, { "epoch": 0.3988681618109411, "grad_norm": 0.10797383636236191, "learning_rate": 6.8414434117803785e-06, "loss": 0.0574, "step": 1903 }, { "epoch": 0.39907776147558166, "grad_norm": 0.111129529774189, "learning_rate": 6.83828675807706e-06, "loss": 0.0593, "step": 1904 }, { "epoch": 0.3992873611402222, "grad_norm": 0.1041678711771965, "learning_rate": 6.835129256923931e-06, "loss": 0.0569, "step": 1905 }, { "epoch": 0.3994969608048627, "grad_norm": 0.09219536185264587, "learning_rate": 6.831970909776593e-06, "loss": 0.0573, "step": 1906 }, { "epoch": 0.39970656046950326, "grad_norm": 0.10893040150403976, "learning_rate": 6.828811718091046e-06, "loss": 0.0586, "step": 1907 }, { "epoch": 0.39991616013414377, "grad_norm": 0.08363223820924759, "learning_rate": 6.8256516833236774e-06, "loss": 0.0584, "step": 1908 }, { "epoch": 0.40012575979878434, "grad_norm": 0.1113632470369339, "learning_rate": 6.822490806931262e-06, "loss": 0.0603, "step": 1909 }, { "epoch": 0.40033535946342486, "grad_norm": 0.13083884119987488, "learning_rate": 6.819329090370964e-06, "loss": 0.0586, "step": 1910 }, { "epoch": 0.40054495912806537, "grad_norm": 0.12002018094062805, "learning_rate": 6.816166535100332e-06, "loss": 0.0589, "step": 1911 }, { "epoch": 0.40075455879270594, "grad_norm": 0.13928495347499847, "learning_rate": 6.813003142577306e-06, "loss": 0.0586, "step": 1912 }, { "epoch": 0.40096415845734645, "grad_norm": 0.14167679846286774, "learning_rate": 6.809838914260208e-06, "loss": 0.0567, "step": 1913 }, { "epoch": 0.401173758121987, "grad_norm": 0.13765917718410492, "learning_rate": 6.806673851607745e-06, "loss": 0.0596, "step": 1914 }, { "epoch": 0.40138335778662754, "grad_norm": 0.11585170775651932, "learning_rate": 6.803507956079012e-06, "loss": 0.0562, "step": 1915 }, { "epoch": 0.40159295745126805, "grad_norm": 0.1145220398902893, "learning_rate": 6.800341229133486e-06, "loss": 0.0572, "step": 1916 }, { "epoch": 0.4018025571159086, "grad_norm": 0.11287350952625275, "learning_rate": 6.797173672231027e-06, "loss": 0.0581, "step": 1917 }, { "epoch": 0.40201215678054913, "grad_norm": 0.0974586084485054, "learning_rate": 6.794005286831878e-06, "loss": 0.0552, "step": 1918 }, { "epoch": 0.4022217564451897, "grad_norm": 0.10374786704778671, "learning_rate": 6.790836074396666e-06, "loss": 0.0583, "step": 1919 }, { "epoch": 0.4024313561098302, "grad_norm": 0.11808675527572632, "learning_rate": 6.7876660363863955e-06, "loss": 0.0586, "step": 1920 }, { "epoch": 0.4026409557744708, "grad_norm": 0.11047716438770294, "learning_rate": 6.784495174262452e-06, "loss": 0.0589, "step": 1921 }, { "epoch": 0.4028505554391113, "grad_norm": 0.09720434248447418, "learning_rate": 6.781323489486606e-06, "loss": 0.0586, "step": 1922 }, { "epoch": 0.4030601551037518, "grad_norm": 0.11186391115188599, "learning_rate": 6.778150983520999e-06, "loss": 0.0589, "step": 1923 }, { "epoch": 0.4032697547683924, "grad_norm": 0.11044489592313766, "learning_rate": 6.774977657828159e-06, "loss": 0.0598, "step": 1924 }, { "epoch": 0.4034793544330329, "grad_norm": 0.08856026083230972, "learning_rate": 6.771803513870988e-06, "loss": 0.0551, "step": 1925 }, { "epoch": 0.40368895409767347, "grad_norm": 0.11433659493923187, "learning_rate": 6.768628553112763e-06, "loss": 0.0583, "step": 1926 }, { "epoch": 0.403898553762314, "grad_norm": 0.12080615013837814, "learning_rate": 6.765452777017146e-06, "loss": 0.0574, "step": 1927 }, { "epoch": 0.4041081534269545, "grad_norm": 0.09817066043615341, "learning_rate": 6.762276187048164e-06, "loss": 0.0604, "step": 1928 }, { "epoch": 0.40431775309159507, "grad_norm": 0.14070898294448853, "learning_rate": 6.759098784670224e-06, "loss": 0.058, "step": 1929 }, { "epoch": 0.4045273527562356, "grad_norm": 0.16221067309379578, "learning_rate": 6.755920571348111e-06, "loss": 0.0593, "step": 1930 }, { "epoch": 0.40473695242087615, "grad_norm": 0.14239110052585602, "learning_rate": 6.75274154854698e-06, "loss": 0.0559, "step": 1931 }, { "epoch": 0.40494655208551666, "grad_norm": 0.1578344851732254, "learning_rate": 6.749561717732359e-06, "loss": 0.0568, "step": 1932 }, { "epoch": 0.4051561517501572, "grad_norm": 0.11431273072957993, "learning_rate": 6.7463810803701495e-06, "loss": 0.0581, "step": 1933 }, { "epoch": 0.40536575141479775, "grad_norm": 0.10527955740690231, "learning_rate": 6.743199637926623e-06, "loss": 0.0593, "step": 1934 }, { "epoch": 0.40557535107943826, "grad_norm": 0.13100096583366394, "learning_rate": 6.740017391868427e-06, "loss": 0.059, "step": 1935 }, { "epoch": 0.40578495074407883, "grad_norm": 0.11144158244132996, "learning_rate": 6.736834343662576e-06, "loss": 0.0579, "step": 1936 }, { "epoch": 0.40599455040871935, "grad_norm": 0.13539659976959229, "learning_rate": 6.73365049477645e-06, "loss": 0.0581, "step": 1937 }, { "epoch": 0.40620415007335986, "grad_norm": 0.13399067521095276, "learning_rate": 6.7304658466778095e-06, "loss": 0.0582, "step": 1938 }, { "epoch": 0.40641374973800043, "grad_norm": 0.11665608733892441, "learning_rate": 6.7272804008347705e-06, "loss": 0.0557, "step": 1939 }, { "epoch": 0.40662334940264094, "grad_norm": 0.12888075411319733, "learning_rate": 6.7240941587158274e-06, "loss": 0.0614, "step": 1940 }, { "epoch": 0.4068329490672815, "grad_norm": 0.11636313796043396, "learning_rate": 6.720907121789835e-06, "loss": 0.0591, "step": 1941 }, { "epoch": 0.407042548731922, "grad_norm": 0.11793368309736252, "learning_rate": 6.717719291526016e-06, "loss": 0.0597, "step": 1942 }, { "epoch": 0.40725214839656254, "grad_norm": 0.13560403883457184, "learning_rate": 6.71453066939396e-06, "loss": 0.0574, "step": 1943 }, { "epoch": 0.4074617480612031, "grad_norm": 0.09925508499145508, "learning_rate": 6.711341256863623e-06, "loss": 0.0591, "step": 1944 }, { "epoch": 0.4076713477258436, "grad_norm": 0.1104302629828453, "learning_rate": 6.708151055405321e-06, "loss": 0.0595, "step": 1945 }, { "epoch": 0.4078809473904842, "grad_norm": 0.1431284248828888, "learning_rate": 6.704960066489738e-06, "loss": 0.0606, "step": 1946 }, { "epoch": 0.4080905470551247, "grad_norm": 0.1400553286075592, "learning_rate": 6.701768291587918e-06, "loss": 0.0602, "step": 1947 }, { "epoch": 0.4083001467197652, "grad_norm": 0.16290639340877533, "learning_rate": 6.698575732171269e-06, "loss": 0.0591, "step": 1948 }, { "epoch": 0.4085097463844058, "grad_norm": 0.14287681877613068, "learning_rate": 6.695382389711561e-06, "loss": 0.0571, "step": 1949 }, { "epoch": 0.4087193460490463, "grad_norm": 0.13841015100479126, "learning_rate": 6.692188265680923e-06, "loss": 0.0579, "step": 1950 }, { "epoch": 0.4089289457136869, "grad_norm": 0.1272697001695633, "learning_rate": 6.688993361551847e-06, "loss": 0.0557, "step": 1951 }, { "epoch": 0.4091385453783274, "grad_norm": 0.16662158071994781, "learning_rate": 6.6857976787971815e-06, "loss": 0.0621, "step": 1952 }, { "epoch": 0.4093481450429679, "grad_norm": 0.18397577106952667, "learning_rate": 6.682601218890136e-06, "loss": 0.06, "step": 1953 }, { "epoch": 0.4095577447076085, "grad_norm": 0.14603066444396973, "learning_rate": 6.679403983304278e-06, "loss": 0.0582, "step": 1954 }, { "epoch": 0.409767344372249, "grad_norm": 0.1586339771747589, "learning_rate": 6.6762059735135325e-06, "loss": 0.0604, "step": 1955 }, { "epoch": 0.40997694403688956, "grad_norm": 0.1153809055685997, "learning_rate": 6.673007190992181e-06, "loss": 0.0626, "step": 1956 }, { "epoch": 0.41018654370153007, "grad_norm": 0.15605716407299042, "learning_rate": 6.669807637214862e-06, "loss": 0.0584, "step": 1957 }, { "epoch": 0.41039614336617064, "grad_norm": 0.17064720392227173, "learning_rate": 6.66660731365657e-06, "loss": 0.0572, "step": 1958 }, { "epoch": 0.41060574303081115, "grad_norm": 0.141871839761734, "learning_rate": 6.663406221792652e-06, "loss": 0.0552, "step": 1959 }, { "epoch": 0.41081534269545167, "grad_norm": 0.14091584086418152, "learning_rate": 6.660204363098812e-06, "loss": 0.06, "step": 1960 }, { "epoch": 0.41102494236009224, "grad_norm": 0.10206598043441772, "learning_rate": 6.657001739051105e-06, "loss": 0.0599, "step": 1961 }, { "epoch": 0.41123454202473275, "grad_norm": 0.1338677704334259, "learning_rate": 6.65379835112594e-06, "loss": 0.0609, "step": 1962 }, { "epoch": 0.4114441416893733, "grad_norm": 0.13361436128616333, "learning_rate": 6.65059420080008e-06, "loss": 0.058, "step": 1963 }, { "epoch": 0.41165374135401384, "grad_norm": 0.15927137434482574, "learning_rate": 6.647389289550635e-06, "loss": 0.061, "step": 1964 }, { "epoch": 0.41186334101865435, "grad_norm": 0.11594053357839584, "learning_rate": 6.64418361885507e-06, "loss": 0.0563, "step": 1965 }, { "epoch": 0.4120729406832949, "grad_norm": 0.10348820686340332, "learning_rate": 6.640977190191198e-06, "loss": 0.0613, "step": 1966 }, { "epoch": 0.41228254034793543, "grad_norm": 0.14770418405532837, "learning_rate": 6.637770005037182e-06, "loss": 0.0593, "step": 1967 }, { "epoch": 0.412492140012576, "grad_norm": 0.12195451557636261, "learning_rate": 6.634562064871535e-06, "loss": 0.0585, "step": 1968 }, { "epoch": 0.4127017396772165, "grad_norm": 0.11958389729261398, "learning_rate": 6.631353371173115e-06, "loss": 0.058, "step": 1969 }, { "epoch": 0.41291133934185703, "grad_norm": 0.09207571297883987, "learning_rate": 6.628143925421129e-06, "loss": 0.0556, "step": 1970 }, { "epoch": 0.4131209390064976, "grad_norm": 0.10654924809932709, "learning_rate": 6.624933729095133e-06, "loss": 0.0564, "step": 1971 }, { "epoch": 0.4133305386711381, "grad_norm": 0.12806440889835358, "learning_rate": 6.621722783675024e-06, "loss": 0.0573, "step": 1972 }, { "epoch": 0.4135401383357787, "grad_norm": 0.15015651285648346, "learning_rate": 6.6185110906410485e-06, "loss": 0.0567, "step": 1973 }, { "epoch": 0.4137497380004192, "grad_norm": 0.13939987123012543, "learning_rate": 6.615298651473799e-06, "loss": 0.058, "step": 1974 }, { "epoch": 0.4139593376650597, "grad_norm": 0.10312556475400925, "learning_rate": 6.612085467654204e-06, "loss": 0.0589, "step": 1975 }, { "epoch": 0.4141689373297003, "grad_norm": 0.11151403933763504, "learning_rate": 6.608871540663544e-06, "loss": 0.0598, "step": 1976 }, { "epoch": 0.4143785369943408, "grad_norm": 0.10755238682031631, "learning_rate": 6.605656871983439e-06, "loss": 0.0598, "step": 1977 }, { "epoch": 0.41458813665898137, "grad_norm": 0.09601911902427673, "learning_rate": 6.6024414630958475e-06, "loss": 0.0571, "step": 1978 }, { "epoch": 0.4147977363236219, "grad_norm": 0.11368954926729202, "learning_rate": 6.599225315483076e-06, "loss": 0.0556, "step": 1979 }, { "epoch": 0.4150073359882624, "grad_norm": 0.11148924380540848, "learning_rate": 6.596008430627766e-06, "loss": 0.0572, "step": 1980 }, { "epoch": 0.41521693565290296, "grad_norm": 0.11703907698392868, "learning_rate": 6.592790810012901e-06, "loss": 0.0583, "step": 1981 }, { "epoch": 0.4154265353175435, "grad_norm": 0.1259683072566986, "learning_rate": 6.589572455121804e-06, "loss": 0.0599, "step": 1982 }, { "epoch": 0.41563613498218405, "grad_norm": 0.12415824085474014, "learning_rate": 6.586353367438134e-06, "loss": 0.0568, "step": 1983 }, { "epoch": 0.41584573464682456, "grad_norm": 0.11688219755887985, "learning_rate": 6.5831335484458915e-06, "loss": 0.0564, "step": 1984 }, { "epoch": 0.4160553343114651, "grad_norm": 0.10014292597770691, "learning_rate": 6.579912999629412e-06, "loss": 0.0595, "step": 1985 }, { "epoch": 0.41626493397610564, "grad_norm": 0.09642448276281357, "learning_rate": 6.576691722473368e-06, "loss": 0.0587, "step": 1986 }, { "epoch": 0.41647453364074616, "grad_norm": 0.10601100325584412, "learning_rate": 6.573469718462768e-06, "loss": 0.0572, "step": 1987 }, { "epoch": 0.41668413330538673, "grad_norm": 0.10878970474004745, "learning_rate": 6.570246989082954e-06, "loss": 0.0598, "step": 1988 }, { "epoch": 0.41689373297002724, "grad_norm": 0.11488792300224304, "learning_rate": 6.567023535819605e-06, "loss": 0.0606, "step": 1989 }, { "epoch": 0.4171033326346678, "grad_norm": 0.1324160397052765, "learning_rate": 6.5637993601587305e-06, "loss": 0.058, "step": 1990 }, { "epoch": 0.4173129322993083, "grad_norm": 0.1504766047000885, "learning_rate": 6.560574463586677e-06, "loss": 0.0565, "step": 1991 }, { "epoch": 0.41752253196394884, "grad_norm": 0.1516587734222412, "learning_rate": 6.557348847590118e-06, "loss": 0.0564, "step": 1992 }, { "epoch": 0.4177321316285894, "grad_norm": 0.14044304192066193, "learning_rate": 6.554122513656065e-06, "loss": 0.0586, "step": 1993 }, { "epoch": 0.4179417312932299, "grad_norm": 0.11305470019578934, "learning_rate": 6.550895463271856e-06, "loss": 0.0593, "step": 1994 }, { "epoch": 0.4181513309578705, "grad_norm": 0.11471327394247055, "learning_rate": 6.547667697925161e-06, "loss": 0.055, "step": 1995 }, { "epoch": 0.418360930622511, "grad_norm": 0.15590576827526093, "learning_rate": 6.544439219103977e-06, "loss": 0.059, "step": 1996 }, { "epoch": 0.4185705302871515, "grad_norm": 0.16013385355472565, "learning_rate": 6.5412100282966366e-06, "loss": 0.0576, "step": 1997 }, { "epoch": 0.4187801299517921, "grad_norm": 0.13815461099147797, "learning_rate": 6.53798012699179e-06, "loss": 0.0577, "step": 1998 }, { "epoch": 0.4189897296164326, "grad_norm": 0.11431432515382767, "learning_rate": 6.534749516678427e-06, "loss": 0.0585, "step": 1999 }, { "epoch": 0.4191993292810732, "grad_norm": 0.1008606106042862, "learning_rate": 6.531518198845854e-06, "loss": 0.0561, "step": 2000 }, { "epoch": 0.4194089289457137, "grad_norm": 0.10297921299934387, "learning_rate": 6.52828617498371e-06, "loss": 0.0571, "step": 2001 }, { "epoch": 0.4196185286103542, "grad_norm": 0.10644082725048065, "learning_rate": 6.525053446581957e-06, "loss": 0.0551, "step": 2002 }, { "epoch": 0.41982812827499477, "grad_norm": 0.10528367012739182, "learning_rate": 6.52182001513088e-06, "loss": 0.0583, "step": 2003 }, { "epoch": 0.4200377279396353, "grad_norm": 0.09672725200653076, "learning_rate": 6.518585882121096e-06, "loss": 0.0558, "step": 2004 }, { "epoch": 0.42024732760427586, "grad_norm": 0.09464428573846817, "learning_rate": 6.515351049043533e-06, "loss": 0.058, "step": 2005 }, { "epoch": 0.42045692726891637, "grad_norm": 0.10385248810052872, "learning_rate": 6.5121155173894515e-06, "loss": 0.0584, "step": 2006 }, { "epoch": 0.4206665269335569, "grad_norm": 0.10311248153448105, "learning_rate": 6.508879288650431e-06, "loss": 0.0564, "step": 2007 }, { "epoch": 0.42087612659819745, "grad_norm": 0.08637700229883194, "learning_rate": 6.505642364318372e-06, "loss": 0.0596, "step": 2008 }, { "epoch": 0.42108572626283797, "grad_norm": 0.10172644257545471, "learning_rate": 6.502404745885495e-06, "loss": 0.0594, "step": 2009 }, { "epoch": 0.42129532592747854, "grad_norm": 0.12740617990493774, "learning_rate": 6.499166434844344e-06, "loss": 0.058, "step": 2010 }, { "epoch": 0.42150492559211905, "grad_norm": 0.12112673372030258, "learning_rate": 6.495927432687777e-06, "loss": 0.0566, "step": 2011 }, { "epoch": 0.42171452525675956, "grad_norm": 0.10495518893003464, "learning_rate": 6.492687740908973e-06, "loss": 0.0581, "step": 2012 }, { "epoch": 0.42192412492140013, "grad_norm": 0.11961285024881363, "learning_rate": 6.489447361001431e-06, "loss": 0.0561, "step": 2013 }, { "epoch": 0.42213372458604065, "grad_norm": 0.1219894215464592, "learning_rate": 6.486206294458966e-06, "loss": 0.0599, "step": 2014 }, { "epoch": 0.4223433242506812, "grad_norm": 0.11397355794906616, "learning_rate": 6.482964542775707e-06, "loss": 0.0564, "step": 2015 }, { "epoch": 0.42255292391532173, "grad_norm": 0.10773434489965439, "learning_rate": 6.479722107446102e-06, "loss": 0.0577, "step": 2016 }, { "epoch": 0.42276252357996225, "grad_norm": 0.09557615965604782, "learning_rate": 6.476478989964914e-06, "loss": 0.0588, "step": 2017 }, { "epoch": 0.4229721232446028, "grad_norm": 0.09783432632684708, "learning_rate": 6.473235191827219e-06, "loss": 0.056, "step": 2018 }, { "epoch": 0.42318172290924333, "grad_norm": 0.10292766988277435, "learning_rate": 6.469990714528403e-06, "loss": 0.0593, "step": 2019 }, { "epoch": 0.4233913225738839, "grad_norm": 0.09302861243486404, "learning_rate": 6.466745559564175e-06, "loss": 0.0565, "step": 2020 }, { "epoch": 0.4236009222385244, "grad_norm": 0.1034521758556366, "learning_rate": 6.463499728430549e-06, "loss": 0.0567, "step": 2021 }, { "epoch": 0.4238105219031649, "grad_norm": 0.11888016015291214, "learning_rate": 6.460253222623851e-06, "loss": 0.0578, "step": 2022 }, { "epoch": 0.4240201215678055, "grad_norm": 0.11663860082626343, "learning_rate": 6.457006043640722e-06, "loss": 0.0627, "step": 2023 }, { "epoch": 0.424229721232446, "grad_norm": 0.11908293515443802, "learning_rate": 6.453758192978109e-06, "loss": 0.0577, "step": 2024 }, { "epoch": 0.4244393208970866, "grad_norm": 0.11339572072029114, "learning_rate": 6.450509672133272e-06, "loss": 0.0609, "step": 2025 }, { "epoch": 0.4246489205617271, "grad_norm": 0.10132212191820145, "learning_rate": 6.447260482603776e-06, "loss": 0.0551, "step": 2026 }, { "epoch": 0.42485852022636766, "grad_norm": 0.119560107588768, "learning_rate": 6.444010625887498e-06, "loss": 0.0589, "step": 2027 }, { "epoch": 0.4250681198910082, "grad_norm": 0.12544332444667816, "learning_rate": 6.4407601034826225e-06, "loss": 0.0594, "step": 2028 }, { "epoch": 0.4252777195556487, "grad_norm": 0.10760358721017838, "learning_rate": 6.437508916887638e-06, "loss": 0.0583, "step": 2029 }, { "epoch": 0.42548731922028926, "grad_norm": 0.08328445255756378, "learning_rate": 6.4342570676013415e-06, "loss": 0.0586, "step": 2030 }, { "epoch": 0.4256969188849298, "grad_norm": 0.09850215911865234, "learning_rate": 6.4310045571228344e-06, "loss": 0.0574, "step": 2031 }, { "epoch": 0.42590651854957035, "grad_norm": 0.11578439176082611, "learning_rate": 6.427751386951525e-06, "loss": 0.0607, "step": 2032 }, { "epoch": 0.42611611821421086, "grad_norm": 0.10482225567102432, "learning_rate": 6.424497558587122e-06, "loss": 0.0572, "step": 2033 }, { "epoch": 0.4263257178788514, "grad_norm": 0.11043455451726913, "learning_rate": 6.421243073529639e-06, "loss": 0.0568, "step": 2034 }, { "epoch": 0.42653531754349194, "grad_norm": 0.12734782695770264, "learning_rate": 6.417987933279397e-06, "loss": 0.0573, "step": 2035 }, { "epoch": 0.42674491720813246, "grad_norm": 0.11755295097827911, "learning_rate": 6.41473213933701e-06, "loss": 0.0586, "step": 2036 }, { "epoch": 0.426954516872773, "grad_norm": 0.10255907475948334, "learning_rate": 6.411475693203402e-06, "loss": 0.0627, "step": 2037 }, { "epoch": 0.42716411653741354, "grad_norm": 0.07925237715244293, "learning_rate": 6.408218596379792e-06, "loss": 0.0562, "step": 2038 }, { "epoch": 0.42737371620205405, "grad_norm": 0.07128457725048065, "learning_rate": 6.404960850367701e-06, "loss": 0.0545, "step": 2039 }, { "epoch": 0.4275833158666946, "grad_norm": 0.08888307213783264, "learning_rate": 6.4017024566689515e-06, "loss": 0.0587, "step": 2040 }, { "epoch": 0.42779291553133514, "grad_norm": 0.09860040247440338, "learning_rate": 6.39844341678566e-06, "loss": 0.0571, "step": 2041 }, { "epoch": 0.4280025151959757, "grad_norm": 0.09772216528654099, "learning_rate": 6.395183732220242e-06, "loss": 0.0591, "step": 2042 }, { "epoch": 0.4282121148606162, "grad_norm": 0.08198471367359161, "learning_rate": 6.391923404475416e-06, "loss": 0.0592, "step": 2043 }, { "epoch": 0.42842171452525674, "grad_norm": 0.07226858288049698, "learning_rate": 6.388662435054187e-06, "loss": 0.0569, "step": 2044 }, { "epoch": 0.4286313141898973, "grad_norm": 0.1091211661696434, "learning_rate": 6.385400825459865e-06, "loss": 0.0546, "step": 2045 }, { "epoch": 0.4288409138545378, "grad_norm": 0.11992846429347992, "learning_rate": 6.382138577196052e-06, "loss": 0.0588, "step": 2046 }, { "epoch": 0.4290505135191784, "grad_norm": 0.09196002781391144, "learning_rate": 6.378875691766639e-06, "loss": 0.0565, "step": 2047 }, { "epoch": 0.4292601131838189, "grad_norm": 0.15287606418132782, "learning_rate": 6.375612170675821e-06, "loss": 0.0568, "step": 2048 }, { "epoch": 0.4294697128484594, "grad_norm": 0.15699946880340576, "learning_rate": 6.372348015428077e-06, "loss": 0.0595, "step": 2049 }, { "epoch": 0.4296793125131, "grad_norm": 0.12692134082317352, "learning_rate": 6.3690832275281835e-06, "loss": 0.059, "step": 2050 }, { "epoch": 0.4298889121777405, "grad_norm": 0.14513273537158966, "learning_rate": 6.3658178084812065e-06, "loss": 0.0585, "step": 2051 }, { "epoch": 0.43009851184238107, "grad_norm": 0.13621389865875244, "learning_rate": 6.3625517597925025e-06, "loss": 0.0557, "step": 2052 }, { "epoch": 0.4303081115070216, "grad_norm": 0.14017432928085327, "learning_rate": 6.359285082967721e-06, "loss": 0.0575, "step": 2053 }, { "epoch": 0.4305177111716621, "grad_norm": 0.11645541340112686, "learning_rate": 6.356017779512799e-06, "loss": 0.0586, "step": 2054 }, { "epoch": 0.43072731083630267, "grad_norm": 0.12075808644294739, "learning_rate": 6.352749850933961e-06, "loss": 0.0586, "step": 2055 }, { "epoch": 0.4309369105009432, "grad_norm": 0.1253325641155243, "learning_rate": 6.349481298737723e-06, "loss": 0.057, "step": 2056 }, { "epoch": 0.43114651016558375, "grad_norm": 0.12212934345006943, "learning_rate": 6.346212124430888e-06, "loss": 0.0561, "step": 2057 }, { "epoch": 0.43135610983022427, "grad_norm": 0.10774050652980804, "learning_rate": 6.342942329520543e-06, "loss": 0.0599, "step": 2058 }, { "epoch": 0.43156570949486484, "grad_norm": 0.08897628635168076, "learning_rate": 6.339671915514062e-06, "loss": 0.0602, "step": 2059 }, { "epoch": 0.43177530915950535, "grad_norm": 0.12079823017120361, "learning_rate": 6.336400883919106e-06, "loss": 0.0568, "step": 2060 }, { "epoch": 0.43198490882414586, "grad_norm": 0.11661111563444138, "learning_rate": 6.33312923624362e-06, "loss": 0.0562, "step": 2061 }, { "epoch": 0.43219450848878643, "grad_norm": 0.09649336338043213, "learning_rate": 6.329856973995835e-06, "loss": 0.0592, "step": 2062 }, { "epoch": 0.43240410815342695, "grad_norm": 0.12174370884895325, "learning_rate": 6.32658409868426e-06, "loss": 0.0586, "step": 2063 }, { "epoch": 0.4326137078180675, "grad_norm": 0.13494165241718292, "learning_rate": 6.32331061181769e-06, "loss": 0.0575, "step": 2064 }, { "epoch": 0.43282330748270803, "grad_norm": 0.1156393364071846, "learning_rate": 6.320036514905204e-06, "loss": 0.0577, "step": 2065 }, { "epoch": 0.43303290714734854, "grad_norm": 0.11403451859951019, "learning_rate": 6.316761809456159e-06, "loss": 0.0599, "step": 2066 }, { "epoch": 0.4332425068119891, "grad_norm": 0.1350114345550537, "learning_rate": 6.313486496980192e-06, "loss": 0.0572, "step": 2067 }, { "epoch": 0.43345210647662963, "grad_norm": 0.10050825029611588, "learning_rate": 6.310210578987225e-06, "loss": 0.0613, "step": 2068 }, { "epoch": 0.4336617061412702, "grad_norm": 0.1249486580491066, "learning_rate": 6.306934056987452e-06, "loss": 0.0597, "step": 2069 }, { "epoch": 0.4338713058059107, "grad_norm": 0.15121574699878693, "learning_rate": 6.303656932491349e-06, "loss": 0.0558, "step": 2070 }, { "epoch": 0.4340809054705512, "grad_norm": 0.12158171832561493, "learning_rate": 6.3003792070096735e-06, "loss": 0.059, "step": 2071 }, { "epoch": 0.4342905051351918, "grad_norm": 0.11560632288455963, "learning_rate": 6.297100882053451e-06, "loss": 0.0586, "step": 2072 }, { "epoch": 0.4345001047998323, "grad_norm": 0.1341363489627838, "learning_rate": 6.293821959133993e-06, "loss": 0.0616, "step": 2073 }, { "epoch": 0.4347097044644729, "grad_norm": 0.11600136011838913, "learning_rate": 6.2905424397628816e-06, "loss": 0.0593, "step": 2074 }, { "epoch": 0.4349193041291134, "grad_norm": 0.10497263818979263, "learning_rate": 6.28726232545197e-06, "loss": 0.0561, "step": 2075 }, { "epoch": 0.4351289037937539, "grad_norm": 0.10920844972133636, "learning_rate": 6.283981617713397e-06, "loss": 0.0577, "step": 2076 }, { "epoch": 0.4353385034583945, "grad_norm": 0.11021441221237183, "learning_rate": 6.280700318059563e-06, "loss": 0.0574, "step": 2077 }, { "epoch": 0.435548103123035, "grad_norm": 0.09715298563241959, "learning_rate": 6.277418428003149e-06, "loss": 0.0599, "step": 2078 }, { "epoch": 0.43575770278767556, "grad_norm": 0.0873681902885437, "learning_rate": 6.274135949057107e-06, "loss": 0.0556, "step": 2079 }, { "epoch": 0.4359673024523161, "grad_norm": 0.11400279402732849, "learning_rate": 6.270852882734654e-06, "loss": 0.0596, "step": 2080 }, { "epoch": 0.4361769021169566, "grad_norm": 0.10710210353136063, "learning_rate": 6.267569230549288e-06, "loss": 0.0607, "step": 2081 }, { "epoch": 0.43638650178159716, "grad_norm": 0.1295563280582428, "learning_rate": 6.26428499401477e-06, "loss": 0.0584, "step": 2082 }, { "epoch": 0.43659610144623767, "grad_norm": 0.1621527224779129, "learning_rate": 6.261000174645131e-06, "loss": 0.0576, "step": 2083 }, { "epoch": 0.43680570111087824, "grad_norm": 0.17567972838878632, "learning_rate": 6.257714773954674e-06, "loss": 0.057, "step": 2084 }, { "epoch": 0.43701530077551876, "grad_norm": 0.16140656173229218, "learning_rate": 6.254428793457967e-06, "loss": 0.0583, "step": 2085 }, { "epoch": 0.43722490044015927, "grad_norm": 0.13268983364105225, "learning_rate": 6.251142234669848e-06, "loss": 0.0565, "step": 2086 }, { "epoch": 0.43743450010479984, "grad_norm": 0.11374874413013458, "learning_rate": 6.24785509910542e-06, "loss": 0.056, "step": 2087 }, { "epoch": 0.43764409976944035, "grad_norm": 0.09471353888511658, "learning_rate": 6.244567388280047e-06, "loss": 0.0595, "step": 2088 }, { "epoch": 0.4378536994340809, "grad_norm": 0.12923859059810638, "learning_rate": 6.241279103709368e-06, "loss": 0.0555, "step": 2089 }, { "epoch": 0.43806329909872144, "grad_norm": 0.12571561336517334, "learning_rate": 6.23799024690928e-06, "loss": 0.0587, "step": 2090 }, { "epoch": 0.43827289876336195, "grad_norm": 0.1770731657743454, "learning_rate": 6.234700819395946e-06, "loss": 0.0601, "step": 2091 }, { "epoch": 0.4384824984280025, "grad_norm": 0.15990307927131653, "learning_rate": 6.231410822685791e-06, "loss": 0.0585, "step": 2092 }, { "epoch": 0.43869209809264303, "grad_norm": 0.11964087188243866, "learning_rate": 6.228120258295501e-06, "loss": 0.0585, "step": 2093 }, { "epoch": 0.4389016977572836, "grad_norm": 0.17662663757801056, "learning_rate": 6.224829127742028e-06, "loss": 0.0592, "step": 2094 }, { "epoch": 0.4391112974219241, "grad_norm": 0.17629125714302063, "learning_rate": 6.221537432542581e-06, "loss": 0.0581, "step": 2095 }, { "epoch": 0.4393208970865647, "grad_norm": 0.20414410531520844, "learning_rate": 6.218245174214632e-06, "loss": 0.0573, "step": 2096 }, { "epoch": 0.4395304967512052, "grad_norm": 0.16704221069812775, "learning_rate": 6.21495235427591e-06, "loss": 0.0575, "step": 2097 }, { "epoch": 0.4397400964158457, "grad_norm": 0.11289860308170319, "learning_rate": 6.211658974244407e-06, "loss": 0.0558, "step": 2098 }, { "epoch": 0.4399496960804863, "grad_norm": 0.17604808509349823, "learning_rate": 6.208365035638366e-06, "loss": 0.0585, "step": 2099 }, { "epoch": 0.4401592957451268, "grad_norm": 0.18010827898979187, "learning_rate": 6.205070539976297e-06, "loss": 0.0616, "step": 2100 }, { "epoch": 0.44036889540976737, "grad_norm": 0.15220797061920166, "learning_rate": 6.2017754887769576e-06, "loss": 0.0565, "step": 2101 }, { "epoch": 0.4405784950744079, "grad_norm": 0.12355081737041473, "learning_rate": 6.198479883559367e-06, "loss": 0.0581, "step": 2102 }, { "epoch": 0.4407880947390484, "grad_norm": 0.16100534796714783, "learning_rate": 6.195183725842799e-06, "loss": 0.0599, "step": 2103 }, { "epoch": 0.44099769440368897, "grad_norm": 0.1588708758354187, "learning_rate": 6.191887017146784e-06, "loss": 0.0561, "step": 2104 }, { "epoch": 0.4412072940683295, "grad_norm": 0.10125992447137833, "learning_rate": 6.1885897589911e-06, "loss": 0.0616, "step": 2105 }, { "epoch": 0.44141689373297005, "grad_norm": 0.16263973712921143, "learning_rate": 6.185291952895784e-06, "loss": 0.0601, "step": 2106 }, { "epoch": 0.44162649339761056, "grad_norm": 0.15129660069942474, "learning_rate": 6.181993600381126e-06, "loss": 0.0571, "step": 2107 }, { "epoch": 0.4418360930622511, "grad_norm": 0.13963650166988373, "learning_rate": 6.17869470296766e-06, "loss": 0.0569, "step": 2108 }, { "epoch": 0.44204569272689165, "grad_norm": 0.13959255814552307, "learning_rate": 6.175395262176184e-06, "loss": 0.0612, "step": 2109 }, { "epoch": 0.44225529239153216, "grad_norm": 0.13720089197158813, "learning_rate": 6.172095279527735e-06, "loss": 0.0559, "step": 2110 }, { "epoch": 0.44246489205617273, "grad_norm": 0.1654689759016037, "learning_rate": 6.168794756543605e-06, "loss": 0.057, "step": 2111 }, { "epoch": 0.44267449172081325, "grad_norm": 0.145694300532341, "learning_rate": 6.1654936947453355e-06, "loss": 0.0561, "step": 2112 }, { "epoch": 0.44288409138545376, "grad_norm": 0.13383303582668304, "learning_rate": 6.162192095654714e-06, "loss": 0.0579, "step": 2113 }, { "epoch": 0.44309369105009433, "grad_norm": 0.1771506667137146, "learning_rate": 6.158889960793779e-06, "loss": 0.0558, "step": 2114 }, { "epoch": 0.44330329071473484, "grad_norm": 0.1819649338722229, "learning_rate": 6.155587291684814e-06, "loss": 0.0574, "step": 2115 }, { "epoch": 0.4435128903793754, "grad_norm": 0.16896513104438782, "learning_rate": 6.1522840898503446e-06, "loss": 0.0588, "step": 2116 }, { "epoch": 0.4437224900440159, "grad_norm": 0.135847270488739, "learning_rate": 6.148980356813151e-06, "loss": 0.0569, "step": 2117 }, { "epoch": 0.44393208970865644, "grad_norm": 0.20298628509044647, "learning_rate": 6.145676094096251e-06, "loss": 0.0624, "step": 2118 }, { "epoch": 0.444141689373297, "grad_norm": 0.15171730518341064, "learning_rate": 6.142371303222909e-06, "loss": 0.0567, "step": 2119 }, { "epoch": 0.4443512890379375, "grad_norm": 0.1395554542541504, "learning_rate": 6.139065985716635e-06, "loss": 0.0578, "step": 2120 }, { "epoch": 0.4445608887025781, "grad_norm": 0.14991877973079681, "learning_rate": 6.135760143101177e-06, "loss": 0.0587, "step": 2121 }, { "epoch": 0.4447704883672186, "grad_norm": 0.16107945144176483, "learning_rate": 6.13245377690053e-06, "loss": 0.058, "step": 2122 }, { "epoch": 0.4449800880318591, "grad_norm": 0.1380777806043625, "learning_rate": 6.129146888638928e-06, "loss": 0.0574, "step": 2123 }, { "epoch": 0.4451896876964997, "grad_norm": 0.10554420948028564, "learning_rate": 6.1258394798408424e-06, "loss": 0.0586, "step": 2124 }, { "epoch": 0.4453992873611402, "grad_norm": 0.1480768322944641, "learning_rate": 6.122531552030992e-06, "loss": 0.0602, "step": 2125 }, { "epoch": 0.4456088870257808, "grad_norm": 0.1352657824754715, "learning_rate": 6.119223106734328e-06, "loss": 0.0581, "step": 2126 }, { "epoch": 0.4458184866904213, "grad_norm": 0.1190410628914833, "learning_rate": 6.115914145476045e-06, "loss": 0.0586, "step": 2127 }, { "epoch": 0.44602808635506186, "grad_norm": 0.13114245235919952, "learning_rate": 6.112604669781572e-06, "loss": 0.0564, "step": 2128 }, { "epoch": 0.4462376860197024, "grad_norm": 0.1338747888803482, "learning_rate": 6.109294681176578e-06, "loss": 0.0603, "step": 2129 }, { "epoch": 0.4464472856843429, "grad_norm": 0.12414805591106415, "learning_rate": 6.105984181186968e-06, "loss": 0.0564, "step": 2130 }, { "epoch": 0.44665688534898346, "grad_norm": 0.09722696989774704, "learning_rate": 6.102673171338878e-06, "loss": 0.0565, "step": 2131 }, { "epoch": 0.44686648501362397, "grad_norm": 0.13036277890205383, "learning_rate": 6.099361653158687e-06, "loss": 0.0595, "step": 2132 }, { "epoch": 0.44707608467826454, "grad_norm": 0.12158705294132233, "learning_rate": 6.0960496281729995e-06, "loss": 0.0564, "step": 2133 }, { "epoch": 0.44728568434290505, "grad_norm": 0.13005104660987854, "learning_rate": 6.092737097908663e-06, "loss": 0.0613, "step": 2134 }, { "epoch": 0.44749528400754557, "grad_norm": 0.1160380020737648, "learning_rate": 6.08942406389275e-06, "loss": 0.0599, "step": 2135 }, { "epoch": 0.44770488367218614, "grad_norm": 0.11387168616056442, "learning_rate": 6.086110527652571e-06, "loss": 0.0565, "step": 2136 }, { "epoch": 0.44791448333682665, "grad_norm": 0.15158362686634064, "learning_rate": 6.082796490715666e-06, "loss": 0.0593, "step": 2137 }, { "epoch": 0.4481240830014672, "grad_norm": 0.12060023099184036, "learning_rate": 6.0794819546098006e-06, "loss": 0.0565, "step": 2138 }, { "epoch": 0.44833368266610774, "grad_norm": 0.12229592353105545, "learning_rate": 6.076166920862979e-06, "loss": 0.0584, "step": 2139 }, { "epoch": 0.44854328233074825, "grad_norm": 0.10849461704492569, "learning_rate": 6.072851391003432e-06, "loss": 0.0565, "step": 2140 }, { "epoch": 0.4487528819953888, "grad_norm": 0.1278999298810959, "learning_rate": 6.069535366559615e-06, "loss": 0.0572, "step": 2141 }, { "epoch": 0.44896248166002933, "grad_norm": 0.1158517524600029, "learning_rate": 6.066218849060217e-06, "loss": 0.0586, "step": 2142 }, { "epoch": 0.4491720813246699, "grad_norm": 0.11099664121866226, "learning_rate": 6.0629018400341514e-06, "loss": 0.0569, "step": 2143 }, { "epoch": 0.4493816809893104, "grad_norm": 0.10765625536441803, "learning_rate": 6.059584341010556e-06, "loss": 0.0578, "step": 2144 }, { "epoch": 0.44959128065395093, "grad_norm": 0.10618588328361511, "learning_rate": 6.056266353518803e-06, "loss": 0.0557, "step": 2145 }, { "epoch": 0.4498008803185915, "grad_norm": 0.11303580552339554, "learning_rate": 6.052947879088479e-06, "loss": 0.0572, "step": 2146 }, { "epoch": 0.450010479983232, "grad_norm": 0.09245932102203369, "learning_rate": 6.0496289192494e-06, "loss": 0.0542, "step": 2147 }, { "epoch": 0.4502200796478726, "grad_norm": 0.09954038262367249, "learning_rate": 6.046309475531609e-06, "loss": 0.0579, "step": 2148 }, { "epoch": 0.4504296793125131, "grad_norm": 0.08930417150259018, "learning_rate": 6.0429895494653655e-06, "loss": 0.0557, "step": 2149 }, { "epoch": 0.4506392789771536, "grad_norm": 0.10569532960653305, "learning_rate": 6.039669142581157e-06, "loss": 0.0588, "step": 2150 }, { "epoch": 0.4508488786417942, "grad_norm": 0.10851980745792389, "learning_rate": 6.036348256409692e-06, "loss": 0.0586, "step": 2151 }, { "epoch": 0.4510584783064347, "grad_norm": 0.10847117751836777, "learning_rate": 6.0330268924818925e-06, "loss": 0.0563, "step": 2152 }, { "epoch": 0.45126807797107527, "grad_norm": 0.10027307271957397, "learning_rate": 6.029705052328912e-06, "loss": 0.058, "step": 2153 }, { "epoch": 0.4514776776357158, "grad_norm": 0.08025722205638885, "learning_rate": 6.026382737482116e-06, "loss": 0.0541, "step": 2154 }, { "epoch": 0.4516872773003563, "grad_norm": 0.09462866187095642, "learning_rate": 6.023059949473091e-06, "loss": 0.0561, "step": 2155 }, { "epoch": 0.45189687696499686, "grad_norm": 0.0991954430937767, "learning_rate": 6.019736689833643e-06, "loss": 0.0566, "step": 2156 }, { "epoch": 0.4521064766296374, "grad_norm": 0.11048531532287598, "learning_rate": 6.016412960095791e-06, "loss": 0.057, "step": 2157 }, { "epoch": 0.45231607629427795, "grad_norm": 0.09113509207963943, "learning_rate": 6.013088761791776e-06, "loss": 0.0562, "step": 2158 }, { "epoch": 0.45252567595891846, "grad_norm": 0.07936461269855499, "learning_rate": 6.0097640964540535e-06, "loss": 0.0601, "step": 2159 }, { "epoch": 0.452735275623559, "grad_norm": 0.10202574729919434, "learning_rate": 6.006438965615291e-06, "loss": 0.0573, "step": 2160 }, { "epoch": 0.45294487528819954, "grad_norm": 0.10225759446620941, "learning_rate": 6.003113370808375e-06, "loss": 0.0587, "step": 2161 }, { "epoch": 0.45315447495284006, "grad_norm": 0.09442250430583954, "learning_rate": 5.999787313566403e-06, "loss": 0.0609, "step": 2162 }, { "epoch": 0.45336407461748063, "grad_norm": 0.11422485113143921, "learning_rate": 5.996460795422688e-06, "loss": 0.0597, "step": 2163 }, { "epoch": 0.45357367428212114, "grad_norm": 0.08849114924669266, "learning_rate": 5.993133817910752e-06, "loss": 0.0596, "step": 2164 }, { "epoch": 0.4537832739467617, "grad_norm": 0.1041831523180008, "learning_rate": 5.9898063825643335e-06, "loss": 0.0561, "step": 2165 }, { "epoch": 0.4539928736114022, "grad_norm": 0.12931424379348755, "learning_rate": 5.986478490917378e-06, "loss": 0.0563, "step": 2166 }, { "epoch": 0.45420247327604274, "grad_norm": 0.07999792695045471, "learning_rate": 5.983150144504043e-06, "loss": 0.0577, "step": 2167 }, { "epoch": 0.4544120729406833, "grad_norm": 0.10252411663532257, "learning_rate": 5.979821344858695e-06, "loss": 0.0587, "step": 2168 }, { "epoch": 0.4546216726053238, "grad_norm": 0.10504443198442459, "learning_rate": 5.976492093515911e-06, "loss": 0.0596, "step": 2169 }, { "epoch": 0.4548312722699644, "grad_norm": 0.09300100803375244, "learning_rate": 5.973162392010474e-06, "loss": 0.0626, "step": 2170 }, { "epoch": 0.4550408719346049, "grad_norm": 0.11920040845870972, "learning_rate": 5.969832241877378e-06, "loss": 0.0583, "step": 2171 }, { "epoch": 0.4552504715992454, "grad_norm": 0.10893451422452927, "learning_rate": 5.966501644651817e-06, "loss": 0.055, "step": 2172 }, { "epoch": 0.455460071263886, "grad_norm": 0.08888144791126251, "learning_rate": 5.963170601869203e-06, "loss": 0.0577, "step": 2173 }, { "epoch": 0.4556696709285265, "grad_norm": 0.11214505136013031, "learning_rate": 5.959839115065138e-06, "loss": 0.057, "step": 2174 }, { "epoch": 0.4558792705931671, "grad_norm": 0.10267043858766556, "learning_rate": 5.956507185775441e-06, "loss": 0.0584, "step": 2175 }, { "epoch": 0.4560888702578076, "grad_norm": 0.10312189161777496, "learning_rate": 5.953174815536131e-06, "loss": 0.0589, "step": 2176 }, { "epoch": 0.4562984699224481, "grad_norm": 0.11400537937879562, "learning_rate": 5.949842005883428e-06, "loss": 0.0566, "step": 2177 }, { "epoch": 0.45650806958708867, "grad_norm": 0.0924360379576683, "learning_rate": 5.9465087583537594e-06, "loss": 0.0597, "step": 2178 }, { "epoch": 0.4567176692517292, "grad_norm": 0.11675729602575302, "learning_rate": 5.943175074483749e-06, "loss": 0.0566, "step": 2179 }, { "epoch": 0.45692726891636976, "grad_norm": 0.11411453783512115, "learning_rate": 5.939840955810223e-06, "loss": 0.0598, "step": 2180 }, { "epoch": 0.45713686858101027, "grad_norm": 0.11106470972299576, "learning_rate": 5.936506403870215e-06, "loss": 0.0569, "step": 2181 }, { "epoch": 0.4573464682456508, "grad_norm": 0.1371942162513733, "learning_rate": 5.933171420200946e-06, "loss": 0.0571, "step": 2182 }, { "epoch": 0.45755606791029135, "grad_norm": 0.10696960240602493, "learning_rate": 5.929836006339848e-06, "loss": 0.0614, "step": 2183 }, { "epoch": 0.45776566757493187, "grad_norm": 0.1150413230061531, "learning_rate": 5.926500163824546e-06, "loss": 0.0592, "step": 2184 }, { "epoch": 0.45797526723957244, "grad_norm": 0.10095971077680588, "learning_rate": 5.923163894192857e-06, "loss": 0.0564, "step": 2185 }, { "epoch": 0.45818486690421295, "grad_norm": 0.10621986538171768, "learning_rate": 5.9198271989828075e-06, "loss": 0.0544, "step": 2186 }, { "epoch": 0.45839446656885346, "grad_norm": 0.12330467998981476, "learning_rate": 5.91649007973261e-06, "loss": 0.0565, "step": 2187 }, { "epoch": 0.45860406623349403, "grad_norm": 0.10255663841962814, "learning_rate": 5.913152537980674e-06, "loss": 0.057, "step": 2188 }, { "epoch": 0.45881366589813455, "grad_norm": 0.13345320522785187, "learning_rate": 5.909814575265609e-06, "loss": 0.0597, "step": 2189 }, { "epoch": 0.4590232655627751, "grad_norm": 0.112071193754673, "learning_rate": 5.9064761931262135e-06, "loss": 0.0556, "step": 2190 }, { "epoch": 0.45923286522741563, "grad_norm": 0.12068294733762741, "learning_rate": 5.903137393101482e-06, "loss": 0.0554, "step": 2191 }, { "epoch": 0.45944246489205615, "grad_norm": 0.11650517582893372, "learning_rate": 5.8997981767306e-06, "loss": 0.0557, "step": 2192 }, { "epoch": 0.4596520645566967, "grad_norm": 0.1526482254266739, "learning_rate": 5.896458545552946e-06, "loss": 0.0581, "step": 2193 }, { "epoch": 0.45986166422133723, "grad_norm": 0.15084873139858246, "learning_rate": 5.893118501108087e-06, "loss": 0.0581, "step": 2194 }, { "epoch": 0.4600712638859778, "grad_norm": 0.12740178406238556, "learning_rate": 5.889778044935785e-06, "loss": 0.0579, "step": 2195 }, { "epoch": 0.4602808635506183, "grad_norm": 0.1175624206662178, "learning_rate": 5.88643717857599e-06, "loss": 0.0556, "step": 2196 }, { "epoch": 0.4604904632152589, "grad_norm": 0.12416189163923264, "learning_rate": 5.883095903568838e-06, "loss": 0.0587, "step": 2197 }, { "epoch": 0.4607000628798994, "grad_norm": 0.13820405304431915, "learning_rate": 5.87975422145466e-06, "loss": 0.0573, "step": 2198 }, { "epoch": 0.4609096625445399, "grad_norm": 0.11369778960943222, "learning_rate": 5.876412133773968e-06, "loss": 0.0573, "step": 2199 }, { "epoch": 0.4611192622091805, "grad_norm": 0.1153719425201416, "learning_rate": 5.873069642067464e-06, "loss": 0.0553, "step": 2200 }, { "epoch": 0.461328861873821, "grad_norm": 0.10116839408874512, "learning_rate": 5.869726747876036e-06, "loss": 0.0588, "step": 2201 }, { "epoch": 0.46153846153846156, "grad_norm": 0.1170465499162674, "learning_rate": 5.866383452740758e-06, "loss": 0.0555, "step": 2202 }, { "epoch": 0.4617480612031021, "grad_norm": 0.14628863334655762, "learning_rate": 5.863039758202889e-06, "loss": 0.0561, "step": 2203 }, { "epoch": 0.4619576608677426, "grad_norm": 0.15336976945400238, "learning_rate": 5.85969566580387e-06, "loss": 0.0593, "step": 2204 }, { "epoch": 0.46216726053238316, "grad_norm": 0.14001086354255676, "learning_rate": 5.856351177085327e-06, "loss": 0.0572, "step": 2205 }, { "epoch": 0.4623768601970237, "grad_norm": 0.09859530627727509, "learning_rate": 5.853006293589071e-06, "loss": 0.057, "step": 2206 }, { "epoch": 0.46258645986166425, "grad_norm": 0.10227430611848831, "learning_rate": 5.84966101685709e-06, "loss": 0.0542, "step": 2207 }, { "epoch": 0.46279605952630476, "grad_norm": 0.12590292096138, "learning_rate": 5.846315348431555e-06, "loss": 0.0573, "step": 2208 }, { "epoch": 0.4630056591909453, "grad_norm": 0.11103103309869766, "learning_rate": 5.842969289854823e-06, "loss": 0.0576, "step": 2209 }, { "epoch": 0.46321525885558584, "grad_norm": 0.12213583290576935, "learning_rate": 5.839622842669423e-06, "loss": 0.0586, "step": 2210 }, { "epoch": 0.46342485852022636, "grad_norm": 0.10279441624879837, "learning_rate": 5.836276008418065e-06, "loss": 0.0569, "step": 2211 }, { "epoch": 0.4636344581848669, "grad_norm": 0.09687735140323639, "learning_rate": 5.832928788643644e-06, "loss": 0.0582, "step": 2212 }, { "epoch": 0.46384405784950744, "grad_norm": 0.09342148154973984, "learning_rate": 5.8295811848892215e-06, "loss": 0.0585, "step": 2213 }, { "epoch": 0.46405365751414795, "grad_norm": 0.09567330032587051, "learning_rate": 5.826233198698047e-06, "loss": 0.0571, "step": 2214 }, { "epoch": 0.4642632571787885, "grad_norm": 0.10038771480321884, "learning_rate": 5.822884831613538e-06, "loss": 0.0558, "step": 2215 }, { "epoch": 0.46447285684342904, "grad_norm": 0.10112529247999191, "learning_rate": 5.819536085179293e-06, "loss": 0.0575, "step": 2216 }, { "epoch": 0.4646824565080696, "grad_norm": 0.11409956961870193, "learning_rate": 5.816186960939084e-06, "loss": 0.0551, "step": 2217 }, { "epoch": 0.4648920561727101, "grad_norm": 0.11404009908437729, "learning_rate": 5.8128374604368534e-06, "loss": 0.0572, "step": 2218 }, { "epoch": 0.46510165583735064, "grad_norm": 0.1052129939198494, "learning_rate": 5.809487585216725e-06, "loss": 0.0569, "step": 2219 }, { "epoch": 0.4653112555019912, "grad_norm": 0.1068800687789917, "learning_rate": 5.806137336822987e-06, "loss": 0.0568, "step": 2220 }, { "epoch": 0.4655208551666317, "grad_norm": 0.1187024712562561, "learning_rate": 5.802786716800102e-06, "loss": 0.0567, "step": 2221 }, { "epoch": 0.4657304548312723, "grad_norm": 0.11063768714666367, "learning_rate": 5.79943572669271e-06, "loss": 0.0565, "step": 2222 }, { "epoch": 0.4659400544959128, "grad_norm": 0.11005666106939316, "learning_rate": 5.796084368045612e-06, "loss": 0.0578, "step": 2223 }, { "epoch": 0.4661496541605533, "grad_norm": 0.09957437217235565, "learning_rate": 5.7927326424037875e-06, "loss": 0.0566, "step": 2224 }, { "epoch": 0.4663592538251939, "grad_norm": 0.10687166452407837, "learning_rate": 5.789380551312379e-06, "loss": 0.0565, "step": 2225 }, { "epoch": 0.4665688534898344, "grad_norm": 0.12073647975921631, "learning_rate": 5.7860280963167e-06, "loss": 0.0587, "step": 2226 }, { "epoch": 0.46677845315447497, "grad_norm": 0.11012930423021317, "learning_rate": 5.782675278962232e-06, "loss": 0.056, "step": 2227 }, { "epoch": 0.4669880528191155, "grad_norm": 0.10260272771120071, "learning_rate": 5.7793221007946245e-06, "loss": 0.0595, "step": 2228 }, { "epoch": 0.467197652483756, "grad_norm": 0.07964638620615005, "learning_rate": 5.775968563359688e-06, "loss": 0.0563, "step": 2229 }, { "epoch": 0.46740725214839657, "grad_norm": 0.09134583175182343, "learning_rate": 5.7726146682034055e-06, "loss": 0.0548, "step": 2230 }, { "epoch": 0.4676168518130371, "grad_norm": 0.10940870642662048, "learning_rate": 5.7692604168719225e-06, "loss": 0.0535, "step": 2231 }, { "epoch": 0.46782645147767765, "grad_norm": 0.1325157731771469, "learning_rate": 5.765905810911546e-06, "loss": 0.0571, "step": 2232 }, { "epoch": 0.46803605114231817, "grad_norm": 0.13585898280143738, "learning_rate": 5.762550851868751e-06, "loss": 0.057, "step": 2233 }, { "epoch": 0.46824565080695874, "grad_norm": 0.11632729321718216, "learning_rate": 5.759195541290171e-06, "loss": 0.0581, "step": 2234 }, { "epoch": 0.46845525047159925, "grad_norm": 0.08722345530986786, "learning_rate": 5.7558398807226045e-06, "loss": 0.0561, "step": 2235 }, { "epoch": 0.46866485013623976, "grad_norm": 0.1064450740814209, "learning_rate": 5.7524838717130095e-06, "loss": 0.0549, "step": 2236 }, { "epoch": 0.46887444980088033, "grad_norm": 0.12046857923269272, "learning_rate": 5.749127515808506e-06, "loss": 0.0576, "step": 2237 }, { "epoch": 0.46908404946552085, "grad_norm": 0.11908888071775436, "learning_rate": 5.745770814556373e-06, "loss": 0.0575, "step": 2238 }, { "epoch": 0.4692936491301614, "grad_norm": 0.10716499388217926, "learning_rate": 5.7424137695040495e-06, "loss": 0.0562, "step": 2239 }, { "epoch": 0.46950324879480193, "grad_norm": 0.09762990474700928, "learning_rate": 5.7390563821991326e-06, "loss": 0.0582, "step": 2240 }, { "epoch": 0.46971284845944244, "grad_norm": 0.11566614359617233, "learning_rate": 5.735698654189377e-06, "loss": 0.0592, "step": 2241 }, { "epoch": 0.469922448124083, "grad_norm": 0.09800199419260025, "learning_rate": 5.7323405870226955e-06, "loss": 0.0567, "step": 2242 }, { "epoch": 0.47013204778872353, "grad_norm": 0.09443154186010361, "learning_rate": 5.7289821822471545e-06, "loss": 0.058, "step": 2243 }, { "epoch": 0.4703416474533641, "grad_norm": 0.07052087038755417, "learning_rate": 5.725623441410979e-06, "loss": 0.0536, "step": 2244 }, { "epoch": 0.4705512471180046, "grad_norm": 0.10066118091344833, "learning_rate": 5.722264366062549e-06, "loss": 0.0568, "step": 2245 }, { "epoch": 0.4707608467826451, "grad_norm": 0.10691467672586441, "learning_rate": 5.718904957750394e-06, "loss": 0.0564, "step": 2246 }, { "epoch": 0.4709704464472857, "grad_norm": 0.10959997028112411, "learning_rate": 5.715545218023205e-06, "loss": 0.0544, "step": 2247 }, { "epoch": 0.4711800461119262, "grad_norm": 0.09174855053424835, "learning_rate": 5.7121851484298184e-06, "loss": 0.0534, "step": 2248 }, { "epoch": 0.4713896457765668, "grad_norm": 0.10001726448535919, "learning_rate": 5.708824750519225e-06, "loss": 0.0574, "step": 2249 }, { "epoch": 0.4715992454412073, "grad_norm": 0.10282893478870392, "learning_rate": 5.705464025840571e-06, "loss": 0.0535, "step": 2250 }, { "epoch": 0.4718088451058478, "grad_norm": 0.0975334644317627, "learning_rate": 5.702102975943147e-06, "loss": 0.0609, "step": 2251 }, { "epoch": 0.4720184447704884, "grad_norm": 0.08588805049657822, "learning_rate": 5.698741602376395e-06, "loss": 0.0538, "step": 2252 }, { "epoch": 0.4722280444351289, "grad_norm": 0.08347409963607788, "learning_rate": 5.695379906689912e-06, "loss": 0.0594, "step": 2253 }, { "epoch": 0.47243764409976946, "grad_norm": 0.0910545289516449, "learning_rate": 5.6920178904334346e-06, "loss": 0.0592, "step": 2254 }, { "epoch": 0.47264724376441, "grad_norm": 0.08542585372924805, "learning_rate": 5.688655555156854e-06, "loss": 0.058, "step": 2255 }, { "epoch": 0.4728568434290505, "grad_norm": 0.09314323216676712, "learning_rate": 5.6852929024102065e-06, "loss": 0.0546, "step": 2256 }, { "epoch": 0.47306644309369106, "grad_norm": 0.09594455361366272, "learning_rate": 5.681929933743672e-06, "loss": 0.0546, "step": 2257 }, { "epoch": 0.47327604275833157, "grad_norm": 0.09240654855966568, "learning_rate": 5.67856665070758e-06, "loss": 0.0568, "step": 2258 }, { "epoch": 0.47348564242297214, "grad_norm": 0.08861672133207321, "learning_rate": 5.675203054852403e-06, "loss": 0.0584, "step": 2259 }, { "epoch": 0.47369524208761266, "grad_norm": 0.08311017602682114, "learning_rate": 5.671839147728758e-06, "loss": 0.0548, "step": 2260 }, { "epoch": 0.47390484175225317, "grad_norm": 0.08416654914617538, "learning_rate": 5.668474930887406e-06, "loss": 0.0569, "step": 2261 }, { "epoch": 0.47411444141689374, "grad_norm": 0.07767292857170105, "learning_rate": 5.6651104058792496e-06, "loss": 0.0558, "step": 2262 }, { "epoch": 0.47432404108153425, "grad_norm": 0.09354811161756516, "learning_rate": 5.661745574255334e-06, "loss": 0.0562, "step": 2263 }, { "epoch": 0.4745336407461748, "grad_norm": 0.1013355404138565, "learning_rate": 5.658380437566846e-06, "loss": 0.0563, "step": 2264 }, { "epoch": 0.47474324041081534, "grad_norm": 0.08834841102361679, "learning_rate": 5.655014997365114e-06, "loss": 0.0581, "step": 2265 }, { "epoch": 0.4749528400754559, "grad_norm": 0.07844306528568268, "learning_rate": 5.651649255201603e-06, "loss": 0.0559, "step": 2266 }, { "epoch": 0.4751624397400964, "grad_norm": 0.08669186383485794, "learning_rate": 5.648283212627921e-06, "loss": 0.0571, "step": 2267 }, { "epoch": 0.47537203940473693, "grad_norm": 0.08236299455165863, "learning_rate": 5.6449168711958135e-06, "loss": 0.057, "step": 2268 }, { "epoch": 0.4755816390693775, "grad_norm": 0.0980667769908905, "learning_rate": 5.641550232457162e-06, "loss": 0.0546, "step": 2269 }, { "epoch": 0.475791238734018, "grad_norm": 0.11645791679620743, "learning_rate": 5.638183297963986e-06, "loss": 0.0549, "step": 2270 }, { "epoch": 0.4760008383986586, "grad_norm": 0.12081831693649292, "learning_rate": 5.634816069268442e-06, "loss": 0.0582, "step": 2271 }, { "epoch": 0.4762104380632991, "grad_norm": 0.1103171780705452, "learning_rate": 5.631448547922822e-06, "loss": 0.0567, "step": 2272 }, { "epoch": 0.4764200377279396, "grad_norm": 0.1022624745965004, "learning_rate": 5.628080735479553e-06, "loss": 0.0577, "step": 2273 }, { "epoch": 0.4766296373925802, "grad_norm": 0.0808848962187767, "learning_rate": 5.624712633491196e-06, "loss": 0.0593, "step": 2274 }, { "epoch": 0.4768392370572207, "grad_norm": 0.07358387112617493, "learning_rate": 5.621344243510444e-06, "loss": 0.0568, "step": 2275 }, { "epoch": 0.47704883672186127, "grad_norm": 0.07857120782136917, "learning_rate": 5.6179755670901245e-06, "loss": 0.0569, "step": 2276 }, { "epoch": 0.4772584363865018, "grad_norm": 0.06889036297798157, "learning_rate": 5.614606605783197e-06, "loss": 0.0586, "step": 2277 }, { "epoch": 0.4774680360511423, "grad_norm": 0.07760413736104965, "learning_rate": 5.611237361142753e-06, "loss": 0.0551, "step": 2278 }, { "epoch": 0.47767763571578287, "grad_norm": 0.08692996203899384, "learning_rate": 5.607867834722012e-06, "loss": 0.0545, "step": 2279 }, { "epoch": 0.4778872353804234, "grad_norm": 0.08127982169389725, "learning_rate": 5.604498028074323e-06, "loss": 0.0571, "step": 2280 }, { "epoch": 0.47809683504506395, "grad_norm": 0.07126957178115845, "learning_rate": 5.601127942753173e-06, "loss": 0.0577, "step": 2281 }, { "epoch": 0.47830643470970446, "grad_norm": 0.08008185029029846, "learning_rate": 5.597757580312163e-06, "loss": 0.0542, "step": 2282 }, { "epoch": 0.478516034374345, "grad_norm": 0.07614471018314362, "learning_rate": 5.594386942305035e-06, "loss": 0.0554, "step": 2283 }, { "epoch": 0.47872563403898555, "grad_norm": 0.06831208616495132, "learning_rate": 5.5910160302856486e-06, "loss": 0.0555, "step": 2284 }, { "epoch": 0.47893523370362606, "grad_norm": 0.07799834758043289, "learning_rate": 5.587644845807994e-06, "loss": 0.0569, "step": 2285 }, { "epoch": 0.47914483336826663, "grad_norm": 0.08827179670333862, "learning_rate": 5.584273390426189e-06, "loss": 0.0555, "step": 2286 }, { "epoch": 0.47935443303290715, "grad_norm": 0.07891764491796494, "learning_rate": 5.580901665694471e-06, "loss": 0.0582, "step": 2287 }, { "epoch": 0.47956403269754766, "grad_norm": 0.06794466078281403, "learning_rate": 5.577529673167208e-06, "loss": 0.0563, "step": 2288 }, { "epoch": 0.47977363236218823, "grad_norm": 0.06872694939374924, "learning_rate": 5.574157414398885e-06, "loss": 0.0551, "step": 2289 }, { "epoch": 0.47998323202682874, "grad_norm": 0.06112726777791977, "learning_rate": 5.570784890944112e-06, "loss": 0.0571, "step": 2290 }, { "epoch": 0.4801928316914693, "grad_norm": 0.07286538183689117, "learning_rate": 5.567412104357623e-06, "loss": 0.057, "step": 2291 }, { "epoch": 0.4804024313561098, "grad_norm": 0.06900748610496521, "learning_rate": 5.564039056194274e-06, "loss": 0.0567, "step": 2292 }, { "epoch": 0.48061203102075034, "grad_norm": 0.07319528609514236, "learning_rate": 5.560665748009034e-06, "loss": 0.0562, "step": 2293 }, { "epoch": 0.4808216306853909, "grad_norm": 0.07866884768009186, "learning_rate": 5.557292181357003e-06, "loss": 0.0561, "step": 2294 }, { "epoch": 0.4810312303500314, "grad_norm": 0.07217524945735931, "learning_rate": 5.553918357793391e-06, "loss": 0.0565, "step": 2295 }, { "epoch": 0.481240830014672, "grad_norm": 0.0580948144197464, "learning_rate": 5.550544278873531e-06, "loss": 0.0583, "step": 2296 }, { "epoch": 0.4814504296793125, "grad_norm": 0.0548657588660717, "learning_rate": 5.547169946152874e-06, "loss": 0.0574, "step": 2297 }, { "epoch": 0.481660029343953, "grad_norm": 0.06751734763383865, "learning_rate": 5.543795361186984e-06, "loss": 0.0567, "step": 2298 }, { "epoch": 0.4818696290085936, "grad_norm": 0.0683041512966156, "learning_rate": 5.540420525531547e-06, "loss": 0.0561, "step": 2299 }, { "epoch": 0.4820792286732341, "grad_norm": 0.0687454417347908, "learning_rate": 5.537045440742359e-06, "loss": 0.0569, "step": 2300 }, { "epoch": 0.4822888283378747, "grad_norm": 0.060074321925640106, "learning_rate": 5.533670108375334e-06, "loss": 0.0553, "step": 2301 }, { "epoch": 0.4824984280025152, "grad_norm": 0.05447131395339966, "learning_rate": 5.5302945299865005e-06, "loss": 0.057, "step": 2302 }, { "epoch": 0.48270802766715576, "grad_norm": 0.051248107105493546, "learning_rate": 5.526918707132e-06, "loss": 0.0561, "step": 2303 }, { "epoch": 0.4829176273317963, "grad_norm": 0.0613834448158741, "learning_rate": 5.523542641368083e-06, "loss": 0.0582, "step": 2304 }, { "epoch": 0.4831272269964368, "grad_norm": 0.054669518023729324, "learning_rate": 5.520166334251118e-06, "loss": 0.056, "step": 2305 }, { "epoch": 0.48333682666107736, "grad_norm": 0.058429256081581116, "learning_rate": 5.5167897873375805e-06, "loss": 0.0573, "step": 2306 }, { "epoch": 0.48354642632571787, "grad_norm": 0.06477133929729462, "learning_rate": 5.513413002184059e-06, "loss": 0.0557, "step": 2307 }, { "epoch": 0.48375602599035844, "grad_norm": 0.06107893958687782, "learning_rate": 5.510035980347249e-06, "loss": 0.0576, "step": 2308 }, { "epoch": 0.48396562565499895, "grad_norm": 0.06686937808990479, "learning_rate": 5.50665872338396e-06, "loss": 0.0586, "step": 2309 }, { "epoch": 0.48417522531963947, "grad_norm": 0.0777740553021431, "learning_rate": 5.503281232851102e-06, "loss": 0.0551, "step": 2310 }, { "epoch": 0.48438482498428004, "grad_norm": 0.08693230897188187, "learning_rate": 5.499903510305703e-06, "loss": 0.0538, "step": 2311 }, { "epoch": 0.48459442464892055, "grad_norm": 0.09828737378120422, "learning_rate": 5.496525557304888e-06, "loss": 0.0546, "step": 2312 }, { "epoch": 0.4848040243135611, "grad_norm": 0.11315979063510895, "learning_rate": 5.4931473754058935e-06, "loss": 0.0586, "step": 2313 }, { "epoch": 0.48501362397820164, "grad_norm": 0.1083892285823822, "learning_rate": 5.489768966166064e-06, "loss": 0.0606, "step": 2314 }, { "epoch": 0.48522322364284215, "grad_norm": 0.08665548264980316, "learning_rate": 5.486390331142841e-06, "loss": 0.0561, "step": 2315 }, { "epoch": 0.4854328233074827, "grad_norm": 0.06847358494997025, "learning_rate": 5.483011471893775e-06, "loss": 0.0552, "step": 2316 }, { "epoch": 0.48564242297212323, "grad_norm": 0.08172042667865753, "learning_rate": 5.479632389976524e-06, "loss": 0.0575, "step": 2317 }, { "epoch": 0.4858520226367638, "grad_norm": 0.120709627866745, "learning_rate": 5.4762530869488385e-06, "loss": 0.0572, "step": 2318 }, { "epoch": 0.4860616223014043, "grad_norm": 0.1355757862329483, "learning_rate": 5.4728735643685804e-06, "loss": 0.0587, "step": 2319 }, { "epoch": 0.48627122196604483, "grad_norm": 0.12048780918121338, "learning_rate": 5.469493823793706e-06, "loss": 0.0591, "step": 2320 }, { "epoch": 0.4864808216306854, "grad_norm": 0.07901732623577118, "learning_rate": 5.466113866782277e-06, "loss": 0.0552, "step": 2321 }, { "epoch": 0.4866904212953259, "grad_norm": 0.0863695964217186, "learning_rate": 5.462733694892452e-06, "loss": 0.0571, "step": 2322 }, { "epoch": 0.4869000209599665, "grad_norm": 0.13559545576572418, "learning_rate": 5.4593533096824906e-06, "loss": 0.0566, "step": 2323 }, { "epoch": 0.487109620624607, "grad_norm": 0.1430426388978958, "learning_rate": 5.455972712710748e-06, "loss": 0.058, "step": 2324 }, { "epoch": 0.4873192202892475, "grad_norm": 0.09979075938463211, "learning_rate": 5.4525919055356804e-06, "loss": 0.0557, "step": 2325 }, { "epoch": 0.4875288199538881, "grad_norm": 0.07715443521738052, "learning_rate": 5.449210889715837e-06, "loss": 0.0599, "step": 2326 }, { "epoch": 0.4877384196185286, "grad_norm": 0.09540057182312012, "learning_rate": 5.445829666809866e-06, "loss": 0.0527, "step": 2327 }, { "epoch": 0.48794801928316917, "grad_norm": 0.10082437098026276, "learning_rate": 5.442448238376513e-06, "loss": 0.058, "step": 2328 }, { "epoch": 0.4881576189478097, "grad_norm": 0.09437216818332672, "learning_rate": 5.439066605974615e-06, "loss": 0.0533, "step": 2329 }, { "epoch": 0.4883672186124502, "grad_norm": 0.10088086873292923, "learning_rate": 5.4356847711631015e-06, "loss": 0.0566, "step": 2330 }, { "epoch": 0.48857681827709076, "grad_norm": 0.09661588817834854, "learning_rate": 5.432302735500998e-06, "loss": 0.0525, "step": 2331 }, { "epoch": 0.4887864179417313, "grad_norm": 0.08586642891168594, "learning_rate": 5.428920500547425e-06, "loss": 0.0568, "step": 2332 }, { "epoch": 0.48899601760637185, "grad_norm": 0.09433707594871521, "learning_rate": 5.4255380678615885e-06, "loss": 0.0546, "step": 2333 }, { "epoch": 0.48920561727101236, "grad_norm": 0.1241917684674263, "learning_rate": 5.422155439002793e-06, "loss": 0.0577, "step": 2334 }, { "epoch": 0.48941521693565293, "grad_norm": 0.11522339284420013, "learning_rate": 5.418772615530426e-06, "loss": 0.0575, "step": 2335 }, { "epoch": 0.48962481660029344, "grad_norm": 0.09254017472267151, "learning_rate": 5.415389599003972e-06, "loss": 0.0549, "step": 2336 }, { "epoch": 0.48983441626493396, "grad_norm": 0.1002906784415245, "learning_rate": 5.412006390982999e-06, "loss": 0.0554, "step": 2337 }, { "epoch": 0.49004401592957453, "grad_norm": 0.09268494695425034, "learning_rate": 5.4086229930271636e-06, "loss": 0.059, "step": 2338 }, { "epoch": 0.49025361559421504, "grad_norm": 0.0953909158706665, "learning_rate": 5.405239406696216e-06, "loss": 0.054, "step": 2339 }, { "epoch": 0.4904632152588556, "grad_norm": 0.10069115459918976, "learning_rate": 5.401855633549984e-06, "loss": 0.0563, "step": 2340 }, { "epoch": 0.4906728149234961, "grad_norm": 0.09953933954238892, "learning_rate": 5.398471675148389e-06, "loss": 0.0573, "step": 2341 }, { "epoch": 0.49088241458813664, "grad_norm": 0.11302805691957474, "learning_rate": 5.395087533051434e-06, "loss": 0.0545, "step": 2342 }, { "epoch": 0.4910920142527772, "grad_norm": 0.10684003680944443, "learning_rate": 5.391703208819209e-06, "loss": 0.0559, "step": 2343 }, { "epoch": 0.4913016139174177, "grad_norm": 0.09628332406282425, "learning_rate": 5.388318704011885e-06, "loss": 0.0545, "step": 2344 }, { "epoch": 0.4915112135820583, "grad_norm": 0.10096313059329987, "learning_rate": 5.38493402018972e-06, "loss": 0.059, "step": 2345 }, { "epoch": 0.4917208132466988, "grad_norm": 0.08674436807632446, "learning_rate": 5.3815491589130506e-06, "loss": 0.0603, "step": 2346 }, { "epoch": 0.4919304129113393, "grad_norm": 0.0984383150935173, "learning_rate": 5.378164121742301e-06, "loss": 0.0552, "step": 2347 }, { "epoch": 0.4921400125759799, "grad_norm": 0.10171261429786682, "learning_rate": 5.374778910237968e-06, "loss": 0.0556, "step": 2348 }, { "epoch": 0.4923496122406204, "grad_norm": 0.09578763693571091, "learning_rate": 5.3713935259606345e-06, "loss": 0.0573, "step": 2349 }, { "epoch": 0.492559211905261, "grad_norm": 0.09438461065292358, "learning_rate": 5.368007970470964e-06, "loss": 0.0582, "step": 2350 }, { "epoch": 0.4927688115699015, "grad_norm": 0.08926350623369217, "learning_rate": 5.3646222453296936e-06, "loss": 0.0568, "step": 2351 }, { "epoch": 0.492978411234542, "grad_norm": 0.09703266620635986, "learning_rate": 5.361236352097646e-06, "loss": 0.057, "step": 2352 }, { "epoch": 0.49318801089918257, "grad_norm": 0.0997847244143486, "learning_rate": 5.357850292335715e-06, "loss": 0.0538, "step": 2353 }, { "epoch": 0.4933976105638231, "grad_norm": 0.09428234398365021, "learning_rate": 5.354464067604872e-06, "loss": 0.0584, "step": 2354 }, { "epoch": 0.49360721022846366, "grad_norm": 0.10148053616285324, "learning_rate": 5.35107767946617e-06, "loss": 0.0593, "step": 2355 }, { "epoch": 0.49381680989310417, "grad_norm": 0.11165421456098557, "learning_rate": 5.3476911294807284e-06, "loss": 0.057, "step": 2356 }, { "epoch": 0.4940264095577447, "grad_norm": 0.0888749286532402, "learning_rate": 5.344304419209748e-06, "loss": 0.0558, "step": 2357 }, { "epoch": 0.49423600922238525, "grad_norm": 0.10030533373355865, "learning_rate": 5.340917550214504e-06, "loss": 0.0587, "step": 2358 }, { "epoch": 0.49444560888702577, "grad_norm": 0.10392221808433533, "learning_rate": 5.337530524056338e-06, "loss": 0.0557, "step": 2359 }, { "epoch": 0.49465520855166634, "grad_norm": 0.09198975563049316, "learning_rate": 5.334143342296672e-06, "loss": 0.0559, "step": 2360 }, { "epoch": 0.49486480821630685, "grad_norm": 0.0988771989941597, "learning_rate": 5.3307560064969955e-06, "loss": 0.0561, "step": 2361 }, { "epoch": 0.49507440788094736, "grad_norm": 0.09191461652517319, "learning_rate": 5.327368518218866e-06, "loss": 0.0575, "step": 2362 }, { "epoch": 0.49528400754558793, "grad_norm": 0.08047354221343994, "learning_rate": 5.323980879023919e-06, "loss": 0.0582, "step": 2363 }, { "epoch": 0.49549360721022845, "grad_norm": 0.097519151866436, "learning_rate": 5.3205930904738544e-06, "loss": 0.0563, "step": 2364 }, { "epoch": 0.495703206874869, "grad_norm": 0.09642237424850464, "learning_rate": 5.317205154130442e-06, "loss": 0.0562, "step": 2365 }, { "epoch": 0.49591280653950953, "grad_norm": 0.09264837205410004, "learning_rate": 5.3138170715555186e-06, "loss": 0.0574, "step": 2366 }, { "epoch": 0.49612240620415005, "grad_norm": 0.10126832872629166, "learning_rate": 5.31042884431099e-06, "loss": 0.0553, "step": 2367 }, { "epoch": 0.4963320058687906, "grad_norm": 0.09630407392978668, "learning_rate": 5.3070404739588285e-06, "loss": 0.0527, "step": 2368 }, { "epoch": 0.49654160553343113, "grad_norm": 0.0988563820719719, "learning_rate": 5.303651962061074e-06, "loss": 0.0599, "step": 2369 }, { "epoch": 0.4967512051980717, "grad_norm": 0.09341637045145035, "learning_rate": 5.300263310179826e-06, "loss": 0.0563, "step": 2370 }, { "epoch": 0.4969608048627122, "grad_norm": 0.08345013111829758, "learning_rate": 5.296874519877256e-06, "loss": 0.0572, "step": 2371 }, { "epoch": 0.4971704045273528, "grad_norm": 0.08412396162748337, "learning_rate": 5.293485592715593e-06, "loss": 0.0528, "step": 2372 }, { "epoch": 0.4973800041919933, "grad_norm": 0.09522940963506699, "learning_rate": 5.290096530257134e-06, "loss": 0.0576, "step": 2373 }, { "epoch": 0.4975896038566338, "grad_norm": 0.0857052206993103, "learning_rate": 5.286707334064234e-06, "loss": 0.0547, "step": 2374 }, { "epoch": 0.4977992035212744, "grad_norm": 0.08157432824373245, "learning_rate": 5.283318005699313e-06, "loss": 0.0595, "step": 2375 }, { "epoch": 0.4980088031859149, "grad_norm": 0.08909313380718231, "learning_rate": 5.279928546724849e-06, "loss": 0.0555, "step": 2376 }, { "epoch": 0.49821840285055546, "grad_norm": 0.0913192629814148, "learning_rate": 5.276538958703384e-06, "loss": 0.0553, "step": 2377 }, { "epoch": 0.498428002515196, "grad_norm": 0.09321916103363037, "learning_rate": 5.273149243197517e-06, "loss": 0.0555, "step": 2378 }, { "epoch": 0.4986376021798365, "grad_norm": 0.09494626522064209, "learning_rate": 5.269759401769904e-06, "loss": 0.0588, "step": 2379 }, { "epoch": 0.49884720184447706, "grad_norm": 0.08014019578695297, "learning_rate": 5.266369435983264e-06, "loss": 0.0564, "step": 2380 }, { "epoch": 0.4990568015091176, "grad_norm": 0.08060725778341293, "learning_rate": 5.26297934740037e-06, "loss": 0.0594, "step": 2381 }, { "epoch": 0.49926640117375815, "grad_norm": 0.09616056829690933, "learning_rate": 5.259589137584049e-06, "loss": 0.0564, "step": 2382 }, { "epoch": 0.49947600083839866, "grad_norm": 0.10162606090307236, "learning_rate": 5.256198808097192e-06, "loss": 0.0553, "step": 2383 }, { "epoch": 0.4996856005030392, "grad_norm": 0.09933992475271225, "learning_rate": 5.252808360502737e-06, "loss": 0.0562, "step": 2384 }, { "epoch": 0.49989520016767974, "grad_norm": 0.106983482837677, "learning_rate": 5.2494177963636785e-06, "loss": 0.0579, "step": 2385 }, { "epoch": 0.5001047998323203, "grad_norm": 0.10930163413286209, "learning_rate": 5.246027117243071e-06, "loss": 0.0578, "step": 2386 }, { "epoch": 0.5003143994969608, "grad_norm": 0.10855498164892197, "learning_rate": 5.24263632470401e-06, "loss": 0.0547, "step": 2387 }, { "epoch": 0.5005239991616014, "grad_norm": 0.09061628580093384, "learning_rate": 5.239245420309658e-06, "loss": 0.0546, "step": 2388 }, { "epoch": 0.5007335988262419, "grad_norm": 0.07383181154727936, "learning_rate": 5.2358544056232145e-06, "loss": 0.0568, "step": 2389 }, { "epoch": 0.5009431984908824, "grad_norm": 0.07915870100259781, "learning_rate": 5.232463282207937e-06, "loss": 0.058, "step": 2390 }, { "epoch": 0.5011527981555229, "grad_norm": 0.07858150452375412, "learning_rate": 5.229072051627137e-06, "loss": 0.0581, "step": 2391 }, { "epoch": 0.5013623978201635, "grad_norm": 0.07529259473085403, "learning_rate": 5.225680715444168e-06, "loss": 0.0572, "step": 2392 }, { "epoch": 0.5015719974848041, "grad_norm": 0.07334654778242111, "learning_rate": 5.222289275222436e-06, "loss": 0.0565, "step": 2393 }, { "epoch": 0.5017815971494446, "grad_norm": 0.07713853567838669, "learning_rate": 5.218897732525397e-06, "loss": 0.0555, "step": 2394 }, { "epoch": 0.5019911968140851, "grad_norm": 0.0780978575348854, "learning_rate": 5.215506088916545e-06, "loss": 0.0557, "step": 2395 }, { "epoch": 0.5022007964787256, "grad_norm": 0.06955696642398834, "learning_rate": 5.212114345959433e-06, "loss": 0.0569, "step": 2396 }, { "epoch": 0.5024103961433661, "grad_norm": 0.07240567356348038, "learning_rate": 5.2087225052176515e-06, "loss": 0.0579, "step": 2397 }, { "epoch": 0.5026199958080068, "grad_norm": 0.08363047242164612, "learning_rate": 5.205330568254837e-06, "loss": 0.0534, "step": 2398 }, { "epoch": 0.5028295954726473, "grad_norm": 0.08383505046367645, "learning_rate": 5.201938536634674e-06, "loss": 0.0569, "step": 2399 }, { "epoch": 0.5030391951372878, "grad_norm": 0.07523591816425323, "learning_rate": 5.198546411920888e-06, "loss": 0.0564, "step": 2400 }, { "epoch": 0.5032487948019283, "grad_norm": 0.08412330597639084, "learning_rate": 5.195154195677247e-06, "loss": 0.0541, "step": 2401 }, { "epoch": 0.5034583944665688, "grad_norm": 0.08403821289539337, "learning_rate": 5.1917618894675615e-06, "loss": 0.0546, "step": 2402 }, { "epoch": 0.5036679941312094, "grad_norm": 0.07715484499931335, "learning_rate": 5.188369494855686e-06, "loss": 0.0564, "step": 2403 }, { "epoch": 0.50387759379585, "grad_norm": 0.07596497237682343, "learning_rate": 5.184977013405512e-06, "loss": 0.0592, "step": 2404 }, { "epoch": 0.5040871934604905, "grad_norm": 0.0813627615571022, "learning_rate": 5.181584446680974e-06, "loss": 0.0554, "step": 2405 }, { "epoch": 0.504296793125131, "grad_norm": 0.10759492963552475, "learning_rate": 5.178191796246043e-06, "loss": 0.0575, "step": 2406 }, { "epoch": 0.5045063927897715, "grad_norm": 0.12914326786994934, "learning_rate": 5.174799063664731e-06, "loss": 0.0546, "step": 2407 }, { "epoch": 0.5047159924544121, "grad_norm": 0.12072386592626572, "learning_rate": 5.171406250501087e-06, "loss": 0.0569, "step": 2408 }, { "epoch": 0.5049255921190526, "grad_norm": 0.10323359817266464, "learning_rate": 5.1680133583191975e-06, "loss": 0.0564, "step": 2409 }, { "epoch": 0.5051351917836931, "grad_norm": 0.10388664901256561, "learning_rate": 5.164620388683183e-06, "loss": 0.0547, "step": 2410 }, { "epoch": 0.5053447914483337, "grad_norm": 0.09523274004459381, "learning_rate": 5.1612273431572055e-06, "loss": 0.0558, "step": 2411 }, { "epoch": 0.5055543911129742, "grad_norm": 0.08987044543027878, "learning_rate": 5.1578342233054555e-06, "loss": 0.0577, "step": 2412 }, { "epoch": 0.5057639907776148, "grad_norm": 0.08983159810304642, "learning_rate": 5.154441030692162e-06, "loss": 0.0571, "step": 2413 }, { "epoch": 0.5059735904422553, "grad_norm": 0.07881300896406174, "learning_rate": 5.151047766881585e-06, "loss": 0.0565, "step": 2414 }, { "epoch": 0.5061831901068958, "grad_norm": 0.07373305410146713, "learning_rate": 5.147654433438018e-06, "loss": 0.0545, "step": 2415 }, { "epoch": 0.5063927897715363, "grad_norm": 0.07738931477069855, "learning_rate": 5.144261031925789e-06, "loss": 0.0551, "step": 2416 }, { "epoch": 0.5066023894361769, "grad_norm": 0.07858271896839142, "learning_rate": 5.140867563909254e-06, "loss": 0.0541, "step": 2417 }, { "epoch": 0.5068119891008175, "grad_norm": 0.08694379776716232, "learning_rate": 5.137474030952801e-06, "loss": 0.0558, "step": 2418 }, { "epoch": 0.507021588765458, "grad_norm": 0.09215452522039413, "learning_rate": 5.134080434620849e-06, "loss": 0.0572, "step": 2419 }, { "epoch": 0.5072311884300985, "grad_norm": 0.08612719178199768, "learning_rate": 5.1306867764778445e-06, "loss": 0.054, "step": 2420 }, { "epoch": 0.507440788094739, "grad_norm": 0.0733933225274086, "learning_rate": 5.127293058088261e-06, "loss": 0.0533, "step": 2421 }, { "epoch": 0.5076503877593795, "grad_norm": 0.06744138896465302, "learning_rate": 5.1238992810166065e-06, "loss": 0.0542, "step": 2422 }, { "epoch": 0.5078599874240202, "grad_norm": 0.062108736485242844, "learning_rate": 5.120505446827409e-06, "loss": 0.0584, "step": 2423 }, { "epoch": 0.5080695870886607, "grad_norm": 0.06326153874397278, "learning_rate": 5.117111557085225e-06, "loss": 0.0542, "step": 2424 }, { "epoch": 0.5082791867533012, "grad_norm": 0.07352180778980255, "learning_rate": 5.1137176133546376e-06, "loss": 0.0584, "step": 2425 }, { "epoch": 0.5084887864179417, "grad_norm": 0.07469355314970016, "learning_rate": 5.1103236172002534e-06, "loss": 0.0574, "step": 2426 }, { "epoch": 0.5086983860825822, "grad_norm": 0.08976007997989655, "learning_rate": 5.106929570186706e-06, "loss": 0.057, "step": 2427 }, { "epoch": 0.5089079857472228, "grad_norm": 0.09722107648849487, "learning_rate": 5.1035354738786465e-06, "loss": 0.0571, "step": 2428 }, { "epoch": 0.5091175854118634, "grad_norm": 0.07933610677719116, "learning_rate": 5.100141329840757e-06, "loss": 0.0566, "step": 2429 }, { "epoch": 0.5093271850765039, "grad_norm": 0.07435188442468643, "learning_rate": 5.096747139637737e-06, "loss": 0.0562, "step": 2430 }, { "epoch": 0.5095367847411444, "grad_norm": 0.06793338805437088, "learning_rate": 5.0933529048343025e-06, "loss": 0.0573, "step": 2431 }, { "epoch": 0.5097463844057849, "grad_norm": 0.07182233035564423, "learning_rate": 5.089958626995199e-06, "loss": 0.055, "step": 2432 }, { "epoch": 0.5099559840704255, "grad_norm": 0.09133189171552658, "learning_rate": 5.086564307685188e-06, "loss": 0.0541, "step": 2433 }, { "epoch": 0.510165583735066, "grad_norm": 0.08642099797725677, "learning_rate": 5.083169948469049e-06, "loss": 0.0556, "step": 2434 }, { "epoch": 0.5103751833997066, "grad_norm": 0.08702914416790009, "learning_rate": 5.07977555091158e-06, "loss": 0.0571, "step": 2435 }, { "epoch": 0.5105847830643471, "grad_norm": 0.08467281609773636, "learning_rate": 5.0763811165776e-06, "loss": 0.0562, "step": 2436 }, { "epoch": 0.5107943827289876, "grad_norm": 0.06674730032682419, "learning_rate": 5.07298664703194e-06, "loss": 0.0522, "step": 2437 }, { "epoch": 0.5110039823936282, "grad_norm": 0.07278907299041748, "learning_rate": 5.069592143839452e-06, "loss": 0.0542, "step": 2438 }, { "epoch": 0.5112135820582687, "grad_norm": 0.0811818316578865, "learning_rate": 5.066197608564999e-06, "loss": 0.0573, "step": 2439 }, { "epoch": 0.5114231817229092, "grad_norm": 0.07044421881437302, "learning_rate": 5.062803042773463e-06, "loss": 0.0575, "step": 2440 }, { "epoch": 0.5116327813875498, "grad_norm": 0.06003498286008835, "learning_rate": 5.059408448029737e-06, "loss": 0.0577, "step": 2441 }, { "epoch": 0.5118423810521903, "grad_norm": 0.06972794234752655, "learning_rate": 5.056013825898729e-06, "loss": 0.0543, "step": 2442 }, { "epoch": 0.5120519807168309, "grad_norm": 0.07647814601659775, "learning_rate": 5.052619177945358e-06, "loss": 0.0548, "step": 2443 }, { "epoch": 0.5122615803814714, "grad_norm": 0.06816070526838303, "learning_rate": 5.049224505734558e-06, "loss": 0.0582, "step": 2444 }, { "epoch": 0.5124711800461119, "grad_norm": 0.0557573027908802, "learning_rate": 5.045829810831271e-06, "loss": 0.0547, "step": 2445 }, { "epoch": 0.5126807797107524, "grad_norm": 0.06597109884023666, "learning_rate": 5.042435094800451e-06, "loss": 0.056, "step": 2446 }, { "epoch": 0.512890379375393, "grad_norm": 0.0730803981423378, "learning_rate": 5.0390403592070605e-06, "loss": 0.0547, "step": 2447 }, { "epoch": 0.5130999790400336, "grad_norm": 0.07265052199363708, "learning_rate": 5.0356456056160715e-06, "loss": 0.0567, "step": 2448 }, { "epoch": 0.5133095787046741, "grad_norm": 0.07946612685918808, "learning_rate": 5.032250835592467e-06, "loss": 0.0576, "step": 2449 }, { "epoch": 0.5135191783693146, "grad_norm": 0.09229972958564758, "learning_rate": 5.028856050701234e-06, "loss": 0.0536, "step": 2450 }, { "epoch": 0.5137287780339551, "grad_norm": 0.1069934144616127, "learning_rate": 5.025461252507367e-06, "loss": 0.0562, "step": 2451 }, { "epoch": 0.5139383776985956, "grad_norm": 0.12349528819322586, "learning_rate": 5.0220664425758695e-06, "loss": 0.0573, "step": 2452 }, { "epoch": 0.5141479773632363, "grad_norm": 0.1274581402540207, "learning_rate": 5.0186716224717445e-06, "loss": 0.0576, "step": 2453 }, { "epoch": 0.5143575770278768, "grad_norm": 0.11807071417570114, "learning_rate": 5.0152767937600055e-06, "loss": 0.0576, "step": 2454 }, { "epoch": 0.5145671766925173, "grad_norm": 0.10544098168611526, "learning_rate": 5.0118819580056686e-06, "loss": 0.0589, "step": 2455 }, { "epoch": 0.5147767763571578, "grad_norm": 0.091020368039608, "learning_rate": 5.008487116773752e-06, "loss": 0.0574, "step": 2456 }, { "epoch": 0.5149863760217984, "grad_norm": 0.07477325946092606, "learning_rate": 5.0050922716292745e-06, "loss": 0.0584, "step": 2457 }, { "epoch": 0.5151959756864389, "grad_norm": 0.06757844984531403, "learning_rate": 5.001697424137264e-06, "loss": 0.0552, "step": 2458 }, { "epoch": 0.5154055753510794, "grad_norm": 0.06685573607683182, "learning_rate": 4.9983025758627376e-06, "loss": 0.0533, "step": 2459 }, { "epoch": 0.51561517501572, "grad_norm": 0.0714949518442154, "learning_rate": 4.9949077283707255e-06, "loss": 0.0563, "step": 2460 }, { "epoch": 0.5158247746803605, "grad_norm": 0.06642146408557892, "learning_rate": 4.991512883226251e-06, "loss": 0.0599, "step": 2461 }, { "epoch": 0.5160343743450011, "grad_norm": 0.06495679169893265, "learning_rate": 4.988118041994332e-06, "loss": 0.0593, "step": 2462 }, { "epoch": 0.5162439740096416, "grad_norm": 0.06817002594470978, "learning_rate": 4.984723206239995e-06, "loss": 0.0564, "step": 2463 }, { "epoch": 0.5164535736742821, "grad_norm": 0.0714736357331276, "learning_rate": 4.981328377528258e-06, "loss": 0.0588, "step": 2464 }, { "epoch": 0.5166631733389226, "grad_norm": 0.07612305879592896, "learning_rate": 4.977933557424133e-06, "loss": 0.0566, "step": 2465 }, { "epoch": 0.5168727730035632, "grad_norm": 0.0783240795135498, "learning_rate": 4.974538747492634e-06, "loss": 0.056, "step": 2466 }, { "epoch": 0.5170823726682038, "grad_norm": 0.07756256312131882, "learning_rate": 4.971143949298769e-06, "loss": 0.0546, "step": 2467 }, { "epoch": 0.5172919723328443, "grad_norm": 0.07845515012741089, "learning_rate": 4.967749164407535e-06, "loss": 0.0533, "step": 2468 }, { "epoch": 0.5175015719974848, "grad_norm": 0.08055856078863144, "learning_rate": 4.964354394383929e-06, "loss": 0.0586, "step": 2469 }, { "epoch": 0.5177111716621253, "grad_norm": 0.07212040573358536, "learning_rate": 4.960959640792943e-06, "loss": 0.0549, "step": 2470 }, { "epoch": 0.5179207713267658, "grad_norm": 0.07273006439208984, "learning_rate": 4.9575649051995515e-06, "loss": 0.0545, "step": 2471 }, { "epoch": 0.5181303709914065, "grad_norm": 0.08731981366872787, "learning_rate": 4.954170189168731e-06, "loss": 0.0581, "step": 2472 }, { "epoch": 0.518339970656047, "grad_norm": 0.07897140830755234, "learning_rate": 4.950775494265443e-06, "loss": 0.0562, "step": 2473 }, { "epoch": 0.5185495703206875, "grad_norm": 0.055604368448257446, "learning_rate": 4.947380822054643e-06, "loss": 0.0545, "step": 2474 }, { "epoch": 0.518759169985328, "grad_norm": 0.05771341547369957, "learning_rate": 4.9439861741012726e-06, "loss": 0.0541, "step": 2475 }, { "epoch": 0.5189687696499685, "grad_norm": 0.08119264245033264, "learning_rate": 4.940591551970264e-06, "loss": 0.0558, "step": 2476 }, { "epoch": 0.5191783693146091, "grad_norm": 0.08719661831855774, "learning_rate": 4.93719695722654e-06, "loss": 0.0553, "step": 2477 }, { "epoch": 0.5193879689792497, "grad_norm": 0.08087395131587982, "learning_rate": 4.933802391435002e-06, "loss": 0.0536, "step": 2478 }, { "epoch": 0.5195975686438902, "grad_norm": 0.07597414404153824, "learning_rate": 4.93040785616055e-06, "loss": 0.0563, "step": 2479 }, { "epoch": 0.5198071683085307, "grad_norm": 0.07065032422542572, "learning_rate": 4.927013352968063e-06, "loss": 0.0543, "step": 2480 }, { "epoch": 0.5200167679731712, "grad_norm": 0.06194977089762688, "learning_rate": 4.9236188834224015e-06, "loss": 0.0575, "step": 2481 }, { "epoch": 0.5202263676378118, "grad_norm": 0.059151582419872284, "learning_rate": 4.920224449088421e-06, "loss": 0.0577, "step": 2482 }, { "epoch": 0.5204359673024523, "grad_norm": 0.07860930263996124, "learning_rate": 4.9168300515309515e-06, "loss": 0.0566, "step": 2483 }, { "epoch": 0.5206455669670929, "grad_norm": 0.11033756285905838, "learning_rate": 4.913435692314814e-06, "loss": 0.056, "step": 2484 }, { "epoch": 0.5208551666317334, "grad_norm": 0.13464130461215973, "learning_rate": 4.910041373004802e-06, "loss": 0.0548, "step": 2485 }, { "epoch": 0.5210647662963739, "grad_norm": 0.1375139057636261, "learning_rate": 4.906647095165698e-06, "loss": 0.0568, "step": 2486 }, { "epoch": 0.5212743659610145, "grad_norm": 0.10741981118917465, "learning_rate": 4.903252860362266e-06, "loss": 0.0593, "step": 2487 }, { "epoch": 0.521483965625655, "grad_norm": 0.08266045153141022, "learning_rate": 4.8998586701592436e-06, "loss": 0.0558, "step": 2488 }, { "epoch": 0.5216935652902955, "grad_norm": 0.0874459519982338, "learning_rate": 4.8964645261213535e-06, "loss": 0.0557, "step": 2489 }, { "epoch": 0.521903164954936, "grad_norm": 0.10738598555326462, "learning_rate": 4.8930704298132965e-06, "loss": 0.0556, "step": 2490 }, { "epoch": 0.5221127646195766, "grad_norm": 0.11535639315843582, "learning_rate": 4.889676382799748e-06, "loss": 0.0566, "step": 2491 }, { "epoch": 0.5223223642842172, "grad_norm": 0.10083887726068497, "learning_rate": 4.886282386645363e-06, "loss": 0.0567, "step": 2492 }, { "epoch": 0.5225319639488577, "grad_norm": 0.07577608525753021, "learning_rate": 4.8828884429147775e-06, "loss": 0.0539, "step": 2493 }, { "epoch": 0.5227415636134982, "grad_norm": 0.08031262457370758, "learning_rate": 4.879494553172594e-06, "loss": 0.0541, "step": 2494 }, { "epoch": 0.5229511632781387, "grad_norm": 0.10877335071563721, "learning_rate": 4.876100718983394e-06, "loss": 0.0566, "step": 2495 }, { "epoch": 0.5231607629427792, "grad_norm": 0.10455876588821411, "learning_rate": 4.872706941911739e-06, "loss": 0.0559, "step": 2496 }, { "epoch": 0.5233703626074199, "grad_norm": 0.07597562670707703, "learning_rate": 4.869313223522159e-06, "loss": 0.0565, "step": 2497 }, { "epoch": 0.5235799622720604, "grad_norm": 0.08931368589401245, "learning_rate": 4.865919565379152e-06, "loss": 0.0583, "step": 2498 }, { "epoch": 0.5237895619367009, "grad_norm": 0.09384021908044815, "learning_rate": 4.8625259690472e-06, "loss": 0.0574, "step": 2499 }, { "epoch": 0.5239991616013414, "grad_norm": 0.0911545678973198, "learning_rate": 4.859132436090748e-06, "loss": 0.0561, "step": 2500 }, { "epoch": 0.5242087612659819, "grad_norm": 0.10391653329133987, "learning_rate": 4.855738968074212e-06, "loss": 0.0552, "step": 2501 }, { "epoch": 0.5244183609306226, "grad_norm": 0.09222046285867691, "learning_rate": 4.852345566561983e-06, "loss": 0.059, "step": 2502 }, { "epoch": 0.5246279605952631, "grad_norm": 0.07000227272510529, "learning_rate": 4.848952233118417e-06, "loss": 0.0584, "step": 2503 }, { "epoch": 0.5248375602599036, "grad_norm": 0.08648461848497391, "learning_rate": 4.845558969307839e-06, "loss": 0.0541, "step": 2504 }, { "epoch": 0.5250471599245441, "grad_norm": 0.09578924626111984, "learning_rate": 4.842165776694545e-06, "loss": 0.0551, "step": 2505 }, { "epoch": 0.5252567595891846, "grad_norm": 0.091171033680439, "learning_rate": 4.8387726568427945e-06, "loss": 0.055, "step": 2506 }, { "epoch": 0.5254663592538252, "grad_norm": 0.09794994443655014, "learning_rate": 4.835379611316818e-06, "loss": 0.0602, "step": 2507 }, { "epoch": 0.5256759589184657, "grad_norm": 0.08770140260457993, "learning_rate": 4.831986641680804e-06, "loss": 0.0555, "step": 2508 }, { "epoch": 0.5258855585831063, "grad_norm": 0.07761924713850021, "learning_rate": 4.828593749498913e-06, "loss": 0.0558, "step": 2509 }, { "epoch": 0.5260951582477468, "grad_norm": 0.09006881713867188, "learning_rate": 4.825200936335272e-06, "loss": 0.0575, "step": 2510 }, { "epoch": 0.5263047579123873, "grad_norm": 0.08472940325737, "learning_rate": 4.821808203753959e-06, "loss": 0.0555, "step": 2511 }, { "epoch": 0.5265143575770279, "grad_norm": 0.06748061627149582, "learning_rate": 4.818415553319027e-06, "loss": 0.0555, "step": 2512 }, { "epoch": 0.5267239572416684, "grad_norm": 0.07318470627069473, "learning_rate": 4.815022986594491e-06, "loss": 0.0564, "step": 2513 }, { "epoch": 0.5269335569063089, "grad_norm": 0.07728718966245651, "learning_rate": 4.811630505144316e-06, "loss": 0.055, "step": 2514 }, { "epoch": 0.5271431565709495, "grad_norm": 0.06775672733783722, "learning_rate": 4.808238110532439e-06, "loss": 0.0543, "step": 2515 }, { "epoch": 0.52735275623559, "grad_norm": 0.08202599734067917, "learning_rate": 4.804845804322756e-06, "loss": 0.0562, "step": 2516 }, { "epoch": 0.5275623559002306, "grad_norm": 0.09403207898139954, "learning_rate": 4.801453588079113e-06, "loss": 0.0548, "step": 2517 }, { "epoch": 0.5277719555648711, "grad_norm": 0.08758095651865005, "learning_rate": 4.798061463365327e-06, "loss": 0.0533, "step": 2518 }, { "epoch": 0.5279815552295116, "grad_norm": 0.07951238006353378, "learning_rate": 4.7946694317451635e-06, "loss": 0.0558, "step": 2519 }, { "epoch": 0.5281911548941521, "grad_norm": 0.06479839980602264, "learning_rate": 4.791277494782351e-06, "loss": 0.056, "step": 2520 }, { "epoch": 0.5284007545587927, "grad_norm": 0.060277167707681656, "learning_rate": 4.787885654040569e-06, "loss": 0.0572, "step": 2521 }, { "epoch": 0.5286103542234333, "grad_norm": 0.06489501148462296, "learning_rate": 4.784493911083455e-06, "loss": 0.0559, "step": 2522 }, { "epoch": 0.5288199538880738, "grad_norm": 0.07337628304958344, "learning_rate": 4.781102267474606e-06, "loss": 0.0576, "step": 2523 }, { "epoch": 0.5290295535527143, "grad_norm": 0.08746179193258286, "learning_rate": 4.777710724777565e-06, "loss": 0.0556, "step": 2524 }, { "epoch": 0.5292391532173548, "grad_norm": 0.09314849972724915, "learning_rate": 4.774319284555833e-06, "loss": 0.0566, "step": 2525 }, { "epoch": 0.5294487528819954, "grad_norm": 0.08934000134468079, "learning_rate": 4.770927948372865e-06, "loss": 0.0522, "step": 2526 }, { "epoch": 0.529658352546636, "grad_norm": 0.08320695906877518, "learning_rate": 4.7675367177920645e-06, "loss": 0.0547, "step": 2527 }, { "epoch": 0.5298679522112765, "grad_norm": 0.07654823362827301, "learning_rate": 4.764145594376788e-06, "loss": 0.0531, "step": 2528 }, { "epoch": 0.530077551875917, "grad_norm": 0.07065373659133911, "learning_rate": 4.7607545796903444e-06, "loss": 0.0584, "step": 2529 }, { "epoch": 0.5302871515405575, "grad_norm": 0.06831829994916916, "learning_rate": 4.757363675295991e-06, "loss": 0.0557, "step": 2530 }, { "epoch": 0.5304967512051981, "grad_norm": 0.06134999543428421, "learning_rate": 4.753972882756931e-06, "loss": 0.0548, "step": 2531 }, { "epoch": 0.5307063508698386, "grad_norm": 0.05438331514596939, "learning_rate": 4.7505822036363214e-06, "loss": 0.0568, "step": 2532 }, { "epoch": 0.5309159505344792, "grad_norm": 0.047348763793706894, "learning_rate": 4.747191639497266e-06, "loss": 0.0568, "step": 2533 }, { "epoch": 0.5311255501991197, "grad_norm": 0.048679519444704056, "learning_rate": 4.743801191902809e-06, "loss": 0.0563, "step": 2534 }, { "epoch": 0.5313351498637602, "grad_norm": 0.045131586492061615, "learning_rate": 4.740410862415952e-06, "loss": 0.0555, "step": 2535 }, { "epoch": 0.5315447495284008, "grad_norm": 0.05211302638053894, "learning_rate": 4.737020652599633e-06, "loss": 0.0551, "step": 2536 }, { "epoch": 0.5317543491930413, "grad_norm": 0.05385665223002434, "learning_rate": 4.733630564016738e-06, "loss": 0.0541, "step": 2537 }, { "epoch": 0.5319639488576818, "grad_norm": 0.046267565339803696, "learning_rate": 4.730240598230097e-06, "loss": 0.0533, "step": 2538 }, { "epoch": 0.5321735485223223, "grad_norm": 0.04819132760167122, "learning_rate": 4.726850756802486e-06, "loss": 0.0564, "step": 2539 }, { "epoch": 0.5323831481869629, "grad_norm": 0.048858221620321274, "learning_rate": 4.723461041296618e-06, "loss": 0.0547, "step": 2540 }, { "epoch": 0.5325927478516035, "grad_norm": 0.042201049625873566, "learning_rate": 4.720071453275152e-06, "loss": 0.0573, "step": 2541 }, { "epoch": 0.532802347516244, "grad_norm": 0.04239390045404434, "learning_rate": 4.716681994300688e-06, "loss": 0.0573, "step": 2542 }, { "epoch": 0.5330119471808845, "grad_norm": 0.05800594016909599, "learning_rate": 4.7132926659357675e-06, "loss": 0.0546, "step": 2543 }, { "epoch": 0.533221546845525, "grad_norm": 0.05549288168549538, "learning_rate": 4.7099034697428676e-06, "loss": 0.0565, "step": 2544 }, { "epoch": 0.5334311465101655, "grad_norm": 0.05524330586194992, "learning_rate": 4.706514407284407e-06, "loss": 0.0544, "step": 2545 }, { "epoch": 0.5336407461748062, "grad_norm": 0.07277749478816986, "learning_rate": 4.703125480122747e-06, "loss": 0.0542, "step": 2546 }, { "epoch": 0.5338503458394467, "grad_norm": 0.06170268729329109, "learning_rate": 4.699736689820175e-06, "loss": 0.0565, "step": 2547 }, { "epoch": 0.5340599455040872, "grad_norm": 0.05076323822140694, "learning_rate": 4.696348037938927e-06, "loss": 0.0565, "step": 2548 }, { "epoch": 0.5342695451687277, "grad_norm": 0.06921599805355072, "learning_rate": 4.692959526041174e-06, "loss": 0.0565, "step": 2549 }, { "epoch": 0.5344791448333682, "grad_norm": 0.057804301381111145, "learning_rate": 4.689571155689012e-06, "loss": 0.053, "step": 2550 }, { "epoch": 0.5346887444980088, "grad_norm": 0.04741832986474037, "learning_rate": 4.686182928444484e-06, "loss": 0.0552, "step": 2551 }, { "epoch": 0.5348983441626494, "grad_norm": 0.07584595680236816, "learning_rate": 4.682794845869559e-06, "loss": 0.0525, "step": 2552 }, { "epoch": 0.5351079438272899, "grad_norm": 0.07168301194906235, "learning_rate": 4.679406909526147e-06, "loss": 0.0579, "step": 2553 }, { "epoch": 0.5353175434919304, "grad_norm": 0.04884837195277214, "learning_rate": 4.676019120976082e-06, "loss": 0.0574, "step": 2554 }, { "epoch": 0.5355271431565709, "grad_norm": 0.06782133877277374, "learning_rate": 4.672631481781134e-06, "loss": 0.0565, "step": 2555 }, { "epoch": 0.5357367428212115, "grad_norm": 0.07214924693107605, "learning_rate": 4.669243993503008e-06, "loss": 0.0584, "step": 2556 }, { "epoch": 0.535946342485852, "grad_norm": 0.05515038222074509, "learning_rate": 4.665856657703329e-06, "loss": 0.0545, "step": 2557 }, { "epoch": 0.5361559421504926, "grad_norm": 0.06522560119628906, "learning_rate": 4.662469475943662e-06, "loss": 0.0558, "step": 2558 }, { "epoch": 0.5363655418151331, "grad_norm": 0.06113879382610321, "learning_rate": 4.659082449785498e-06, "loss": 0.0558, "step": 2559 }, { "epoch": 0.5365751414797736, "grad_norm": 0.06243380904197693, "learning_rate": 4.655695580790254e-06, "loss": 0.0578, "step": 2560 }, { "epoch": 0.5367847411444142, "grad_norm": 0.07164395600557327, "learning_rate": 4.652308870519272e-06, "loss": 0.0574, "step": 2561 }, { "epoch": 0.5369943408090547, "grad_norm": 0.06726083904504776, "learning_rate": 4.648922320533833e-06, "loss": 0.0546, "step": 2562 }, { "epoch": 0.5372039404736952, "grad_norm": 0.054155874997377396, "learning_rate": 4.645535932395129e-06, "loss": 0.0536, "step": 2563 }, { "epoch": 0.5374135401383358, "grad_norm": 0.0629991814494133, "learning_rate": 4.6421497076642864e-06, "loss": 0.0576, "step": 2564 }, { "epoch": 0.5376231398029763, "grad_norm": 0.06752461940050125, "learning_rate": 4.638763647902355e-06, "loss": 0.0567, "step": 2565 }, { "epoch": 0.5378327394676169, "grad_norm": 0.0500505268573761, "learning_rate": 4.635377754670307e-06, "loss": 0.0546, "step": 2566 }, { "epoch": 0.5380423391322574, "grad_norm": 0.043680962175130844, "learning_rate": 4.631992029529037e-06, "loss": 0.0556, "step": 2567 }, { "epoch": 0.5382519387968979, "grad_norm": 0.06089196354150772, "learning_rate": 4.628606474039366e-06, "loss": 0.0545, "step": 2568 }, { "epoch": 0.5384615384615384, "grad_norm": 0.07623324543237686, "learning_rate": 4.625221089762034e-06, "loss": 0.0577, "step": 2569 }, { "epoch": 0.538671138126179, "grad_norm": 0.09632382541894913, "learning_rate": 4.621835878257701e-06, "loss": 0.0567, "step": 2570 }, { "epoch": 0.5388807377908196, "grad_norm": 0.12334061414003372, "learning_rate": 4.6184508410869486e-06, "loss": 0.0533, "step": 2571 }, { "epoch": 0.5390903374554601, "grad_norm": 0.1471899002790451, "learning_rate": 4.615065979810282e-06, "loss": 0.0571, "step": 2572 }, { "epoch": 0.5392999371201006, "grad_norm": 0.16688735783100128, "learning_rate": 4.6116812959881154e-06, "loss": 0.0543, "step": 2573 }, { "epoch": 0.5395095367847411, "grad_norm": 0.17000025510787964, "learning_rate": 4.608296791180793e-06, "loss": 0.0555, "step": 2574 }, { "epoch": 0.5397191364493816, "grad_norm": 0.13026584684848785, "learning_rate": 4.604912466948568e-06, "loss": 0.058, "step": 2575 }, { "epoch": 0.5399287361140223, "grad_norm": 0.06306587904691696, "learning_rate": 4.601528324851613e-06, "loss": 0.0546, "step": 2576 }, { "epoch": 0.5401383357786628, "grad_norm": 0.06573072820901871, "learning_rate": 4.598144366450018e-06, "loss": 0.057, "step": 2577 }, { "epoch": 0.5403479354433033, "grad_norm": 0.1011551097035408, "learning_rate": 4.594760593303785e-06, "loss": 0.0563, "step": 2578 }, { "epoch": 0.5405575351079438, "grad_norm": 0.13201986253261566, "learning_rate": 4.591377006972837e-06, "loss": 0.0559, "step": 2579 }, { "epoch": 0.5407671347725843, "grad_norm": 0.13169518113136292, "learning_rate": 4.587993609017003e-06, "loss": 0.0583, "step": 2580 }, { "epoch": 0.5409767344372249, "grad_norm": 0.07675211131572723, "learning_rate": 4.584610400996028e-06, "loss": 0.0547, "step": 2581 }, { "epoch": 0.5411863341018655, "grad_norm": 0.07180249691009521, "learning_rate": 4.581227384469575e-06, "loss": 0.056, "step": 2582 }, { "epoch": 0.541395933766506, "grad_norm": 0.11214875429868698, "learning_rate": 4.577844560997208e-06, "loss": 0.0561, "step": 2583 }, { "epoch": 0.5416055334311465, "grad_norm": 0.1112339124083519, "learning_rate": 4.574461932138412e-06, "loss": 0.0535, "step": 2584 }, { "epoch": 0.541815133095787, "grad_norm": 0.08749670535326004, "learning_rate": 4.571079499452578e-06, "loss": 0.057, "step": 2585 }, { "epoch": 0.5420247327604276, "grad_norm": 0.08394355326890945, "learning_rate": 4.567697264499003e-06, "loss": 0.0547, "step": 2586 }, { "epoch": 0.5422343324250681, "grad_norm": 0.12321101129055023, "learning_rate": 4.564315228836901e-06, "loss": 0.0577, "step": 2587 }, { "epoch": 0.5424439320897086, "grad_norm": 0.13158167898654938, "learning_rate": 4.560933394025386e-06, "loss": 0.0583, "step": 2588 }, { "epoch": 0.5426535317543492, "grad_norm": 0.08250062167644501, "learning_rate": 4.5575517616234874e-06, "loss": 0.0547, "step": 2589 }, { "epoch": 0.5428631314189897, "grad_norm": 0.07321292906999588, "learning_rate": 4.5541703331901346e-06, "loss": 0.0575, "step": 2590 }, { "epoch": 0.5430727310836303, "grad_norm": 0.10240411758422852, "learning_rate": 4.550789110284164e-06, "loss": 0.0548, "step": 2591 }, { "epoch": 0.5432823307482708, "grad_norm": 0.09402070939540863, "learning_rate": 4.547408094464322e-06, "loss": 0.0568, "step": 2592 }, { "epoch": 0.5434919304129113, "grad_norm": 0.06143482029438019, "learning_rate": 4.5440272872892546e-06, "loss": 0.0552, "step": 2593 }, { "epoch": 0.5437015300775518, "grad_norm": 0.07297394424676895, "learning_rate": 4.54064669031751e-06, "loss": 0.0561, "step": 2594 }, { "epoch": 0.5439111297421925, "grad_norm": 0.08411424607038498, "learning_rate": 4.537266305107549e-06, "loss": 0.0554, "step": 2595 }, { "epoch": 0.544120729406833, "grad_norm": 0.06542636454105377, "learning_rate": 4.533886133217725e-06, "loss": 0.0563, "step": 2596 }, { "epoch": 0.5443303290714735, "grad_norm": 0.06550241261720657, "learning_rate": 4.5305061762062945e-06, "loss": 0.0535, "step": 2597 }, { "epoch": 0.544539928736114, "grad_norm": 0.08619559556245804, "learning_rate": 4.527126435631422e-06, "loss": 0.0566, "step": 2598 }, { "epoch": 0.5447495284007545, "grad_norm": 0.07745786011219025, "learning_rate": 4.523746913051163e-06, "loss": 0.0571, "step": 2599 }, { "epoch": 0.5449591280653951, "grad_norm": 0.0609513558447361, "learning_rate": 4.520367610023477e-06, "loss": 0.0552, "step": 2600 }, { "epoch": 0.5451687277300357, "grad_norm": 0.0679200291633606, "learning_rate": 4.516988528106225e-06, "loss": 0.0538, "step": 2601 }, { "epoch": 0.5453783273946762, "grad_norm": 0.067595936357975, "learning_rate": 4.513609668857162e-06, "loss": 0.0541, "step": 2602 }, { "epoch": 0.5455879270593167, "grad_norm": 0.06276869773864746, "learning_rate": 4.510231033833938e-06, "loss": 0.0549, "step": 2603 }, { "epoch": 0.5457975267239572, "grad_norm": 0.058919332921504974, "learning_rate": 4.506852624594107e-06, "loss": 0.0553, "step": 2604 }, { "epoch": 0.5460071263885978, "grad_norm": 0.06877582520246506, "learning_rate": 4.503474442695115e-06, "loss": 0.0558, "step": 2605 }, { "epoch": 0.5462167260532383, "grad_norm": 0.07054632157087326, "learning_rate": 4.500096489694299e-06, "loss": 0.0591, "step": 2606 }, { "epoch": 0.5464263257178789, "grad_norm": 0.05597640573978424, "learning_rate": 4.496718767148898e-06, "loss": 0.0552, "step": 2607 }, { "epoch": 0.5466359253825194, "grad_norm": 0.052397601306438446, "learning_rate": 4.493341276616044e-06, "loss": 0.0543, "step": 2608 }, { "epoch": 0.5468455250471599, "grad_norm": 0.06026868522167206, "learning_rate": 4.489964019652752e-06, "loss": 0.0578, "step": 2609 }, { "epoch": 0.5470551247118005, "grad_norm": 0.05290864408016205, "learning_rate": 4.486586997815942e-06, "loss": 0.0533, "step": 2610 }, { "epoch": 0.547264724376441, "grad_norm": 0.05662361532449722, "learning_rate": 4.48321021266242e-06, "loss": 0.0552, "step": 2611 }, { "epoch": 0.5474743240410815, "grad_norm": 0.057523008435964584, "learning_rate": 4.479833665748884e-06, "loss": 0.0595, "step": 2612 }, { "epoch": 0.547683923705722, "grad_norm": 0.054744523018598557, "learning_rate": 4.476457358631918e-06, "loss": 0.0589, "step": 2613 }, { "epoch": 0.5478935233703626, "grad_norm": 0.06096423789858818, "learning_rate": 4.473081292868001e-06, "loss": 0.054, "step": 2614 }, { "epoch": 0.5481031230350032, "grad_norm": 0.06122094392776489, "learning_rate": 4.4697054700135e-06, "loss": 0.0524, "step": 2615 }, { "epoch": 0.5483127226996437, "grad_norm": 0.06579925119876862, "learning_rate": 4.4663298916246665e-06, "loss": 0.0554, "step": 2616 }, { "epoch": 0.5485223223642842, "grad_norm": 0.0574062280356884, "learning_rate": 4.4629545592576415e-06, "loss": 0.0563, "step": 2617 }, { "epoch": 0.5487319220289247, "grad_norm": 0.05878579616546631, "learning_rate": 4.459579474468455e-06, "loss": 0.0541, "step": 2618 }, { "epoch": 0.5489415216935652, "grad_norm": 0.06138122081756592, "learning_rate": 4.456204638813017e-06, "loss": 0.0564, "step": 2619 }, { "epoch": 0.5491511213582059, "grad_norm": 0.04858166351914406, "learning_rate": 4.452830053847127e-06, "loss": 0.0547, "step": 2620 }, { "epoch": 0.5493607210228464, "grad_norm": 0.046725232154130936, "learning_rate": 4.4494557211264715e-06, "loss": 0.0551, "step": 2621 }, { "epoch": 0.5495703206874869, "grad_norm": 0.05486287176609039, "learning_rate": 4.446081642206611e-06, "loss": 0.0574, "step": 2622 }, { "epoch": 0.5497799203521274, "grad_norm": 0.052558574825525284, "learning_rate": 4.442707818642999e-06, "loss": 0.0535, "step": 2623 }, { "epoch": 0.5499895200167679, "grad_norm": 0.04250088334083557, "learning_rate": 4.439334251990966e-06, "loss": 0.0561, "step": 2624 }, { "epoch": 0.5501991196814086, "grad_norm": 0.047486599534749985, "learning_rate": 4.435960943805729e-06, "loss": 0.0572, "step": 2625 }, { "epoch": 0.5504087193460491, "grad_norm": 0.04519723728299141, "learning_rate": 4.432587895642378e-06, "loss": 0.0536, "step": 2626 }, { "epoch": 0.5506183190106896, "grad_norm": 0.042785726487636566, "learning_rate": 4.4292151090558884e-06, "loss": 0.0544, "step": 2627 }, { "epoch": 0.5508279186753301, "grad_norm": 0.04462413862347603, "learning_rate": 4.425842585601117e-06, "loss": 0.0569, "step": 2628 }, { "epoch": 0.5510375183399706, "grad_norm": 0.041346605867147446, "learning_rate": 4.422470326832794e-06, "loss": 0.0567, "step": 2629 }, { "epoch": 0.5512471180046112, "grad_norm": 0.04267747327685356, "learning_rate": 4.419098334305529e-06, "loss": 0.0542, "step": 2630 }, { "epoch": 0.5514567176692518, "grad_norm": 0.05388062447309494, "learning_rate": 4.4157266095738125e-06, "loss": 0.0559, "step": 2631 }, { "epoch": 0.5516663173338923, "grad_norm": 0.064181849360466, "learning_rate": 4.412355154192007e-06, "loss": 0.0576, "step": 2632 }, { "epoch": 0.5518759169985328, "grad_norm": 0.0693066269159317, "learning_rate": 4.408983969714353e-06, "loss": 0.056, "step": 2633 }, { "epoch": 0.5520855166631733, "grad_norm": 0.07318708300590515, "learning_rate": 4.405613057694967e-06, "loss": 0.054, "step": 2634 }, { "epoch": 0.5522951163278139, "grad_norm": 0.07104630768299103, "learning_rate": 4.402242419687839e-06, "loss": 0.0536, "step": 2635 }, { "epoch": 0.5525047159924544, "grad_norm": 0.057221170514822006, "learning_rate": 4.398872057246829e-06, "loss": 0.0522, "step": 2636 }, { "epoch": 0.552714315657095, "grad_norm": 0.05032278224825859, "learning_rate": 4.395501971925677e-06, "loss": 0.0552, "step": 2637 }, { "epoch": 0.5529239153217355, "grad_norm": 0.050015486776828766, "learning_rate": 4.392132165277991e-06, "loss": 0.0571, "step": 2638 }, { "epoch": 0.553133514986376, "grad_norm": 0.05708552524447441, "learning_rate": 4.388762638857249e-06, "loss": 0.054, "step": 2639 }, { "epoch": 0.5533431146510166, "grad_norm": 0.06874597072601318, "learning_rate": 4.385393394216804e-06, "loss": 0.0553, "step": 2640 }, { "epoch": 0.5535527143156571, "grad_norm": 0.059858787804841995, "learning_rate": 4.382024432909878e-06, "loss": 0.0597, "step": 2641 }, { "epoch": 0.5537623139802976, "grad_norm": 0.04494311660528183, "learning_rate": 4.378655756489558e-06, "loss": 0.0579, "step": 2642 }, { "epoch": 0.5539719136449381, "grad_norm": 0.062470775097608566, "learning_rate": 4.3752873665088055e-06, "loss": 0.0557, "step": 2643 }, { "epoch": 0.5541815133095787, "grad_norm": 0.07735218852758408, "learning_rate": 4.371919264520449e-06, "loss": 0.0562, "step": 2644 }, { "epoch": 0.5543911129742193, "grad_norm": 0.07211725413799286, "learning_rate": 4.368551452077179e-06, "loss": 0.056, "step": 2645 }, { "epoch": 0.5546007126388598, "grad_norm": 0.0716887041926384, "learning_rate": 4.365183930731559e-06, "loss": 0.0586, "step": 2646 }, { "epoch": 0.5548103123035003, "grad_norm": 0.06757423281669617, "learning_rate": 4.361816702036015e-06, "loss": 0.0548, "step": 2647 }, { "epoch": 0.5550199119681408, "grad_norm": 0.05915999040007591, "learning_rate": 4.35844976754284e-06, "loss": 0.0561, "step": 2648 }, { "epoch": 0.5552295116327813, "grad_norm": 0.06384500861167908, "learning_rate": 4.355083128804188e-06, "loss": 0.0545, "step": 2649 }, { "epoch": 0.555439111297422, "grad_norm": 0.08520271629095078, "learning_rate": 4.351716787372079e-06, "loss": 0.0544, "step": 2650 }, { "epoch": 0.5556487109620625, "grad_norm": 0.09145956486463547, "learning_rate": 4.348350744798399e-06, "loss": 0.0539, "step": 2651 }, { "epoch": 0.555858310626703, "grad_norm": 0.08064839243888855, "learning_rate": 4.344985002634888e-06, "loss": 0.0575, "step": 2652 }, { "epoch": 0.5560679102913435, "grad_norm": 0.055196382105350494, "learning_rate": 4.341619562433154e-06, "loss": 0.0543, "step": 2653 }, { "epoch": 0.556277509955984, "grad_norm": 0.05988939851522446, "learning_rate": 4.338254425744669e-06, "loss": 0.0568, "step": 2654 }, { "epoch": 0.5564871096206246, "grad_norm": 0.06533896923065186, "learning_rate": 4.334889594120751e-06, "loss": 0.0567, "step": 2655 }, { "epoch": 0.5566967092852652, "grad_norm": 0.0704990103840828, "learning_rate": 4.331525069112595e-06, "loss": 0.0541, "step": 2656 }, { "epoch": 0.5569063089499057, "grad_norm": 0.07413534820079803, "learning_rate": 4.328160852271241e-06, "loss": 0.0561, "step": 2657 }, { "epoch": 0.5571159086145462, "grad_norm": 0.06026620417833328, "learning_rate": 4.324796945147598e-06, "loss": 0.0552, "step": 2658 }, { "epoch": 0.5573255082791867, "grad_norm": 0.05583348870277405, "learning_rate": 4.321433349292422e-06, "loss": 0.0569, "step": 2659 }, { "epoch": 0.5575351079438273, "grad_norm": 0.06051694229245186, "learning_rate": 4.318070066256328e-06, "loss": 0.061, "step": 2660 }, { "epoch": 0.5577447076084678, "grad_norm": 0.06352775543928146, "learning_rate": 4.314707097589796e-06, "loss": 0.0561, "step": 2661 }, { "epoch": 0.5579543072731084, "grad_norm": 0.06398675590753555, "learning_rate": 4.311344444843147e-06, "loss": 0.0535, "step": 2662 }, { "epoch": 0.5581639069377489, "grad_norm": 0.06229983642697334, "learning_rate": 4.307982109566566e-06, "loss": 0.0519, "step": 2663 }, { "epoch": 0.5583735066023895, "grad_norm": 0.05653541907668114, "learning_rate": 4.3046200933100905e-06, "loss": 0.0557, "step": 2664 }, { "epoch": 0.55858310626703, "grad_norm": 0.047429151833057404, "learning_rate": 4.301258397623606e-06, "loss": 0.0534, "step": 2665 }, { "epoch": 0.5587927059316705, "grad_norm": 0.05258271098136902, "learning_rate": 4.2978970240568556e-06, "loss": 0.0542, "step": 2666 }, { "epoch": 0.559002305596311, "grad_norm": 0.05701644718647003, "learning_rate": 4.2945359741594315e-06, "loss": 0.0556, "step": 2667 }, { "epoch": 0.5592119052609515, "grad_norm": 0.06717483699321747, "learning_rate": 4.291175249480777e-06, "loss": 0.054, "step": 2668 }, { "epoch": 0.5594215049255922, "grad_norm": 0.06235239654779434, "learning_rate": 4.287814851570183e-06, "loss": 0.0587, "step": 2669 }, { "epoch": 0.5596311045902327, "grad_norm": 0.06835221499204636, "learning_rate": 4.284454781976796e-06, "loss": 0.0562, "step": 2670 }, { "epoch": 0.5598407042548732, "grad_norm": 0.07527367770671844, "learning_rate": 4.281095042249608e-06, "loss": 0.0565, "step": 2671 }, { "epoch": 0.5600503039195137, "grad_norm": 0.062950000166893, "learning_rate": 4.2777356339374526e-06, "loss": 0.0572, "step": 2672 }, { "epoch": 0.5602599035841542, "grad_norm": 0.06541959196329117, "learning_rate": 4.274376558589022e-06, "loss": 0.0521, "step": 2673 }, { "epoch": 0.5604695032487949, "grad_norm": 0.08955197781324387, "learning_rate": 4.271017817752847e-06, "loss": 0.053, "step": 2674 }, { "epoch": 0.5606791029134354, "grad_norm": 0.07426553964614868, "learning_rate": 4.267659412977306e-06, "loss": 0.0531, "step": 2675 }, { "epoch": 0.5608887025780759, "grad_norm": 0.061013299971818924, "learning_rate": 4.264301345810623e-06, "loss": 0.0548, "step": 2676 }, { "epoch": 0.5610983022427164, "grad_norm": 0.06869885325431824, "learning_rate": 4.260943617800869e-06, "loss": 0.0564, "step": 2677 }, { "epoch": 0.5613079019073569, "grad_norm": 0.060993682593107224, "learning_rate": 4.257586230495951e-06, "loss": 0.0556, "step": 2678 }, { "epoch": 0.5615175015719975, "grad_norm": 0.08102905750274658, "learning_rate": 4.254229185443628e-06, "loss": 0.0537, "step": 2679 }, { "epoch": 0.561727101236638, "grad_norm": 0.0864529013633728, "learning_rate": 4.250872484191495e-06, "loss": 0.0589, "step": 2680 }, { "epoch": 0.5619367009012786, "grad_norm": 0.07401063293218613, "learning_rate": 4.247516128286992e-06, "loss": 0.055, "step": 2681 }, { "epoch": 0.5621463005659191, "grad_norm": 0.06574457883834839, "learning_rate": 4.244160119277397e-06, "loss": 0.059, "step": 2682 }, { "epoch": 0.5623559002305596, "grad_norm": 0.05600839853286743, "learning_rate": 4.24080445870983e-06, "loss": 0.0577, "step": 2683 }, { "epoch": 0.5625654998952002, "grad_norm": 0.058064643293619156, "learning_rate": 4.2374491481312506e-06, "loss": 0.0574, "step": 2684 }, { "epoch": 0.5627750995598407, "grad_norm": 0.05819283798336983, "learning_rate": 4.234094189088455e-06, "loss": 0.0563, "step": 2685 }, { "epoch": 0.5629846992244812, "grad_norm": 0.0627334788441658, "learning_rate": 4.230739583128078e-06, "loss": 0.0543, "step": 2686 }, { "epoch": 0.5631942988891218, "grad_norm": 0.07298675179481506, "learning_rate": 4.227385331796596e-06, "loss": 0.0586, "step": 2687 }, { "epoch": 0.5634038985537623, "grad_norm": 0.07162720710039139, "learning_rate": 4.2240314366403135e-06, "loss": 0.0575, "step": 2688 }, { "epoch": 0.5636134982184029, "grad_norm": 0.06274513155221939, "learning_rate": 4.220677899205376e-06, "loss": 0.0544, "step": 2689 }, { "epoch": 0.5638230978830434, "grad_norm": 0.049171797931194305, "learning_rate": 4.21732472103777e-06, "loss": 0.0564, "step": 2690 }, { "epoch": 0.5640326975476839, "grad_norm": 0.04283327981829643, "learning_rate": 4.213971903683301e-06, "loss": 0.0541, "step": 2691 }, { "epoch": 0.5642422972123244, "grad_norm": 0.05370490252971649, "learning_rate": 4.210619448687622e-06, "loss": 0.0539, "step": 2692 }, { "epoch": 0.564451896876965, "grad_norm": 0.05934003368020058, "learning_rate": 4.2072673575962125e-06, "loss": 0.0545, "step": 2693 }, { "epoch": 0.5646614965416056, "grad_norm": 0.06390709429979324, "learning_rate": 4.203915631954389e-06, "loss": 0.0548, "step": 2694 }, { "epoch": 0.5648710962062461, "grad_norm": 0.0646679550409317, "learning_rate": 4.200564273307292e-06, "loss": 0.0554, "step": 2695 }, { "epoch": 0.5650806958708866, "grad_norm": 0.05779599770903587, "learning_rate": 4.197213283199898e-06, "loss": 0.0591, "step": 2696 }, { "epoch": 0.5652902955355271, "grad_norm": 0.06428996473550797, "learning_rate": 4.193862663177016e-06, "loss": 0.0544, "step": 2697 }, { "epoch": 0.5654998952001676, "grad_norm": 0.07036624848842621, "learning_rate": 4.190512414783278e-06, "loss": 0.0551, "step": 2698 }, { "epoch": 0.5657094948648083, "grad_norm": 0.07967984676361084, "learning_rate": 4.187162539563147e-06, "loss": 0.0584, "step": 2699 }, { "epoch": 0.5659190945294488, "grad_norm": 0.09749884158372879, "learning_rate": 4.183813039060919e-06, "loss": 0.0543, "step": 2700 }, { "epoch": 0.5661286941940893, "grad_norm": 0.09949739277362823, "learning_rate": 4.180463914820709e-06, "loss": 0.0562, "step": 2701 }, { "epoch": 0.5663382938587298, "grad_norm": 0.09305305033922195, "learning_rate": 4.177115168386463e-06, "loss": 0.055, "step": 2702 }, { "epoch": 0.5665478935233703, "grad_norm": 0.07777238637208939, "learning_rate": 4.173766801301956e-06, "loss": 0.0574, "step": 2703 }, { "epoch": 0.5667574931880109, "grad_norm": 0.06988883018493652, "learning_rate": 4.17041881511078e-06, "loss": 0.0547, "step": 2704 }, { "epoch": 0.5669670928526515, "grad_norm": 0.07308480888605118, "learning_rate": 4.167071211356358e-06, "loss": 0.0539, "step": 2705 }, { "epoch": 0.567176692517292, "grad_norm": 0.053045183420181274, "learning_rate": 4.163723991581935e-06, "loss": 0.057, "step": 2706 }, { "epoch": 0.5673862921819325, "grad_norm": 0.05672500282526016, "learning_rate": 4.160377157330579e-06, "loss": 0.0545, "step": 2707 }, { "epoch": 0.567595891846573, "grad_norm": 0.06733475625514984, "learning_rate": 4.157030710145178e-06, "loss": 0.0554, "step": 2708 }, { "epoch": 0.5678054915112136, "grad_norm": 0.07970572263002396, "learning_rate": 4.153684651568445e-06, "loss": 0.0561, "step": 2709 }, { "epoch": 0.5680150911758541, "grad_norm": 0.1028270497918129, "learning_rate": 4.150338983142913e-06, "loss": 0.0553, "step": 2710 }, { "epoch": 0.5682246908404947, "grad_norm": 0.10650909692049026, "learning_rate": 4.1469937064109305e-06, "loss": 0.0545, "step": 2711 }, { "epoch": 0.5684342905051352, "grad_norm": 0.09105932712554932, "learning_rate": 4.1436488229146735e-06, "loss": 0.0579, "step": 2712 }, { "epoch": 0.5686438901697757, "grad_norm": 0.0559769943356514, "learning_rate": 4.140304334196133e-06, "loss": 0.0559, "step": 2713 }, { "epoch": 0.5688534898344163, "grad_norm": 0.056230757385492325, "learning_rate": 4.136960241797113e-06, "loss": 0.0553, "step": 2714 }, { "epoch": 0.5690630894990568, "grad_norm": 0.07475091516971588, "learning_rate": 4.1336165472592434e-06, "loss": 0.0565, "step": 2715 }, { "epoch": 0.5692726891636973, "grad_norm": 0.07072797417640686, "learning_rate": 4.130273252123965e-06, "loss": 0.0563, "step": 2716 }, { "epoch": 0.5694822888283378, "grad_norm": 0.05199355632066727, "learning_rate": 4.1269303579325385e-06, "loss": 0.0579, "step": 2717 }, { "epoch": 0.5696918884929784, "grad_norm": 0.047952428460121155, "learning_rate": 4.123587866226035e-06, "loss": 0.0544, "step": 2718 }, { "epoch": 0.569901488157619, "grad_norm": 0.059139080345630646, "learning_rate": 4.120245778545341e-06, "loss": 0.0557, "step": 2719 }, { "epoch": 0.5701110878222595, "grad_norm": 0.08245202153921127, "learning_rate": 4.116904096431163e-06, "loss": 0.0564, "step": 2720 }, { "epoch": 0.5703206874869, "grad_norm": 0.09880422055721283, "learning_rate": 4.113562821424012e-06, "loss": 0.0558, "step": 2721 }, { "epoch": 0.5705302871515405, "grad_norm": 0.09177903831005096, "learning_rate": 4.1102219550642154e-06, "loss": 0.0563, "step": 2722 }, { "epoch": 0.570739886816181, "grad_norm": 0.06406284123659134, "learning_rate": 4.1068814988919156e-06, "loss": 0.0579, "step": 2723 }, { "epoch": 0.5709494864808217, "grad_norm": 0.055943313986063004, "learning_rate": 4.103541454447057e-06, "loss": 0.0544, "step": 2724 }, { "epoch": 0.5711590861454622, "grad_norm": 0.07301504909992218, "learning_rate": 4.100201823269401e-06, "loss": 0.054, "step": 2725 }, { "epoch": 0.5713686858101027, "grad_norm": 0.08340940624475479, "learning_rate": 4.0968626068985205e-06, "loss": 0.0556, "step": 2726 }, { "epoch": 0.5715782854747432, "grad_norm": 0.08021462708711624, "learning_rate": 4.093523806873787e-06, "loss": 0.0534, "step": 2727 }, { "epoch": 0.5717878851393837, "grad_norm": 0.07112640142440796, "learning_rate": 4.090185424734392e-06, "loss": 0.0529, "step": 2728 }, { "epoch": 0.5719974848040243, "grad_norm": 0.06481733918190002, "learning_rate": 4.086847462019326e-06, "loss": 0.0575, "step": 2729 }, { "epoch": 0.5722070844686649, "grad_norm": 0.06660385429859161, "learning_rate": 4.0835099202673926e-06, "loss": 0.0535, "step": 2730 }, { "epoch": 0.5724166841333054, "grad_norm": 0.07477893680334091, "learning_rate": 4.080172801017195e-06, "loss": 0.0567, "step": 2731 }, { "epoch": 0.5726262837979459, "grad_norm": 0.07714486122131348, "learning_rate": 4.076836105807143e-06, "loss": 0.0576, "step": 2732 }, { "epoch": 0.5728358834625865, "grad_norm": 0.05599776282906532, "learning_rate": 4.073499836175457e-06, "loss": 0.0559, "step": 2733 }, { "epoch": 0.573045483127227, "grad_norm": 0.03909073770046234, "learning_rate": 4.0701639936601535e-06, "loss": 0.0551, "step": 2734 }, { "epoch": 0.5732550827918675, "grad_norm": 0.06613650918006897, "learning_rate": 4.066828579799054e-06, "loss": 0.0566, "step": 2735 }, { "epoch": 0.5734646824565081, "grad_norm": 0.0671084001660347, "learning_rate": 4.063493596129788e-06, "loss": 0.0562, "step": 2736 }, { "epoch": 0.5736742821211486, "grad_norm": 0.058822277933359146, "learning_rate": 4.060159044189778e-06, "loss": 0.054, "step": 2737 }, { "epoch": 0.5738838817857892, "grad_norm": 0.05256880447268486, "learning_rate": 4.0568249255162526e-06, "loss": 0.0573, "step": 2738 }, { "epoch": 0.5740934814504297, "grad_norm": 0.05657995492219925, "learning_rate": 4.053491241646242e-06, "loss": 0.0553, "step": 2739 }, { "epoch": 0.5743030811150702, "grad_norm": 0.05349545180797577, "learning_rate": 4.050157994116573e-06, "loss": 0.0562, "step": 2740 }, { "epoch": 0.5745126807797107, "grad_norm": 0.04297221451997757, "learning_rate": 4.04682518446387e-06, "loss": 0.0556, "step": 2741 }, { "epoch": 0.5747222804443513, "grad_norm": 0.04681278392672539, "learning_rate": 4.043492814224559e-06, "loss": 0.0543, "step": 2742 }, { "epoch": 0.5749318801089919, "grad_norm": 0.06194974109530449, "learning_rate": 4.040160884934864e-06, "loss": 0.0546, "step": 2743 }, { "epoch": 0.5751414797736324, "grad_norm": 0.08011812716722488, "learning_rate": 4.036829398130799e-06, "loss": 0.0552, "step": 2744 }, { "epoch": 0.5753510794382729, "grad_norm": 0.08187726140022278, "learning_rate": 4.033498355348183e-06, "loss": 0.0533, "step": 2745 }, { "epoch": 0.5755606791029134, "grad_norm": 0.06547623127698898, "learning_rate": 4.030167758122625e-06, "loss": 0.0533, "step": 2746 }, { "epoch": 0.5757702787675539, "grad_norm": 0.0580391101539135, "learning_rate": 4.026837607989527e-06, "loss": 0.055, "step": 2747 }, { "epoch": 0.5759798784321946, "grad_norm": 0.06372876465320587, "learning_rate": 4.0235079064840905e-06, "loss": 0.0547, "step": 2748 }, { "epoch": 0.5761894780968351, "grad_norm": 0.0708019807934761, "learning_rate": 4.020178655141307e-06, "loss": 0.0543, "step": 2749 }, { "epoch": 0.5763990777614756, "grad_norm": 0.06585292518138885, "learning_rate": 4.016849855495959e-06, "loss": 0.0568, "step": 2750 }, { "epoch": 0.5766086774261161, "grad_norm": 0.05741509795188904, "learning_rate": 4.013521509082624e-06, "loss": 0.0556, "step": 2751 }, { "epoch": 0.5768182770907566, "grad_norm": 0.05286962538957596, "learning_rate": 4.0101936174356665e-06, "loss": 0.0564, "step": 2752 }, { "epoch": 0.5770278767553972, "grad_norm": 0.04501480236649513, "learning_rate": 4.0068661820892485e-06, "loss": 0.0571, "step": 2753 }, { "epoch": 0.5772374764200378, "grad_norm": 0.045843351632356644, "learning_rate": 4.003539204577313e-06, "loss": 0.0557, "step": 2754 }, { "epoch": 0.5774470760846783, "grad_norm": 0.062015220522880554, "learning_rate": 4.000212686433597e-06, "loss": 0.053, "step": 2755 }, { "epoch": 0.5776566757493188, "grad_norm": 0.06968379020690918, "learning_rate": 3.9968866291916254e-06, "loss": 0.0549, "step": 2756 }, { "epoch": 0.5778662754139593, "grad_norm": 0.06381241232156754, "learning_rate": 3.99356103438471e-06, "loss": 0.0555, "step": 2757 }, { "epoch": 0.5780758750785999, "grad_norm": 0.051279522478580475, "learning_rate": 3.990235903545947e-06, "loss": 0.0565, "step": 2758 }, { "epoch": 0.5782854747432404, "grad_norm": 0.04932436719536781, "learning_rate": 3.9869112382082255e-06, "loss": 0.0562, "step": 2759 }, { "epoch": 0.578495074407881, "grad_norm": 0.05086817964911461, "learning_rate": 3.98358703990421e-06, "loss": 0.0573, "step": 2760 }, { "epoch": 0.5787046740725215, "grad_norm": 0.03960997983813286, "learning_rate": 3.980263310166359e-06, "loss": 0.0557, "step": 2761 }, { "epoch": 0.578914273737162, "grad_norm": 0.04365481063723564, "learning_rate": 3.976940050526909e-06, "loss": 0.0548, "step": 2762 }, { "epoch": 0.5791238734018026, "grad_norm": 0.0648135393857956, "learning_rate": 3.973617262517886e-06, "loss": 0.0554, "step": 2763 }, { "epoch": 0.5793334730664431, "grad_norm": 0.0678715854883194, "learning_rate": 3.970294947671089e-06, "loss": 0.0566, "step": 2764 }, { "epoch": 0.5795430727310836, "grad_norm": 0.06538737565279007, "learning_rate": 3.9669731075181074e-06, "loss": 0.0556, "step": 2765 }, { "epoch": 0.5797526723957241, "grad_norm": 0.058645885437726974, "learning_rate": 3.963651743590311e-06, "loss": 0.0545, "step": 2766 }, { "epoch": 0.5799622720603647, "grad_norm": 0.04971649497747421, "learning_rate": 3.960330857418844e-06, "loss": 0.0548, "step": 2767 }, { "epoch": 0.5801718717250053, "grad_norm": 0.05847443640232086, "learning_rate": 3.9570104505346345e-06, "loss": 0.0528, "step": 2768 }, { "epoch": 0.5803814713896458, "grad_norm": 0.0770886018872261, "learning_rate": 3.953690524468393e-06, "loss": 0.058, "step": 2769 }, { "epoch": 0.5805910710542863, "grad_norm": 0.08351907879114151, "learning_rate": 3.950371080750602e-06, "loss": 0.0587, "step": 2770 }, { "epoch": 0.5808006707189268, "grad_norm": 0.06995301693677902, "learning_rate": 3.947052120911523e-06, "loss": 0.0554, "step": 2771 }, { "epoch": 0.5810102703835673, "grad_norm": 0.05648371949791908, "learning_rate": 3.9437336464812e-06, "loss": 0.0523, "step": 2772 }, { "epoch": 0.581219870048208, "grad_norm": 0.06814776360988617, "learning_rate": 3.940415658989445e-06, "loss": 0.0542, "step": 2773 }, { "epoch": 0.5814294697128485, "grad_norm": 0.07342953979969025, "learning_rate": 3.93709815996585e-06, "loss": 0.0562, "step": 2774 }, { "epoch": 0.581639069377489, "grad_norm": 0.06355999410152435, "learning_rate": 3.933781150939784e-06, "loss": 0.0555, "step": 2775 }, { "epoch": 0.5818486690421295, "grad_norm": 0.05689748376607895, "learning_rate": 3.9304646334403875e-06, "loss": 0.054, "step": 2776 }, { "epoch": 0.58205826870677, "grad_norm": 0.06319185346364975, "learning_rate": 3.927148608996569e-06, "loss": 0.0562, "step": 2777 }, { "epoch": 0.5822678683714106, "grad_norm": 0.05822164565324783, "learning_rate": 3.923833079137022e-06, "loss": 0.0561, "step": 2778 }, { "epoch": 0.5824774680360512, "grad_norm": 0.058942124247550964, "learning_rate": 3.920518045390201e-06, "loss": 0.0585, "step": 2779 }, { "epoch": 0.5826870677006917, "grad_norm": 0.0649891048669815, "learning_rate": 3.9172035092843365e-06, "loss": 0.0539, "step": 2780 }, { "epoch": 0.5828966673653322, "grad_norm": 0.06785421073436737, "learning_rate": 3.91388947234743e-06, "loss": 0.0573, "step": 2781 }, { "epoch": 0.5831062670299727, "grad_norm": 0.07181856036186218, "learning_rate": 3.9105759361072516e-06, "loss": 0.0548, "step": 2782 }, { "epoch": 0.5833158666946133, "grad_norm": 0.07179451733827591, "learning_rate": 3.907262902091338e-06, "loss": 0.0551, "step": 2783 }, { "epoch": 0.5835254663592538, "grad_norm": 0.0645546168088913, "learning_rate": 3.903950371827001e-06, "loss": 0.0542, "step": 2784 }, { "epoch": 0.5837350660238944, "grad_norm": 0.05469752475619316, "learning_rate": 3.900638346841314e-06, "loss": 0.0533, "step": 2785 }, { "epoch": 0.5839446656885349, "grad_norm": 0.04990299046039581, "learning_rate": 3.897326828661123e-06, "loss": 0.0538, "step": 2786 }, { "epoch": 0.5841542653531754, "grad_norm": 0.05425114557147026, "learning_rate": 3.894015818813034e-06, "loss": 0.0566, "step": 2787 }, { "epoch": 0.584363865017816, "grad_norm": 0.07104803621768951, "learning_rate": 3.890705318823421e-06, "loss": 0.0536, "step": 2788 }, { "epoch": 0.5845734646824565, "grad_norm": 0.0836774930357933, "learning_rate": 3.887395330218429e-06, "loss": 0.055, "step": 2789 }, { "epoch": 0.584783064347097, "grad_norm": 0.08817489445209503, "learning_rate": 3.884085854523956e-06, "loss": 0.0544, "step": 2790 }, { "epoch": 0.5849926640117376, "grad_norm": 0.08014960587024689, "learning_rate": 3.880776893265673e-06, "loss": 0.0536, "step": 2791 }, { "epoch": 0.5852022636763781, "grad_norm": 0.07269839197397232, "learning_rate": 3.877468447969011e-06, "loss": 0.0537, "step": 2792 }, { "epoch": 0.5854118633410187, "grad_norm": 0.06357665359973907, "learning_rate": 3.874160520159159e-06, "loss": 0.0549, "step": 2793 }, { "epoch": 0.5856214630056592, "grad_norm": 0.051662154495716095, "learning_rate": 3.8708531113610735e-06, "loss": 0.0525, "step": 2794 }, { "epoch": 0.5858310626702997, "grad_norm": 0.040864262729883194, "learning_rate": 3.8675462230994725e-06, "loss": 0.0547, "step": 2795 }, { "epoch": 0.5860406623349402, "grad_norm": 0.04362620413303375, "learning_rate": 3.864239856898824e-06, "loss": 0.0537, "step": 2796 }, { "epoch": 0.5862502619995807, "grad_norm": 0.047262124717235565, "learning_rate": 3.860934014283366e-06, "loss": 0.054, "step": 2797 }, { "epoch": 0.5864598616642214, "grad_norm": 0.043658629059791565, "learning_rate": 3.85762869677709e-06, "loss": 0.0525, "step": 2798 }, { "epoch": 0.5866694613288619, "grad_norm": 0.040076591074466705, "learning_rate": 3.854323905903751e-06, "loss": 0.0557, "step": 2799 }, { "epoch": 0.5868790609935024, "grad_norm": 0.03843994066119194, "learning_rate": 3.851019643186851e-06, "loss": 0.0569, "step": 2800 }, { "epoch": 0.5870886606581429, "grad_norm": 0.04533432796597481, "learning_rate": 3.847715910149656e-06, "loss": 0.0561, "step": 2801 }, { "epoch": 0.5872982603227835, "grad_norm": 0.042377371340990067, "learning_rate": 3.8444127083151885e-06, "loss": 0.0557, "step": 2802 }, { "epoch": 0.587507859987424, "grad_norm": 0.03570021688938141, "learning_rate": 3.841110039206222e-06, "loss": 0.0541, "step": 2803 }, { "epoch": 0.5877174596520646, "grad_norm": 0.03966277092695236, "learning_rate": 3.837807904345286e-06, "loss": 0.0542, "step": 2804 }, { "epoch": 0.5879270593167051, "grad_norm": 0.041379570960998535, "learning_rate": 3.834506305254667e-06, "loss": 0.0551, "step": 2805 }, { "epoch": 0.5881366589813456, "grad_norm": 0.03879513218998909, "learning_rate": 3.831205243456397e-06, "loss": 0.056, "step": 2806 }, { "epoch": 0.5883462586459862, "grad_norm": 0.041930824518203735, "learning_rate": 3.827904720472267e-06, "loss": 0.0544, "step": 2807 }, { "epoch": 0.5885558583106267, "grad_norm": 0.04247865080833435, "learning_rate": 3.824604737823819e-06, "loss": 0.0552, "step": 2808 }, { "epoch": 0.5887654579752672, "grad_norm": 0.03654211759567261, "learning_rate": 3.821305297032342e-06, "loss": 0.0553, "step": 2809 }, { "epoch": 0.5889750576399078, "grad_norm": 0.04415356367826462, "learning_rate": 3.818006399618877e-06, "loss": 0.0576, "step": 2810 }, { "epoch": 0.5891846573045483, "grad_norm": 0.058614566922187805, "learning_rate": 3.8147080471042166e-06, "loss": 0.057, "step": 2811 }, { "epoch": 0.5893942569691889, "grad_norm": 0.05693338066339493, "learning_rate": 3.811410241008902e-06, "loss": 0.053, "step": 2812 }, { "epoch": 0.5896038566338294, "grad_norm": 0.04473234340548515, "learning_rate": 3.808112982853217e-06, "loss": 0.0539, "step": 2813 }, { "epoch": 0.5898134562984699, "grad_norm": 0.04146963357925415, "learning_rate": 3.8048162741572008e-06, "loss": 0.056, "step": 2814 }, { "epoch": 0.5900230559631104, "grad_norm": 0.03515983745455742, "learning_rate": 3.801520116440635e-06, "loss": 0.0558, "step": 2815 }, { "epoch": 0.590232655627751, "grad_norm": 0.040382951498031616, "learning_rate": 3.798224511223044e-06, "loss": 0.0587, "step": 2816 }, { "epoch": 0.5904422552923916, "grad_norm": 0.042147841304540634, "learning_rate": 3.794929460023705e-06, "loss": 0.0541, "step": 2817 }, { "epoch": 0.5906518549570321, "grad_norm": 0.0410742312669754, "learning_rate": 3.7916349643616357e-06, "loss": 0.0568, "step": 2818 }, { "epoch": 0.5908614546216726, "grad_norm": 0.03527839481830597, "learning_rate": 3.788341025755595e-06, "loss": 0.0572, "step": 2819 }, { "epoch": 0.5910710542863131, "grad_norm": 0.04356502741575241, "learning_rate": 3.7850476457240905e-06, "loss": 0.0555, "step": 2820 }, { "epoch": 0.5912806539509536, "grad_norm": 0.053171779960393906, "learning_rate": 3.781754825785368e-06, "loss": 0.0572, "step": 2821 }, { "epoch": 0.5914902536155943, "grad_norm": 0.06760048121213913, "learning_rate": 3.77846256745742e-06, "loss": 0.0539, "step": 2822 }, { "epoch": 0.5916998532802348, "grad_norm": 0.06500531733036041, "learning_rate": 3.7751708722579733e-06, "loss": 0.0529, "step": 2823 }, { "epoch": 0.5919094529448753, "grad_norm": 0.0501357838511467, "learning_rate": 3.771879741704499e-06, "loss": 0.053, "step": 2824 }, { "epoch": 0.5921190526095158, "grad_norm": 0.05668150261044502, "learning_rate": 3.768589177314211e-06, "loss": 0.0561, "step": 2825 }, { "epoch": 0.5923286522741563, "grad_norm": 0.05575096979737282, "learning_rate": 3.765299180604055e-06, "loss": 0.0568, "step": 2826 }, { "epoch": 0.592538251938797, "grad_norm": 0.05972772836685181, "learning_rate": 3.7620097530907196e-06, "loss": 0.0563, "step": 2827 }, { "epoch": 0.5927478516034375, "grad_norm": 0.05793767794966698, "learning_rate": 3.758720896290634e-06, "loss": 0.0539, "step": 2828 }, { "epoch": 0.592957451268078, "grad_norm": 0.05584365129470825, "learning_rate": 3.755432611719954e-06, "loss": 0.0543, "step": 2829 }, { "epoch": 0.5931670509327185, "grad_norm": 0.06453042477369308, "learning_rate": 3.752144900894582e-06, "loss": 0.054, "step": 2830 }, { "epoch": 0.593376650597359, "grad_norm": 0.05443593114614487, "learning_rate": 3.7488577653301538e-06, "loss": 0.0543, "step": 2831 }, { "epoch": 0.5935862502619996, "grad_norm": 0.05260578915476799, "learning_rate": 3.7455712065420335e-06, "loss": 0.0575, "step": 2832 }, { "epoch": 0.5937958499266401, "grad_norm": 0.057504259049892426, "learning_rate": 3.7422852260453274e-06, "loss": 0.0539, "step": 2833 }, { "epoch": 0.5940054495912807, "grad_norm": 0.05131387338042259, "learning_rate": 3.7389998253548698e-06, "loss": 0.055, "step": 2834 }, { "epoch": 0.5942150492559212, "grad_norm": 0.04155094549059868, "learning_rate": 3.7357150059852325e-06, "loss": 0.0557, "step": 2835 }, { "epoch": 0.5944246489205617, "grad_norm": 0.0489707812666893, "learning_rate": 3.732430769450714e-06, "loss": 0.0533, "step": 2836 }, { "epoch": 0.5946342485852023, "grad_norm": 0.06095966696739197, "learning_rate": 3.7291471172653463e-06, "loss": 0.0531, "step": 2837 }, { "epoch": 0.5948438482498428, "grad_norm": 0.05945379659533501, "learning_rate": 3.7258640509428955e-06, "loss": 0.0565, "step": 2838 }, { "epoch": 0.5950534479144833, "grad_norm": 0.05267561972141266, "learning_rate": 3.7225815719968522e-06, "loss": 0.0533, "step": 2839 }, { "epoch": 0.5952630475791238, "grad_norm": 0.04585307091474533, "learning_rate": 3.719299681940437e-06, "loss": 0.0541, "step": 2840 }, { "epoch": 0.5954726472437644, "grad_norm": 0.057470791041851044, "learning_rate": 3.716018382286605e-06, "loss": 0.0551, "step": 2841 }, { "epoch": 0.595682246908405, "grad_norm": 0.07041089981794357, "learning_rate": 3.7127376745480313e-06, "loss": 0.0542, "step": 2842 }, { "epoch": 0.5958918465730455, "grad_norm": 0.07431340962648392, "learning_rate": 3.709457560237121e-06, "loss": 0.0535, "step": 2843 }, { "epoch": 0.596101446237686, "grad_norm": 0.06186486408114433, "learning_rate": 3.7061780408660075e-06, "loss": 0.054, "step": 2844 }, { "epoch": 0.5963110459023265, "grad_norm": 0.047516003251075745, "learning_rate": 3.7028991179465502e-06, "loss": 0.0556, "step": 2845 }, { "epoch": 0.596520645566967, "grad_norm": 0.04875975474715233, "learning_rate": 3.699620792990328e-06, "loss": 0.0547, "step": 2846 }, { "epoch": 0.5967302452316077, "grad_norm": 0.05895498767495155, "learning_rate": 3.696343067508651e-06, "loss": 0.0538, "step": 2847 }, { "epoch": 0.5969398448962482, "grad_norm": 0.05717795342206955, "learning_rate": 3.6930659430125506e-06, "loss": 0.0563, "step": 2848 }, { "epoch": 0.5971494445608887, "grad_norm": 0.06923867017030716, "learning_rate": 3.6897894210127765e-06, "loss": 0.056, "step": 2849 }, { "epoch": 0.5973590442255292, "grad_norm": 0.07923050224781036, "learning_rate": 3.6865135030198084e-06, "loss": 0.0544, "step": 2850 }, { "epoch": 0.5975686438901697, "grad_norm": 0.07306721806526184, "learning_rate": 3.683238190543843e-06, "loss": 0.055, "step": 2851 }, { "epoch": 0.5977782435548104, "grad_norm": 0.05905516818165779, "learning_rate": 3.679963485094797e-06, "loss": 0.0547, "step": 2852 }, { "epoch": 0.5979878432194509, "grad_norm": 0.05046588554978371, "learning_rate": 3.6766893881823106e-06, "loss": 0.0556, "step": 2853 }, { "epoch": 0.5981974428840914, "grad_norm": 0.06019177287817001, "learning_rate": 3.673415901315743e-06, "loss": 0.0556, "step": 2854 }, { "epoch": 0.5984070425487319, "grad_norm": 0.06949783116579056, "learning_rate": 3.6701430260041672e-06, "loss": 0.0553, "step": 2855 }, { "epoch": 0.5986166422133724, "grad_norm": 0.06795257329940796, "learning_rate": 3.6668707637563804e-06, "loss": 0.0538, "step": 2856 }, { "epoch": 0.598826241878013, "grad_norm": 0.05731310322880745, "learning_rate": 3.6635991160808943e-06, "loss": 0.0557, "step": 2857 }, { "epoch": 0.5990358415426535, "grad_norm": 0.04602685570716858, "learning_rate": 3.66032808448594e-06, "loss": 0.0542, "step": 2858 }, { "epoch": 0.5992454412072941, "grad_norm": 0.055196985602378845, "learning_rate": 3.6570576704794593e-06, "loss": 0.0562, "step": 2859 }, { "epoch": 0.5994550408719346, "grad_norm": 0.06921806186437607, "learning_rate": 3.6537878755691124e-06, "loss": 0.0544, "step": 2860 }, { "epoch": 0.5996646405365751, "grad_norm": 0.0728953555226326, "learning_rate": 3.650518701262278e-06, "loss": 0.0534, "step": 2861 }, { "epoch": 0.5998742402012157, "grad_norm": 0.07024196535348892, "learning_rate": 3.6472501490660407e-06, "loss": 0.0554, "step": 2862 }, { "epoch": 0.6000838398658562, "grad_norm": 0.06623408198356628, "learning_rate": 3.643982220487202e-06, "loss": 0.0524, "step": 2863 }, { "epoch": 0.6002934395304967, "grad_norm": 0.0592159740626812, "learning_rate": 3.640714917032281e-06, "loss": 0.054, "step": 2864 }, { "epoch": 0.6005030391951373, "grad_norm": 0.04894890636205673, "learning_rate": 3.637448240207499e-06, "loss": 0.0521, "step": 2865 }, { "epoch": 0.6007126388597778, "grad_norm": 0.04575192928314209, "learning_rate": 3.634182191518796e-06, "loss": 0.059, "step": 2866 }, { "epoch": 0.6009222385244184, "grad_norm": 0.05822861194610596, "learning_rate": 3.630916772471817e-06, "loss": 0.0532, "step": 2867 }, { "epoch": 0.6011318381890589, "grad_norm": 0.08098480850458145, "learning_rate": 3.6276519845719237e-06, "loss": 0.0572, "step": 2868 }, { "epoch": 0.6013414378536994, "grad_norm": 0.10072686523199081, "learning_rate": 3.624387829324181e-06, "loss": 0.0538, "step": 2869 }, { "epoch": 0.6015510375183399, "grad_norm": 0.09352651983499527, "learning_rate": 3.62112430823336e-06, "loss": 0.0559, "step": 2870 }, { "epoch": 0.6017606371829806, "grad_norm": 0.06254759430885315, "learning_rate": 3.61786142280395e-06, "loss": 0.0529, "step": 2871 }, { "epoch": 0.6019702368476211, "grad_norm": 0.05310570448637009, "learning_rate": 3.6145991745401354e-06, "loss": 0.0568, "step": 2872 }, { "epoch": 0.6021798365122616, "grad_norm": 0.051381923258304596, "learning_rate": 3.6113375649458126e-06, "loss": 0.0545, "step": 2873 }, { "epoch": 0.6023894361769021, "grad_norm": 0.04709269851446152, "learning_rate": 3.6080765955245867e-06, "loss": 0.0551, "step": 2874 }, { "epoch": 0.6025990358415426, "grad_norm": 0.05449269711971283, "learning_rate": 3.6048162677797595e-06, "loss": 0.053, "step": 2875 }, { "epoch": 0.6028086355061832, "grad_norm": 0.0537823885679245, "learning_rate": 3.601556583214342e-06, "loss": 0.0567, "step": 2876 }, { "epoch": 0.6030182351708238, "grad_norm": 0.041623324155807495, "learning_rate": 3.5982975433310506e-06, "loss": 0.0548, "step": 2877 }, { "epoch": 0.6032278348354643, "grad_norm": 0.04960298910737038, "learning_rate": 3.5950391496323007e-06, "loss": 0.0561, "step": 2878 }, { "epoch": 0.6034374345001048, "grad_norm": 0.06087161973118782, "learning_rate": 3.591781403620209e-06, "loss": 0.0552, "step": 2879 }, { "epoch": 0.6036470341647453, "grad_norm": 0.050851162523031235, "learning_rate": 3.5885243067965992e-06, "loss": 0.0517, "step": 2880 }, { "epoch": 0.6038566338293859, "grad_norm": 0.049509234726428986, "learning_rate": 3.585267860662992e-06, "loss": 0.0563, "step": 2881 }, { "epoch": 0.6040662334940264, "grad_norm": 0.04799444228410721, "learning_rate": 3.582012066720605e-06, "loss": 0.0555, "step": 2882 }, { "epoch": 0.604275833158667, "grad_norm": 0.05108596384525299, "learning_rate": 3.5787569264703614e-06, "loss": 0.0555, "step": 2883 }, { "epoch": 0.6044854328233075, "grad_norm": 0.05567923188209534, "learning_rate": 3.575502441412881e-06, "loss": 0.0538, "step": 2884 }, { "epoch": 0.604695032487948, "grad_norm": 0.052811216562986374, "learning_rate": 3.572248613048477e-06, "loss": 0.0541, "step": 2885 }, { "epoch": 0.6049046321525886, "grad_norm": 0.05271197855472565, "learning_rate": 3.568995442877167e-06, "loss": 0.0536, "step": 2886 }, { "epoch": 0.6051142318172291, "grad_norm": 0.056101392954587936, "learning_rate": 3.565742932398661e-06, "loss": 0.0548, "step": 2887 }, { "epoch": 0.6053238314818696, "grad_norm": 0.06310348957777023, "learning_rate": 3.5624910831123633e-06, "loss": 0.0554, "step": 2888 }, { "epoch": 0.6055334311465101, "grad_norm": 0.05690898001194, "learning_rate": 3.559239896517379e-06, "loss": 0.056, "step": 2889 }, { "epoch": 0.6057430308111507, "grad_norm": 0.037709228694438934, "learning_rate": 3.5559893741125018e-06, "loss": 0.0556, "step": 2890 }, { "epoch": 0.6059526304757913, "grad_norm": 0.043450452387332916, "learning_rate": 3.5527395173962255e-06, "loss": 0.053, "step": 2891 }, { "epoch": 0.6061622301404318, "grad_norm": 0.051693037152290344, "learning_rate": 3.5494903278667305e-06, "loss": 0.0546, "step": 2892 }, { "epoch": 0.6063718298050723, "grad_norm": 0.04153525084257126, "learning_rate": 3.5462418070218913e-06, "loss": 0.0546, "step": 2893 }, { "epoch": 0.6065814294697128, "grad_norm": 0.043478380888700485, "learning_rate": 3.5429939563592795e-06, "loss": 0.0527, "step": 2894 }, { "epoch": 0.6067910291343533, "grad_norm": 0.053386226296424866, "learning_rate": 3.5397467773761495e-06, "loss": 0.0544, "step": 2895 }, { "epoch": 0.607000628798994, "grad_norm": 0.037870582193136215, "learning_rate": 3.536500271569452e-06, "loss": 0.0549, "step": 2896 }, { "epoch": 0.6072102284636345, "grad_norm": 0.043722059577703476, "learning_rate": 3.533254440435826e-06, "loss": 0.0551, "step": 2897 }, { "epoch": 0.607419828128275, "grad_norm": 0.05376339703798294, "learning_rate": 3.5300092854715985e-06, "loss": 0.0537, "step": 2898 }, { "epoch": 0.6076294277929155, "grad_norm": 0.04392022639513016, "learning_rate": 3.5267648081727834e-06, "loss": 0.057, "step": 2899 }, { "epoch": 0.607839027457556, "grad_norm": 0.04517117887735367, "learning_rate": 3.523521010035089e-06, "loss": 0.0543, "step": 2900 }, { "epoch": 0.6080486271221966, "grad_norm": 0.05903865396976471, "learning_rate": 3.520277892553899e-06, "loss": 0.0538, "step": 2901 }, { "epoch": 0.6082582267868372, "grad_norm": 0.049636900424957275, "learning_rate": 3.5170354572242936e-06, "loss": 0.0557, "step": 2902 }, { "epoch": 0.6084678264514777, "grad_norm": 0.037783827632665634, "learning_rate": 3.5137937055410343e-06, "loss": 0.0566, "step": 2903 }, { "epoch": 0.6086774261161182, "grad_norm": 0.04359547793865204, "learning_rate": 3.51055263899857e-06, "loss": 0.0542, "step": 2904 }, { "epoch": 0.6088870257807587, "grad_norm": 0.042558085173368454, "learning_rate": 3.5073122590910285e-06, "loss": 0.057, "step": 2905 }, { "epoch": 0.6090966254453993, "grad_norm": 0.04583901911973953, "learning_rate": 3.5040725673122246e-06, "loss": 0.0591, "step": 2906 }, { "epoch": 0.6093062251100398, "grad_norm": 0.06440989673137665, "learning_rate": 3.500833565155658e-06, "loss": 0.057, "step": 2907 }, { "epoch": 0.6095158247746804, "grad_norm": 0.057344041764736176, "learning_rate": 3.4975952541145063e-06, "loss": 0.0544, "step": 2908 }, { "epoch": 0.6097254244393209, "grad_norm": 0.04767520725727081, "learning_rate": 3.4943576356816287e-06, "loss": 0.0573, "step": 2909 }, { "epoch": 0.6099350241039614, "grad_norm": 0.04085380956530571, "learning_rate": 3.4911207113495703e-06, "loss": 0.0563, "step": 2910 }, { "epoch": 0.610144623768602, "grad_norm": 0.03516289219260216, "learning_rate": 3.4878844826105497e-06, "loss": 0.0583, "step": 2911 }, { "epoch": 0.6103542234332425, "grad_norm": 0.048390522599220276, "learning_rate": 3.4846489509564674e-06, "loss": 0.0521, "step": 2912 }, { "epoch": 0.610563823097883, "grad_norm": 0.05823476240038872, "learning_rate": 3.481414117878906e-06, "loss": 0.0571, "step": 2913 }, { "epoch": 0.6107734227625236, "grad_norm": 0.05212031677365303, "learning_rate": 3.47817998486912e-06, "loss": 0.0531, "step": 2914 }, { "epoch": 0.6109830224271641, "grad_norm": 0.054088983684778214, "learning_rate": 3.474946553418044e-06, "loss": 0.0541, "step": 2915 }, { "epoch": 0.6111926220918047, "grad_norm": 0.05518088862299919, "learning_rate": 3.4717138250162908e-06, "loss": 0.0524, "step": 2916 }, { "epoch": 0.6114022217564452, "grad_norm": 0.04526456817984581, "learning_rate": 3.4684818011541484e-06, "loss": 0.0545, "step": 2917 }, { "epoch": 0.6116118214210857, "grad_norm": 0.036848414689302444, "learning_rate": 3.465250483321575e-06, "loss": 0.053, "step": 2918 }, { "epoch": 0.6118214210857262, "grad_norm": 0.03322865068912506, "learning_rate": 3.462019873008211e-06, "loss": 0.0532, "step": 2919 }, { "epoch": 0.6120310207503667, "grad_norm": 0.035156525671482086, "learning_rate": 3.458789971703367e-06, "loss": 0.0583, "step": 2920 }, { "epoch": 0.6122406204150074, "grad_norm": 0.03802697733044624, "learning_rate": 3.4555607808960232e-06, "loss": 0.0545, "step": 2921 }, { "epoch": 0.6124502200796479, "grad_norm": 0.0401364266872406, "learning_rate": 3.4523323020748413e-06, "loss": 0.0569, "step": 2922 }, { "epoch": 0.6126598197442884, "grad_norm": 0.04033052548766136, "learning_rate": 3.449104536728146e-06, "loss": 0.054, "step": 2923 }, { "epoch": 0.6128694194089289, "grad_norm": 0.0390617661178112, "learning_rate": 3.4458774863439366e-06, "loss": 0.0552, "step": 2924 }, { "epoch": 0.6130790190735694, "grad_norm": 0.042450230568647385, "learning_rate": 3.4426511524098834e-06, "loss": 0.0538, "step": 2925 }, { "epoch": 0.6132886187382101, "grad_norm": 0.038516972213983536, "learning_rate": 3.4394255364133245e-06, "loss": 0.0565, "step": 2926 }, { "epoch": 0.6134982184028506, "grad_norm": 0.029110699892044067, "learning_rate": 3.436200639841271e-06, "loss": 0.0553, "step": 2927 }, { "epoch": 0.6137078180674911, "grad_norm": 0.0337090939283371, "learning_rate": 3.432976464180397e-06, "loss": 0.0579, "step": 2928 }, { "epoch": 0.6139174177321316, "grad_norm": 0.04489186406135559, "learning_rate": 3.4297530109170463e-06, "loss": 0.0531, "step": 2929 }, { "epoch": 0.6141270173967721, "grad_norm": 0.04716401919722557, "learning_rate": 3.426530281537234e-06, "loss": 0.0538, "step": 2930 }, { "epoch": 0.6143366170614127, "grad_norm": 0.053189124912023544, "learning_rate": 3.423308277526633e-06, "loss": 0.0568, "step": 2931 }, { "epoch": 0.6145462167260533, "grad_norm": 0.05599994957447052, "learning_rate": 3.4200870003705883e-06, "loss": 0.056, "step": 2932 }, { "epoch": 0.6147558163906938, "grad_norm": 0.04985819011926651, "learning_rate": 3.41686645155411e-06, "loss": 0.0561, "step": 2933 }, { "epoch": 0.6149654160553343, "grad_norm": 0.04989127814769745, "learning_rate": 3.413646632561868e-06, "loss": 0.0537, "step": 2934 }, { "epoch": 0.6151750157199748, "grad_norm": 0.04989814758300781, "learning_rate": 3.410427544878198e-06, "loss": 0.0575, "step": 2935 }, { "epoch": 0.6153846153846154, "grad_norm": 0.03400817885994911, "learning_rate": 3.4072091899871016e-06, "loss": 0.0551, "step": 2936 }, { "epoch": 0.6155942150492559, "grad_norm": 0.034855592995882034, "learning_rate": 3.403991569372235e-06, "loss": 0.0567, "step": 2937 }, { "epoch": 0.6158038147138964, "grad_norm": 0.042559489607810974, "learning_rate": 3.4007746845169253e-06, "loss": 0.0539, "step": 2938 }, { "epoch": 0.616013414378537, "grad_norm": 0.042176634073257446, "learning_rate": 3.397558536904152e-06, "loss": 0.0521, "step": 2939 }, { "epoch": 0.6162230140431776, "grad_norm": 0.04478650167584419, "learning_rate": 3.394343128016563e-06, "loss": 0.0573, "step": 2940 }, { "epoch": 0.6164326137078181, "grad_norm": 0.04596136510372162, "learning_rate": 3.3911284593364568e-06, "loss": 0.056, "step": 2941 }, { "epoch": 0.6166422133724586, "grad_norm": 0.044569674879312515, "learning_rate": 3.387914532345796e-06, "loss": 0.0557, "step": 2942 }, { "epoch": 0.6168518130370991, "grad_norm": 0.03586115688085556, "learning_rate": 3.3847013485262037e-06, "loss": 0.0556, "step": 2943 }, { "epoch": 0.6170614127017396, "grad_norm": 0.05500089004635811, "learning_rate": 3.381488909358952e-06, "loss": 0.0532, "step": 2944 }, { "epoch": 0.6172710123663803, "grad_norm": 0.06483340263366699, "learning_rate": 3.3782772163249767e-06, "loss": 0.0562, "step": 2945 }, { "epoch": 0.6174806120310208, "grad_norm": 0.057424839586019516, "learning_rate": 3.375066270904869e-06, "loss": 0.0568, "step": 2946 }, { "epoch": 0.6176902116956613, "grad_norm": 0.0566512756049633, "learning_rate": 3.3718560745788724e-06, "loss": 0.0588, "step": 2947 }, { "epoch": 0.6178998113603018, "grad_norm": 0.05279812961816788, "learning_rate": 3.368646628826886e-06, "loss": 0.0549, "step": 2948 }, { "epoch": 0.6181094110249423, "grad_norm": 0.03966805338859558, "learning_rate": 3.365437935128466e-06, "loss": 0.0557, "step": 2949 }, { "epoch": 0.618319010689583, "grad_norm": 0.03704335168004036, "learning_rate": 3.3622299949628197e-06, "loss": 0.0535, "step": 2950 }, { "epoch": 0.6185286103542235, "grad_norm": 0.03752034902572632, "learning_rate": 3.359022809808803e-06, "loss": 0.055, "step": 2951 }, { "epoch": 0.618738210018864, "grad_norm": 0.04068942740559578, "learning_rate": 3.3558163811449317e-06, "loss": 0.0546, "step": 2952 }, { "epoch": 0.6189478096835045, "grad_norm": 0.03766583278775215, "learning_rate": 3.352610710449368e-06, "loss": 0.0553, "step": 2953 }, { "epoch": 0.619157409348145, "grad_norm": 0.044414669275283813, "learning_rate": 3.349405799199922e-06, "loss": 0.0573, "step": 2954 }, { "epoch": 0.6193670090127856, "grad_norm": 0.06159931421279907, "learning_rate": 3.3462016488740612e-06, "loss": 0.0544, "step": 2955 }, { "epoch": 0.6195766086774261, "grad_norm": 0.05573199316859245, "learning_rate": 3.3429982609488976e-06, "loss": 0.0549, "step": 2956 }, { "epoch": 0.6197862083420667, "grad_norm": 0.03836704418063164, "learning_rate": 3.3397956369011897e-06, "loss": 0.0552, "step": 2957 }, { "epoch": 0.6199958080067072, "grad_norm": 0.05629691854119301, "learning_rate": 3.3365937782073486e-06, "loss": 0.0566, "step": 2958 }, { "epoch": 0.6202054076713477, "grad_norm": 0.07051456719636917, "learning_rate": 3.3333926863434317e-06, "loss": 0.0524, "step": 2959 }, { "epoch": 0.6204150073359883, "grad_norm": 0.0706133171916008, "learning_rate": 3.3301923627851385e-06, "loss": 0.0567, "step": 2960 }, { "epoch": 0.6206246070006288, "grad_norm": 0.058882150799036026, "learning_rate": 3.3269928090078204e-06, "loss": 0.054, "step": 2961 }, { "epoch": 0.6208342066652693, "grad_norm": 0.04647863656282425, "learning_rate": 3.3237940264864684e-06, "loss": 0.0558, "step": 2962 }, { "epoch": 0.6210438063299099, "grad_norm": 0.046870891004800797, "learning_rate": 3.320596016695724e-06, "loss": 0.0534, "step": 2963 }, { "epoch": 0.6212534059945504, "grad_norm": 0.043968793004751205, "learning_rate": 3.3173987811098664e-06, "loss": 0.0568, "step": 2964 }, { "epoch": 0.621463005659191, "grad_norm": 0.05325428396463394, "learning_rate": 3.3142023212028197e-06, "loss": 0.0536, "step": 2965 }, { "epoch": 0.6216726053238315, "grad_norm": 0.049214769154787064, "learning_rate": 3.311006638448155e-06, "loss": 0.0545, "step": 2966 }, { "epoch": 0.621882204988472, "grad_norm": 0.04188595712184906, "learning_rate": 3.307811734319078e-06, "loss": 0.0546, "step": 2967 }, { "epoch": 0.6220918046531125, "grad_norm": 0.040931567549705505, "learning_rate": 3.304617610288439e-06, "loss": 0.0557, "step": 2968 }, { "epoch": 0.622301404317753, "grad_norm": 0.03652331605553627, "learning_rate": 3.3014242678287327e-06, "loss": 0.054, "step": 2969 }, { "epoch": 0.6225110039823937, "grad_norm": 0.0368216410279274, "learning_rate": 3.298231708412083e-06, "loss": 0.0558, "step": 2970 }, { "epoch": 0.6227206036470342, "grad_norm": 0.03968511521816254, "learning_rate": 3.295039933510262e-06, "loss": 0.0557, "step": 2971 }, { "epoch": 0.6229302033116747, "grad_norm": 0.04443355277180672, "learning_rate": 3.291848944594679e-06, "loss": 0.0524, "step": 2972 }, { "epoch": 0.6231398029763152, "grad_norm": 0.05209925025701523, "learning_rate": 3.288658743136378e-06, "loss": 0.0553, "step": 2973 }, { "epoch": 0.6233494026409557, "grad_norm": 0.05424797162413597, "learning_rate": 3.2854693306060407e-06, "loss": 0.0547, "step": 2974 }, { "epoch": 0.6235590023055964, "grad_norm": 0.04404807463288307, "learning_rate": 3.282280708473985e-06, "loss": 0.0548, "step": 2975 }, { "epoch": 0.6237686019702369, "grad_norm": 0.03802521154284477, "learning_rate": 3.2790928782101674e-06, "loss": 0.0596, "step": 2976 }, { "epoch": 0.6239782016348774, "grad_norm": 0.045362699776887894, "learning_rate": 3.2759058412841742e-06, "loss": 0.0551, "step": 2977 }, { "epoch": 0.6241878012995179, "grad_norm": 0.05245572328567505, "learning_rate": 3.2727195991652295e-06, "loss": 0.0543, "step": 2978 }, { "epoch": 0.6243974009641584, "grad_norm": 0.056894753128290176, "learning_rate": 3.2695341533221926e-06, "loss": 0.0573, "step": 2979 }, { "epoch": 0.624607000628799, "grad_norm": 0.05126248672604561, "learning_rate": 3.2663495052235505e-06, "loss": 0.0536, "step": 2980 }, { "epoch": 0.6248166002934396, "grad_norm": 0.05021638423204422, "learning_rate": 3.263165656337426e-06, "loss": 0.0562, "step": 2981 }, { "epoch": 0.6250261999580801, "grad_norm": 0.04641691595315933, "learning_rate": 3.2599826081315744e-06, "loss": 0.0555, "step": 2982 }, { "epoch": 0.6252357996227206, "grad_norm": 0.04853666201233864, "learning_rate": 3.2568003620733778e-06, "loss": 0.0559, "step": 2983 }, { "epoch": 0.6254453992873611, "grad_norm": 0.05443096533417702, "learning_rate": 3.2536189196298518e-06, "loss": 0.0531, "step": 2984 }, { "epoch": 0.6256549989520017, "grad_norm": 0.048564642667770386, "learning_rate": 3.250438282267642e-06, "loss": 0.0532, "step": 2985 }, { "epoch": 0.6258645986166422, "grad_norm": 0.04952483996748924, "learning_rate": 3.247258451453022e-06, "loss": 0.0535, "step": 2986 }, { "epoch": 0.6260741982812827, "grad_norm": 0.038559552282094955, "learning_rate": 3.2440794286518896e-06, "loss": 0.0568, "step": 2987 }, { "epoch": 0.6262837979459233, "grad_norm": 0.0397375226020813, "learning_rate": 3.2409012153297762e-06, "loss": 0.0548, "step": 2988 }, { "epoch": 0.6264933976105638, "grad_norm": 0.050432708114385605, "learning_rate": 3.2377238129518392e-06, "loss": 0.0539, "step": 2989 }, { "epoch": 0.6267029972752044, "grad_norm": 0.04017667844891548, "learning_rate": 3.2345472229828556e-06, "loss": 0.0548, "step": 2990 }, { "epoch": 0.6269125969398449, "grad_norm": 0.035998113453388214, "learning_rate": 3.231371446887237e-06, "loss": 0.0556, "step": 2991 }, { "epoch": 0.6271221966044854, "grad_norm": 0.044560160487890244, "learning_rate": 3.2281964861290146e-06, "loss": 0.0553, "step": 2992 }, { "epoch": 0.6273317962691259, "grad_norm": 0.04030834883451462, "learning_rate": 3.225022342171842e-06, "loss": 0.0535, "step": 2993 }, { "epoch": 0.6275413959337665, "grad_norm": 0.03907003253698349, "learning_rate": 3.2218490164790015e-06, "loss": 0.0563, "step": 2994 }, { "epoch": 0.6277509955984071, "grad_norm": 0.037859588861465454, "learning_rate": 3.2186765105133955e-06, "loss": 0.0553, "step": 2995 }, { "epoch": 0.6279605952630476, "grad_norm": 0.04589460417628288, "learning_rate": 3.215504825737549e-06, "loss": 0.0531, "step": 2996 }, { "epoch": 0.6281701949276881, "grad_norm": 0.05734090879559517, "learning_rate": 3.2123339636136065e-06, "loss": 0.0535, "step": 2997 }, { "epoch": 0.6283797945923286, "grad_norm": 0.04286714643239975, "learning_rate": 3.209163925603335e-06, "loss": 0.0539, "step": 2998 }, { "epoch": 0.6285893942569691, "grad_norm": 0.04124724492430687, "learning_rate": 3.2059947131681226e-06, "loss": 0.0566, "step": 2999 }, { "epoch": 0.6287989939216098, "grad_norm": 0.06533816456794739, "learning_rate": 3.202826327768974e-06, "loss": 0.0547, "step": 3000 }, { "epoch": 0.6290085935862503, "grad_norm": 0.08003267645835876, "learning_rate": 3.199658770866515e-06, "loss": 0.0532, "step": 3001 }, { "epoch": 0.6292181932508908, "grad_norm": 0.05155742168426514, "learning_rate": 3.19649204392099e-06, "loss": 0.0543, "step": 3002 }, { "epoch": 0.6294277929155313, "grad_norm": 0.0488094687461853, "learning_rate": 3.193326148392257e-06, "loss": 0.0571, "step": 3003 }, { "epoch": 0.6296373925801718, "grad_norm": 0.07249227166175842, "learning_rate": 3.1901610857397936e-06, "loss": 0.0532, "step": 3004 }, { "epoch": 0.6298469922448124, "grad_norm": 0.09708064049482346, "learning_rate": 3.1869968574226966e-06, "loss": 0.0545, "step": 3005 }, { "epoch": 0.630056591909453, "grad_norm": 0.11073527485132217, "learning_rate": 3.183833464899669e-06, "loss": 0.055, "step": 3006 }, { "epoch": 0.6302661915740935, "grad_norm": 0.10113838315010071, "learning_rate": 3.1806709096290377e-06, "loss": 0.0557, "step": 3007 }, { "epoch": 0.630475791238734, "grad_norm": 0.06188422441482544, "learning_rate": 3.1775091930687374e-06, "loss": 0.0557, "step": 3008 }, { "epoch": 0.6306853909033746, "grad_norm": 0.04998209327459335, "learning_rate": 3.1743483166763234e-06, "loss": 0.0554, "step": 3009 }, { "epoch": 0.6308949905680151, "grad_norm": 0.11552390456199646, "learning_rate": 3.1711882819089553e-06, "loss": 0.0551, "step": 3010 }, { "epoch": 0.6311045902326556, "grad_norm": 0.1522252857685089, "learning_rate": 3.1680290902234078e-06, "loss": 0.0541, "step": 3011 }, { "epoch": 0.6313141898972962, "grad_norm": 0.12954400479793549, "learning_rate": 3.164870743076072e-06, "loss": 0.0543, "step": 3012 }, { "epoch": 0.6315237895619367, "grad_norm": 0.06520622968673706, "learning_rate": 3.16171324192294e-06, "loss": 0.0547, "step": 3013 }, { "epoch": 0.6317333892265773, "grad_norm": 0.06795618683099747, "learning_rate": 3.1585565882196223e-06, "loss": 0.052, "step": 3014 }, { "epoch": 0.6319429888912178, "grad_norm": 0.12307886779308319, "learning_rate": 3.1554007834213357e-06, "loss": 0.0545, "step": 3015 }, { "epoch": 0.6321525885558583, "grad_norm": 0.10787169635295868, "learning_rate": 3.1522458289829045e-06, "loss": 0.0564, "step": 3016 }, { "epoch": 0.6323621882204988, "grad_norm": 0.04827128350734711, "learning_rate": 3.1490917263587607e-06, "loss": 0.0542, "step": 3017 }, { "epoch": 0.6325717878851393, "grad_norm": 0.10941686481237411, "learning_rate": 3.1459384770029476e-06, "loss": 0.0537, "step": 3018 }, { "epoch": 0.63278138754978, "grad_norm": 0.13342756032943726, "learning_rate": 3.1427860823691136e-06, "loss": 0.054, "step": 3019 }, { "epoch": 0.6329909872144205, "grad_norm": 0.0916086882352829, "learning_rate": 3.139634543910507e-06, "loss": 0.0536, "step": 3020 }, { "epoch": 0.633200586879061, "grad_norm": 0.06962735950946808, "learning_rate": 3.13648386307999e-06, "loss": 0.0578, "step": 3021 }, { "epoch": 0.6334101865437015, "grad_norm": 0.11727678030729294, "learning_rate": 3.1333340413300263e-06, "loss": 0.054, "step": 3022 }, { "epoch": 0.633619786208342, "grad_norm": 0.11866160482168198, "learning_rate": 3.1301850801126797e-06, "loss": 0.0551, "step": 3023 }, { "epoch": 0.6338293858729827, "grad_norm": 0.06939590722322464, "learning_rate": 3.127036980879624e-06, "loss": 0.0589, "step": 3024 }, { "epoch": 0.6340389855376232, "grad_norm": 0.07698585093021393, "learning_rate": 3.123889745082132e-06, "loss": 0.0568, "step": 3025 }, { "epoch": 0.6342485852022637, "grad_norm": 0.10211010277271271, "learning_rate": 3.1207433741710757e-06, "loss": 0.0565, "step": 3026 }, { "epoch": 0.6344581848669042, "grad_norm": 0.08808134496212006, "learning_rate": 3.1175978695969344e-06, "loss": 0.0556, "step": 3027 }, { "epoch": 0.6346677845315447, "grad_norm": 0.05355629697442055, "learning_rate": 3.1144532328097853e-06, "loss": 0.0566, "step": 3028 }, { "epoch": 0.6348773841961853, "grad_norm": 0.06826507300138474, "learning_rate": 3.1113094652593023e-06, "loss": 0.0551, "step": 3029 }, { "epoch": 0.6350869838608258, "grad_norm": 0.07204960286617279, "learning_rate": 3.108166568394765e-06, "loss": 0.0561, "step": 3030 }, { "epoch": 0.6352965835254664, "grad_norm": 0.06383077800273895, "learning_rate": 3.105024543665045e-06, "loss": 0.0559, "step": 3031 }, { "epoch": 0.6355061831901069, "grad_norm": 0.050440918654203415, "learning_rate": 3.1018833925186194e-06, "loss": 0.0551, "step": 3032 }, { "epoch": 0.6357157828547474, "grad_norm": 0.05051702260971069, "learning_rate": 3.0987431164035542e-06, "loss": 0.0548, "step": 3033 }, { "epoch": 0.635925382519388, "grad_norm": 0.05574216693639755, "learning_rate": 3.0956037167675164e-06, "loss": 0.0549, "step": 3034 }, { "epoch": 0.6361349821840285, "grad_norm": 0.047421135008335114, "learning_rate": 3.092465195057771e-06, "loss": 0.0559, "step": 3035 }, { "epoch": 0.636344581848669, "grad_norm": 0.0535261407494545, "learning_rate": 3.0893275527211742e-06, "loss": 0.055, "step": 3036 }, { "epoch": 0.6365541815133096, "grad_norm": 0.05517066270112991, "learning_rate": 3.0861907912041776e-06, "loss": 0.0564, "step": 3037 }, { "epoch": 0.6367637811779501, "grad_norm": 0.04538208991289139, "learning_rate": 3.083054911952831e-06, "loss": 0.0563, "step": 3038 }, { "epoch": 0.6369733808425907, "grad_norm": 0.054025497287511826, "learning_rate": 3.0799199164127704e-06, "loss": 0.0538, "step": 3039 }, { "epoch": 0.6371829805072312, "grad_norm": 0.049201078712940216, "learning_rate": 3.0767858060292285e-06, "loss": 0.057, "step": 3040 }, { "epoch": 0.6373925801718717, "grad_norm": 0.042893946170806885, "learning_rate": 3.073652582247033e-06, "loss": 0.0548, "step": 3041 }, { "epoch": 0.6376021798365122, "grad_norm": 0.04716379567980766, "learning_rate": 3.070520246510595e-06, "loss": 0.0529, "step": 3042 }, { "epoch": 0.6378117795011528, "grad_norm": 0.05528158321976662, "learning_rate": 3.067388800263923e-06, "loss": 0.055, "step": 3043 }, { "epoch": 0.6380213791657934, "grad_norm": 0.052032019942998886, "learning_rate": 3.064258244950612e-06, "loss": 0.0524, "step": 3044 }, { "epoch": 0.6382309788304339, "grad_norm": 0.041680578142404556, "learning_rate": 3.0611285820138493e-06, "loss": 0.0556, "step": 3045 }, { "epoch": 0.6384405784950744, "grad_norm": 0.050715796649456024, "learning_rate": 3.0579998128964073e-06, "loss": 0.0546, "step": 3046 }, { "epoch": 0.6386501781597149, "grad_norm": 0.057833991944789886, "learning_rate": 3.0548719390406467e-06, "loss": 0.0522, "step": 3047 }, { "epoch": 0.6388597778243554, "grad_norm": 0.05053876340389252, "learning_rate": 3.051744961888521e-06, "loss": 0.0559, "step": 3048 }, { "epoch": 0.6390693774889961, "grad_norm": 0.047344524413347244, "learning_rate": 3.048618882881561e-06, "loss": 0.0547, "step": 3049 }, { "epoch": 0.6392789771536366, "grad_norm": 0.05755054950714111, "learning_rate": 3.04549370346089e-06, "loss": 0.0537, "step": 3050 }, { "epoch": 0.6394885768182771, "grad_norm": 0.062311019748449326, "learning_rate": 3.0423694250672182e-06, "loss": 0.0551, "step": 3051 }, { "epoch": 0.6396981764829176, "grad_norm": 0.05935823917388916, "learning_rate": 3.0392460491408333e-06, "loss": 0.0545, "step": 3052 }, { "epoch": 0.6399077761475581, "grad_norm": 0.05066816136240959, "learning_rate": 3.0361235771216114e-06, "loss": 0.0509, "step": 3053 }, { "epoch": 0.6401173758121987, "grad_norm": 0.06608299165964127, "learning_rate": 3.033002010449014e-06, "loss": 0.0565, "step": 3054 }, { "epoch": 0.6403269754768393, "grad_norm": 0.05467569828033447, "learning_rate": 3.0298813505620816e-06, "loss": 0.0547, "step": 3055 }, { "epoch": 0.6405365751414798, "grad_norm": 0.048877786844968796, "learning_rate": 3.0267615988994357e-06, "loss": 0.0563, "step": 3056 }, { "epoch": 0.6407461748061203, "grad_norm": 0.053627368062734604, "learning_rate": 3.0236427568992845e-06, "loss": 0.0565, "step": 3057 }, { "epoch": 0.6409557744707608, "grad_norm": 0.05301428213715553, "learning_rate": 3.020524825999412e-06, "loss": 0.0546, "step": 3058 }, { "epoch": 0.6411653741354014, "grad_norm": 0.0528411827981472, "learning_rate": 3.0174078076371815e-06, "loss": 0.053, "step": 3059 }, { "epoch": 0.6413749738000419, "grad_norm": 0.055189281702041626, "learning_rate": 3.014291703249541e-06, "loss": 0.0531, "step": 3060 }, { "epoch": 0.6415845734646825, "grad_norm": 0.0514843687415123, "learning_rate": 3.011176514273014e-06, "loss": 0.0567, "step": 3061 }, { "epoch": 0.641794173129323, "grad_norm": 0.04860663786530495, "learning_rate": 3.008062242143699e-06, "loss": 0.0563, "step": 3062 }, { "epoch": 0.6420037727939635, "grad_norm": 0.05139222368597984, "learning_rate": 3.0049488882972773e-06, "loss": 0.0565, "step": 3063 }, { "epoch": 0.6422133724586041, "grad_norm": 0.04456450790166855, "learning_rate": 3.0018364541690048e-06, "loss": 0.0534, "step": 3064 }, { "epoch": 0.6424229721232446, "grad_norm": 0.05100685730576515, "learning_rate": 2.9987249411937096e-06, "loss": 0.0531, "step": 3065 }, { "epoch": 0.6426325717878851, "grad_norm": 0.04359569400548935, "learning_rate": 2.9956143508058023e-06, "loss": 0.0551, "step": 3066 }, { "epoch": 0.6428421714525256, "grad_norm": 0.049825966358184814, "learning_rate": 2.9925046844392613e-06, "loss": 0.0554, "step": 3067 }, { "epoch": 0.6430517711171662, "grad_norm": 0.05026031658053398, "learning_rate": 2.9893959435276464e-06, "loss": 0.0543, "step": 3068 }, { "epoch": 0.6432613707818068, "grad_norm": 0.04848875477910042, "learning_rate": 2.9862881295040826e-06, "loss": 0.055, "step": 3069 }, { "epoch": 0.6434709704464473, "grad_norm": 0.0475265197455883, "learning_rate": 2.983181243801272e-06, "loss": 0.0545, "step": 3070 }, { "epoch": 0.6436805701110878, "grad_norm": 0.05732462555170059, "learning_rate": 2.9800752878514903e-06, "loss": 0.0564, "step": 3071 }, { "epoch": 0.6438901697757283, "grad_norm": 0.049768634140491486, "learning_rate": 2.9769702630865814e-06, "loss": 0.0524, "step": 3072 }, { "epoch": 0.6440997694403688, "grad_norm": 0.0468343161046505, "learning_rate": 2.973866170937959e-06, "loss": 0.0559, "step": 3073 }, { "epoch": 0.6443093691050095, "grad_norm": 0.05665425956249237, "learning_rate": 2.9707630128366137e-06, "loss": 0.0539, "step": 3074 }, { "epoch": 0.64451896876965, "grad_norm": 0.04687637463212013, "learning_rate": 2.9676607902130972e-06, "loss": 0.0534, "step": 3075 }, { "epoch": 0.6447285684342905, "grad_norm": 0.04909089207649231, "learning_rate": 2.9645595044975328e-06, "loss": 0.0555, "step": 3076 }, { "epoch": 0.644938168098931, "grad_norm": 0.046408187597990036, "learning_rate": 2.961459157119615e-06, "loss": 0.0535, "step": 3077 }, { "epoch": 0.6451477677635716, "grad_norm": 0.04547589272260666, "learning_rate": 2.958359749508603e-06, "loss": 0.0563, "step": 3078 }, { "epoch": 0.6453573674282121, "grad_norm": 0.04302079603075981, "learning_rate": 2.9552612830933216e-06, "loss": 0.0552, "step": 3079 }, { "epoch": 0.6455669670928527, "grad_norm": 0.0457628108561039, "learning_rate": 2.9521637593021636e-06, "loss": 0.0536, "step": 3080 }, { "epoch": 0.6457765667574932, "grad_norm": 0.04492702707648277, "learning_rate": 2.9490671795630884e-06, "loss": 0.0543, "step": 3081 }, { "epoch": 0.6459861664221337, "grad_norm": 0.04878842458128929, "learning_rate": 2.9459715453036163e-06, "loss": 0.0547, "step": 3082 }, { "epoch": 0.6461957660867743, "grad_norm": 0.052640918642282486, "learning_rate": 2.9428768579508347e-06, "loss": 0.0546, "step": 3083 }, { "epoch": 0.6464053657514148, "grad_norm": 0.05040021613240242, "learning_rate": 2.9397831189313963e-06, "loss": 0.0541, "step": 3084 }, { "epoch": 0.6466149654160553, "grad_norm": 0.04280049726366997, "learning_rate": 2.936690329671511e-06, "loss": 0.0556, "step": 3085 }, { "epoch": 0.6468245650806959, "grad_norm": 0.03978101909160614, "learning_rate": 2.933598491596954e-06, "loss": 0.0547, "step": 3086 }, { "epoch": 0.6470341647453364, "grad_norm": 0.03757026419043541, "learning_rate": 2.930507606133065e-06, "loss": 0.0551, "step": 3087 }, { "epoch": 0.647243764409977, "grad_norm": 0.039234839379787445, "learning_rate": 2.927417674704739e-06, "loss": 0.0582, "step": 3088 }, { "epoch": 0.6474533640746175, "grad_norm": 0.03372887521982193, "learning_rate": 2.924328698736434e-06, "loss": 0.0578, "step": 3089 }, { "epoch": 0.647662963739258, "grad_norm": 0.04041403532028198, "learning_rate": 2.9212406796521675e-06, "loss": 0.0554, "step": 3090 }, { "epoch": 0.6478725634038985, "grad_norm": 0.04249253869056702, "learning_rate": 2.9181536188755178e-06, "loss": 0.0546, "step": 3091 }, { "epoch": 0.648082163068539, "grad_norm": 0.04245641827583313, "learning_rate": 2.915067517829615e-06, "loss": 0.0551, "step": 3092 }, { "epoch": 0.6482917627331797, "grad_norm": 0.044679149985313416, "learning_rate": 2.9119823779371536e-06, "loss": 0.0552, "step": 3093 }, { "epoch": 0.6485013623978202, "grad_norm": 0.03879356011748314, "learning_rate": 2.9088982006203835e-06, "loss": 0.0526, "step": 3094 }, { "epoch": 0.6487109620624607, "grad_norm": 0.04242287203669548, "learning_rate": 2.90581498730111e-06, "loss": 0.0546, "step": 3095 }, { "epoch": 0.6489205617271012, "grad_norm": 0.05261940881609917, "learning_rate": 2.902732739400689e-06, "loss": 0.0519, "step": 3096 }, { "epoch": 0.6491301613917417, "grad_norm": 0.054223619401454926, "learning_rate": 2.8996514583400425e-06, "loss": 0.0544, "step": 3097 }, { "epoch": 0.6493397610563824, "grad_norm": 0.04815622419118881, "learning_rate": 2.8965711455396343e-06, "loss": 0.0534, "step": 3098 }, { "epoch": 0.6495493607210229, "grad_norm": 0.0753355398774147, "learning_rate": 2.893491802419492e-06, "loss": 0.0551, "step": 3099 }, { "epoch": 0.6497589603856634, "grad_norm": 0.07607095688581467, "learning_rate": 2.8904134303991928e-06, "loss": 0.0535, "step": 3100 }, { "epoch": 0.6499685600503039, "grad_norm": 0.05531083047389984, "learning_rate": 2.887336030897864e-06, "loss": 0.0524, "step": 3101 }, { "epoch": 0.6501781597149444, "grad_norm": 0.05754781886935234, "learning_rate": 2.884259605334184e-06, "loss": 0.0536, "step": 3102 }, { "epoch": 0.650387759379585, "grad_norm": 0.05980090796947479, "learning_rate": 2.8811841551263873e-06, "loss": 0.0591, "step": 3103 }, { "epoch": 0.6505973590442256, "grad_norm": 0.05938847362995148, "learning_rate": 2.878109681692256e-06, "loss": 0.0561, "step": 3104 }, { "epoch": 0.6508069587088661, "grad_norm": 0.06025892496109009, "learning_rate": 2.8750361864491195e-06, "loss": 0.0583, "step": 3105 }, { "epoch": 0.6510165583735066, "grad_norm": 0.0442025326192379, "learning_rate": 2.871963670813861e-06, "loss": 0.057, "step": 3106 }, { "epoch": 0.6512261580381471, "grad_norm": 0.05523305386304855, "learning_rate": 2.868892136202909e-06, "loss": 0.0543, "step": 3107 }, { "epoch": 0.6514357577027877, "grad_norm": 0.05948861315846443, "learning_rate": 2.865821584032237e-06, "loss": 0.0551, "step": 3108 }, { "epoch": 0.6516453573674282, "grad_norm": 0.05912363529205322, "learning_rate": 2.862752015717371e-06, "loss": 0.0561, "step": 3109 }, { "epoch": 0.6518549570320687, "grad_norm": 0.05048060417175293, "learning_rate": 2.859683432673385e-06, "loss": 0.056, "step": 3110 }, { "epoch": 0.6520645566967093, "grad_norm": 0.07560472190380096, "learning_rate": 2.856615836314889e-06, "loss": 0.0548, "step": 3111 }, { "epoch": 0.6522741563613498, "grad_norm": 0.07296677678823471, "learning_rate": 2.8535492280560487e-06, "loss": 0.055, "step": 3112 }, { "epoch": 0.6524837560259904, "grad_norm": 0.0519208163022995, "learning_rate": 2.850483609310567e-06, "loss": 0.0532, "step": 3113 }, { "epoch": 0.6526933556906309, "grad_norm": 0.05058110132813454, "learning_rate": 2.8474189814916973e-06, "loss": 0.0544, "step": 3114 }, { "epoch": 0.6529029553552714, "grad_norm": 0.06586900353431702, "learning_rate": 2.844355346012228e-06, "loss": 0.0592, "step": 3115 }, { "epoch": 0.653112555019912, "grad_norm": 0.056768354028463364, "learning_rate": 2.8412927042844985e-06, "loss": 0.0548, "step": 3116 }, { "epoch": 0.6533221546845525, "grad_norm": 0.04887418821454048, "learning_rate": 2.838231057720383e-06, "loss": 0.053, "step": 3117 }, { "epoch": 0.6535317543491931, "grad_norm": 0.06352341920137405, "learning_rate": 2.8351704077312998e-06, "loss": 0.0511, "step": 3118 }, { "epoch": 0.6537413540138336, "grad_norm": 0.0803912952542305, "learning_rate": 2.8321107557282083e-06, "loss": 0.054, "step": 3119 }, { "epoch": 0.6539509536784741, "grad_norm": 0.052244700491428375, "learning_rate": 2.829052103121611e-06, "loss": 0.0523, "step": 3120 }, { "epoch": 0.6541605533431146, "grad_norm": 0.04686099290847778, "learning_rate": 2.8259944513215405e-06, "loss": 0.0567, "step": 3121 }, { "epoch": 0.6543701530077551, "grad_norm": 0.07411143183708191, "learning_rate": 2.8229378017375784e-06, "loss": 0.055, "step": 3122 }, { "epoch": 0.6545797526723958, "grad_norm": 0.06782218813896179, "learning_rate": 2.819882155778836e-06, "loss": 0.0541, "step": 3123 }, { "epoch": 0.6547893523370363, "grad_norm": 0.04807867109775543, "learning_rate": 2.816827514853968e-06, "loss": 0.0563, "step": 3124 }, { "epoch": 0.6549989520016768, "grad_norm": 0.047602880746126175, "learning_rate": 2.8137738803711607e-06, "loss": 0.0538, "step": 3125 }, { "epoch": 0.6552085516663173, "grad_norm": 0.05647674947977066, "learning_rate": 2.81072125373814e-06, "loss": 0.0552, "step": 3126 }, { "epoch": 0.6554181513309578, "grad_norm": 0.04610821232199669, "learning_rate": 2.807669636362169e-06, "loss": 0.0572, "step": 3127 }, { "epoch": 0.6556277509955984, "grad_norm": 0.04513276740908623, "learning_rate": 2.8046190296500407e-06, "loss": 0.0562, "step": 3128 }, { "epoch": 0.655837350660239, "grad_norm": 0.06186923012137413, "learning_rate": 2.8015694350080813e-06, "loss": 0.0527, "step": 3129 }, { "epoch": 0.6560469503248795, "grad_norm": 0.04966472089290619, "learning_rate": 2.7985208538421576e-06, "loss": 0.0564, "step": 3130 }, { "epoch": 0.65625654998952, "grad_norm": 0.04229920357465744, "learning_rate": 2.7954732875576606e-06, "loss": 0.0569, "step": 3131 }, { "epoch": 0.6564661496541605, "grad_norm": 0.0393625944852829, "learning_rate": 2.792426737559521e-06, "loss": 0.0555, "step": 3132 }, { "epoch": 0.6566757493188011, "grad_norm": 0.04377404600381851, "learning_rate": 2.7893812052521984e-06, "loss": 0.0534, "step": 3133 }, { "epoch": 0.6568853489834416, "grad_norm": 0.04195486381649971, "learning_rate": 2.7863366920396805e-06, "loss": 0.0553, "step": 3134 }, { "epoch": 0.6570949486480822, "grad_norm": 0.03995470330119133, "learning_rate": 2.7832931993254865e-06, "loss": 0.0516, "step": 3135 }, { "epoch": 0.6573045483127227, "grad_norm": 0.041193921118974686, "learning_rate": 2.7802507285126666e-06, "loss": 0.0564, "step": 3136 }, { "epoch": 0.6575141479773632, "grad_norm": 0.04172206670045853, "learning_rate": 2.7772092810038027e-06, "loss": 0.052, "step": 3137 }, { "epoch": 0.6577237476420038, "grad_norm": 0.040663424879312515, "learning_rate": 2.774168858200996e-06, "loss": 0.0556, "step": 3138 }, { "epoch": 0.6579333473066443, "grad_norm": 0.03617934137582779, "learning_rate": 2.771129461505885e-06, "loss": 0.0541, "step": 3139 }, { "epoch": 0.6581429469712848, "grad_norm": 0.04274788871407509, "learning_rate": 2.7680910923196293e-06, "loss": 0.061, "step": 3140 }, { "epoch": 0.6583525466359254, "grad_norm": 0.040159981697797775, "learning_rate": 2.765053752042915e-06, "loss": 0.0571, "step": 3141 }, { "epoch": 0.6585621463005659, "grad_norm": 0.04021866247057915, "learning_rate": 2.762017442075956e-06, "loss": 0.0527, "step": 3142 }, { "epoch": 0.6587717459652065, "grad_norm": 0.036748629063367844, "learning_rate": 2.7589821638184942e-06, "loss": 0.0545, "step": 3143 }, { "epoch": 0.658981345629847, "grad_norm": 0.047550056129693985, "learning_rate": 2.7559479186697868e-06, "loss": 0.0544, "step": 3144 }, { "epoch": 0.6591909452944875, "grad_norm": 0.05852500721812248, "learning_rate": 2.7529147080286246e-06, "loss": 0.0522, "step": 3145 }, { "epoch": 0.659400544959128, "grad_norm": 0.05024390295147896, "learning_rate": 2.7498825332933154e-06, "loss": 0.0573, "step": 3146 }, { "epoch": 0.6596101446237687, "grad_norm": 0.03166329488158226, "learning_rate": 2.7468513958616895e-06, "loss": 0.0534, "step": 3147 }, { "epoch": 0.6598197442884092, "grad_norm": 0.03988707438111305, "learning_rate": 2.7438212971311016e-06, "loss": 0.0526, "step": 3148 }, { "epoch": 0.6600293439530497, "grad_norm": 0.0417884886264801, "learning_rate": 2.740792238498427e-06, "loss": 0.0536, "step": 3149 }, { "epoch": 0.6602389436176902, "grad_norm": 0.04264437034726143, "learning_rate": 2.7377642213600652e-06, "loss": 0.0557, "step": 3150 }, { "epoch": 0.6604485432823307, "grad_norm": 0.039088521152734756, "learning_rate": 2.734737247111924e-06, "loss": 0.0562, "step": 3151 }, { "epoch": 0.6606581429469713, "grad_norm": 0.0382147878408432, "learning_rate": 2.7317113171494407e-06, "loss": 0.054, "step": 3152 }, { "epoch": 0.6608677426116119, "grad_norm": 0.05300328880548477, "learning_rate": 2.728686432867571e-06, "loss": 0.0522, "step": 3153 }, { "epoch": 0.6610773422762524, "grad_norm": 0.043220195919275284, "learning_rate": 2.7256625956607822e-06, "loss": 0.0544, "step": 3154 }, { "epoch": 0.6612869419408929, "grad_norm": 0.034808751195669174, "learning_rate": 2.722639806923066e-06, "loss": 0.0513, "step": 3155 }, { "epoch": 0.6614965416055334, "grad_norm": 0.045243218541145325, "learning_rate": 2.719618068047927e-06, "loss": 0.0548, "step": 3156 }, { "epoch": 0.661706141270174, "grad_norm": 0.0599578395485878, "learning_rate": 2.716597380428382e-06, "loss": 0.052, "step": 3157 }, { "epoch": 0.6619157409348145, "grad_norm": 0.0630265325307846, "learning_rate": 2.7135777454569703e-06, "loss": 0.0534, "step": 3158 }, { "epoch": 0.662125340599455, "grad_norm": 0.04426709935069084, "learning_rate": 2.7105591645257432e-06, "loss": 0.0565, "step": 3159 }, { "epoch": 0.6623349402640956, "grad_norm": 0.030477387830615044, "learning_rate": 2.7075416390262676e-06, "loss": 0.0565, "step": 3160 }, { "epoch": 0.6625445399287361, "grad_norm": 0.04804522171616554, "learning_rate": 2.70452517034962e-06, "loss": 0.0542, "step": 3161 }, { "epoch": 0.6627541395933767, "grad_norm": 0.06287235021591187, "learning_rate": 2.7015097598863906e-06, "loss": 0.052, "step": 3162 }, { "epoch": 0.6629637392580172, "grad_norm": 0.04516349360346794, "learning_rate": 2.6984954090266856e-06, "loss": 0.0546, "step": 3163 }, { "epoch": 0.6631733389226577, "grad_norm": 0.03801724314689636, "learning_rate": 2.6954821191601175e-06, "loss": 0.0541, "step": 3164 }, { "epoch": 0.6633829385872982, "grad_norm": 0.07248124480247498, "learning_rate": 2.692469891675814e-06, "loss": 0.0553, "step": 3165 }, { "epoch": 0.6635925382519388, "grad_norm": 0.06941161304712296, "learning_rate": 2.689458727962413e-06, "loss": 0.0552, "step": 3166 }, { "epoch": 0.6638021379165794, "grad_norm": 0.042739253491163254, "learning_rate": 2.6864486294080585e-06, "loss": 0.0544, "step": 3167 }, { "epoch": 0.6640117375812199, "grad_norm": 0.05017360299825668, "learning_rate": 2.683439597400403e-06, "loss": 0.0566, "step": 3168 }, { "epoch": 0.6642213372458604, "grad_norm": 0.07375095039606094, "learning_rate": 2.680431633326614e-06, "loss": 0.0569, "step": 3169 }, { "epoch": 0.6644309369105009, "grad_norm": 0.0569414347410202, "learning_rate": 2.677424738573359e-06, "loss": 0.0524, "step": 3170 }, { "epoch": 0.6646405365751414, "grad_norm": 0.041695497930049896, "learning_rate": 2.6744189145268155e-06, "loss": 0.0548, "step": 3171 }, { "epoch": 0.6648501362397821, "grad_norm": 0.06951986998319626, "learning_rate": 2.6714141625726725e-06, "loss": 0.0538, "step": 3172 }, { "epoch": 0.6650597359044226, "grad_norm": 0.06547250598669052, "learning_rate": 2.6684104840961167e-06, "loss": 0.0563, "step": 3173 }, { "epoch": 0.6652693355690631, "grad_norm": 0.041869353502988815, "learning_rate": 2.665407880481841e-06, "loss": 0.0529, "step": 3174 }, { "epoch": 0.6654789352337036, "grad_norm": 0.05186137557029724, "learning_rate": 2.6624063531140477e-06, "loss": 0.0539, "step": 3175 }, { "epoch": 0.6656885348983441, "grad_norm": 0.06323316693305969, "learning_rate": 2.659405903376442e-06, "loss": 0.0561, "step": 3176 }, { "epoch": 0.6658981345629847, "grad_norm": 0.05578729510307312, "learning_rate": 2.656406532652227e-06, "loss": 0.0543, "step": 3177 }, { "epoch": 0.6661077342276253, "grad_norm": 0.04175778478384018, "learning_rate": 2.6534082423241154e-06, "loss": 0.0538, "step": 3178 }, { "epoch": 0.6663173338922658, "grad_norm": 0.039774637669324875, "learning_rate": 2.6504110337743166e-06, "loss": 0.0528, "step": 3179 }, { "epoch": 0.6665269335569063, "grad_norm": 0.04265711456537247, "learning_rate": 2.6474149083845412e-06, "loss": 0.0546, "step": 3180 }, { "epoch": 0.6667365332215468, "grad_norm": 0.04271739721298218, "learning_rate": 2.6444198675360044e-06, "loss": 0.055, "step": 3181 }, { "epoch": 0.6669461328861874, "grad_norm": 0.037498895078897476, "learning_rate": 2.6414259126094192e-06, "loss": 0.0567, "step": 3182 }, { "epoch": 0.6671557325508279, "grad_norm": 0.040308877825737, "learning_rate": 2.6384330449850028e-06, "loss": 0.0524, "step": 3183 }, { "epoch": 0.6673653322154685, "grad_norm": 0.04669109731912613, "learning_rate": 2.6354412660424577e-06, "loss": 0.0537, "step": 3184 }, { "epoch": 0.667574931880109, "grad_norm": 0.03690009191632271, "learning_rate": 2.6324505771609976e-06, "loss": 0.0583, "step": 3185 }, { "epoch": 0.6677845315447495, "grad_norm": 0.04083159938454628, "learning_rate": 2.6294609797193326e-06, "loss": 0.0584, "step": 3186 }, { "epoch": 0.6679941312093901, "grad_norm": 0.041967883706092834, "learning_rate": 2.6264724750956605e-06, "loss": 0.055, "step": 3187 }, { "epoch": 0.6682037308740306, "grad_norm": 0.034479230642318726, "learning_rate": 2.623485064667687e-06, "loss": 0.0565, "step": 3188 }, { "epoch": 0.6684133305386711, "grad_norm": 0.04071040078997612, "learning_rate": 2.6204987498126046e-06, "loss": 0.0576, "step": 3189 }, { "epoch": 0.6686229302033116, "grad_norm": 0.04605560004711151, "learning_rate": 2.617513531907103e-06, "loss": 0.0516, "step": 3190 }, { "epoch": 0.6688325298679522, "grad_norm": 0.042507972568273544, "learning_rate": 2.6145294123273677e-06, "loss": 0.0521, "step": 3191 }, { "epoch": 0.6690421295325928, "grad_norm": 0.03638184443116188, "learning_rate": 2.6115463924490796e-06, "loss": 0.0575, "step": 3192 }, { "epoch": 0.6692517291972333, "grad_norm": 0.035932447761297226, "learning_rate": 2.608564473647407e-06, "loss": 0.0562, "step": 3193 }, { "epoch": 0.6694613288618738, "grad_norm": 0.0350482314825058, "learning_rate": 2.605583657297017e-06, "loss": 0.0577, "step": 3194 }, { "epoch": 0.6696709285265143, "grad_norm": 0.03746052831411362, "learning_rate": 2.602603944772062e-06, "loss": 0.0553, "step": 3195 }, { "epoch": 0.6698805281911548, "grad_norm": 0.031886544078588486, "learning_rate": 2.5996253374461924e-06, "loss": 0.0555, "step": 3196 }, { "epoch": 0.6700901278557955, "grad_norm": 0.03672702610492706, "learning_rate": 2.5966478366925406e-06, "loss": 0.055, "step": 3197 }, { "epoch": 0.670299727520436, "grad_norm": 0.03667857125401497, "learning_rate": 2.593671443883738e-06, "loss": 0.0557, "step": 3198 }, { "epoch": 0.6705093271850765, "grad_norm": 0.03002052754163742, "learning_rate": 2.590696160391901e-06, "loss": 0.0557, "step": 3199 }, { "epoch": 0.670718926849717, "grad_norm": 0.034151580184698105, "learning_rate": 2.587721987588635e-06, "loss": 0.0556, "step": 3200 }, { "epoch": 0.6709285265143575, "grad_norm": 0.037925586104393005, "learning_rate": 2.5847489268450287e-06, "loss": 0.0564, "step": 3201 }, { "epoch": 0.6711381261789982, "grad_norm": 0.0316159762442112, "learning_rate": 2.5817769795316674e-06, "loss": 0.0528, "step": 3202 }, { "epoch": 0.6713477258436387, "grad_norm": 0.03222265467047691, "learning_rate": 2.578806147018614e-06, "loss": 0.0547, "step": 3203 }, { "epoch": 0.6715573255082792, "grad_norm": 0.0310065895318985, "learning_rate": 2.5758364306754247e-06, "loss": 0.0515, "step": 3204 }, { "epoch": 0.6717669251729197, "grad_norm": 0.03238251060247421, "learning_rate": 2.5728678318711385e-06, "loss": 0.058, "step": 3205 }, { "epoch": 0.6719765248375602, "grad_norm": 0.026389988139271736, "learning_rate": 2.5699003519742783e-06, "loss": 0.0564, "step": 3206 }, { "epoch": 0.6721861245022008, "grad_norm": 0.028875682502985, "learning_rate": 2.566933992352849e-06, "loss": 0.0546, "step": 3207 }, { "epoch": 0.6723957241668413, "grad_norm": 0.03320210054516792, "learning_rate": 2.563968754374344e-06, "loss": 0.0556, "step": 3208 }, { "epoch": 0.6726053238314819, "grad_norm": 0.02969476394355297, "learning_rate": 2.5610046394057386e-06, "loss": 0.0555, "step": 3209 }, { "epoch": 0.6728149234961224, "grad_norm": 0.029616905376315117, "learning_rate": 2.5580416488134864e-06, "loss": 0.056, "step": 3210 }, { "epoch": 0.6730245231607629, "grad_norm": 0.023642316460609436, "learning_rate": 2.5550797839635283e-06, "loss": 0.0539, "step": 3211 }, { "epoch": 0.6732341228254035, "grad_norm": 0.028511328622698784, "learning_rate": 2.552119046221282e-06, "loss": 0.0526, "step": 3212 }, { "epoch": 0.673443722490044, "grad_norm": 0.030537253245711327, "learning_rate": 2.5491594369516452e-06, "loss": 0.0572, "step": 3213 }, { "epoch": 0.6736533221546845, "grad_norm": 0.029667936265468597, "learning_rate": 2.546200957518999e-06, "loss": 0.0561, "step": 3214 }, { "epoch": 0.6738629218193251, "grad_norm": 0.025500137358903885, "learning_rate": 2.5432436092872036e-06, "loss": 0.0569, "step": 3215 }, { "epoch": 0.6740725214839657, "grad_norm": 0.024678369984030724, "learning_rate": 2.5402873936195914e-06, "loss": 0.0559, "step": 3216 }, { "epoch": 0.6742821211486062, "grad_norm": 0.031020864844322205, "learning_rate": 2.537332311878983e-06, "loss": 0.0552, "step": 3217 }, { "epoch": 0.6744917208132467, "grad_norm": 0.034975748509168625, "learning_rate": 2.5343783654276644e-06, "loss": 0.0541, "step": 3218 }, { "epoch": 0.6747013204778872, "grad_norm": 0.03894275054335594, "learning_rate": 2.5314255556274092e-06, "loss": 0.0545, "step": 3219 }, { "epoch": 0.6749109201425277, "grad_norm": 0.029810387641191483, "learning_rate": 2.5284738838394586e-06, "loss": 0.0543, "step": 3220 }, { "epoch": 0.6751205198071684, "grad_norm": 0.02972550317645073, "learning_rate": 2.5255233514245358e-06, "loss": 0.0532, "step": 3221 }, { "epoch": 0.6753301194718089, "grad_norm": 0.02595905028283596, "learning_rate": 2.5225739597428334e-06, "loss": 0.0548, "step": 3222 }, { "epoch": 0.6755397191364494, "grad_norm": 0.025289386510849, "learning_rate": 2.51962571015402e-06, "loss": 0.0532, "step": 3223 }, { "epoch": 0.6757493188010899, "grad_norm": 0.028770189732313156, "learning_rate": 2.5166786040172387e-06, "loss": 0.0516, "step": 3224 }, { "epoch": 0.6759589184657304, "grad_norm": 0.024205055087804794, "learning_rate": 2.5137326426911067e-06, "loss": 0.052, "step": 3225 }, { "epoch": 0.676168518130371, "grad_norm": 0.02634621225297451, "learning_rate": 2.5107878275337084e-06, "loss": 0.0547, "step": 3226 }, { "epoch": 0.6763781177950116, "grad_norm": 0.031177837401628494, "learning_rate": 2.507844159902606e-06, "loss": 0.0581, "step": 3227 }, { "epoch": 0.6765877174596521, "grad_norm": 0.032176099717617035, "learning_rate": 2.5049016411548273e-06, "loss": 0.0547, "step": 3228 }, { "epoch": 0.6767973171242926, "grad_norm": 0.03147950395941734, "learning_rate": 2.501960272646875e-06, "loss": 0.0561, "step": 3229 }, { "epoch": 0.6770069167889331, "grad_norm": 0.02897576615214348, "learning_rate": 2.499020055734716e-06, "loss": 0.0564, "step": 3230 }, { "epoch": 0.6772165164535737, "grad_norm": 0.03353496268391609, "learning_rate": 2.496080991773792e-06, "loss": 0.0531, "step": 3231 }, { "epoch": 0.6774261161182142, "grad_norm": 0.03561830520629883, "learning_rate": 2.493143082119013e-06, "loss": 0.0565, "step": 3232 }, { "epoch": 0.6776357157828548, "grad_norm": 0.027927443385124207, "learning_rate": 2.490206328124752e-06, "loss": 0.0541, "step": 3233 }, { "epoch": 0.6778453154474953, "grad_norm": 0.037114545702934265, "learning_rate": 2.4872707311448504e-06, "loss": 0.0534, "step": 3234 }, { "epoch": 0.6780549151121358, "grad_norm": 0.03275038301944733, "learning_rate": 2.484336292532622e-06, "loss": 0.0519, "step": 3235 }, { "epoch": 0.6782645147767764, "grad_norm": 0.02878388576209545, "learning_rate": 2.481403013640838e-06, "loss": 0.0572, "step": 3236 }, { "epoch": 0.6784741144414169, "grad_norm": 0.02609802968800068, "learning_rate": 2.478470895821742e-06, "loss": 0.052, "step": 3237 }, { "epoch": 0.6786837141060574, "grad_norm": 0.024750111624598503, "learning_rate": 2.475539940427041e-06, "loss": 0.0553, "step": 3238 }, { "epoch": 0.678893313770698, "grad_norm": 0.02313164807856083, "learning_rate": 2.472610148807903e-06, "loss": 0.0561, "step": 3239 }, { "epoch": 0.6791029134353385, "grad_norm": 0.02588365226984024, "learning_rate": 2.469681522314959e-06, "loss": 0.0566, "step": 3240 }, { "epoch": 0.6793125130999791, "grad_norm": 0.02386978268623352, "learning_rate": 2.4667540622983083e-06, "loss": 0.053, "step": 3241 }, { "epoch": 0.6795221127646196, "grad_norm": 0.025168968364596367, "learning_rate": 2.4638277701075103e-06, "loss": 0.0536, "step": 3242 }, { "epoch": 0.6797317124292601, "grad_norm": 0.02500263787806034, "learning_rate": 2.460902647091582e-06, "loss": 0.0524, "step": 3243 }, { "epoch": 0.6799413120939006, "grad_norm": 0.02609972469508648, "learning_rate": 2.4579786945990073e-06, "loss": 0.0566, "step": 3244 }, { "epoch": 0.6801509117585411, "grad_norm": 0.03376591205596924, "learning_rate": 2.4550559139777264e-06, "loss": 0.0542, "step": 3245 }, { "epoch": 0.6803605114231818, "grad_norm": 0.03760766610503197, "learning_rate": 2.452134306575139e-06, "loss": 0.0535, "step": 3246 }, { "epoch": 0.6805701110878223, "grad_norm": 0.029737234115600586, "learning_rate": 2.4492138737381066e-06, "loss": 0.0522, "step": 3247 }, { "epoch": 0.6807797107524628, "grad_norm": 0.030677396804094315, "learning_rate": 2.446294616812951e-06, "loss": 0.0523, "step": 3248 }, { "epoch": 0.6809893104171033, "grad_norm": 0.05189737677574158, "learning_rate": 2.443376537145444e-06, "loss": 0.0541, "step": 3249 }, { "epoch": 0.6811989100817438, "grad_norm": 0.0667051374912262, "learning_rate": 2.4404596360808255e-06, "loss": 0.056, "step": 3250 }, { "epoch": 0.6814085097463844, "grad_norm": 0.05018071457743645, "learning_rate": 2.437543914963782e-06, "loss": 0.0542, "step": 3251 }, { "epoch": 0.681618109411025, "grad_norm": 0.02675667405128479, "learning_rate": 2.4346293751384597e-06, "loss": 0.0551, "step": 3252 }, { "epoch": 0.6818277090756655, "grad_norm": 0.04382751137018204, "learning_rate": 2.431716017948462e-06, "loss": 0.0544, "step": 3253 }, { "epoch": 0.682037308740306, "grad_norm": 0.051815591752529144, "learning_rate": 2.428803844736848e-06, "loss": 0.0546, "step": 3254 }, { "epoch": 0.6822469084049465, "grad_norm": 0.034245654940605164, "learning_rate": 2.4258928568461303e-06, "loss": 0.0518, "step": 3255 }, { "epoch": 0.6824565080695871, "grad_norm": 0.029311811551451683, "learning_rate": 2.422983055618267e-06, "loss": 0.0555, "step": 3256 }, { "epoch": 0.6826661077342276, "grad_norm": 0.03641844168305397, "learning_rate": 2.4200744423946803e-06, "loss": 0.0532, "step": 3257 }, { "epoch": 0.6828757073988682, "grad_norm": 0.03218969702720642, "learning_rate": 2.4171670185162415e-06, "loss": 0.0543, "step": 3258 }, { "epoch": 0.6830853070635087, "grad_norm": 0.024815967306494713, "learning_rate": 2.4142607853232687e-06, "loss": 0.0584, "step": 3259 }, { "epoch": 0.6832949067281492, "grad_norm": 0.025883564725518227, "learning_rate": 2.4113557441555384e-06, "loss": 0.0544, "step": 3260 }, { "epoch": 0.6835045063927898, "grad_norm": 0.03328178450465202, "learning_rate": 2.4084518963522724e-06, "loss": 0.0541, "step": 3261 }, { "epoch": 0.6837141060574303, "grad_norm": 0.028745753690600395, "learning_rate": 2.4055492432521428e-06, "loss": 0.0541, "step": 3262 }, { "epoch": 0.6839237057220708, "grad_norm": 0.023591186851263046, "learning_rate": 2.402647786193272e-06, "loss": 0.0531, "step": 3263 }, { "epoch": 0.6841333053867114, "grad_norm": 0.026261072605848312, "learning_rate": 2.3997475265132333e-06, "loss": 0.058, "step": 3264 }, { "epoch": 0.6843429050513519, "grad_norm": 0.023627085611224174, "learning_rate": 2.3968484655490455e-06, "loss": 0.0551, "step": 3265 }, { "epoch": 0.6845525047159925, "grad_norm": 0.024966632947325706, "learning_rate": 2.3939506046371753e-06, "loss": 0.0562, "step": 3266 }, { "epoch": 0.684762104380633, "grad_norm": 0.02931971289217472, "learning_rate": 2.391053945113533e-06, "loss": 0.0526, "step": 3267 }, { "epoch": 0.6849717040452735, "grad_norm": 0.035215772688388824, "learning_rate": 2.388158488313481e-06, "loss": 0.0566, "step": 3268 }, { "epoch": 0.685181303709914, "grad_norm": 0.02961471863090992, "learning_rate": 2.3852642355718224e-06, "loss": 0.0554, "step": 3269 }, { "epoch": 0.6853909033745545, "grad_norm": 0.028823746368288994, "learning_rate": 2.3823711882228077e-06, "loss": 0.053, "step": 3270 }, { "epoch": 0.6856005030391952, "grad_norm": 0.04585007578134537, "learning_rate": 2.379479347600134e-06, "loss": 0.0535, "step": 3271 }, { "epoch": 0.6858101027038357, "grad_norm": 0.03799903392791748, "learning_rate": 2.3765887150369366e-06, "loss": 0.0529, "step": 3272 }, { "epoch": 0.6860197023684762, "grad_norm": 0.02885265089571476, "learning_rate": 2.373699291865794e-06, "loss": 0.0559, "step": 3273 }, { "epoch": 0.6862293020331167, "grad_norm": 0.03915941342711449, "learning_rate": 2.370811079418735e-06, "loss": 0.0528, "step": 3274 }, { "epoch": 0.6864389016977572, "grad_norm": 0.03368467465043068, "learning_rate": 2.3679240790272203e-06, "loss": 0.0532, "step": 3275 }, { "epoch": 0.6866485013623979, "grad_norm": 0.03050885535776615, "learning_rate": 2.3650382920221578e-06, "loss": 0.0564, "step": 3276 }, { "epoch": 0.6868581010270384, "grad_norm": 0.03771447017788887, "learning_rate": 2.3621537197338977e-06, "loss": 0.0533, "step": 3277 }, { "epoch": 0.6870677006916789, "grad_norm": 0.040800098329782486, "learning_rate": 2.359270363492225e-06, "loss": 0.056, "step": 3278 }, { "epoch": 0.6872773003563194, "grad_norm": 0.03476854786276817, "learning_rate": 2.356388224626364e-06, "loss": 0.0529, "step": 3279 }, { "epoch": 0.6874869000209599, "grad_norm": 0.026112914085388184, "learning_rate": 2.3535073044649824e-06, "loss": 0.055, "step": 3280 }, { "epoch": 0.6876964996856005, "grad_norm": 0.03609404340386391, "learning_rate": 2.350627604336186e-06, "loss": 0.0527, "step": 3281 }, { "epoch": 0.687906099350241, "grad_norm": 0.039491958916187286, "learning_rate": 2.347749125567511e-06, "loss": 0.054, "step": 3282 }, { "epoch": 0.6881156990148816, "grad_norm": 0.02687055617570877, "learning_rate": 2.344871869485941e-06, "loss": 0.0556, "step": 3283 }, { "epoch": 0.6883252986795221, "grad_norm": 0.03269565477967262, "learning_rate": 2.341995837417887e-06, "loss": 0.0552, "step": 3284 }, { "epoch": 0.6885348983441627, "grad_norm": 0.037071727216243744, "learning_rate": 2.3391210306891977e-06, "loss": 0.0513, "step": 3285 }, { "epoch": 0.6887444980088032, "grad_norm": 0.03246400132775307, "learning_rate": 2.336247450625161e-06, "loss": 0.0536, "step": 3286 }, { "epoch": 0.6889540976734437, "grad_norm": 0.02836901880800724, "learning_rate": 2.333375098550496e-06, "loss": 0.0533, "step": 3287 }, { "epoch": 0.6891636973380842, "grad_norm": 0.04530966654419899, "learning_rate": 2.330503975789361e-06, "loss": 0.0553, "step": 3288 }, { "epoch": 0.6893732970027248, "grad_norm": 0.05676478147506714, "learning_rate": 2.3276340836653342e-06, "loss": 0.0544, "step": 3289 }, { "epoch": 0.6895828966673654, "grad_norm": 0.03974361717700958, "learning_rate": 2.3247654235014403e-06, "loss": 0.056, "step": 3290 }, { "epoch": 0.6897924963320059, "grad_norm": 0.022683048620820045, "learning_rate": 2.321897996620132e-06, "loss": 0.0571, "step": 3291 }, { "epoch": 0.6900020959966464, "grad_norm": 0.03632507100701332, "learning_rate": 2.3190318043432892e-06, "loss": 0.053, "step": 3292 }, { "epoch": 0.6902116956612869, "grad_norm": 0.029882922768592834, "learning_rate": 2.3161668479922293e-06, "loss": 0.0551, "step": 3293 }, { "epoch": 0.6904212953259274, "grad_norm": 0.0291314534842968, "learning_rate": 2.3133031288876955e-06, "loss": 0.0527, "step": 3294 }, { "epoch": 0.6906308949905681, "grad_norm": 0.03613777831196785, "learning_rate": 2.3104406483498593e-06, "loss": 0.0545, "step": 3295 }, { "epoch": 0.6908404946552086, "grad_norm": 0.04006670415401459, "learning_rate": 2.3075794076983254e-06, "loss": 0.0551, "step": 3296 }, { "epoch": 0.6910500943198491, "grad_norm": 0.031675100326538086, "learning_rate": 2.304719408252127e-06, "loss": 0.0566, "step": 3297 }, { "epoch": 0.6912596939844896, "grad_norm": 0.03170279040932655, "learning_rate": 2.30186065132972e-06, "loss": 0.0545, "step": 3298 }, { "epoch": 0.6914692936491301, "grad_norm": 0.04302024096250534, "learning_rate": 2.2990031382489935e-06, "loss": 0.055, "step": 3299 }, { "epoch": 0.6916788933137707, "grad_norm": 0.03898413106799126, "learning_rate": 2.296146870327258e-06, "loss": 0.0566, "step": 3300 }, { "epoch": 0.6918884929784113, "grad_norm": 0.021424876525998116, "learning_rate": 2.293291848881255e-06, "loss": 0.0572, "step": 3301 }, { "epoch": 0.6920980926430518, "grad_norm": 0.03237781301140785, "learning_rate": 2.290438075227146e-06, "loss": 0.0543, "step": 3302 }, { "epoch": 0.6923076923076923, "grad_norm": 0.03710688278079033, "learning_rate": 2.2875855506805217e-06, "loss": 0.0557, "step": 3303 }, { "epoch": 0.6925172919723328, "grad_norm": 0.030332237482070923, "learning_rate": 2.284734276556396e-06, "loss": 0.056, "step": 3304 }, { "epoch": 0.6927268916369734, "grad_norm": 0.02461421862244606, "learning_rate": 2.281884254169206e-06, "loss": 0.0517, "step": 3305 }, { "epoch": 0.6929364913016139, "grad_norm": 0.02470548450946808, "learning_rate": 2.279035484832808e-06, "loss": 0.0545, "step": 3306 }, { "epoch": 0.6931460909662545, "grad_norm": 0.025578899309039116, "learning_rate": 2.276187969860488e-06, "loss": 0.0556, "step": 3307 }, { "epoch": 0.693355690630895, "grad_norm": 0.024182526394724846, "learning_rate": 2.2733417105649458e-06, "loss": 0.0549, "step": 3308 }, { "epoch": 0.6935652902955355, "grad_norm": 0.02719123847782612, "learning_rate": 2.270496708258309e-06, "loss": 0.0541, "step": 3309 }, { "epoch": 0.6937748899601761, "grad_norm": 0.03566061705350876, "learning_rate": 2.2676529642521244e-06, "loss": 0.0548, "step": 3310 }, { "epoch": 0.6939844896248166, "grad_norm": 0.036818746477365494, "learning_rate": 2.264810479857356e-06, "loss": 0.0577, "step": 3311 }, { "epoch": 0.6941940892894571, "grad_norm": 0.03650686517357826, "learning_rate": 2.2619692563843865e-06, "loss": 0.0557, "step": 3312 }, { "epoch": 0.6944036889540977, "grad_norm": 0.02722245827317238, "learning_rate": 2.259129295143021e-06, "loss": 0.0572, "step": 3313 }, { "epoch": 0.6946132886187382, "grad_norm": 0.023747239261865616, "learning_rate": 2.2562905974424824e-06, "loss": 0.0539, "step": 3314 }, { "epoch": 0.6948228882833788, "grad_norm": 0.027470340952277184, "learning_rate": 2.2534531645914078e-06, "loss": 0.0524, "step": 3315 }, { "epoch": 0.6950324879480193, "grad_norm": 0.02598888985812664, "learning_rate": 2.2506169978978543e-06, "loss": 0.0545, "step": 3316 }, { "epoch": 0.6952420876126598, "grad_norm": 0.024863192811608315, "learning_rate": 2.2477820986692945e-06, "loss": 0.0519, "step": 3317 }, { "epoch": 0.6954516872773003, "grad_norm": 0.0245378315448761, "learning_rate": 2.2449484682126133e-06, "loss": 0.0544, "step": 3318 }, { "epoch": 0.6956612869419408, "grad_norm": 0.02471884898841381, "learning_rate": 2.2421161078341163e-06, "loss": 0.0544, "step": 3319 }, { "epoch": 0.6958708866065815, "grad_norm": 0.02821512520313263, "learning_rate": 2.2392850188395227e-06, "loss": 0.0583, "step": 3320 }, { "epoch": 0.696080486271222, "grad_norm": 0.024967767298221588, "learning_rate": 2.23645520253396e-06, "loss": 0.0552, "step": 3321 }, { "epoch": 0.6962900859358625, "grad_norm": 0.022636888548731804, "learning_rate": 2.2336266602219762e-06, "loss": 0.0556, "step": 3322 }, { "epoch": 0.696499685600503, "grad_norm": 0.02549467422068119, "learning_rate": 2.230799393207526e-06, "loss": 0.0551, "step": 3323 }, { "epoch": 0.6967092852651435, "grad_norm": 0.022535137832164764, "learning_rate": 2.227973402793982e-06, "loss": 0.0537, "step": 3324 }, { "epoch": 0.6969188849297842, "grad_norm": 0.028567228466272354, "learning_rate": 2.225148690284122e-06, "loss": 0.0531, "step": 3325 }, { "epoch": 0.6971284845944247, "grad_norm": 0.025192799046635628, "learning_rate": 2.2223252569801412e-06, "loss": 0.0575, "step": 3326 }, { "epoch": 0.6973380842590652, "grad_norm": 0.02626647800207138, "learning_rate": 2.2195031041836396e-06, "loss": 0.0538, "step": 3327 }, { "epoch": 0.6975476839237057, "grad_norm": 0.026845086365938187, "learning_rate": 2.216682233195628e-06, "loss": 0.0532, "step": 3328 }, { "epoch": 0.6977572835883462, "grad_norm": 0.027518998831510544, "learning_rate": 2.2138626453165286e-06, "loss": 0.0531, "step": 3329 }, { "epoch": 0.6979668832529868, "grad_norm": 0.0274403914809227, "learning_rate": 2.2110443418461723e-06, "loss": 0.0507, "step": 3330 }, { "epoch": 0.6981764829176274, "grad_norm": 0.03426744416356087, "learning_rate": 2.208227324083794e-06, "loss": 0.0554, "step": 3331 }, { "epoch": 0.6983860825822679, "grad_norm": 0.029531171545386314, "learning_rate": 2.2054115933280407e-06, "loss": 0.0547, "step": 3332 }, { "epoch": 0.6985956822469084, "grad_norm": 0.02566445991396904, "learning_rate": 2.2025971508769607e-06, "loss": 0.0586, "step": 3333 }, { "epoch": 0.6988052819115489, "grad_norm": 0.04020659625530243, "learning_rate": 2.199783998028015e-06, "loss": 0.0545, "step": 3334 }, { "epoch": 0.6990148815761895, "grad_norm": 0.03343269228935242, "learning_rate": 2.1969721360780626e-06, "loss": 0.0539, "step": 3335 }, { "epoch": 0.69922448124083, "grad_norm": 0.02812224067747593, "learning_rate": 2.1941615663233733e-06, "loss": 0.0545, "step": 3336 }, { "epoch": 0.6994340809054705, "grad_norm": 0.04151785001158714, "learning_rate": 2.191352290059621e-06, "loss": 0.055, "step": 3337 }, { "epoch": 0.6996436805701111, "grad_norm": 0.028555648401379585, "learning_rate": 2.1885443085818795e-06, "loss": 0.0527, "step": 3338 }, { "epoch": 0.6998532802347516, "grad_norm": 0.034111034125089645, "learning_rate": 2.185737623184626e-06, "loss": 0.0539, "step": 3339 }, { "epoch": 0.7000628798993922, "grad_norm": 0.035869497805833817, "learning_rate": 2.1829322351617456e-06, "loss": 0.0548, "step": 3340 }, { "epoch": 0.7002724795640327, "grad_norm": 0.029365766793489456, "learning_rate": 2.1801281458065177e-06, "loss": 0.0542, "step": 3341 }, { "epoch": 0.7004820792286732, "grad_norm": 0.03768596425652504, "learning_rate": 2.1773253564116288e-06, "loss": 0.0529, "step": 3342 }, { "epoch": 0.7006916788933137, "grad_norm": 0.03441116586327553, "learning_rate": 2.174523868269166e-06, "loss": 0.056, "step": 3343 }, { "epoch": 0.7009012785579543, "grad_norm": 0.03129161149263382, "learning_rate": 2.171723682670613e-06, "loss": 0.054, "step": 3344 }, { "epoch": 0.7011108782225949, "grad_norm": 0.03608183562755585, "learning_rate": 2.1689248009068532e-06, "loss": 0.0549, "step": 3345 }, { "epoch": 0.7013204778872354, "grad_norm": 0.026896623894572258, "learning_rate": 2.166127224268172e-06, "loss": 0.055, "step": 3346 }, { "epoch": 0.7015300775518759, "grad_norm": 0.03324010595679283, "learning_rate": 2.163330954044253e-06, "loss": 0.0538, "step": 3347 }, { "epoch": 0.7017396772165164, "grad_norm": 0.02656789869070053, "learning_rate": 2.160535991524174e-06, "loss": 0.0549, "step": 3348 }, { "epoch": 0.7019492768811569, "grad_norm": 0.029164310544729233, "learning_rate": 2.1577423379964147e-06, "loss": 0.0556, "step": 3349 }, { "epoch": 0.7021588765457976, "grad_norm": 0.02474060468375683, "learning_rate": 2.154949994748847e-06, "loss": 0.0542, "step": 3350 }, { "epoch": 0.7023684762104381, "grad_norm": 0.031023385003209114, "learning_rate": 2.152158963068739e-06, "loss": 0.0539, "step": 3351 }, { "epoch": 0.7025780758750786, "grad_norm": 0.02580900862812996, "learning_rate": 2.149369244242758e-06, "loss": 0.0536, "step": 3352 }, { "epoch": 0.7027876755397191, "grad_norm": 0.027426833286881447, "learning_rate": 2.1465808395569644e-06, "loss": 0.0532, "step": 3353 }, { "epoch": 0.7029972752043597, "grad_norm": 0.031186439096927643, "learning_rate": 2.1437937502968093e-06, "loss": 0.0555, "step": 3354 }, { "epoch": 0.7032068748690002, "grad_norm": 0.031483497470617294, "learning_rate": 2.1410079777471444e-06, "loss": 0.0527, "step": 3355 }, { "epoch": 0.7034164745336408, "grad_norm": 0.03852154687047005, "learning_rate": 2.1382235231922053e-06, "loss": 0.0592, "step": 3356 }, { "epoch": 0.7036260741982813, "grad_norm": 0.03263459354639053, "learning_rate": 2.13544038791563e-06, "loss": 0.0546, "step": 3357 }, { "epoch": 0.7038356738629218, "grad_norm": 0.028121495619416237, "learning_rate": 2.1326585732004384e-06, "loss": 0.0508, "step": 3358 }, { "epoch": 0.7040452735275624, "grad_norm": 0.036633770912885666, "learning_rate": 2.1298780803290497e-06, "loss": 0.0558, "step": 3359 }, { "epoch": 0.7042548731922029, "grad_norm": 0.03133592754602432, "learning_rate": 2.127098910583273e-06, "loss": 0.0547, "step": 3360 }, { "epoch": 0.7044644728568434, "grad_norm": 0.026572570204734802, "learning_rate": 2.124321065244298e-06, "loss": 0.0551, "step": 3361 }, { "epoch": 0.704674072521484, "grad_norm": 0.023017263039946556, "learning_rate": 2.121544545592715e-06, "loss": 0.0539, "step": 3362 }, { "epoch": 0.7048836721861245, "grad_norm": 0.021083930507302284, "learning_rate": 2.1187693529085e-06, "loss": 0.0557, "step": 3363 }, { "epoch": 0.7050932718507651, "grad_norm": 0.02307903952896595, "learning_rate": 2.1159954884710133e-06, "loss": 0.0571, "step": 3364 }, { "epoch": 0.7053028715154056, "grad_norm": 0.02702009305357933, "learning_rate": 2.1132229535590092e-06, "loss": 0.054, "step": 3365 }, { "epoch": 0.7055124711800461, "grad_norm": 0.029108747839927673, "learning_rate": 2.110451749450624e-06, "loss": 0.0525, "step": 3366 }, { "epoch": 0.7057220708446866, "grad_norm": 0.024553043767809868, "learning_rate": 2.1076818774233796e-06, "loss": 0.0525, "step": 3367 }, { "epoch": 0.7059316705093271, "grad_norm": 0.03205152973532677, "learning_rate": 2.104913338754189e-06, "loss": 0.0532, "step": 3368 }, { "epoch": 0.7061412701739678, "grad_norm": 0.04320382699370384, "learning_rate": 2.102146134719349e-06, "loss": 0.0527, "step": 3369 }, { "epoch": 0.7063508698386083, "grad_norm": 0.04006104916334152, "learning_rate": 2.0993802665945396e-06, "loss": 0.0532, "step": 3370 }, { "epoch": 0.7065604695032488, "grad_norm": 0.022940274327993393, "learning_rate": 2.0966157356548255e-06, "loss": 0.0541, "step": 3371 }, { "epoch": 0.7067700691678893, "grad_norm": 0.03261176869273186, "learning_rate": 2.093852543174652e-06, "loss": 0.057, "step": 3372 }, { "epoch": 0.7069796688325298, "grad_norm": 0.033332716673612595, "learning_rate": 2.0910906904278542e-06, "loss": 0.0528, "step": 3373 }, { "epoch": 0.7071892684971705, "grad_norm": 0.02474953606724739, "learning_rate": 2.088330178687642e-06, "loss": 0.0564, "step": 3374 }, { "epoch": 0.707398868161811, "grad_norm": 0.028840798884630203, "learning_rate": 2.085571009226613e-06, "loss": 0.0528, "step": 3375 }, { "epoch": 0.7076084678264515, "grad_norm": 0.03333436697721481, "learning_rate": 2.082813183316745e-06, "loss": 0.0579, "step": 3376 }, { "epoch": 0.707818067491092, "grad_norm": 0.03125330060720444, "learning_rate": 2.080056702229393e-06, "loss": 0.0536, "step": 3377 }, { "epoch": 0.7080276671557325, "grad_norm": 0.03265562653541565, "learning_rate": 2.0773015672352938e-06, "loss": 0.0561, "step": 3378 }, { "epoch": 0.7082372668203731, "grad_norm": 0.030047666281461716, "learning_rate": 2.0745477796045664e-06, "loss": 0.0535, "step": 3379 }, { "epoch": 0.7084468664850136, "grad_norm": 0.03281790018081665, "learning_rate": 2.0717953406067033e-06, "loss": 0.0539, "step": 3380 }, { "epoch": 0.7086564661496542, "grad_norm": 0.031788330525159836, "learning_rate": 2.0690442515105797e-06, "loss": 0.0548, "step": 3381 }, { "epoch": 0.7088660658142947, "grad_norm": 0.03671342507004738, "learning_rate": 2.0662945135844493e-06, "loss": 0.0547, "step": 3382 }, { "epoch": 0.7090756654789352, "grad_norm": 0.05137075111269951, "learning_rate": 2.0635461280959386e-06, "loss": 0.054, "step": 3383 }, { "epoch": 0.7092852651435758, "grad_norm": 0.025410549715161324, "learning_rate": 2.060799096312051e-06, "loss": 0.0519, "step": 3384 }, { "epoch": 0.7094948648082163, "grad_norm": 0.041060563176870346, "learning_rate": 2.0580534194991696e-06, "loss": 0.0533, "step": 3385 }, { "epoch": 0.7097044644728568, "grad_norm": 0.04743944853544235, "learning_rate": 2.0553090989230527e-06, "loss": 0.0556, "step": 3386 }, { "epoch": 0.7099140641374974, "grad_norm": 0.02690894342958927, "learning_rate": 2.052566135848828e-06, "loss": 0.0542, "step": 3387 }, { "epoch": 0.7101236638021379, "grad_norm": 0.028761066496372223, "learning_rate": 2.0498245315410037e-06, "loss": 0.0551, "step": 3388 }, { "epoch": 0.7103332634667785, "grad_norm": 0.027141369879245758, "learning_rate": 2.047084287263458e-06, "loss": 0.0542, "step": 3389 }, { "epoch": 0.710542863131419, "grad_norm": 0.02281133271753788, "learning_rate": 2.0443454042794404e-06, "loss": 0.0546, "step": 3390 }, { "epoch": 0.7107524627960595, "grad_norm": 0.025255044922232628, "learning_rate": 2.0416078838515773e-06, "loss": 0.0542, "step": 3391 }, { "epoch": 0.7109620624607, "grad_norm": 0.02403366006910801, "learning_rate": 2.0388717272418653e-06, "loss": 0.0549, "step": 3392 }, { "epoch": 0.7111716621253406, "grad_norm": 0.022493207827210426, "learning_rate": 2.036136935711674e-06, "loss": 0.054, "step": 3393 }, { "epoch": 0.7113812617899812, "grad_norm": 0.022795764729380608, "learning_rate": 2.0334035105217394e-06, "loss": 0.0582, "step": 3394 }, { "epoch": 0.7115908614546217, "grad_norm": 0.024398164823651314, "learning_rate": 2.0306714529321687e-06, "loss": 0.0541, "step": 3395 }, { "epoch": 0.7118004611192622, "grad_norm": 0.022561099380254745, "learning_rate": 2.0279407642024427e-06, "loss": 0.0557, "step": 3396 }, { "epoch": 0.7120100607839027, "grad_norm": 0.02206028625369072, "learning_rate": 2.0252114455914056e-06, "loss": 0.0549, "step": 3397 }, { "epoch": 0.7122196604485432, "grad_norm": 0.021681906655430794, "learning_rate": 2.0224834983572766e-06, "loss": 0.0546, "step": 3398 }, { "epoch": 0.7124292601131839, "grad_norm": 0.02355903573334217, "learning_rate": 2.0197569237576352e-06, "loss": 0.0529, "step": 3399 }, { "epoch": 0.7126388597778244, "grad_norm": 0.02560109831392765, "learning_rate": 2.017031723049432e-06, "loss": 0.0537, "step": 3400 }, { "epoch": 0.7128484594424649, "grad_norm": 0.021792568266391754, "learning_rate": 2.0143078974889846e-06, "loss": 0.0549, "step": 3401 }, { "epoch": 0.7130580591071054, "grad_norm": 0.022112544625997543, "learning_rate": 2.011585448331978e-06, "loss": 0.0541, "step": 3402 }, { "epoch": 0.7132676587717459, "grad_norm": 0.02488597482442856, "learning_rate": 2.008864376833457e-06, "loss": 0.0531, "step": 3403 }, { "epoch": 0.7134772584363865, "grad_norm": 0.022757617756724358, "learning_rate": 2.0061446842478393e-06, "loss": 0.0544, "step": 3404 }, { "epoch": 0.713686858101027, "grad_norm": 0.028682250529527664, "learning_rate": 2.003426371828898e-06, "loss": 0.0521, "step": 3405 }, { "epoch": 0.7138964577656676, "grad_norm": 0.026006808504462242, "learning_rate": 2.000709440829779e-06, "loss": 0.055, "step": 3406 }, { "epoch": 0.7141060574303081, "grad_norm": 0.020952891558408737, "learning_rate": 1.997993892502983e-06, "loss": 0.0541, "step": 3407 }, { "epoch": 0.7143156570949486, "grad_norm": 0.030959462746977806, "learning_rate": 1.9952797281003784e-06, "loss": 0.0533, "step": 3408 }, { "epoch": 0.7145252567595892, "grad_norm": 0.022278886288404465, "learning_rate": 1.9925669488731975e-06, "loss": 0.0538, "step": 3409 }, { "epoch": 0.7147348564242297, "grad_norm": 0.02709982544183731, "learning_rate": 1.989855556072028e-06, "loss": 0.0511, "step": 3410 }, { "epoch": 0.7149444560888703, "grad_norm": 0.02348734438419342, "learning_rate": 1.98714555094682e-06, "loss": 0.0545, "step": 3411 }, { "epoch": 0.7151540557535108, "grad_norm": 0.02444126456975937, "learning_rate": 1.9844369347468895e-06, "loss": 0.0546, "step": 3412 }, { "epoch": 0.7153636554181513, "grad_norm": 0.020223500207066536, "learning_rate": 1.981729708720903e-06, "loss": 0.0556, "step": 3413 }, { "epoch": 0.7155732550827919, "grad_norm": 0.028548697009682655, "learning_rate": 1.979023874116895e-06, "loss": 0.0556, "step": 3414 }, { "epoch": 0.7157828547474324, "grad_norm": 0.021214155480265617, "learning_rate": 1.976319432182254e-06, "loss": 0.0555, "step": 3415 }, { "epoch": 0.7159924544120729, "grad_norm": 0.022887524217367172, "learning_rate": 1.973616384163728e-06, "loss": 0.0523, "step": 3416 }, { "epoch": 0.7162020540767134, "grad_norm": 0.024027662351727486, "learning_rate": 1.9709147313074177e-06, "loss": 0.0544, "step": 3417 }, { "epoch": 0.716411653741354, "grad_norm": 0.028380177915096283, "learning_rate": 1.968214474858787e-06, "loss": 0.053, "step": 3418 }, { "epoch": 0.7166212534059946, "grad_norm": 0.025715503841638565, "learning_rate": 1.965515616062656e-06, "loss": 0.0545, "step": 3419 }, { "epoch": 0.7168308530706351, "grad_norm": 0.03151887282729149, "learning_rate": 1.962818156163194e-06, "loss": 0.0551, "step": 3420 }, { "epoch": 0.7170404527352756, "grad_norm": 0.04665770381689072, "learning_rate": 1.9601220964039324e-06, "loss": 0.056, "step": 3421 }, { "epoch": 0.7172500523999161, "grad_norm": 0.02261737547814846, "learning_rate": 1.957427438027753e-06, "loss": 0.0548, "step": 3422 }, { "epoch": 0.7174596520645568, "grad_norm": 0.03728951886296272, "learning_rate": 1.9547341822768906e-06, "loss": 0.0542, "step": 3423 }, { "epoch": 0.7176692517291973, "grad_norm": 0.032367952167987823, "learning_rate": 1.9520423303929383e-06, "loss": 0.0532, "step": 3424 }, { "epoch": 0.7178788513938378, "grad_norm": 0.028157230466604233, "learning_rate": 1.9493518836168403e-06, "loss": 0.0551, "step": 3425 }, { "epoch": 0.7180884510584783, "grad_norm": 0.029264342039823532, "learning_rate": 1.946662843188888e-06, "loss": 0.0559, "step": 3426 }, { "epoch": 0.7182980507231188, "grad_norm": 0.02802872471511364, "learning_rate": 1.9439752103487324e-06, "loss": 0.0525, "step": 3427 }, { "epoch": 0.7185076503877594, "grad_norm": 0.02839348092675209, "learning_rate": 1.9412889863353683e-06, "loss": 0.0532, "step": 3428 }, { "epoch": 0.7187172500524, "grad_norm": 0.026173194870352745, "learning_rate": 1.9386041723871485e-06, "loss": 0.0528, "step": 3429 }, { "epoch": 0.7189268497170405, "grad_norm": 0.028481315821409225, "learning_rate": 1.9359207697417677e-06, "loss": 0.0537, "step": 3430 }, { "epoch": 0.719136449381681, "grad_norm": 0.028069039806723595, "learning_rate": 1.9332387796362744e-06, "loss": 0.0566, "step": 3431 }, { "epoch": 0.7193460490463215, "grad_norm": 0.031697165220975876, "learning_rate": 1.9305582033070714e-06, "loss": 0.0542, "step": 3432 }, { "epoch": 0.7195556487109621, "grad_norm": 0.029638856649398804, "learning_rate": 1.927879041989895e-06, "loss": 0.054, "step": 3433 }, { "epoch": 0.7197652483756026, "grad_norm": 0.022818326950073242, "learning_rate": 1.925201296919842e-06, "loss": 0.0546, "step": 3434 }, { "epoch": 0.7199748480402431, "grad_norm": 0.03616581857204437, "learning_rate": 1.9225249693313547e-06, "loss": 0.0542, "step": 3435 }, { "epoch": 0.7201844477048837, "grad_norm": 0.028139004483819008, "learning_rate": 1.919850060458215e-06, "loss": 0.053, "step": 3436 }, { "epoch": 0.7203940473695242, "grad_norm": 0.028990762308239937, "learning_rate": 1.91717657153356e-06, "loss": 0.0549, "step": 3437 }, { "epoch": 0.7206036470341648, "grad_norm": 0.022995000705122948, "learning_rate": 1.914504503789863e-06, "loss": 0.0554, "step": 3438 }, { "epoch": 0.7208132466988053, "grad_norm": 0.026573555544018745, "learning_rate": 1.9118338584589503e-06, "loss": 0.0556, "step": 3439 }, { "epoch": 0.7210228463634458, "grad_norm": 0.024811234325170517, "learning_rate": 1.909164636771986e-06, "loss": 0.0537, "step": 3440 }, { "epoch": 0.7212324460280863, "grad_norm": 0.02453632839024067, "learning_rate": 1.9064968399594818e-06, "loss": 0.0517, "step": 3441 }, { "epoch": 0.7214420456927269, "grad_norm": 0.024129144847393036, "learning_rate": 1.9038304692512943e-06, "loss": 0.0534, "step": 3442 }, { "epoch": 0.7216516453573675, "grad_norm": 0.024486854672431946, "learning_rate": 1.9011655258766165e-06, "loss": 0.0531, "step": 3443 }, { "epoch": 0.721861245022008, "grad_norm": 0.022883350029587746, "learning_rate": 1.8985020110639862e-06, "loss": 0.0555, "step": 3444 }, { "epoch": 0.7220708446866485, "grad_norm": 0.02430296316742897, "learning_rate": 1.8958399260412864e-06, "loss": 0.0543, "step": 3445 }, { "epoch": 0.722280444351289, "grad_norm": 0.023016586899757385, "learning_rate": 1.893179272035734e-06, "loss": 0.0543, "step": 3446 }, { "epoch": 0.7224900440159295, "grad_norm": 0.022667940706014633, "learning_rate": 1.890520050273892e-06, "loss": 0.0557, "step": 3447 }, { "epoch": 0.7226996436805702, "grad_norm": 0.025942491367459297, "learning_rate": 1.8878622619816629e-06, "loss": 0.0539, "step": 3448 }, { "epoch": 0.7229092433452107, "grad_norm": 0.027111142873764038, "learning_rate": 1.8852059083842838e-06, "loss": 0.0553, "step": 3449 }, { "epoch": 0.7231188430098512, "grad_norm": 0.024661056697368622, "learning_rate": 1.8825509907063328e-06, "loss": 0.055, "step": 3450 }, { "epoch": 0.7233284426744917, "grad_norm": 0.028169486671686172, "learning_rate": 1.8798975101717275e-06, "loss": 0.0546, "step": 3451 }, { "epoch": 0.7235380423391322, "grad_norm": 0.02489306405186653, "learning_rate": 1.8772454680037233e-06, "loss": 0.0551, "step": 3452 }, { "epoch": 0.7237476420037728, "grad_norm": 0.024785684421658516, "learning_rate": 1.8745948654249085e-06, "loss": 0.0548, "step": 3453 }, { "epoch": 0.7239572416684134, "grad_norm": 0.021179838106036186, "learning_rate": 1.871945703657213e-06, "loss": 0.0556, "step": 3454 }, { "epoch": 0.7241668413330539, "grad_norm": 0.02177123725414276, "learning_rate": 1.8692979839218984e-06, "loss": 0.0575, "step": 3455 }, { "epoch": 0.7243764409976944, "grad_norm": 0.025510409846901894, "learning_rate": 1.8666517074395607e-06, "loss": 0.0527, "step": 3456 }, { "epoch": 0.7245860406623349, "grad_norm": 0.01962488703429699, "learning_rate": 1.8640068754301355e-06, "loss": 0.0554, "step": 3457 }, { "epoch": 0.7247956403269755, "grad_norm": 0.023709991946816444, "learning_rate": 1.8613634891128912e-06, "loss": 0.0541, "step": 3458 }, { "epoch": 0.725005239991616, "grad_norm": 0.02632421813905239, "learning_rate": 1.8587215497064242e-06, "loss": 0.0557, "step": 3459 }, { "epoch": 0.7252148396562565, "grad_norm": 0.0217702928930521, "learning_rate": 1.8560810584286726e-06, "loss": 0.0557, "step": 3460 }, { "epoch": 0.7254244393208971, "grad_norm": 0.02565830387175083, "learning_rate": 1.853442016496898e-06, "loss": 0.0528, "step": 3461 }, { "epoch": 0.7256340389855376, "grad_norm": 0.034673016518354416, "learning_rate": 1.8508044251277019e-06, "loss": 0.0535, "step": 3462 }, { "epoch": 0.7258436386501782, "grad_norm": 0.02693593129515648, "learning_rate": 1.8481682855370098e-06, "loss": 0.0535, "step": 3463 }, { "epoch": 0.7260532383148187, "grad_norm": 0.018855255097150803, "learning_rate": 1.8455335989400836e-06, "loss": 0.0527, "step": 3464 }, { "epoch": 0.7262628379794592, "grad_norm": 0.039858147501945496, "learning_rate": 1.8429003665515165e-06, "loss": 0.0535, "step": 3465 }, { "epoch": 0.7264724376440997, "grad_norm": 0.03316653147339821, "learning_rate": 1.8402685895852213e-06, "loss": 0.0561, "step": 3466 }, { "epoch": 0.7266820373087403, "grad_norm": 0.018629280850291252, "learning_rate": 1.8376382692544498e-06, "loss": 0.0525, "step": 3467 }, { "epoch": 0.7268916369733809, "grad_norm": 0.024704501032829285, "learning_rate": 1.835009406771781e-06, "loss": 0.0541, "step": 3468 }, { "epoch": 0.7271012366380214, "grad_norm": 0.020816821604967117, "learning_rate": 1.8323820033491163e-06, "loss": 0.0552, "step": 3469 }, { "epoch": 0.7273108363026619, "grad_norm": 0.021062025800347328, "learning_rate": 1.829756060197692e-06, "loss": 0.0548, "step": 3470 }, { "epoch": 0.7275204359673024, "grad_norm": 0.026151493191719055, "learning_rate": 1.8271315785280658e-06, "loss": 0.0545, "step": 3471 }, { "epoch": 0.7277300356319429, "grad_norm": 0.019930191338062286, "learning_rate": 1.8245085595501205e-06, "loss": 0.0541, "step": 3472 }, { "epoch": 0.7279396352965836, "grad_norm": 0.028385937213897705, "learning_rate": 1.8218870044730702e-06, "loss": 0.0535, "step": 3473 }, { "epoch": 0.7281492349612241, "grad_norm": 0.021223368123173714, "learning_rate": 1.8192669145054503e-06, "loss": 0.0507, "step": 3474 }, { "epoch": 0.7283588346258646, "grad_norm": 0.02485741302371025, "learning_rate": 1.8166482908551242e-06, "loss": 0.0555, "step": 3475 }, { "epoch": 0.7285684342905051, "grad_norm": 0.022934934124350548, "learning_rate": 1.8140311347292744e-06, "loss": 0.0534, "step": 3476 }, { "epoch": 0.7287780339551456, "grad_norm": 0.020589333027601242, "learning_rate": 1.8114154473344081e-06, "loss": 0.0537, "step": 3477 }, { "epoch": 0.7289876336197862, "grad_norm": 0.021330052986741066, "learning_rate": 1.8088012298763603e-06, "loss": 0.0529, "step": 3478 }, { "epoch": 0.7291972332844268, "grad_norm": 0.025633566081523895, "learning_rate": 1.8061884835602805e-06, "loss": 0.0564, "step": 3479 }, { "epoch": 0.7294068329490673, "grad_norm": 0.021011043339967728, "learning_rate": 1.8035772095906462e-06, "loss": 0.0529, "step": 3480 }, { "epoch": 0.7296164326137078, "grad_norm": 0.023229733109474182, "learning_rate": 1.8009674091712565e-06, "loss": 0.0541, "step": 3481 }, { "epoch": 0.7298260322783483, "grad_norm": 0.02509014680981636, "learning_rate": 1.7983590835052267e-06, "loss": 0.054, "step": 3482 }, { "epoch": 0.7300356319429889, "grad_norm": 0.021912094205617905, "learning_rate": 1.7957522337949924e-06, "loss": 0.0527, "step": 3483 }, { "epoch": 0.7302452316076294, "grad_norm": 0.024635696783661842, "learning_rate": 1.7931468612423142e-06, "loss": 0.0531, "step": 3484 }, { "epoch": 0.73045483127227, "grad_norm": 0.018132206052541733, "learning_rate": 1.7905429670482654e-06, "loss": 0.0552, "step": 3485 }, { "epoch": 0.7306644309369105, "grad_norm": 0.023826826363801956, "learning_rate": 1.7879405524132426e-06, "loss": 0.0526, "step": 3486 }, { "epoch": 0.730874030601551, "grad_norm": 0.020080413669347763, "learning_rate": 1.7853396185369592e-06, "loss": 0.0542, "step": 3487 }, { "epoch": 0.7310836302661916, "grad_norm": 0.02045084349811077, "learning_rate": 1.7827401666184434e-06, "loss": 0.0543, "step": 3488 }, { "epoch": 0.7312932299308321, "grad_norm": 0.023532617837190628, "learning_rate": 1.7801421978560418e-06, "loss": 0.0532, "step": 3489 }, { "epoch": 0.7315028295954726, "grad_norm": 0.02120790258049965, "learning_rate": 1.7775457134474177e-06, "loss": 0.0526, "step": 3490 }, { "epoch": 0.7317124292601132, "grad_norm": 0.024196181446313858, "learning_rate": 1.7749507145895518e-06, "loss": 0.0563, "step": 3491 }, { "epoch": 0.7319220289247538, "grad_norm": 0.023171545937657356, "learning_rate": 1.772357202478735e-06, "loss": 0.0559, "step": 3492 }, { "epoch": 0.7321316285893943, "grad_norm": 0.02514570765197277, "learning_rate": 1.7697651783105795e-06, "loss": 0.054, "step": 3493 }, { "epoch": 0.7323412282540348, "grad_norm": 0.02238095924258232, "learning_rate": 1.7671746432800058e-06, "loss": 0.0535, "step": 3494 }, { "epoch": 0.7325508279186753, "grad_norm": 0.030948419123888016, "learning_rate": 1.7645855985812476e-06, "loss": 0.0521, "step": 3495 }, { "epoch": 0.7327604275833158, "grad_norm": 0.02502829022705555, "learning_rate": 1.7619980454078572e-06, "loss": 0.0525, "step": 3496 }, { "epoch": 0.7329700272479565, "grad_norm": 0.02190042845904827, "learning_rate": 1.7594119849526947e-06, "loss": 0.0547, "step": 3497 }, { "epoch": 0.733179626912597, "grad_norm": 0.031756095588207245, "learning_rate": 1.756827418407936e-06, "loss": 0.0517, "step": 3498 }, { "epoch": 0.7333892265772375, "grad_norm": 0.03542139008641243, "learning_rate": 1.7542443469650638e-06, "loss": 0.0533, "step": 3499 }, { "epoch": 0.733598826241878, "grad_norm": 0.024449272081255913, "learning_rate": 1.7516627718148716e-06, "loss": 0.0501, "step": 3500 }, { "epoch": 0.7338084259065185, "grad_norm": 0.0264756977558136, "learning_rate": 1.7490826941474687e-06, "loss": 0.0556, "step": 3501 }, { "epoch": 0.7340180255711591, "grad_norm": 0.026716774329543114, "learning_rate": 1.7465041151522666e-06, "loss": 0.0549, "step": 3502 }, { "epoch": 0.7342276252357997, "grad_norm": 0.03247566148638725, "learning_rate": 1.7439270360179933e-06, "loss": 0.0522, "step": 3503 }, { "epoch": 0.7344372249004402, "grad_norm": 0.04316284507513046, "learning_rate": 1.7413514579326806e-06, "loss": 0.0556, "step": 3504 }, { "epoch": 0.7346468245650807, "grad_norm": 0.021153295412659645, "learning_rate": 1.7387773820836668e-06, "loss": 0.055, "step": 3505 }, { "epoch": 0.7348564242297212, "grad_norm": 0.045009203255176544, "learning_rate": 1.7362048096576023e-06, "loss": 0.0544, "step": 3506 }, { "epoch": 0.7350660238943618, "grad_norm": 0.040110033005476, "learning_rate": 1.7336337418404442e-06, "loss": 0.0585, "step": 3507 }, { "epoch": 0.7352756235590023, "grad_norm": 0.024850115180015564, "learning_rate": 1.7310641798174516e-06, "loss": 0.0576, "step": 3508 }, { "epoch": 0.7354852232236428, "grad_norm": 0.03829395771026611, "learning_rate": 1.728496124773194e-06, "loss": 0.0543, "step": 3509 }, { "epoch": 0.7356948228882834, "grad_norm": 0.02593119814991951, "learning_rate": 1.7259295778915419e-06, "loss": 0.0555, "step": 3510 }, { "epoch": 0.7359044225529239, "grad_norm": 0.027632324025034904, "learning_rate": 1.7233645403556765e-06, "loss": 0.0559, "step": 3511 }, { "epoch": 0.7361140222175645, "grad_norm": 0.03341587260365486, "learning_rate": 1.7208010133480751e-06, "loss": 0.0545, "step": 3512 }, { "epoch": 0.736323621882205, "grad_norm": 0.023744579404592514, "learning_rate": 1.7182389980505254e-06, "loss": 0.0524, "step": 3513 }, { "epoch": 0.7365332215468455, "grad_norm": 0.02730092778801918, "learning_rate": 1.7156784956441181e-06, "loss": 0.058, "step": 3514 }, { "epoch": 0.736742821211486, "grad_norm": 0.04322980344295502, "learning_rate": 1.7131195073092417e-06, "loss": 0.0545, "step": 3515 }, { "epoch": 0.7369524208761266, "grad_norm": 0.02667373977601528, "learning_rate": 1.710562034225588e-06, "loss": 0.0557, "step": 3516 }, { "epoch": 0.7371620205407672, "grad_norm": 0.03887426108121872, "learning_rate": 1.7080060775721546e-06, "loss": 0.0514, "step": 3517 }, { "epoch": 0.7373716202054077, "grad_norm": 0.06282377243041992, "learning_rate": 1.7054516385272345e-06, "loss": 0.0567, "step": 3518 }, { "epoch": 0.7375812198700482, "grad_norm": 0.041183438152074814, "learning_rate": 1.7028987182684248e-06, "loss": 0.0551, "step": 3519 }, { "epoch": 0.7377908195346887, "grad_norm": 0.026868589222431183, "learning_rate": 1.7003473179726226e-06, "loss": 0.0558, "step": 3520 }, { "epoch": 0.7380004191993292, "grad_norm": 0.061398621648550034, "learning_rate": 1.6977974388160213e-06, "loss": 0.0527, "step": 3521 }, { "epoch": 0.7382100188639699, "grad_norm": 0.05005694553256035, "learning_rate": 1.695249081974113e-06, "loss": 0.0526, "step": 3522 }, { "epoch": 0.7384196185286104, "grad_norm": 0.01858423836529255, "learning_rate": 1.6927022486216915e-06, "loss": 0.0522, "step": 3523 }, { "epoch": 0.7386292181932509, "grad_norm": 0.045225538313388824, "learning_rate": 1.6901569399328483e-06, "loss": 0.0529, "step": 3524 }, { "epoch": 0.7388388178578914, "grad_norm": 0.03772973269224167, "learning_rate": 1.6876131570809667e-06, "loss": 0.0558, "step": 3525 }, { "epoch": 0.7390484175225319, "grad_norm": 0.029959680512547493, "learning_rate": 1.6850709012387328e-06, "loss": 0.0574, "step": 3526 }, { "epoch": 0.7392580171871725, "grad_norm": 0.06689772009849548, "learning_rate": 1.6825301735781252e-06, "loss": 0.0507, "step": 3527 }, { "epoch": 0.7394676168518131, "grad_norm": 0.04782269150018692, "learning_rate": 1.679990975270417e-06, "loss": 0.0543, "step": 3528 }, { "epoch": 0.7396772165164536, "grad_norm": 0.035698726773262024, "learning_rate": 1.6774533074861793e-06, "loss": 0.053, "step": 3529 }, { "epoch": 0.7398868161810941, "grad_norm": 0.07200644910335541, "learning_rate": 1.6749171713952783e-06, "loss": 0.0537, "step": 3530 }, { "epoch": 0.7400964158457346, "grad_norm": 0.05012660101056099, "learning_rate": 1.6723825681668692e-06, "loss": 0.0529, "step": 3531 }, { "epoch": 0.7403060155103752, "grad_norm": 0.031019916757941246, "learning_rate": 1.6698494989694064e-06, "loss": 0.0542, "step": 3532 }, { "epoch": 0.7405156151750157, "grad_norm": 0.07774803787469864, "learning_rate": 1.6673179649706312e-06, "loss": 0.0546, "step": 3533 }, { "epoch": 0.7407252148396563, "grad_norm": 0.06452113389968872, "learning_rate": 1.664787967337584e-06, "loss": 0.053, "step": 3534 }, { "epoch": 0.7409348145042968, "grad_norm": 0.024198826402425766, "learning_rate": 1.6622595072365887e-06, "loss": 0.0523, "step": 3535 }, { "epoch": 0.7411444141689373, "grad_norm": 0.07494359463453293, "learning_rate": 1.6597325858332675e-06, "loss": 0.0525, "step": 3536 }, { "epoch": 0.7413540138335779, "grad_norm": 0.060714419931173325, "learning_rate": 1.6572072042925335e-06, "loss": 0.0551, "step": 3537 }, { "epoch": 0.7415636134982184, "grad_norm": 0.028920724987983704, "learning_rate": 1.6546833637785814e-06, "loss": 0.055, "step": 3538 }, { "epoch": 0.7417732131628589, "grad_norm": 0.07191255688667297, "learning_rate": 1.652161065454903e-06, "loss": 0.0569, "step": 3539 }, { "epoch": 0.7419828128274994, "grad_norm": 0.045189227908849716, "learning_rate": 1.64964031048428e-06, "loss": 0.0538, "step": 3540 }, { "epoch": 0.74219241249214, "grad_norm": 0.034395113587379456, "learning_rate": 1.6471211000287762e-06, "loss": 0.0559, "step": 3541 }, { "epoch": 0.7424020121567806, "grad_norm": 0.062001634389162064, "learning_rate": 1.6446034352497504e-06, "loss": 0.0567, "step": 3542 }, { "epoch": 0.7426116118214211, "grad_norm": 0.03360583260655403, "learning_rate": 1.6420873173078422e-06, "loss": 0.0552, "step": 3543 }, { "epoch": 0.7428212114860616, "grad_norm": 0.0365772545337677, "learning_rate": 1.6395727473629852e-06, "loss": 0.051, "step": 3544 }, { "epoch": 0.7430308111507021, "grad_norm": 0.046734243631362915, "learning_rate": 1.637059726574392e-06, "loss": 0.0532, "step": 3545 }, { "epoch": 0.7432404108153426, "grad_norm": 0.021529680117964745, "learning_rate": 1.634548256100566e-06, "loss": 0.053, "step": 3546 }, { "epoch": 0.7434500104799833, "grad_norm": 0.04237625375390053, "learning_rate": 1.632038337099297e-06, "loss": 0.0539, "step": 3547 }, { "epoch": 0.7436596101446238, "grad_norm": 0.029763460159301758, "learning_rate": 1.6295299707276546e-06, "loss": 0.0511, "step": 3548 }, { "epoch": 0.7438692098092643, "grad_norm": 0.031175674870610237, "learning_rate": 1.6270231581419943e-06, "loss": 0.0551, "step": 3549 }, { "epoch": 0.7440788094739048, "grad_norm": 0.04852377995848656, "learning_rate": 1.6245179004979588e-06, "loss": 0.054, "step": 3550 }, { "epoch": 0.7442884091385453, "grad_norm": 0.027154749259352684, "learning_rate": 1.6220141989504683e-06, "loss": 0.0528, "step": 3551 }, { "epoch": 0.744498008803186, "grad_norm": 0.026955176144838333, "learning_rate": 1.6195120546537307e-06, "loss": 0.0542, "step": 3552 }, { "epoch": 0.7447076084678265, "grad_norm": 0.029728444293141365, "learning_rate": 1.6170114687612349e-06, "loss": 0.0566, "step": 3553 }, { "epoch": 0.744917208132467, "grad_norm": 0.018385691568255424, "learning_rate": 1.6145124424257497e-06, "loss": 0.0548, "step": 3554 }, { "epoch": 0.7451268077971075, "grad_norm": 0.02659742347896099, "learning_rate": 1.6120149767993237e-06, "loss": 0.0572, "step": 3555 }, { "epoch": 0.745336407461748, "grad_norm": 0.030509591102600098, "learning_rate": 1.6095190730332893e-06, "loss": 0.0547, "step": 3556 }, { "epoch": 0.7455460071263886, "grad_norm": 0.02252962999045849, "learning_rate": 1.60702473227826e-06, "loss": 0.0554, "step": 3557 }, { "epoch": 0.7457556067910291, "grad_norm": 0.029321059584617615, "learning_rate": 1.6045319556841227e-06, "loss": 0.0525, "step": 3558 }, { "epoch": 0.7459652064556697, "grad_norm": 0.026201212778687477, "learning_rate": 1.6020407444000497e-06, "loss": 0.0564, "step": 3559 }, { "epoch": 0.7461748061203102, "grad_norm": 0.028670430183410645, "learning_rate": 1.5995510995744879e-06, "loss": 0.0548, "step": 3560 }, { "epoch": 0.7463844057849508, "grad_norm": 0.028581839054822922, "learning_rate": 1.5970630223551614e-06, "loss": 0.0519, "step": 3561 }, { "epoch": 0.7465940054495913, "grad_norm": 0.025404004380106926, "learning_rate": 1.5945765138890746e-06, "loss": 0.0549, "step": 3562 }, { "epoch": 0.7468036051142318, "grad_norm": 0.04105531796813011, "learning_rate": 1.5920915753225097e-06, "loss": 0.0537, "step": 3563 }, { "epoch": 0.7470132047788723, "grad_norm": 0.02948727272450924, "learning_rate": 1.5896082078010183e-06, "loss": 0.0519, "step": 3564 }, { "epoch": 0.7472228044435129, "grad_norm": 0.02374359965324402, "learning_rate": 1.5871264124694368e-06, "loss": 0.0529, "step": 3565 }, { "epoch": 0.7474324041081535, "grad_norm": 0.02481861598789692, "learning_rate": 1.5846461904718686e-06, "loss": 0.0539, "step": 3566 }, { "epoch": 0.747642003772794, "grad_norm": 0.019914066419005394, "learning_rate": 1.5821675429516981e-06, "loss": 0.0535, "step": 3567 }, { "epoch": 0.7478516034374345, "grad_norm": 0.027355968952178955, "learning_rate": 1.5796904710515792e-06, "loss": 0.054, "step": 3568 }, { "epoch": 0.748061203102075, "grad_norm": 0.02385944128036499, "learning_rate": 1.577214975913443e-06, "loss": 0.0551, "step": 3569 }, { "epoch": 0.7482708027667155, "grad_norm": 0.022167477756738663, "learning_rate": 1.574741058678495e-06, "loss": 0.0533, "step": 3570 }, { "epoch": 0.7484804024313562, "grad_norm": 0.024447597563266754, "learning_rate": 1.5722687204872038e-06, "loss": 0.0569, "step": 3571 }, { "epoch": 0.7486900020959967, "grad_norm": 0.019285105168819427, "learning_rate": 1.569797962479321e-06, "loss": 0.0547, "step": 3572 }, { "epoch": 0.7488996017606372, "grad_norm": 0.026579929515719414, "learning_rate": 1.5673287857938663e-06, "loss": 0.0544, "step": 3573 }, { "epoch": 0.7491092014252777, "grad_norm": 0.021767770871520042, "learning_rate": 1.564861191569127e-06, "loss": 0.0549, "step": 3574 }, { "epoch": 0.7493188010899182, "grad_norm": 0.022724144160747528, "learning_rate": 1.5623951809426663e-06, "loss": 0.0553, "step": 3575 }, { "epoch": 0.7495284007545588, "grad_norm": 0.02660524845123291, "learning_rate": 1.5599307550513132e-06, "loss": 0.0526, "step": 3576 }, { "epoch": 0.7497380004191994, "grad_norm": 0.02129141427576542, "learning_rate": 1.5574679150311656e-06, "loss": 0.0547, "step": 3577 }, { "epoch": 0.7499476000838399, "grad_norm": 0.02047175168991089, "learning_rate": 1.555006662017594e-06, "loss": 0.0529, "step": 3578 }, { "epoch": 0.7501571997484804, "grad_norm": 0.02189241163432598, "learning_rate": 1.5525469971452362e-06, "loss": 0.0533, "step": 3579 }, { "epoch": 0.7503667994131209, "grad_norm": 0.019183902069926262, "learning_rate": 1.5500889215479974e-06, "loss": 0.0535, "step": 3580 }, { "epoch": 0.7505763990777615, "grad_norm": 0.022731618955731392, "learning_rate": 1.54763243635905e-06, "loss": 0.054, "step": 3581 }, { "epoch": 0.750785998742402, "grad_norm": 0.02083863690495491, "learning_rate": 1.5451775427108302e-06, "loss": 0.0566, "step": 3582 }, { "epoch": 0.7509955984070426, "grad_norm": 0.021793080493807793, "learning_rate": 1.5427242417350474e-06, "loss": 0.0539, "step": 3583 }, { "epoch": 0.7512051980716831, "grad_norm": 0.024363208562135696, "learning_rate": 1.540272534562669e-06, "loss": 0.0542, "step": 3584 }, { "epoch": 0.7514147977363236, "grad_norm": 0.020569128915667534, "learning_rate": 1.5378224223239341e-06, "loss": 0.0554, "step": 3585 }, { "epoch": 0.7516243974009642, "grad_norm": 0.02453795075416565, "learning_rate": 1.5353739061483446e-06, "loss": 0.0513, "step": 3586 }, { "epoch": 0.7518339970656047, "grad_norm": 0.022484583780169487, "learning_rate": 1.5329269871646646e-06, "loss": 0.0539, "step": 3587 }, { "epoch": 0.7520435967302452, "grad_norm": 0.021373063325881958, "learning_rate": 1.530481666500922e-06, "loss": 0.0554, "step": 3588 }, { "epoch": 0.7522531963948857, "grad_norm": 0.026554672047495842, "learning_rate": 1.5280379452844124e-06, "loss": 0.0527, "step": 3589 }, { "epoch": 0.7524627960595263, "grad_norm": 0.023381482809782028, "learning_rate": 1.525595824641687e-06, "loss": 0.0568, "step": 3590 }, { "epoch": 0.7526723957241669, "grad_norm": 0.027381494641304016, "learning_rate": 1.5231553056985642e-06, "loss": 0.0522, "step": 3591 }, { "epoch": 0.7528819953888074, "grad_norm": 0.03211834654211998, "learning_rate": 1.5207163895801252e-06, "loss": 0.0554, "step": 3592 }, { "epoch": 0.7530915950534479, "grad_norm": 0.021453700959682465, "learning_rate": 1.5182790774107082e-06, "loss": 0.0549, "step": 3593 }, { "epoch": 0.7533011947180884, "grad_norm": 0.037080686539411545, "learning_rate": 1.5158433703139114e-06, "loss": 0.0532, "step": 3594 }, { "epoch": 0.7535107943827289, "grad_norm": 0.02473445050418377, "learning_rate": 1.5134092694125968e-06, "loss": 0.0549, "step": 3595 }, { "epoch": 0.7537203940473696, "grad_norm": 0.02276427671313286, "learning_rate": 1.510976775828887e-06, "loss": 0.0568, "step": 3596 }, { "epoch": 0.7539299937120101, "grad_norm": 0.020384134724736214, "learning_rate": 1.508545890684157e-06, "loss": 0.054, "step": 3597 }, { "epoch": 0.7541395933766506, "grad_norm": 0.025425709784030914, "learning_rate": 1.5061166150990475e-06, "loss": 0.0536, "step": 3598 }, { "epoch": 0.7543491930412911, "grad_norm": 0.022402847185730934, "learning_rate": 1.5036889501934533e-06, "loss": 0.0543, "step": 3599 }, { "epoch": 0.7545587927059316, "grad_norm": 0.02631525509059429, "learning_rate": 1.5012628970865245e-06, "loss": 0.0548, "step": 3600 }, { "epoch": 0.7547683923705722, "grad_norm": 0.019039105623960495, "learning_rate": 1.498838456896674e-06, "loss": 0.0555, "step": 3601 }, { "epoch": 0.7549779920352128, "grad_norm": 0.028720203787088394, "learning_rate": 1.4964156307415673e-06, "loss": 0.0541, "step": 3602 }, { "epoch": 0.7551875916998533, "grad_norm": 0.02376984804868698, "learning_rate": 1.493994419738129e-06, "loss": 0.053, "step": 3603 }, { "epoch": 0.7553971913644938, "grad_norm": 0.028366727754473686, "learning_rate": 1.4915748250025346e-06, "loss": 0.0518, "step": 3604 }, { "epoch": 0.7556067910291343, "grad_norm": 0.024544060230255127, "learning_rate": 1.4891568476502154e-06, "loss": 0.0542, "step": 3605 }, { "epoch": 0.7558163906937749, "grad_norm": 0.029393775388598442, "learning_rate": 1.486740488795862e-06, "loss": 0.0566, "step": 3606 }, { "epoch": 0.7560259903584154, "grad_norm": 0.030081739649176598, "learning_rate": 1.484325749553412e-06, "loss": 0.054, "step": 3607 }, { "epoch": 0.756235590023056, "grad_norm": 0.03067159280180931, "learning_rate": 1.4819126310360626e-06, "loss": 0.0546, "step": 3608 }, { "epoch": 0.7564451896876965, "grad_norm": 0.02665751427412033, "learning_rate": 1.4795011343562594e-06, "loss": 0.0545, "step": 3609 }, { "epoch": 0.756654789352337, "grad_norm": 0.025258727371692657, "learning_rate": 1.4770912606257003e-06, "loss": 0.0552, "step": 3610 }, { "epoch": 0.7568643890169776, "grad_norm": 0.028663890436291695, "learning_rate": 1.4746830109553388e-06, "loss": 0.0521, "step": 3611 }, { "epoch": 0.7570739886816181, "grad_norm": 0.02796069160103798, "learning_rate": 1.472276386455378e-06, "loss": 0.0549, "step": 3612 }, { "epoch": 0.7572835883462586, "grad_norm": 0.028017984703183174, "learning_rate": 1.4698713882352694e-06, "loss": 0.0551, "step": 3613 }, { "epoch": 0.7574931880108992, "grad_norm": 0.036044858396053314, "learning_rate": 1.4674680174037186e-06, "loss": 0.0549, "step": 3614 }, { "epoch": 0.7577027876755397, "grad_norm": 0.024651629850268364, "learning_rate": 1.465066275068676e-06, "loss": 0.0535, "step": 3615 }, { "epoch": 0.7579123873401803, "grad_norm": 0.02148495241999626, "learning_rate": 1.462666162337349e-06, "loss": 0.0529, "step": 3616 }, { "epoch": 0.7581219870048208, "grad_norm": 0.023804927244782448, "learning_rate": 1.4602676803161842e-06, "loss": 0.0557, "step": 3617 }, { "epoch": 0.7583315866694613, "grad_norm": 0.017874160781502724, "learning_rate": 1.4578708301108835e-06, "loss": 0.0548, "step": 3618 }, { "epoch": 0.7585411863341018, "grad_norm": 0.021595729514956474, "learning_rate": 1.4554756128263958e-06, "loss": 0.0567, "step": 3619 }, { "epoch": 0.7587507859987423, "grad_norm": 0.022326629608869553, "learning_rate": 1.4530820295669145e-06, "loss": 0.0534, "step": 3620 }, { "epoch": 0.758960385663383, "grad_norm": 0.021938448771834373, "learning_rate": 1.4506900814358794e-06, "loss": 0.0532, "step": 3621 }, { "epoch": 0.7591699853280235, "grad_norm": 0.02145432122051716, "learning_rate": 1.4482997695359807e-06, "loss": 0.0548, "step": 3622 }, { "epoch": 0.759379584992664, "grad_norm": 0.019530145451426506, "learning_rate": 1.445911094969149e-06, "loss": 0.0541, "step": 3623 }, { "epoch": 0.7595891846573045, "grad_norm": 0.01897241175174713, "learning_rate": 1.4435240588365645e-06, "loss": 0.0528, "step": 3624 }, { "epoch": 0.759798784321945, "grad_norm": 0.02352256327867508, "learning_rate": 1.4411386622386519e-06, "loss": 0.0556, "step": 3625 }, { "epoch": 0.7600083839865857, "grad_norm": 0.016778651624917984, "learning_rate": 1.4387549062750767e-06, "loss": 0.055, "step": 3626 }, { "epoch": 0.7602179836512262, "grad_norm": 0.02301890030503273, "learning_rate": 1.4363727920447478e-06, "loss": 0.053, "step": 3627 }, { "epoch": 0.7604275833158667, "grad_norm": 0.020533403381705284, "learning_rate": 1.4339923206458222e-06, "loss": 0.0549, "step": 3628 }, { "epoch": 0.7606371829805072, "grad_norm": 0.02557838149368763, "learning_rate": 1.431613493175697e-06, "loss": 0.0544, "step": 3629 }, { "epoch": 0.7608467826451477, "grad_norm": 0.026523873209953308, "learning_rate": 1.4292363107310091e-06, "loss": 0.0518, "step": 3630 }, { "epoch": 0.7610563823097883, "grad_norm": 0.02507561445236206, "learning_rate": 1.4268607744076419e-06, "loss": 0.0542, "step": 3631 }, { "epoch": 0.7612659819744289, "grad_norm": 0.034792933613061905, "learning_rate": 1.424486885300715e-06, "loss": 0.0514, "step": 3632 }, { "epoch": 0.7614755816390694, "grad_norm": 0.025153324007987976, "learning_rate": 1.4221146445045903e-06, "loss": 0.0534, "step": 3633 }, { "epoch": 0.7616851813037099, "grad_norm": 0.0239273589104414, "learning_rate": 1.419744053112871e-06, "loss": 0.0546, "step": 3634 }, { "epoch": 0.7618947809683505, "grad_norm": 0.027409644797444344, "learning_rate": 1.417375112218401e-06, "loss": 0.053, "step": 3635 }, { "epoch": 0.762104380632991, "grad_norm": 0.021910734474658966, "learning_rate": 1.4150078229132586e-06, "loss": 0.0506, "step": 3636 }, { "epoch": 0.7623139802976315, "grad_norm": 0.028264936059713364, "learning_rate": 1.4126421862887668e-06, "loss": 0.0526, "step": 3637 }, { "epoch": 0.762523579962272, "grad_norm": 0.017890270799398422, "learning_rate": 1.41027820343548e-06, "loss": 0.0564, "step": 3638 }, { "epoch": 0.7627331796269126, "grad_norm": 0.02689182385802269, "learning_rate": 1.4079158754431981e-06, "loss": 0.0544, "step": 3639 }, { "epoch": 0.7629427792915532, "grad_norm": 0.024087607860565186, "learning_rate": 1.4055552034009496e-06, "loss": 0.0558, "step": 3640 }, { "epoch": 0.7631523789561937, "grad_norm": 0.019843678921461105, "learning_rate": 1.4031961883970053e-06, "loss": 0.0523, "step": 3641 }, { "epoch": 0.7633619786208342, "grad_norm": 0.029412388801574707, "learning_rate": 1.4008388315188743e-06, "loss": 0.0544, "step": 3642 }, { "epoch": 0.7635715782854747, "grad_norm": 0.022888310253620148, "learning_rate": 1.3984831338532916e-06, "loss": 0.0546, "step": 3643 }, { "epoch": 0.7637811779501152, "grad_norm": 0.02272786572575569, "learning_rate": 1.3961290964862356e-06, "loss": 0.053, "step": 3644 }, { "epoch": 0.7639907776147559, "grad_norm": 0.028499163687229156, "learning_rate": 1.3937767205029196e-06, "loss": 0.0532, "step": 3645 }, { "epoch": 0.7642003772793964, "grad_norm": 0.02279391512274742, "learning_rate": 1.3914260069877844e-06, "loss": 0.0518, "step": 3646 }, { "epoch": 0.7644099769440369, "grad_norm": 0.03257404640316963, "learning_rate": 1.3890769570245122e-06, "loss": 0.056, "step": 3647 }, { "epoch": 0.7646195766086774, "grad_norm": 0.030943913385272026, "learning_rate": 1.386729571696011e-06, "loss": 0.0548, "step": 3648 }, { "epoch": 0.7648291762733179, "grad_norm": 0.017855705693364143, "learning_rate": 1.3843838520844288e-06, "loss": 0.0545, "step": 3649 }, { "epoch": 0.7650387759379585, "grad_norm": 0.029183411970734596, "learning_rate": 1.3820397992711377e-06, "loss": 0.0562, "step": 3650 }, { "epoch": 0.7652483756025991, "grad_norm": 0.02148706465959549, "learning_rate": 1.3796974143367475e-06, "loss": 0.0561, "step": 3651 }, { "epoch": 0.7654579752672396, "grad_norm": 0.018655741587281227, "learning_rate": 1.3773566983610992e-06, "loss": 0.0533, "step": 3652 }, { "epoch": 0.7656675749318801, "grad_norm": 0.022678757086396217, "learning_rate": 1.3750176524232605e-06, "loss": 0.0535, "step": 3653 }, { "epoch": 0.7658771745965206, "grad_norm": 0.02211586944758892, "learning_rate": 1.372680277601529e-06, "loss": 0.0545, "step": 3654 }, { "epoch": 0.7660867742611612, "grad_norm": 0.019549356773495674, "learning_rate": 1.3703445749734384e-06, "loss": 0.0548, "step": 3655 }, { "epoch": 0.7662963739258017, "grad_norm": 0.02576516941189766, "learning_rate": 1.3680105456157427e-06, "loss": 0.0552, "step": 3656 }, { "epoch": 0.7665059735904423, "grad_norm": 0.024852564558386803, "learning_rate": 1.3656781906044315e-06, "loss": 0.0538, "step": 3657 }, { "epoch": 0.7667155732550828, "grad_norm": 0.022067122161388397, "learning_rate": 1.3633475110147204e-06, "loss": 0.0551, "step": 3658 }, { "epoch": 0.7669251729197233, "grad_norm": 0.029038861393928528, "learning_rate": 1.3610185079210514e-06, "loss": 0.0531, "step": 3659 }, { "epoch": 0.7671347725843639, "grad_norm": 0.019402772188186646, "learning_rate": 1.3586911823970933e-06, "loss": 0.0537, "step": 3660 }, { "epoch": 0.7673443722490044, "grad_norm": 0.024672850966453552, "learning_rate": 1.3563655355157434e-06, "loss": 0.0522, "step": 3661 }, { "epoch": 0.7675539719136449, "grad_norm": 0.022120898589491844, "learning_rate": 1.3540415683491265e-06, "loss": 0.0542, "step": 3662 }, { "epoch": 0.7677635715782855, "grad_norm": 0.021284013986587524, "learning_rate": 1.3517192819685875e-06, "loss": 0.0516, "step": 3663 }, { "epoch": 0.767973171242926, "grad_norm": 0.024125682190060616, "learning_rate": 1.3493986774447032e-06, "loss": 0.0575, "step": 3664 }, { "epoch": 0.7681827709075666, "grad_norm": 0.022346889600157738, "learning_rate": 1.3470797558472709e-06, "loss": 0.0539, "step": 3665 }, { "epoch": 0.7683923705722071, "grad_norm": 0.022748976945877075, "learning_rate": 1.34476251824531e-06, "loss": 0.0563, "step": 3666 }, { "epoch": 0.7686019702368476, "grad_norm": 0.02310675010085106, "learning_rate": 1.3424469657070693e-06, "loss": 0.0534, "step": 3667 }, { "epoch": 0.7688115699014881, "grad_norm": 0.016234230250120163, "learning_rate": 1.3401330993000195e-06, "loss": 0.0568, "step": 3668 }, { "epoch": 0.7690211695661286, "grad_norm": 0.02276422083377838, "learning_rate": 1.3378209200908487e-06, "loss": 0.0533, "step": 3669 }, { "epoch": 0.7692307692307693, "grad_norm": 0.020701710134744644, "learning_rate": 1.3355104291454751e-06, "loss": 0.0522, "step": 3670 }, { "epoch": 0.7694403688954098, "grad_norm": 0.017248544842004776, "learning_rate": 1.3332016275290304e-06, "loss": 0.0541, "step": 3671 }, { "epoch": 0.7696499685600503, "grad_norm": 0.01811116933822632, "learning_rate": 1.3308945163058757e-06, "loss": 0.0532, "step": 3672 }, { "epoch": 0.7698595682246908, "grad_norm": 0.017278380692005157, "learning_rate": 1.3285890965395853e-06, "loss": 0.0548, "step": 3673 }, { "epoch": 0.7700691678893313, "grad_norm": 0.017872022464871407, "learning_rate": 1.3262853692929583e-06, "loss": 0.0554, "step": 3674 }, { "epoch": 0.770278767553972, "grad_norm": 0.019185813143849373, "learning_rate": 1.3239833356280152e-06, "loss": 0.0519, "step": 3675 }, { "epoch": 0.7704883672186125, "grad_norm": 0.0181709136813879, "learning_rate": 1.3216829966059902e-06, "loss": 0.0555, "step": 3676 }, { "epoch": 0.770697966883253, "grad_norm": 0.020711593329906464, "learning_rate": 1.3193843532873385e-06, "loss": 0.0529, "step": 3677 }, { "epoch": 0.7709075665478935, "grad_norm": 0.015872521325945854, "learning_rate": 1.3170874067317362e-06, "loss": 0.0539, "step": 3678 }, { "epoch": 0.771117166212534, "grad_norm": 0.02129794843494892, "learning_rate": 1.3147921579980739e-06, "loss": 0.0543, "step": 3679 }, { "epoch": 0.7713267658771746, "grad_norm": 0.015224761329591274, "learning_rate": 1.3124986081444625e-06, "loss": 0.0548, "step": 3680 }, { "epoch": 0.7715363655418152, "grad_norm": 0.016702933236956596, "learning_rate": 1.3102067582282264e-06, "loss": 0.0549, "step": 3681 }, { "epoch": 0.7717459652064557, "grad_norm": 0.01623140461742878, "learning_rate": 1.307916609305907e-06, "loss": 0.0532, "step": 3682 }, { "epoch": 0.7719555648710962, "grad_norm": 0.014785263687372208, "learning_rate": 1.305628162433264e-06, "loss": 0.055, "step": 3683 }, { "epoch": 0.7721651645357367, "grad_norm": 0.014699485152959824, "learning_rate": 1.3033414186652705e-06, "loss": 0.0561, "step": 3684 }, { "epoch": 0.7723747642003773, "grad_norm": 0.017208058387041092, "learning_rate": 1.3010563790561165e-06, "loss": 0.0548, "step": 3685 }, { "epoch": 0.7725843638650178, "grad_norm": 0.019409308210015297, "learning_rate": 1.298773044659204e-06, "loss": 0.0524, "step": 3686 }, { "epoch": 0.7727939635296583, "grad_norm": 0.015810973942279816, "learning_rate": 1.296491416527147e-06, "loss": 0.0546, "step": 3687 }, { "epoch": 0.7730035631942989, "grad_norm": 0.020034223794937134, "learning_rate": 1.2942114957117797e-06, "loss": 0.0545, "step": 3688 }, { "epoch": 0.7732131628589394, "grad_norm": 0.021472373977303505, "learning_rate": 1.2919332832641413e-06, "loss": 0.0534, "step": 3689 }, { "epoch": 0.77342276252358, "grad_norm": 0.012456726282835007, "learning_rate": 1.2896567802344888e-06, "loss": 0.0529, "step": 3690 }, { "epoch": 0.7736323621882205, "grad_norm": 0.018201405182480812, "learning_rate": 1.287381987672292e-06, "loss": 0.0526, "step": 3691 }, { "epoch": 0.773841961852861, "grad_norm": 0.012917966581881046, "learning_rate": 1.2851089066262272e-06, "loss": 0.0538, "step": 3692 }, { "epoch": 0.7740515615175015, "grad_norm": 0.019098002463579178, "learning_rate": 1.2828375381441837e-06, "loss": 0.055, "step": 3693 }, { "epoch": 0.774261161182142, "grad_norm": 0.015969131141901016, "learning_rate": 1.2805678832732627e-06, "loss": 0.0544, "step": 3694 }, { "epoch": 0.7744707608467827, "grad_norm": 0.016314983367919922, "learning_rate": 1.2782999430597764e-06, "loss": 0.0513, "step": 3695 }, { "epoch": 0.7746803605114232, "grad_norm": 0.021401531994342804, "learning_rate": 1.2760337185492423e-06, "loss": 0.0557, "step": 3696 }, { "epoch": 0.7748899601760637, "grad_norm": 0.015217507258057594, "learning_rate": 1.2737692107863914e-06, "loss": 0.0566, "step": 3697 }, { "epoch": 0.7750995598407042, "grad_norm": 0.013445749878883362, "learning_rate": 1.2715064208151606e-06, "loss": 0.0534, "step": 3698 }, { "epoch": 0.7753091595053447, "grad_norm": 0.01527884230017662, "learning_rate": 1.2692453496786933e-06, "loss": 0.0554, "step": 3699 }, { "epoch": 0.7755187591699854, "grad_norm": 0.013215204700827599, "learning_rate": 1.2669859984193456e-06, "loss": 0.05, "step": 3700 }, { "epoch": 0.7757283588346259, "grad_norm": 0.013395383022725582, "learning_rate": 1.264728368078678e-06, "loss": 0.0579, "step": 3701 }, { "epoch": 0.7759379584992664, "grad_norm": 0.013562247157096863, "learning_rate": 1.2624724596974557e-06, "loss": 0.0546, "step": 3702 }, { "epoch": 0.7761475581639069, "grad_norm": 0.0144569780677557, "learning_rate": 1.260218274315655e-06, "loss": 0.0544, "step": 3703 }, { "epoch": 0.7763571578285475, "grad_norm": 0.014783012680709362, "learning_rate": 1.2579658129724526e-06, "loss": 0.0539, "step": 3704 }, { "epoch": 0.776566757493188, "grad_norm": 0.014723209664225578, "learning_rate": 1.2557150767062315e-06, "loss": 0.0554, "step": 3705 }, { "epoch": 0.7767763571578286, "grad_norm": 0.016170332208275795, "learning_rate": 1.2534660665545822e-06, "loss": 0.0526, "step": 3706 }, { "epoch": 0.7769859568224691, "grad_norm": 0.019932882860302925, "learning_rate": 1.2512187835542982e-06, "loss": 0.0526, "step": 3707 }, { "epoch": 0.7771955564871096, "grad_norm": 0.016744259744882584, "learning_rate": 1.248973228741378e-06, "loss": 0.0535, "step": 3708 }, { "epoch": 0.7774051561517502, "grad_norm": 0.016560077667236328, "learning_rate": 1.2467294031510202e-06, "loss": 0.0526, "step": 3709 }, { "epoch": 0.7776147558163907, "grad_norm": 0.016798479482531548, "learning_rate": 1.2444873078176262e-06, "loss": 0.0535, "step": 3710 }, { "epoch": 0.7778243554810312, "grad_norm": 0.01939672976732254, "learning_rate": 1.2422469437748046e-06, "loss": 0.0531, "step": 3711 }, { "epoch": 0.7780339551456718, "grad_norm": 0.019812364131212234, "learning_rate": 1.2400083120553602e-06, "loss": 0.0537, "step": 3712 }, { "epoch": 0.7782435548103123, "grad_norm": 0.0169843677431345, "learning_rate": 1.2377714136913028e-06, "loss": 0.0537, "step": 3713 }, { "epoch": 0.7784531544749529, "grad_norm": 0.019338861107826233, "learning_rate": 1.235536249713845e-06, "loss": 0.0538, "step": 3714 }, { "epoch": 0.7786627541395934, "grad_norm": 0.018493298441171646, "learning_rate": 1.2333028211533916e-06, "loss": 0.0527, "step": 3715 }, { "epoch": 0.7788723538042339, "grad_norm": 0.0183496605604887, "learning_rate": 1.2310711290395545e-06, "loss": 0.0549, "step": 3716 }, { "epoch": 0.7790819534688744, "grad_norm": 0.019363224506378174, "learning_rate": 1.2288411744011464e-06, "loss": 0.0513, "step": 3717 }, { "epoch": 0.779291553133515, "grad_norm": 0.015527610667049885, "learning_rate": 1.2266129582661712e-06, "loss": 0.0545, "step": 3718 }, { "epoch": 0.7795011527981556, "grad_norm": 0.018898479640483856, "learning_rate": 1.2243864816618407e-06, "loss": 0.0539, "step": 3719 }, { "epoch": 0.7797107524627961, "grad_norm": 0.014554295688867569, "learning_rate": 1.2221617456145556e-06, "loss": 0.0532, "step": 3720 }, { "epoch": 0.7799203521274366, "grad_norm": 0.01795900985598564, "learning_rate": 1.2199387511499234e-06, "loss": 0.0542, "step": 3721 }, { "epoch": 0.7801299517920771, "grad_norm": 0.018716957420110703, "learning_rate": 1.21771749929274e-06, "loss": 0.0534, "step": 3722 }, { "epoch": 0.7803395514567176, "grad_norm": 0.015783434733748436, "learning_rate": 1.2154979910670033e-06, "loss": 0.055, "step": 3723 }, { "epoch": 0.7805491511213583, "grad_norm": 0.016636742278933525, "learning_rate": 1.2132802274959082e-06, "loss": 0.0527, "step": 3724 }, { "epoch": 0.7807587507859988, "grad_norm": 0.017774462699890137, "learning_rate": 1.2110642096018421e-06, "loss": 0.0556, "step": 3725 }, { "epoch": 0.7809683504506393, "grad_norm": 0.01745484583079815, "learning_rate": 1.2088499384063868e-06, "loss": 0.0521, "step": 3726 }, { "epoch": 0.7811779501152798, "grad_norm": 0.018969282507896423, "learning_rate": 1.2066374149303234e-06, "loss": 0.0524, "step": 3727 }, { "epoch": 0.7813875497799203, "grad_norm": 0.0174677986651659, "learning_rate": 1.2044266401936228e-06, "loss": 0.0541, "step": 3728 }, { "epoch": 0.7815971494445609, "grad_norm": 0.01889014057815075, "learning_rate": 1.2022176152154525e-06, "loss": 0.0532, "step": 3729 }, { "epoch": 0.7818067491092014, "grad_norm": 0.020782019942998886, "learning_rate": 1.200010341014174e-06, "loss": 0.0517, "step": 3730 }, { "epoch": 0.782016348773842, "grad_norm": 0.017878524959087372, "learning_rate": 1.1978048186073388e-06, "loss": 0.0548, "step": 3731 }, { "epoch": 0.7822259484384825, "grad_norm": 0.01691311225295067, "learning_rate": 1.1956010490116915e-06, "loss": 0.0516, "step": 3732 }, { "epoch": 0.782435548103123, "grad_norm": 0.014464441686868668, "learning_rate": 1.1933990332431699e-06, "loss": 0.055, "step": 3733 }, { "epoch": 0.7826451477677636, "grad_norm": 0.02182583324611187, "learning_rate": 1.191198772316905e-06, "loss": 0.0515, "step": 3734 }, { "epoch": 0.7828547474324041, "grad_norm": 0.01545246597379446, "learning_rate": 1.1890002672472133e-06, "loss": 0.0551, "step": 3735 }, { "epoch": 0.7830643470970446, "grad_norm": 0.022399727255105972, "learning_rate": 1.1868035190476085e-06, "loss": 0.0572, "step": 3736 }, { "epoch": 0.7832739467616852, "grad_norm": 0.01581631414592266, "learning_rate": 1.184608528730789e-06, "loss": 0.0539, "step": 3737 }, { "epoch": 0.7834835464263257, "grad_norm": 0.022165510803461075, "learning_rate": 1.1824152973086444e-06, "loss": 0.0579, "step": 3738 }, { "epoch": 0.7836931460909663, "grad_norm": 0.018284514546394348, "learning_rate": 1.1802238257922543e-06, "loss": 0.056, "step": 3739 }, { "epoch": 0.7839027457556068, "grad_norm": 0.017083924263715744, "learning_rate": 1.1780341151918883e-06, "loss": 0.0517, "step": 3740 }, { "epoch": 0.7841123454202473, "grad_norm": 0.01769246533513069, "learning_rate": 1.1758461665170001e-06, "loss": 0.0558, "step": 3741 }, { "epoch": 0.7843219450848878, "grad_norm": 0.018215294927358627, "learning_rate": 1.1736599807762366e-06, "loss": 0.0544, "step": 3742 }, { "epoch": 0.7845315447495284, "grad_norm": 0.018238717690110207, "learning_rate": 1.1714755589774252e-06, "loss": 0.0523, "step": 3743 }, { "epoch": 0.784741144414169, "grad_norm": 0.01696399226784706, "learning_rate": 1.1692929021275874e-06, "loss": 0.0542, "step": 3744 }, { "epoch": 0.7849507440788095, "grad_norm": 0.020423993468284607, "learning_rate": 1.1671120112329248e-06, "loss": 0.0547, "step": 3745 }, { "epoch": 0.78516034374345, "grad_norm": 0.016111532226204872, "learning_rate": 1.1649328872988286e-06, "loss": 0.0546, "step": 3746 }, { "epoch": 0.7853699434080905, "grad_norm": 0.023867281153798103, "learning_rate": 1.1627555313298777e-06, "loss": 0.0544, "step": 3747 }, { "epoch": 0.785579543072731, "grad_norm": 0.017127446830272675, "learning_rate": 1.160579944329827e-06, "loss": 0.0551, "step": 3748 }, { "epoch": 0.7857891427373717, "grad_norm": 0.0323978066444397, "learning_rate": 1.1584061273016245e-06, "loss": 0.0584, "step": 3749 }, { "epoch": 0.7859987424020122, "grad_norm": 0.015841031447052956, "learning_rate": 1.1562340812474004e-06, "loss": 0.0511, "step": 3750 }, { "epoch": 0.7862083420666527, "grad_norm": 0.03065073862671852, "learning_rate": 1.154063807168465e-06, "loss": 0.0526, "step": 3751 }, { "epoch": 0.7864179417312932, "grad_norm": 0.02133885584771633, "learning_rate": 1.1518953060653177e-06, "loss": 0.054, "step": 3752 }, { "epoch": 0.7866275413959337, "grad_norm": 0.02793465554714203, "learning_rate": 1.1497285789376327e-06, "loss": 0.0542, "step": 3753 }, { "epoch": 0.7868371410605743, "grad_norm": 0.024401402100920677, "learning_rate": 1.1475636267842754e-06, "loss": 0.0541, "step": 3754 }, { "epoch": 0.7870467407252149, "grad_norm": 0.02825610339641571, "learning_rate": 1.145400450603284e-06, "loss": 0.053, "step": 3755 }, { "epoch": 0.7872563403898554, "grad_norm": 0.031988829374313354, "learning_rate": 1.143239051391884e-06, "loss": 0.0519, "step": 3756 }, { "epoch": 0.7874659400544959, "grad_norm": 0.01913175918161869, "learning_rate": 1.1410794301464817e-06, "loss": 0.0559, "step": 3757 }, { "epoch": 0.7876755397191364, "grad_norm": 0.02814309112727642, "learning_rate": 1.1389215878626608e-06, "loss": 0.0569, "step": 3758 }, { "epoch": 0.787885139383777, "grad_norm": 0.019799359142780304, "learning_rate": 1.1367655255351845e-06, "loss": 0.0542, "step": 3759 }, { "epoch": 0.7880947390484175, "grad_norm": 0.023299338296055794, "learning_rate": 1.1346112441579998e-06, "loss": 0.0548, "step": 3760 }, { "epoch": 0.788304338713058, "grad_norm": 0.022608846426010132, "learning_rate": 1.132458744724227e-06, "loss": 0.0527, "step": 3761 }, { "epoch": 0.7885139383776986, "grad_norm": 0.03028971515595913, "learning_rate": 1.1303080282261698e-06, "loss": 0.0561, "step": 3762 }, { "epoch": 0.7887235380423391, "grad_norm": 0.019988486543297768, "learning_rate": 1.128159095655309e-06, "loss": 0.0526, "step": 3763 }, { "epoch": 0.7889331377069797, "grad_norm": 0.036664996296167374, "learning_rate": 1.1260119480023008e-06, "loss": 0.0525, "step": 3764 }, { "epoch": 0.7891427373716202, "grad_norm": 0.02409091591835022, "learning_rate": 1.1238665862569786e-06, "loss": 0.0527, "step": 3765 }, { "epoch": 0.7893523370362607, "grad_norm": 0.02567540481686592, "learning_rate": 1.121723011408355e-06, "loss": 0.054, "step": 3766 }, { "epoch": 0.7895619367009012, "grad_norm": 0.028370145708322525, "learning_rate": 1.1195812244446185e-06, "loss": 0.0561, "step": 3767 }, { "epoch": 0.7897715363655418, "grad_norm": 0.015024359337985516, "learning_rate": 1.117441226353131e-06, "loss": 0.0532, "step": 3768 }, { "epoch": 0.7899811360301824, "grad_norm": 0.02175975777208805, "learning_rate": 1.115303018120432e-06, "loss": 0.0534, "step": 3769 }, { "epoch": 0.7901907356948229, "grad_norm": 0.01616404764354229, "learning_rate": 1.1131666007322356e-06, "loss": 0.0545, "step": 3770 }, { "epoch": 0.7904003353594634, "grad_norm": 0.01787029393017292, "learning_rate": 1.1110319751734271e-06, "loss": 0.0541, "step": 3771 }, { "epoch": 0.7906099350241039, "grad_norm": 0.015742896124720573, "learning_rate": 1.1088991424280705e-06, "loss": 0.0542, "step": 3772 }, { "epoch": 0.7908195346887446, "grad_norm": 0.018519075587391853, "learning_rate": 1.106768103479402e-06, "loss": 0.0527, "step": 3773 }, { "epoch": 0.7910291343533851, "grad_norm": 0.012830471619963646, "learning_rate": 1.1046388593098284e-06, "loss": 0.0523, "step": 3774 }, { "epoch": 0.7912387340180256, "grad_norm": 0.018937064334750175, "learning_rate": 1.1025114109009321e-06, "loss": 0.0537, "step": 3775 }, { "epoch": 0.7914483336826661, "grad_norm": 0.016385626047849655, "learning_rate": 1.100385759233465e-06, "loss": 0.0537, "step": 3776 }, { "epoch": 0.7916579333473066, "grad_norm": 0.01458564680069685, "learning_rate": 1.098261905287354e-06, "loss": 0.0535, "step": 3777 }, { "epoch": 0.7918675330119472, "grad_norm": 0.01804785430431366, "learning_rate": 1.0961398500416926e-06, "loss": 0.0504, "step": 3778 }, { "epoch": 0.7920771326765877, "grad_norm": 0.015284632332623005, "learning_rate": 1.0940195944747494e-06, "loss": 0.0537, "step": 3779 }, { "epoch": 0.7922867323412283, "grad_norm": 0.013636418618261814, "learning_rate": 1.091901139563964e-06, "loss": 0.0531, "step": 3780 }, { "epoch": 0.7924963320058688, "grad_norm": 0.013568844646215439, "learning_rate": 1.0897844862859407e-06, "loss": 0.0519, "step": 3781 }, { "epoch": 0.7927059316705093, "grad_norm": 0.015443854033946991, "learning_rate": 1.0876696356164556e-06, "loss": 0.0566, "step": 3782 }, { "epoch": 0.7929155313351499, "grad_norm": 0.013935340568423271, "learning_rate": 1.0855565885304575e-06, "loss": 0.0503, "step": 3783 }, { "epoch": 0.7931251309997904, "grad_norm": 0.01774810254573822, "learning_rate": 1.0834453460020577e-06, "loss": 0.0548, "step": 3784 }, { "epoch": 0.7933347306644309, "grad_norm": 0.01960287056863308, "learning_rate": 1.0813359090045412e-06, "loss": 0.0535, "step": 3785 }, { "epoch": 0.7935443303290715, "grad_norm": 0.014412653632462025, "learning_rate": 1.0792282785103565e-06, "loss": 0.0538, "step": 3786 }, { "epoch": 0.793753929993712, "grad_norm": 0.01678672805428505, "learning_rate": 1.0771224554911197e-06, "loss": 0.0539, "step": 3787 }, { "epoch": 0.7939635296583526, "grad_norm": 0.017591232433915138, "learning_rate": 1.0750184409176156e-06, "loss": 0.0577, "step": 3788 }, { "epoch": 0.7941731293229931, "grad_norm": 0.025450995191931725, "learning_rate": 1.0729162357597956e-06, "loss": 0.0556, "step": 3789 }, { "epoch": 0.7943827289876336, "grad_norm": 0.01504075713455677, "learning_rate": 1.0708158409867763e-06, "loss": 0.0564, "step": 3790 }, { "epoch": 0.7945923286522741, "grad_norm": 0.029961617663502693, "learning_rate": 1.0687172575668381e-06, "loss": 0.0532, "step": 3791 }, { "epoch": 0.7948019283169147, "grad_norm": 0.01688455045223236, "learning_rate": 1.0666204864674263e-06, "loss": 0.0561, "step": 3792 }, { "epoch": 0.7950115279815553, "grad_norm": 0.022862810641527176, "learning_rate": 1.0645255286551548e-06, "loss": 0.0555, "step": 3793 }, { "epoch": 0.7952211276461958, "grad_norm": 0.016880135983228683, "learning_rate": 1.0624323850957952e-06, "loss": 0.0528, "step": 3794 }, { "epoch": 0.7954307273108363, "grad_norm": 0.02419593743979931, "learning_rate": 1.0603410567542882e-06, "loss": 0.054, "step": 3795 }, { "epoch": 0.7956403269754768, "grad_norm": 0.02624059095978737, "learning_rate": 1.0582515445947377e-06, "loss": 0.0534, "step": 3796 }, { "epoch": 0.7958499266401173, "grad_norm": 0.013864974491298199, "learning_rate": 1.056163849580406e-06, "loss": 0.0516, "step": 3797 }, { "epoch": 0.796059526304758, "grad_norm": 0.018372228369116783, "learning_rate": 1.0540779726737187e-06, "loss": 0.0544, "step": 3798 }, { "epoch": 0.7962691259693985, "grad_norm": 0.017514994367957115, "learning_rate": 1.0519939148362667e-06, "loss": 0.0556, "step": 3799 }, { "epoch": 0.796478725634039, "grad_norm": 0.026039427146315575, "learning_rate": 1.0499116770288015e-06, "loss": 0.0531, "step": 3800 }, { "epoch": 0.7966883252986795, "grad_norm": 0.019942620769143105, "learning_rate": 1.0478312602112312e-06, "loss": 0.0527, "step": 3801 }, { "epoch": 0.79689792496332, "grad_norm": 0.03383920341730118, "learning_rate": 1.0457526653426303e-06, "loss": 0.0536, "step": 3802 }, { "epoch": 0.7971075246279606, "grad_norm": 0.018814336508512497, "learning_rate": 1.0436758933812292e-06, "loss": 0.0547, "step": 3803 }, { "epoch": 0.7973171242926012, "grad_norm": 0.035695578902959824, "learning_rate": 1.0416009452844178e-06, "loss": 0.0525, "step": 3804 }, { "epoch": 0.7975267239572417, "grad_norm": 0.019596830010414124, "learning_rate": 1.039527822008749e-06, "loss": 0.0549, "step": 3805 }, { "epoch": 0.7977363236218822, "grad_norm": 0.030918627977371216, "learning_rate": 1.0374565245099328e-06, "loss": 0.0572, "step": 3806 }, { "epoch": 0.7979459232865227, "grad_norm": 0.020429056137800217, "learning_rate": 1.035387053742834e-06, "loss": 0.0537, "step": 3807 }, { "epoch": 0.7981555229511633, "grad_norm": 0.026732752099633217, "learning_rate": 1.0333194106614813e-06, "loss": 0.0556, "step": 3808 }, { "epoch": 0.7983651226158038, "grad_norm": 0.01719031110405922, "learning_rate": 1.0312535962190567e-06, "loss": 0.0563, "step": 3809 }, { "epoch": 0.7985747222804443, "grad_norm": 0.03157135099172592, "learning_rate": 1.0291896113678983e-06, "loss": 0.0525, "step": 3810 }, { "epoch": 0.7987843219450849, "grad_norm": 0.02437172830104828, "learning_rate": 1.0271274570595041e-06, "loss": 0.054, "step": 3811 }, { "epoch": 0.7989939216097254, "grad_norm": 0.017890898510813713, "learning_rate": 1.0250671342445273e-06, "loss": 0.0557, "step": 3812 }, { "epoch": 0.799203521274366, "grad_norm": 0.022037314251065254, "learning_rate": 1.0230086438727771e-06, "loss": 0.0532, "step": 3813 }, { "epoch": 0.7994131209390065, "grad_norm": 0.014611390419304371, "learning_rate": 1.020951986893216e-06, "loss": 0.056, "step": 3814 }, { "epoch": 0.799622720603647, "grad_norm": 0.02562752366065979, "learning_rate": 1.0188971642539614e-06, "loss": 0.0536, "step": 3815 }, { "epoch": 0.7998323202682875, "grad_norm": 0.015589050948619843, "learning_rate": 1.016844176902288e-06, "loss": 0.0522, "step": 3816 }, { "epoch": 0.8000419199329281, "grad_norm": 0.030634846538305283, "learning_rate": 1.0147930257846206e-06, "loss": 0.0525, "step": 3817 }, { "epoch": 0.8002515195975687, "grad_norm": 0.018007168546319008, "learning_rate": 1.0127437118465405e-06, "loss": 0.0567, "step": 3818 }, { "epoch": 0.8004611192622092, "grad_norm": 0.02537359483540058, "learning_rate": 1.0106962360327832e-06, "loss": 0.0564, "step": 3819 }, { "epoch": 0.8006707189268497, "grad_norm": 0.015520203858613968, "learning_rate": 1.0086505992872304e-06, "loss": 0.0566, "step": 3820 }, { "epoch": 0.8008803185914902, "grad_norm": 0.02692110277712345, "learning_rate": 1.0066068025529219e-06, "loss": 0.0548, "step": 3821 }, { "epoch": 0.8010899182561307, "grad_norm": 0.021093720570206642, "learning_rate": 1.0045648467720492e-06, "loss": 0.0524, "step": 3822 }, { "epoch": 0.8012995179207714, "grad_norm": 0.01891363225877285, "learning_rate": 1.002524732885951e-06, "loss": 0.053, "step": 3823 }, { "epoch": 0.8015091175854119, "grad_norm": 0.016529306769371033, "learning_rate": 1.0004864618351223e-06, "loss": 0.054, "step": 3824 }, { "epoch": 0.8017187172500524, "grad_norm": 0.01724756881594658, "learning_rate": 9.984500345592023e-07, "loss": 0.0582, "step": 3825 }, { "epoch": 0.8019283169146929, "grad_norm": 0.014622312039136887, "learning_rate": 9.964154519969865e-07, "loss": 0.0617, "step": 3826 }, { "epoch": 0.8021379165793334, "grad_norm": 0.016424983739852905, "learning_rate": 9.943827150864143e-07, "loss": 0.0527, "step": 3827 }, { "epoch": 0.802347516243974, "grad_norm": 0.016418717801570892, "learning_rate": 9.923518247645785e-07, "loss": 0.0525, "step": 3828 }, { "epoch": 0.8025571159086146, "grad_norm": 0.01681547239422798, "learning_rate": 9.903227819677203e-07, "loss": 0.0546, "step": 3829 }, { "epoch": 0.8027667155732551, "grad_norm": 0.016537809744477272, "learning_rate": 9.882955876312266e-07, "loss": 0.052, "step": 3830 }, { "epoch": 0.8029763152378956, "grad_norm": 0.018042949959635735, "learning_rate": 9.862702426896327e-07, "loss": 0.0549, "step": 3831 }, { "epoch": 0.8031859149025361, "grad_norm": 0.019126849249005318, "learning_rate": 9.842467480766243e-07, "loss": 0.0537, "step": 3832 }, { "epoch": 0.8033955145671767, "grad_norm": 0.01958855427801609, "learning_rate": 9.822251047250298e-07, "loss": 0.0544, "step": 3833 }, { "epoch": 0.8036051142318172, "grad_norm": 0.017055636271834373, "learning_rate": 9.80205313566827e-07, "loss": 0.0512, "step": 3834 }, { "epoch": 0.8038147138964578, "grad_norm": 0.0194768775254488, "learning_rate": 9.781873755331412e-07, "loss": 0.0541, "step": 3835 }, { "epoch": 0.8040243135610983, "grad_norm": 0.015344620682299137, "learning_rate": 9.7617129155424e-07, "loss": 0.0538, "step": 3836 }, { "epoch": 0.8042339132257388, "grad_norm": 0.016386136412620544, "learning_rate": 9.74157062559536e-07, "loss": 0.0537, "step": 3837 }, { "epoch": 0.8044435128903794, "grad_norm": 0.016445597633719444, "learning_rate": 9.7214468947759e-07, "loss": 0.0538, "step": 3838 }, { "epoch": 0.8046531125550199, "grad_norm": 0.017396828159689903, "learning_rate": 9.701341732361068e-07, "loss": 0.0533, "step": 3839 }, { "epoch": 0.8048627122196604, "grad_norm": 0.018001766875386238, "learning_rate": 9.681255147619317e-07, "loss": 0.0519, "step": 3840 }, { "epoch": 0.805072311884301, "grad_norm": 0.014202550053596497, "learning_rate": 9.66118714981058e-07, "loss": 0.0547, "step": 3841 }, { "epoch": 0.8052819115489416, "grad_norm": 0.01720808818936348, "learning_rate": 9.641137748186186e-07, "loss": 0.0543, "step": 3842 }, { "epoch": 0.8054915112135821, "grad_norm": 0.012487749569118023, "learning_rate": 9.62110695198889e-07, "loss": 0.0572, "step": 3843 }, { "epoch": 0.8057011108782226, "grad_norm": 0.014534911140799522, "learning_rate": 9.601094770452907e-07, "loss": 0.0555, "step": 3844 }, { "epoch": 0.8059107105428631, "grad_norm": 0.012713300064206123, "learning_rate": 9.581101212803857e-07, "loss": 0.0567, "step": 3845 }, { "epoch": 0.8061203102075036, "grad_norm": 0.01954994909465313, "learning_rate": 9.561126288258738e-07, "loss": 0.0562, "step": 3846 }, { "epoch": 0.8063299098721443, "grad_norm": 0.01227479986846447, "learning_rate": 9.541170006026012e-07, "loss": 0.0526, "step": 3847 }, { "epoch": 0.8065395095367848, "grad_norm": 0.013360395096242428, "learning_rate": 9.521232375305494e-07, "loss": 0.0531, "step": 3848 }, { "epoch": 0.8067491092014253, "grad_norm": 0.012572353705763817, "learning_rate": 9.50131340528846e-07, "loss": 0.0548, "step": 3849 }, { "epoch": 0.8069587088660658, "grad_norm": 0.012260310351848602, "learning_rate": 9.481413105157517e-07, "loss": 0.0508, "step": 3850 }, { "epoch": 0.8071683085307063, "grad_norm": 0.012266767211258411, "learning_rate": 9.461531484086722e-07, "loss": 0.0564, "step": 3851 }, { "epoch": 0.8073779081953469, "grad_norm": 0.013467447832226753, "learning_rate": 9.441668551241511e-07, "loss": 0.0564, "step": 3852 }, { "epoch": 0.8075875078599875, "grad_norm": 0.013181211426854134, "learning_rate": 9.421824315778649e-07, "loss": 0.0548, "step": 3853 }, { "epoch": 0.807797107524628, "grad_norm": 0.014856358990073204, "learning_rate": 9.401998786846356e-07, "loss": 0.0543, "step": 3854 }, { "epoch": 0.8080067071892685, "grad_norm": 0.013289416208863258, "learning_rate": 9.382191973584193e-07, "loss": 0.0524, "step": 3855 }, { "epoch": 0.808216306853909, "grad_norm": 0.017398502677679062, "learning_rate": 9.362403885123084e-07, "loss": 0.0515, "step": 3856 }, { "epoch": 0.8084259065185496, "grad_norm": 0.012360899709165096, "learning_rate": 9.342634530585354e-07, "loss": 0.0531, "step": 3857 }, { "epoch": 0.8086355061831901, "grad_norm": 0.014594539068639278, "learning_rate": 9.322883919084652e-07, "loss": 0.0537, "step": 3858 }, { "epoch": 0.8088451058478306, "grad_norm": 0.014027457684278488, "learning_rate": 9.303152059726023e-07, "loss": 0.0545, "step": 3859 }, { "epoch": 0.8090547055124712, "grad_norm": 0.015334793366491795, "learning_rate": 9.283438961605829e-07, "loss": 0.0515, "step": 3860 }, { "epoch": 0.8092643051771117, "grad_norm": 0.018460331484675407, "learning_rate": 9.263744633811816e-07, "loss": 0.0543, "step": 3861 }, { "epoch": 0.8094739048417523, "grad_norm": 0.017743943259119987, "learning_rate": 9.244069085423074e-07, "loss": 0.0559, "step": 3862 }, { "epoch": 0.8096835045063928, "grad_norm": 0.014662131667137146, "learning_rate": 9.224412325510024e-07, "loss": 0.055, "step": 3863 }, { "epoch": 0.8098931041710333, "grad_norm": 0.016809482127428055, "learning_rate": 9.204774363134405e-07, "loss": 0.0564, "step": 3864 }, { "epoch": 0.8101027038356738, "grad_norm": 0.022974979132413864, "learning_rate": 9.185155207349344e-07, "loss": 0.0545, "step": 3865 }, { "epoch": 0.8103123035003144, "grad_norm": 0.018833018839359283, "learning_rate": 9.165554867199245e-07, "loss": 0.0561, "step": 3866 }, { "epoch": 0.810521903164955, "grad_norm": 0.021687567234039307, "learning_rate": 9.145973351719867e-07, "loss": 0.0569, "step": 3867 }, { "epoch": 0.8107315028295955, "grad_norm": 0.016966206952929497, "learning_rate": 9.126410669938302e-07, "loss": 0.0518, "step": 3868 }, { "epoch": 0.810941102494236, "grad_norm": 0.023966066539287567, "learning_rate": 9.106866830872929e-07, "loss": 0.0569, "step": 3869 }, { "epoch": 0.8111507021588765, "grad_norm": 0.016374409198760986, "learning_rate": 9.087341843533437e-07, "loss": 0.0552, "step": 3870 }, { "epoch": 0.811360301823517, "grad_norm": 0.025269117206335068, "learning_rate": 9.067835716920859e-07, "loss": 0.0551, "step": 3871 }, { "epoch": 0.8115699014881577, "grad_norm": 0.01591191627085209, "learning_rate": 9.048348460027528e-07, "loss": 0.0532, "step": 3872 }, { "epoch": 0.8117795011527982, "grad_norm": 0.020468199625611305, "learning_rate": 9.028880081837032e-07, "loss": 0.0539, "step": 3873 }, { "epoch": 0.8119891008174387, "grad_norm": 0.0173234511166811, "learning_rate": 9.009430591324325e-07, "loss": 0.0537, "step": 3874 }, { "epoch": 0.8121987004820792, "grad_norm": 0.015647042542696, "learning_rate": 8.989999997455601e-07, "loss": 0.0563, "step": 3875 }, { "epoch": 0.8124083001467197, "grad_norm": 0.022868547588586807, "learning_rate": 8.970588309188343e-07, "loss": 0.0527, "step": 3876 }, { "epoch": 0.8126178998113603, "grad_norm": 0.018496761098504066, "learning_rate": 8.951195535471357e-07, "loss": 0.0546, "step": 3877 }, { "epoch": 0.8128274994760009, "grad_norm": 0.023296529427170753, "learning_rate": 8.931821685244712e-07, "loss": 0.0546, "step": 3878 }, { "epoch": 0.8130370991406414, "grad_norm": 0.019333289936184883, "learning_rate": 8.912466767439726e-07, "loss": 0.0532, "step": 3879 }, { "epoch": 0.8132466988052819, "grad_norm": 0.021376557648181915, "learning_rate": 8.893130790979038e-07, "loss": 0.0532, "step": 3880 }, { "epoch": 0.8134562984699224, "grad_norm": 0.015562393702566624, "learning_rate": 8.873813764776506e-07, "loss": 0.0536, "step": 3881 }, { "epoch": 0.813665898134563, "grad_norm": 0.026157313957810402, "learning_rate": 8.854515697737298e-07, "loss": 0.0531, "step": 3882 }, { "epoch": 0.8138754977992035, "grad_norm": 0.017569879069924355, "learning_rate": 8.835236598757796e-07, "loss": 0.0528, "step": 3883 }, { "epoch": 0.814085097463844, "grad_norm": 0.021584592759609222, "learning_rate": 8.815976476725668e-07, "loss": 0.053, "step": 3884 }, { "epoch": 0.8142946971284846, "grad_norm": 0.01839163526892662, "learning_rate": 8.796735340519847e-07, "loss": 0.0539, "step": 3885 }, { "epoch": 0.8145042967931251, "grad_norm": 0.01819518581032753, "learning_rate": 8.777513199010468e-07, "loss": 0.0558, "step": 3886 }, { "epoch": 0.8147138964577657, "grad_norm": 0.01771095208823681, "learning_rate": 8.758310061058934e-07, "loss": 0.0527, "step": 3887 }, { "epoch": 0.8149234961224062, "grad_norm": 0.01995546743273735, "learning_rate": 8.739125935517906e-07, "loss": 0.0531, "step": 3888 }, { "epoch": 0.8151330957870467, "grad_norm": 0.016256198287010193, "learning_rate": 8.719960831231239e-07, "loss": 0.0523, "step": 3889 }, { "epoch": 0.8153426954516872, "grad_norm": 0.020824657753109932, "learning_rate": 8.70081475703406e-07, "loss": 0.055, "step": 3890 }, { "epoch": 0.8155522951163278, "grad_norm": 0.0172143392264843, "learning_rate": 8.681687721752719e-07, "loss": 0.0564, "step": 3891 }, { "epoch": 0.8157618947809684, "grad_norm": 0.022133000195026398, "learning_rate": 8.66257973420473e-07, "loss": 0.053, "step": 3892 }, { "epoch": 0.8159714944456089, "grad_norm": 0.01741599105298519, "learning_rate": 8.643490803198895e-07, "loss": 0.0574, "step": 3893 }, { "epoch": 0.8161810941102494, "grad_norm": 0.01774427853524685, "learning_rate": 8.62442093753521e-07, "loss": 0.0556, "step": 3894 }, { "epoch": 0.8163906937748899, "grad_norm": 0.019820379093289375, "learning_rate": 8.605370146004894e-07, "loss": 0.0536, "step": 3895 }, { "epoch": 0.8166002934395304, "grad_norm": 0.017903191968798637, "learning_rate": 8.58633843739034e-07, "loss": 0.056, "step": 3896 }, { "epoch": 0.8168098931041711, "grad_norm": 0.02086867205798626, "learning_rate": 8.567325820465156e-07, "loss": 0.053, "step": 3897 }, { "epoch": 0.8170194927688116, "grad_norm": 0.02020520158112049, "learning_rate": 8.548332303994167e-07, "loss": 0.055, "step": 3898 }, { "epoch": 0.8172290924334521, "grad_norm": 0.015635685995221138, "learning_rate": 8.52935789673337e-07, "loss": 0.0539, "step": 3899 }, { "epoch": 0.8174386920980926, "grad_norm": 0.02270682342350483, "learning_rate": 8.510402607429963e-07, "loss": 0.056, "step": 3900 }, { "epoch": 0.8176482917627331, "grad_norm": 0.014987417496740818, "learning_rate": 8.491466444822355e-07, "loss": 0.0546, "step": 3901 }, { "epoch": 0.8178578914273738, "grad_norm": 0.0187685564160347, "learning_rate": 8.472549417640092e-07, "loss": 0.0521, "step": 3902 }, { "epoch": 0.8180674910920143, "grad_norm": 0.01810646429657936, "learning_rate": 8.453651534603901e-07, "loss": 0.0542, "step": 3903 }, { "epoch": 0.8182770907566548, "grad_norm": 0.01981244422495365, "learning_rate": 8.434772804425734e-07, "loss": 0.0551, "step": 3904 }, { "epoch": 0.8184866904212953, "grad_norm": 0.017939042299985886, "learning_rate": 8.415913235808675e-07, "loss": 0.0553, "step": 3905 }, { "epoch": 0.8186962900859358, "grad_norm": 0.02355090342462063, "learning_rate": 8.397072837446968e-07, "loss": 0.052, "step": 3906 }, { "epoch": 0.8189058897505764, "grad_norm": 0.01684572361409664, "learning_rate": 8.378251618026051e-07, "loss": 0.0561, "step": 3907 }, { "epoch": 0.819115489415217, "grad_norm": 0.024392616003751755, "learning_rate": 8.35944958622249e-07, "loss": 0.0536, "step": 3908 }, { "epoch": 0.8193250890798575, "grad_norm": 0.01632305234670639, "learning_rate": 8.340666750704013e-07, "loss": 0.0538, "step": 3909 }, { "epoch": 0.819534688744498, "grad_norm": 0.023946644738316536, "learning_rate": 8.32190312012951e-07, "loss": 0.0542, "step": 3910 }, { "epoch": 0.8197442884091386, "grad_norm": 0.016722572967410088, "learning_rate": 8.303158703149023e-07, "loss": 0.0546, "step": 3911 }, { "epoch": 0.8199538880737791, "grad_norm": 0.02079438418149948, "learning_rate": 8.2844335084037e-07, "loss": 0.0552, "step": 3912 }, { "epoch": 0.8201634877384196, "grad_norm": 0.016205811873078346, "learning_rate": 8.265727544525876e-07, "loss": 0.0574, "step": 3913 }, { "epoch": 0.8203730874030601, "grad_norm": 0.020107150077819824, "learning_rate": 8.247040820138985e-07, "loss": 0.0529, "step": 3914 }, { "epoch": 0.8205826870677007, "grad_norm": 0.02017829939723015, "learning_rate": 8.228373343857593e-07, "loss": 0.0559, "step": 3915 }, { "epoch": 0.8207922867323413, "grad_norm": 0.015335801057517529, "learning_rate": 8.20972512428741e-07, "loss": 0.0539, "step": 3916 }, { "epoch": 0.8210018863969818, "grad_norm": 0.01876046508550644, "learning_rate": 8.191096170025265e-07, "loss": 0.0531, "step": 3917 }, { "epoch": 0.8212114860616223, "grad_norm": 0.022027568891644478, "learning_rate": 8.172486489659115e-07, "loss": 0.0528, "step": 3918 }, { "epoch": 0.8214210857262628, "grad_norm": 0.013593902811408043, "learning_rate": 8.153896091768004e-07, "loss": 0.056, "step": 3919 }, { "epoch": 0.8216306853909033, "grad_norm": 0.02417590655386448, "learning_rate": 8.135324984922088e-07, "loss": 0.0555, "step": 3920 }, { "epoch": 0.821840285055544, "grad_norm": 0.016407081857323647, "learning_rate": 8.116773177682674e-07, "loss": 0.0532, "step": 3921 }, { "epoch": 0.8220498847201845, "grad_norm": 0.021131981164216995, "learning_rate": 8.09824067860211e-07, "loss": 0.0534, "step": 3922 }, { "epoch": 0.822259484384825, "grad_norm": 0.01895805634558201, "learning_rate": 8.079727496223894e-07, "loss": 0.0551, "step": 3923 }, { "epoch": 0.8224690840494655, "grad_norm": 0.027057627215981483, "learning_rate": 8.061233639082616e-07, "loss": 0.0553, "step": 3924 }, { "epoch": 0.822678683714106, "grad_norm": 0.016223613172769547, "learning_rate": 8.042759115703891e-07, "loss": 0.0534, "step": 3925 }, { "epoch": 0.8228882833787466, "grad_norm": 0.030798260122537613, "learning_rate": 8.024303934604505e-07, "loss": 0.0537, "step": 3926 }, { "epoch": 0.8230978830433872, "grad_norm": 0.01324604731053114, "learning_rate": 8.005868104292291e-07, "loss": 0.0519, "step": 3927 }, { "epoch": 0.8233074827080277, "grad_norm": 0.023022161796689034, "learning_rate": 7.987451633266153e-07, "loss": 0.0524, "step": 3928 }, { "epoch": 0.8235170823726682, "grad_norm": 0.015440089628100395, "learning_rate": 7.969054530016091e-07, "loss": 0.0538, "step": 3929 }, { "epoch": 0.8237266820373087, "grad_norm": 0.023025307804346085, "learning_rate": 7.950676803023149e-07, "loss": 0.0521, "step": 3930 }, { "epoch": 0.8239362817019493, "grad_norm": 0.01681152544915676, "learning_rate": 7.93231846075948e-07, "loss": 0.0525, "step": 3931 }, { "epoch": 0.8241458813665898, "grad_norm": 0.016274144873023033, "learning_rate": 7.913979511688252e-07, "loss": 0.0558, "step": 3932 }, { "epoch": 0.8243554810312304, "grad_norm": 0.016948441043496132, "learning_rate": 7.895659964263725e-07, "loss": 0.0526, "step": 3933 }, { "epoch": 0.8245650806958709, "grad_norm": 0.01407658401876688, "learning_rate": 7.877359826931225e-07, "loss": 0.055, "step": 3934 }, { "epoch": 0.8247746803605114, "grad_norm": 0.011790010146796703, "learning_rate": 7.859079108127088e-07, "loss": 0.0548, "step": 3935 }, { "epoch": 0.824984280025152, "grad_norm": 0.014163525775074959, "learning_rate": 7.840817816278723e-07, "loss": 0.055, "step": 3936 }, { "epoch": 0.8251938796897925, "grad_norm": 0.011464595794677734, "learning_rate": 7.822575959804596e-07, "loss": 0.0534, "step": 3937 }, { "epoch": 0.825403479354433, "grad_norm": 0.012643976137042046, "learning_rate": 7.80435354711418e-07, "loss": 0.0527, "step": 3938 }, { "epoch": 0.8256130790190735, "grad_norm": 0.012431315146386623, "learning_rate": 7.78615058660801e-07, "loss": 0.0529, "step": 3939 }, { "epoch": 0.8258226786837141, "grad_norm": 0.011443213559687138, "learning_rate": 7.767967086677669e-07, "loss": 0.0546, "step": 3940 }, { "epoch": 0.8260322783483547, "grad_norm": 0.012839680537581444, "learning_rate": 7.749803055705723e-07, "loss": 0.054, "step": 3941 }, { "epoch": 0.8262418780129952, "grad_norm": 0.010854445397853851, "learning_rate": 7.731658502065786e-07, "loss": 0.0539, "step": 3942 }, { "epoch": 0.8264514776776357, "grad_norm": 0.01064964011311531, "learning_rate": 7.713533434122494e-07, "loss": 0.0548, "step": 3943 }, { "epoch": 0.8266610773422762, "grad_norm": 0.012149804271757603, "learning_rate": 7.695427860231519e-07, "loss": 0.0553, "step": 3944 }, { "epoch": 0.8268706770069167, "grad_norm": 0.01205404382199049, "learning_rate": 7.677341788739507e-07, "loss": 0.0537, "step": 3945 }, { "epoch": 0.8270802766715574, "grad_norm": 0.014352019876241684, "learning_rate": 7.659275227984142e-07, "loss": 0.0514, "step": 3946 }, { "epoch": 0.8272898763361979, "grad_norm": 0.01178305596113205, "learning_rate": 7.641228186294108e-07, "loss": 0.054, "step": 3947 }, { "epoch": 0.8274994760008384, "grad_norm": 0.020204555243253708, "learning_rate": 7.623200671989067e-07, "loss": 0.0541, "step": 3948 }, { "epoch": 0.8277090756654789, "grad_norm": 0.011678210459649563, "learning_rate": 7.605192693379715e-07, "loss": 0.0558, "step": 3949 }, { "epoch": 0.8279186753301194, "grad_norm": 0.014891520142555237, "learning_rate": 7.587204258767733e-07, "loss": 0.0533, "step": 3950 }, { "epoch": 0.82812827499476, "grad_norm": 0.012660454027354717, "learning_rate": 7.569235376445772e-07, "loss": 0.0535, "step": 3951 }, { "epoch": 0.8283378746594006, "grad_norm": 0.01889774762094021, "learning_rate": 7.551286054697498e-07, "loss": 0.0552, "step": 3952 }, { "epoch": 0.8285474743240411, "grad_norm": 0.011206352151930332, "learning_rate": 7.533356301797523e-07, "loss": 0.0553, "step": 3953 }, { "epoch": 0.8287570739886816, "grad_norm": 0.015002798289060593, "learning_rate": 7.515446126011484e-07, "loss": 0.0552, "step": 3954 }, { "epoch": 0.8289666736533221, "grad_norm": 0.012600685469806194, "learning_rate": 7.497555535595946e-07, "loss": 0.0534, "step": 3955 }, { "epoch": 0.8291762733179627, "grad_norm": 0.01465271133929491, "learning_rate": 7.479684538798476e-07, "loss": 0.0585, "step": 3956 }, { "epoch": 0.8293858729826032, "grad_norm": 0.015729481354355812, "learning_rate": 7.461833143857611e-07, "loss": 0.0515, "step": 3957 }, { "epoch": 0.8295954726472438, "grad_norm": 0.012866144999861717, "learning_rate": 7.444001359002833e-07, "loss": 0.0558, "step": 3958 }, { "epoch": 0.8298050723118843, "grad_norm": 0.019521057605743408, "learning_rate": 7.426189192454575e-07, "loss": 0.0552, "step": 3959 }, { "epoch": 0.8300146719765248, "grad_norm": 0.018146377056837082, "learning_rate": 7.408396652424271e-07, "loss": 0.0508, "step": 3960 }, { "epoch": 0.8302242716411654, "grad_norm": 0.01978515274822712, "learning_rate": 7.39062374711425e-07, "loss": 0.0542, "step": 3961 }, { "epoch": 0.8304338713058059, "grad_norm": 0.015556586906313896, "learning_rate": 7.372870484717843e-07, "loss": 0.0571, "step": 3962 }, { "epoch": 0.8306434709704464, "grad_norm": 0.02219201996922493, "learning_rate": 7.355136873419277e-07, "loss": 0.0563, "step": 3963 }, { "epoch": 0.830853070635087, "grad_norm": 0.01369245070964098, "learning_rate": 7.337422921393767e-07, "loss": 0.0554, "step": 3964 }, { "epoch": 0.8310626702997275, "grad_norm": 0.021102454513311386, "learning_rate": 7.319728636807411e-07, "loss": 0.0518, "step": 3965 }, { "epoch": 0.8312722699643681, "grad_norm": 0.013064729049801826, "learning_rate": 7.302054027817291e-07, "loss": 0.0556, "step": 3966 }, { "epoch": 0.8314818696290086, "grad_norm": 0.015176662243902683, "learning_rate": 7.28439910257141e-07, "loss": 0.057, "step": 3967 }, { "epoch": 0.8316914692936491, "grad_norm": 0.014463878236711025, "learning_rate": 7.266763869208665e-07, "loss": 0.0547, "step": 3968 }, { "epoch": 0.8319010689582896, "grad_norm": 0.012370740063488483, "learning_rate": 7.249148335858891e-07, "loss": 0.0541, "step": 3969 }, { "epoch": 0.8321106686229301, "grad_norm": 0.020610513165593147, "learning_rate": 7.231552510642864e-07, "loss": 0.0552, "step": 3970 }, { "epoch": 0.8323202682875708, "grad_norm": 0.010926956310868263, "learning_rate": 7.213976401672235e-07, "loss": 0.0535, "step": 3971 }, { "epoch": 0.8325298679522113, "grad_norm": 0.021732453256845474, "learning_rate": 7.196420017049599e-07, "loss": 0.0549, "step": 3972 }, { "epoch": 0.8327394676168518, "grad_norm": 0.01048396248370409, "learning_rate": 7.178883364868455e-07, "loss": 0.0557, "step": 3973 }, { "epoch": 0.8329490672814923, "grad_norm": 0.02181028202176094, "learning_rate": 7.161366453213181e-07, "loss": 0.0557, "step": 3974 }, { "epoch": 0.8331586669461328, "grad_norm": 0.012369881384074688, "learning_rate": 7.143869290159067e-07, "loss": 0.0522, "step": 3975 }, { "epoch": 0.8333682666107735, "grad_norm": 0.02149048261344433, "learning_rate": 7.12639188377231e-07, "loss": 0.059, "step": 3976 }, { "epoch": 0.833577866275414, "grad_norm": 0.013264495879411697, "learning_rate": 7.108934242109994e-07, "loss": 0.0581, "step": 3977 }, { "epoch": 0.8337874659400545, "grad_norm": 0.019909605383872986, "learning_rate": 7.091496373220075e-07, "loss": 0.0549, "step": 3978 }, { "epoch": 0.833997065604695, "grad_norm": 0.02005838230252266, "learning_rate": 7.074078285141428e-07, "loss": 0.0534, "step": 3979 }, { "epoch": 0.8342066652693356, "grad_norm": 0.0182628370821476, "learning_rate": 7.056679985903774e-07, "loss": 0.0565, "step": 3980 }, { "epoch": 0.8344162649339761, "grad_norm": 0.01993626542389393, "learning_rate": 7.03930148352771e-07, "loss": 0.0518, "step": 3981 }, { "epoch": 0.8346258645986167, "grad_norm": 0.017620259895920753, "learning_rate": 7.021942786024743e-07, "loss": 0.0566, "step": 3982 }, { "epoch": 0.8348354642632572, "grad_norm": 0.012461712583899498, "learning_rate": 7.004603901397239e-07, "loss": 0.0549, "step": 3983 }, { "epoch": 0.8350450639278977, "grad_norm": 0.009824151173233986, "learning_rate": 6.987284837638391e-07, "loss": 0.0525, "step": 3984 }, { "epoch": 0.8352546635925383, "grad_norm": 0.018029648810625076, "learning_rate": 6.969985602732309e-07, "loss": 0.0559, "step": 3985 }, { "epoch": 0.8354642632571788, "grad_norm": 0.01331211719661951, "learning_rate": 6.952706204653914e-07, "loss": 0.0536, "step": 3986 }, { "epoch": 0.8356738629218193, "grad_norm": 0.0163306575268507, "learning_rate": 6.935446651369027e-07, "loss": 0.0557, "step": 3987 }, { "epoch": 0.8358834625864598, "grad_norm": 0.012105715461075306, "learning_rate": 6.918206950834283e-07, "loss": 0.054, "step": 3988 }, { "epoch": 0.8360930622511004, "grad_norm": 0.013904665596783161, "learning_rate": 6.900987110997182e-07, "loss": 0.0538, "step": 3989 }, { "epoch": 0.836302661915741, "grad_norm": 0.012371689081192017, "learning_rate": 6.883787139796078e-07, "loss": 0.0539, "step": 3990 }, { "epoch": 0.8365122615803815, "grad_norm": 0.012469745241105556, "learning_rate": 6.866607045160151e-07, "loss": 0.0568, "step": 3991 }, { "epoch": 0.836721861245022, "grad_norm": 0.012731354683637619, "learning_rate": 6.849446835009405e-07, "loss": 0.0524, "step": 3992 }, { "epoch": 0.8369314609096625, "grad_norm": 0.012761647813022137, "learning_rate": 6.832306517254716e-07, "loss": 0.0523, "step": 3993 }, { "epoch": 0.837141060574303, "grad_norm": 0.013718758709728718, "learning_rate": 6.815186099797744e-07, "loss": 0.0522, "step": 3994 }, { "epoch": 0.8373506602389437, "grad_norm": 0.015886375680565834, "learning_rate": 6.798085590531012e-07, "loss": 0.0548, "step": 3995 }, { "epoch": 0.8375602599035842, "grad_norm": 0.01095049548894167, "learning_rate": 6.78100499733787e-07, "loss": 0.0551, "step": 3996 }, { "epoch": 0.8377698595682247, "grad_norm": 0.015172593295574188, "learning_rate": 6.76394432809242e-07, "loss": 0.0546, "step": 3997 }, { "epoch": 0.8379794592328652, "grad_norm": 0.015550515614449978, "learning_rate": 6.746903590659659e-07, "loss": 0.0529, "step": 3998 }, { "epoch": 0.8381890588975057, "grad_norm": 0.014822781085968018, "learning_rate": 6.729882792895359e-07, "loss": 0.0542, "step": 3999 }, { "epoch": 0.8383986585621463, "grad_norm": 0.01649445667862892, "learning_rate": 6.71288194264611e-07, "loss": 0.0533, "step": 4000 }, { "epoch": 0.8386082582267869, "grad_norm": 0.015881575644016266, "learning_rate": 6.695901047749298e-07, "loss": 0.0535, "step": 4001 }, { "epoch": 0.8388178578914274, "grad_norm": 0.020809467881917953, "learning_rate": 6.678940116033095e-07, "loss": 0.0549, "step": 4002 }, { "epoch": 0.8390274575560679, "grad_norm": 0.013579810969531536, "learning_rate": 6.661999155316512e-07, "loss": 0.0518, "step": 4003 }, { "epoch": 0.8392370572207084, "grad_norm": 0.028101500123739243, "learning_rate": 6.645078173409303e-07, "loss": 0.052, "step": 4004 }, { "epoch": 0.839446656885349, "grad_norm": 0.013599547557532787, "learning_rate": 6.628177178112055e-07, "loss": 0.0546, "step": 4005 }, { "epoch": 0.8396562565499895, "grad_norm": 0.028915736824274063, "learning_rate": 6.611296177216125e-07, "loss": 0.0564, "step": 4006 }, { "epoch": 0.8398658562146301, "grad_norm": 0.016699662432074547, "learning_rate": 6.594435178503644e-07, "loss": 0.0523, "step": 4007 }, { "epoch": 0.8400754558792706, "grad_norm": 0.02233414351940155, "learning_rate": 6.577594189747521e-07, "loss": 0.0537, "step": 4008 }, { "epoch": 0.8402850555439111, "grad_norm": 0.021178172901272774, "learning_rate": 6.560773218711458e-07, "loss": 0.0539, "step": 4009 }, { "epoch": 0.8404946552085517, "grad_norm": 0.022526554763317108, "learning_rate": 6.543972273149928e-07, "loss": 0.0546, "step": 4010 }, { "epoch": 0.8407042548731922, "grad_norm": 0.02022298239171505, "learning_rate": 6.527191360808144e-07, "loss": 0.0568, "step": 4011 }, { "epoch": 0.8409138545378327, "grad_norm": 0.020070351660251617, "learning_rate": 6.51043048942212e-07, "loss": 0.0559, "step": 4012 }, { "epoch": 0.8411234542024733, "grad_norm": 0.019938215613365173, "learning_rate": 6.493689666718611e-07, "loss": 0.0538, "step": 4013 }, { "epoch": 0.8413330538671138, "grad_norm": 0.021108483895659447, "learning_rate": 6.476968900415115e-07, "loss": 0.053, "step": 4014 }, { "epoch": 0.8415426535317544, "grad_norm": 0.02614649012684822, "learning_rate": 6.460268198219916e-07, "loss": 0.0533, "step": 4015 }, { "epoch": 0.8417522531963949, "grad_norm": 0.021795378997921944, "learning_rate": 6.443587567832044e-07, "loss": 0.0542, "step": 4016 }, { "epoch": 0.8419618528610354, "grad_norm": 0.030953634530305862, "learning_rate": 6.426927016941248e-07, "loss": 0.0536, "step": 4017 }, { "epoch": 0.8421714525256759, "grad_norm": 0.0239299014210701, "learning_rate": 6.410286553228052e-07, "loss": 0.0575, "step": 4018 }, { "epoch": 0.8423810521903164, "grad_norm": 0.035018209367990494, "learning_rate": 6.393666184363701e-07, "loss": 0.0552, "step": 4019 }, { "epoch": 0.8425906518549571, "grad_norm": 0.01738305762410164, "learning_rate": 6.377065918010173e-07, "loss": 0.0514, "step": 4020 }, { "epoch": 0.8428002515195976, "grad_norm": 0.027201887220144272, "learning_rate": 6.360485761820195e-07, "loss": 0.0553, "step": 4021 }, { "epoch": 0.8430098511842381, "grad_norm": 0.01937497965991497, "learning_rate": 6.343925723437217e-07, "loss": 0.0526, "step": 4022 }, { "epoch": 0.8432194508488786, "grad_norm": 0.018232090398669243, "learning_rate": 6.327385810495423e-07, "loss": 0.0567, "step": 4023 }, { "epoch": 0.8434290505135191, "grad_norm": 0.01994934491813183, "learning_rate": 6.310866030619694e-07, "loss": 0.0548, "step": 4024 }, { "epoch": 0.8436386501781598, "grad_norm": 0.016245190054178238, "learning_rate": 6.294366391425643e-07, "loss": 0.0534, "step": 4025 }, { "epoch": 0.8438482498428003, "grad_norm": 0.018503889441490173, "learning_rate": 6.27788690051962e-07, "loss": 0.0547, "step": 4026 }, { "epoch": 0.8440578495074408, "grad_norm": 0.015852371230721474, "learning_rate": 6.26142756549864e-07, "loss": 0.0561, "step": 4027 }, { "epoch": 0.8442674491720813, "grad_norm": 0.012544246390461922, "learning_rate": 6.244988393950469e-07, "loss": 0.0558, "step": 4028 }, { "epoch": 0.8444770488367218, "grad_norm": 0.01918352209031582, "learning_rate": 6.228569393453582e-07, "loss": 0.052, "step": 4029 }, { "epoch": 0.8446866485013624, "grad_norm": 0.014716164208948612, "learning_rate": 6.212170571577087e-07, "loss": 0.0517, "step": 4030 }, { "epoch": 0.844896248166003, "grad_norm": 0.015544608235359192, "learning_rate": 6.195791935880868e-07, "loss": 0.0523, "step": 4031 }, { "epoch": 0.8451058478306435, "grad_norm": 0.016333509236574173, "learning_rate": 6.17943349391546e-07, "loss": 0.0548, "step": 4032 }, { "epoch": 0.845315447495284, "grad_norm": 0.018222004175186157, "learning_rate": 6.163095253222129e-07, "loss": 0.052, "step": 4033 }, { "epoch": 0.8455250471599245, "grad_norm": 0.022488001734018326, "learning_rate": 6.146777221332772e-07, "loss": 0.0538, "step": 4034 }, { "epoch": 0.8457346468245651, "grad_norm": 0.019987892359495163, "learning_rate": 6.130479405770002e-07, "loss": 0.0545, "step": 4035 }, { "epoch": 0.8459442464892056, "grad_norm": 0.01848592422902584, "learning_rate": 6.114201814047122e-07, "loss": 0.053, "step": 4036 }, { "epoch": 0.8461538461538461, "grad_norm": 0.011879503726959229, "learning_rate": 6.097944453668081e-07, "loss": 0.0537, "step": 4037 }, { "epoch": 0.8463634458184867, "grad_norm": 0.019580353051424026, "learning_rate": 6.081707332127523e-07, "loss": 0.0525, "step": 4038 }, { "epoch": 0.8465730454831272, "grad_norm": 0.015657026320695877, "learning_rate": 6.065490456910777e-07, "loss": 0.0554, "step": 4039 }, { "epoch": 0.8467826451477678, "grad_norm": 0.021646184846758842, "learning_rate": 6.049293835493803e-07, "loss": 0.0539, "step": 4040 }, { "epoch": 0.8469922448124083, "grad_norm": 0.019581638276576996, "learning_rate": 6.03311747534323e-07, "loss": 0.0546, "step": 4041 }, { "epoch": 0.8472018444770488, "grad_norm": 0.019119396805763245, "learning_rate": 6.016961383916381e-07, "loss": 0.0526, "step": 4042 }, { "epoch": 0.8474114441416893, "grad_norm": 0.018327955156564713, "learning_rate": 6.000825568661184e-07, "loss": 0.0533, "step": 4043 }, { "epoch": 0.8476210438063299, "grad_norm": 0.012688934803009033, "learning_rate": 5.984710037016267e-07, "loss": 0.0557, "step": 4044 }, { "epoch": 0.8478306434709705, "grad_norm": 0.01570408046245575, "learning_rate": 5.968614796410882e-07, "loss": 0.0533, "step": 4045 }, { "epoch": 0.848040243135611, "grad_norm": 0.013904701918363571, "learning_rate": 5.952539854264938e-07, "loss": 0.0549, "step": 4046 }, { "epoch": 0.8482498428002515, "grad_norm": 0.0192690659314394, "learning_rate": 5.936485217988958e-07, "loss": 0.0524, "step": 4047 }, { "epoch": 0.848459442464892, "grad_norm": 0.013410338200628757, "learning_rate": 5.920450894984137e-07, "loss": 0.0537, "step": 4048 }, { "epoch": 0.8486690421295326, "grad_norm": 0.014187265187501907, "learning_rate": 5.904436892642306e-07, "loss": 0.0548, "step": 4049 }, { "epoch": 0.8488786417941732, "grad_norm": 0.012049532495439053, "learning_rate": 5.888443218345907e-07, "loss": 0.0562, "step": 4050 }, { "epoch": 0.8490882414588137, "grad_norm": 0.012612142600119114, "learning_rate": 5.872469879468024e-07, "loss": 0.057, "step": 4051 }, { "epoch": 0.8492978411234542, "grad_norm": 0.016537649556994438, "learning_rate": 5.856516883372365e-07, "loss": 0.0538, "step": 4052 }, { "epoch": 0.8495074407880947, "grad_norm": 0.012568135745823383, "learning_rate": 5.840584237413239e-07, "loss": 0.0545, "step": 4053 }, { "epoch": 0.8497170404527353, "grad_norm": 0.013640418648719788, "learning_rate": 5.824671948935606e-07, "loss": 0.0567, "step": 4054 }, { "epoch": 0.8499266401173758, "grad_norm": 0.016170648857951164, "learning_rate": 5.808780025275045e-07, "loss": 0.054, "step": 4055 }, { "epoch": 0.8501362397820164, "grad_norm": 0.014185959473252296, "learning_rate": 5.792908473757697e-07, "loss": 0.0535, "step": 4056 }, { "epoch": 0.8503458394466569, "grad_norm": 0.01882059872150421, "learning_rate": 5.777057301700372e-07, "loss": 0.0532, "step": 4057 }, { "epoch": 0.8505554391112974, "grad_norm": 0.01410512626171112, "learning_rate": 5.761226516410434e-07, "loss": 0.0541, "step": 4058 }, { "epoch": 0.850765038775938, "grad_norm": 0.01758696883916855, "learning_rate": 5.745416125185898e-07, "loss": 0.0543, "step": 4059 }, { "epoch": 0.8509746384405785, "grad_norm": 0.015843121334910393, "learning_rate": 5.729626135315319e-07, "loss": 0.0518, "step": 4060 }, { "epoch": 0.851184238105219, "grad_norm": 0.015141872689127922, "learning_rate": 5.713856554077901e-07, "loss": 0.056, "step": 4061 }, { "epoch": 0.8513938377698596, "grad_norm": 0.016413336619734764, "learning_rate": 5.698107388743418e-07, "loss": 0.0559, "step": 4062 }, { "epoch": 0.8516034374345001, "grad_norm": 0.016290897503495216, "learning_rate": 5.682378646572229e-07, "loss": 0.0511, "step": 4063 }, { "epoch": 0.8518130370991407, "grad_norm": 0.01656182110309601, "learning_rate": 5.666670334815267e-07, "loss": 0.0546, "step": 4064 }, { "epoch": 0.8520226367637812, "grad_norm": 0.01612510345876217, "learning_rate": 5.650982460714083e-07, "loss": 0.0572, "step": 4065 }, { "epoch": 0.8522322364284217, "grad_norm": 0.01949678361415863, "learning_rate": 5.635315031500766e-07, "loss": 0.0533, "step": 4066 }, { "epoch": 0.8524418360930622, "grad_norm": 0.017564570531249046, "learning_rate": 5.619668054398008e-07, "loss": 0.0558, "step": 4067 }, { "epoch": 0.8526514357577027, "grad_norm": 0.024784216657280922, "learning_rate": 5.604041536619048e-07, "loss": 0.0532, "step": 4068 }, { "epoch": 0.8528610354223434, "grad_norm": 0.020440300926566124, "learning_rate": 5.588435485367733e-07, "loss": 0.0522, "step": 4069 }, { "epoch": 0.8530706350869839, "grad_norm": 0.024173369631171227, "learning_rate": 5.572849907838423e-07, "loss": 0.0533, "step": 4070 }, { "epoch": 0.8532802347516244, "grad_norm": 0.016502492129802704, "learning_rate": 5.557284811216074e-07, "loss": 0.0526, "step": 4071 }, { "epoch": 0.8534898344162649, "grad_norm": 0.020477591082453728, "learning_rate": 5.54174020267621e-07, "loss": 0.051, "step": 4072 }, { "epoch": 0.8536994340809054, "grad_norm": 0.01553434506058693, "learning_rate": 5.526216089384872e-07, "loss": 0.0532, "step": 4073 }, { "epoch": 0.853909033745546, "grad_norm": 0.016066189855337143, "learning_rate": 5.510712478498675e-07, "loss": 0.0515, "step": 4074 }, { "epoch": 0.8541186334101866, "grad_norm": 0.017956750467419624, "learning_rate": 5.49522937716479e-07, "loss": 0.054, "step": 4075 }, { "epoch": 0.8543282330748271, "grad_norm": 0.016211628913879395, "learning_rate": 5.479766792520908e-07, "loss": 0.0554, "step": 4076 }, { "epoch": 0.8545378327394676, "grad_norm": 0.02058112435042858, "learning_rate": 5.464324731695286e-07, "loss": 0.0564, "step": 4077 }, { "epoch": 0.8547474324041081, "grad_norm": 0.020162228494882584, "learning_rate": 5.448903201806727e-07, "loss": 0.0581, "step": 4078 }, { "epoch": 0.8549570320687487, "grad_norm": 0.017797335982322693, "learning_rate": 5.433502209964531e-07, "loss": 0.0549, "step": 4079 }, { "epoch": 0.8551666317333892, "grad_norm": 0.017947765067219734, "learning_rate": 5.418121763268553e-07, "loss": 0.0527, "step": 4080 }, { "epoch": 0.8553762313980298, "grad_norm": 0.02111784741282463, "learning_rate": 5.402761868809181e-07, "loss": 0.0541, "step": 4081 }, { "epoch": 0.8555858310626703, "grad_norm": 0.023570656776428223, "learning_rate": 5.387422533667336e-07, "loss": 0.0538, "step": 4082 }, { "epoch": 0.8557954307273108, "grad_norm": 0.023569107055664062, "learning_rate": 5.372103764914421e-07, "loss": 0.0557, "step": 4083 }, { "epoch": 0.8560050303919514, "grad_norm": 0.02265586517751217, "learning_rate": 5.356805569612417e-07, "loss": 0.0549, "step": 4084 }, { "epoch": 0.8562146300565919, "grad_norm": 0.03135437145829201, "learning_rate": 5.341527954813763e-07, "loss": 0.0536, "step": 4085 }, { "epoch": 0.8564242297212324, "grad_norm": 0.015480936504900455, "learning_rate": 5.326270927561444e-07, "loss": 0.0559, "step": 4086 }, { "epoch": 0.856633829385873, "grad_norm": 0.025915948674082756, "learning_rate": 5.311034494888945e-07, "loss": 0.0521, "step": 4087 }, { "epoch": 0.8568434290505135, "grad_norm": 0.025078527629375458, "learning_rate": 5.295818663820268e-07, "loss": 0.0523, "step": 4088 }, { "epoch": 0.8570530287151541, "grad_norm": 0.015537966974079609, "learning_rate": 5.280623441369897e-07, "loss": 0.0529, "step": 4089 }, { "epoch": 0.8572626283797946, "grad_norm": 0.028883641585707664, "learning_rate": 5.265448834542836e-07, "loss": 0.0587, "step": 4090 }, { "epoch": 0.8574722280444351, "grad_norm": 0.018950091674923897, "learning_rate": 5.250294850334564e-07, "loss": 0.0531, "step": 4091 }, { "epoch": 0.8576818277090756, "grad_norm": 0.019102994352579117, "learning_rate": 5.235161495731079e-07, "loss": 0.0513, "step": 4092 }, { "epoch": 0.8578914273737162, "grad_norm": 0.02294645458459854, "learning_rate": 5.22004877770883e-07, "loss": 0.0553, "step": 4093 }, { "epoch": 0.8581010270383568, "grad_norm": 0.018397770822048187, "learning_rate": 5.204956703234804e-07, "loss": 0.0538, "step": 4094 }, { "epoch": 0.8583106267029973, "grad_norm": 0.019318964332342148, "learning_rate": 5.189885279266433e-07, "loss": 0.0546, "step": 4095 }, { "epoch": 0.8585202263676378, "grad_norm": 0.018967201933264732, "learning_rate": 5.174834512751647e-07, "loss": 0.055, "step": 4096 }, { "epoch": 0.8587298260322783, "grad_norm": 0.016506996005773544, "learning_rate": 5.159804410628827e-07, "loss": 0.0524, "step": 4097 }, { "epoch": 0.8589394256969188, "grad_norm": 0.015648337081074715, "learning_rate": 5.144794979826867e-07, "loss": 0.0533, "step": 4098 }, { "epoch": 0.8591490253615595, "grad_norm": 0.016446251422166824, "learning_rate": 5.129806227265099e-07, "loss": 0.0533, "step": 4099 }, { "epoch": 0.8593586250262, "grad_norm": 0.020193303003907204, "learning_rate": 5.114838159853336e-07, "loss": 0.0549, "step": 4100 }, { "epoch": 0.8595682246908405, "grad_norm": 0.015607200562953949, "learning_rate": 5.099890784491879e-07, "loss": 0.0559, "step": 4101 }, { "epoch": 0.859777824355481, "grad_norm": 0.01757110469043255, "learning_rate": 5.084964108071428e-07, "loss": 0.0546, "step": 4102 }, { "epoch": 0.8599874240201215, "grad_norm": 0.021767864003777504, "learning_rate": 5.070058137473188e-07, "loss": 0.0536, "step": 4103 }, { "epoch": 0.8601970236847621, "grad_norm": 0.01381634920835495, "learning_rate": 5.055172879568815e-07, "loss": 0.0543, "step": 4104 }, { "epoch": 0.8604066233494027, "grad_norm": 0.016660235822200775, "learning_rate": 5.040308341220418e-07, "loss": 0.0551, "step": 4105 }, { "epoch": 0.8606162230140432, "grad_norm": 0.0208145659416914, "learning_rate": 5.025464529280538e-07, "loss": 0.0551, "step": 4106 }, { "epoch": 0.8608258226786837, "grad_norm": 0.013750380836427212, "learning_rate": 5.010641450592158e-07, "loss": 0.0538, "step": 4107 }, { "epoch": 0.8610354223433242, "grad_norm": 0.01817614957690239, "learning_rate": 4.995839111988737e-07, "loss": 0.0553, "step": 4108 }, { "epoch": 0.8612450220079648, "grad_norm": 0.019916629418730736, "learning_rate": 4.981057520294124e-07, "loss": 0.0522, "step": 4109 }, { "epoch": 0.8614546216726053, "grad_norm": 0.012952596880495548, "learning_rate": 4.966296682322641e-07, "loss": 0.0554, "step": 4110 }, { "epoch": 0.8616642213372459, "grad_norm": 0.01568775624036789, "learning_rate": 4.951556604879049e-07, "loss": 0.0523, "step": 4111 }, { "epoch": 0.8618738210018864, "grad_norm": 0.020983314141631126, "learning_rate": 4.936837294758501e-07, "loss": 0.0545, "step": 4112 }, { "epoch": 0.8620834206665269, "grad_norm": 0.017767738550901413, "learning_rate": 4.922138758746587e-07, "loss": 0.0548, "step": 4113 }, { "epoch": 0.8622930203311675, "grad_norm": 0.016531750559806824, "learning_rate": 4.907461003619346e-07, "loss": 0.0573, "step": 4114 }, { "epoch": 0.862502619995808, "grad_norm": 0.019049130380153656, "learning_rate": 4.892804036143223e-07, "loss": 0.0538, "step": 4115 }, { "epoch": 0.8627122196604485, "grad_norm": 0.01858980767428875, "learning_rate": 4.878167863075061e-07, "loss": 0.054, "step": 4116 }, { "epoch": 0.862921819325089, "grad_norm": 0.013377662748098373, "learning_rate": 4.863552491162149e-07, "loss": 0.0547, "step": 4117 }, { "epoch": 0.8631314189897297, "grad_norm": 0.022448167204856873, "learning_rate": 4.848957927142167e-07, "loss": 0.0553, "step": 4118 }, { "epoch": 0.8633410186543702, "grad_norm": 0.019559383392333984, "learning_rate": 4.83438417774319e-07, "loss": 0.0549, "step": 4119 }, { "epoch": 0.8635506183190107, "grad_norm": 0.017378434538841248, "learning_rate": 4.819831249683726e-07, "loss": 0.0533, "step": 4120 }, { "epoch": 0.8637602179836512, "grad_norm": 0.022112280130386353, "learning_rate": 4.805299149672682e-07, "loss": 0.0558, "step": 4121 }, { "epoch": 0.8639698176482917, "grad_norm": 0.02191154658794403, "learning_rate": 4.790787884409332e-07, "loss": 0.0577, "step": 4122 }, { "epoch": 0.8641794173129324, "grad_norm": 0.015262911096215248, "learning_rate": 4.776297460583384e-07, "loss": 0.057, "step": 4123 }, { "epoch": 0.8643890169775729, "grad_norm": 0.01982884109020233, "learning_rate": 4.7618278848749146e-07, "loss": 0.0536, "step": 4124 }, { "epoch": 0.8645986166422134, "grad_norm": 0.024044396355748177, "learning_rate": 4.7473791639543853e-07, "loss": 0.0529, "step": 4125 }, { "epoch": 0.8648082163068539, "grad_norm": 0.015614238567650318, "learning_rate": 4.732951304482658e-07, "loss": 0.0541, "step": 4126 }, { "epoch": 0.8650178159714944, "grad_norm": 0.02073797397315502, "learning_rate": 4.7185443131109785e-07, "loss": 0.0526, "step": 4127 }, { "epoch": 0.865227415636135, "grad_norm": 0.026310456916689873, "learning_rate": 4.7041581964809733e-07, "loss": 0.0539, "step": 4128 }, { "epoch": 0.8654370153007755, "grad_norm": 0.02114870585501194, "learning_rate": 4.6897929612246317e-07, "loss": 0.0532, "step": 4129 }, { "epoch": 0.8656466149654161, "grad_norm": 0.020745746791362762, "learning_rate": 4.675448613964317e-07, "loss": 0.0538, "step": 4130 }, { "epoch": 0.8658562146300566, "grad_norm": 0.02710759826004505, "learning_rate": 4.6611251613127793e-07, "loss": 0.0548, "step": 4131 }, { "epoch": 0.8660658142946971, "grad_norm": 0.020626511424779892, "learning_rate": 4.64682260987312e-07, "loss": 0.0534, "step": 4132 }, { "epoch": 0.8662754139593377, "grad_norm": 0.020137401297688484, "learning_rate": 4.6325409662388133e-07, "loss": 0.0547, "step": 4133 }, { "epoch": 0.8664850136239782, "grad_norm": 0.0274630356580019, "learning_rate": 4.618280236993711e-07, "loss": 0.0579, "step": 4134 }, { "epoch": 0.8666946132886187, "grad_norm": 0.02381911128759384, "learning_rate": 4.6040404287119924e-07, "loss": 0.0563, "step": 4135 }, { "epoch": 0.8669042129532593, "grad_norm": 0.014909719116985798, "learning_rate": 4.589821547958195e-07, "loss": 0.0531, "step": 4136 }, { "epoch": 0.8671138126178998, "grad_norm": 0.02371630258858204, "learning_rate": 4.5756236012872324e-07, "loss": 0.053, "step": 4137 }, { "epoch": 0.8673234122825404, "grad_norm": 0.028402652591466904, "learning_rate": 4.561446595244362e-07, "loss": 0.0536, "step": 4138 }, { "epoch": 0.8675330119471809, "grad_norm": 0.01664336584508419, "learning_rate": 4.547290536365173e-07, "loss": 0.0545, "step": 4139 }, { "epoch": 0.8677426116118214, "grad_norm": 0.022892151027917862, "learning_rate": 4.5331554311755956e-07, "loss": 0.0527, "step": 4140 }, { "epoch": 0.8679522112764619, "grad_norm": 0.027045048773288727, "learning_rate": 4.519041286191933e-07, "loss": 0.053, "step": 4141 }, { "epoch": 0.8681618109411025, "grad_norm": 0.021929344162344933, "learning_rate": 4.504948107920781e-07, "loss": 0.0518, "step": 4142 }, { "epoch": 0.8683714106057431, "grad_norm": 0.016940593719482422, "learning_rate": 4.490875902859099e-07, "loss": 0.0526, "step": 4143 }, { "epoch": 0.8685810102703836, "grad_norm": 0.03185591474175453, "learning_rate": 4.476824677494179e-07, "loss": 0.0529, "step": 4144 }, { "epoch": 0.8687906099350241, "grad_norm": 0.02771037444472313, "learning_rate": 4.46279443830363e-07, "loss": 0.0572, "step": 4145 }, { "epoch": 0.8690002095996646, "grad_norm": 0.017459120601415634, "learning_rate": 4.448785191755378e-07, "loss": 0.0521, "step": 4146 }, { "epoch": 0.8692098092643051, "grad_norm": 0.03185920789837837, "learning_rate": 4.4347969443076956e-07, "loss": 0.0549, "step": 4147 }, { "epoch": 0.8694194089289458, "grad_norm": 0.03139664977788925, "learning_rate": 4.420829702409152e-07, "loss": 0.053, "step": 4148 }, { "epoch": 0.8696290085935863, "grad_norm": 0.023265665397047997, "learning_rate": 4.4068834724986466e-07, "loss": 0.054, "step": 4149 }, { "epoch": 0.8698386082582268, "grad_norm": 0.026613622903823853, "learning_rate": 4.3929582610053976e-07, "loss": 0.0566, "step": 4150 }, { "epoch": 0.8700482079228673, "grad_norm": 0.03510505333542824, "learning_rate": 4.3790540743489207e-07, "loss": 0.0543, "step": 4151 }, { "epoch": 0.8702578075875078, "grad_norm": 0.03017215058207512, "learning_rate": 4.365170918939027e-07, "loss": 0.0546, "step": 4152 }, { "epoch": 0.8704674072521484, "grad_norm": 0.020732155069708824, "learning_rate": 4.351308801175863e-07, "loss": 0.0524, "step": 4153 }, { "epoch": 0.870677006916789, "grad_norm": 0.032381054013967514, "learning_rate": 4.337467727449862e-07, "loss": 0.0544, "step": 4154 }, { "epoch": 0.8708866065814295, "grad_norm": 0.03272484615445137, "learning_rate": 4.323647704141754e-07, "loss": 0.0546, "step": 4155 }, { "epoch": 0.87109620624607, "grad_norm": 0.019726725295186043, "learning_rate": 4.309848737622568e-07, "loss": 0.0547, "step": 4156 }, { "epoch": 0.8713058059107105, "grad_norm": 0.0256227757781744, "learning_rate": 4.2960708342536295e-07, "loss": 0.0544, "step": 4157 }, { "epoch": 0.8715154055753511, "grad_norm": 0.03493373841047287, "learning_rate": 4.2823140003865283e-07, "loss": 0.0541, "step": 4158 }, { "epoch": 0.8717250052399916, "grad_norm": 0.022653086110949516, "learning_rate": 4.2685782423631806e-07, "loss": 0.0534, "step": 4159 }, { "epoch": 0.8719346049046321, "grad_norm": 0.021094655618071556, "learning_rate": 4.2548635665157713e-07, "loss": 0.0501, "step": 4160 }, { "epoch": 0.8721442045692727, "grad_norm": 0.03143506497144699, "learning_rate": 4.241169979166748e-07, "loss": 0.0533, "step": 4161 }, { "epoch": 0.8723538042339132, "grad_norm": 0.029018806293606758, "learning_rate": 4.2274974866288675e-07, "loss": 0.0539, "step": 4162 }, { "epoch": 0.8725634038985538, "grad_norm": 0.01749360002577305, "learning_rate": 4.213846095205126e-07, "loss": 0.0518, "step": 4163 }, { "epoch": 0.8727730035631943, "grad_norm": 0.02830035425722599, "learning_rate": 4.2002158111888345e-07, "loss": 0.0551, "step": 4164 }, { "epoch": 0.8729826032278348, "grad_norm": 0.03221756964921951, "learning_rate": 4.186606640863533e-07, "loss": 0.054, "step": 4165 }, { "epoch": 0.8731922028924753, "grad_norm": 0.021834442391991615, "learning_rate": 4.1730185905030527e-07, "loss": 0.0535, "step": 4166 }, { "epoch": 0.8734018025571159, "grad_norm": 0.025496196001768112, "learning_rate": 4.1594516663714946e-07, "loss": 0.0545, "step": 4167 }, { "epoch": 0.8736114022217565, "grad_norm": 0.031498175114393234, "learning_rate": 4.145905874723194e-07, "loss": 0.0569, "step": 4168 }, { "epoch": 0.873821001886397, "grad_norm": 0.023885555565357208, "learning_rate": 4.1323812218027506e-07, "loss": 0.0548, "step": 4169 }, { "epoch": 0.8740306015510375, "grad_norm": 0.019355174154043198, "learning_rate": 4.1188777138450487e-07, "loss": 0.0546, "step": 4170 }, { "epoch": 0.874240201215678, "grad_norm": 0.029070964083075523, "learning_rate": 4.1053953570751813e-07, "loss": 0.055, "step": 4171 }, { "epoch": 0.8744498008803185, "grad_norm": 0.030413752421736717, "learning_rate": 4.0919341577085157e-07, "loss": 0.0537, "step": 4172 }, { "epoch": 0.8746594005449592, "grad_norm": 0.0210228580981493, "learning_rate": 4.078494121950682e-07, "loss": 0.0545, "step": 4173 }, { "epoch": 0.8748690002095997, "grad_norm": 0.026022691279649734, "learning_rate": 4.065075255997514e-07, "loss": 0.0524, "step": 4174 }, { "epoch": 0.8750785998742402, "grad_norm": 0.03338058665394783, "learning_rate": 4.051677566035106e-07, "loss": 0.0534, "step": 4175 }, { "epoch": 0.8752881995388807, "grad_norm": 0.024644313380122185, "learning_rate": 4.038301058239796e-07, "loss": 0.0516, "step": 4176 }, { "epoch": 0.8754977992035212, "grad_norm": 0.019061360508203506, "learning_rate": 4.024945738778163e-07, "loss": 0.0525, "step": 4177 }, { "epoch": 0.8757073988681618, "grad_norm": 0.035856083035469055, "learning_rate": 4.011611613806987e-07, "loss": 0.0563, "step": 4178 }, { "epoch": 0.8759169985328024, "grad_norm": 0.03140418231487274, "learning_rate": 3.998298689473301e-07, "loss": 0.0531, "step": 4179 }, { "epoch": 0.8761265981974429, "grad_norm": 0.01924203895032406, "learning_rate": 3.9850069719143735e-07, "loss": 0.0564, "step": 4180 }, { "epoch": 0.8763361978620834, "grad_norm": 0.028375735506415367, "learning_rate": 3.971736467257659e-07, "loss": 0.0528, "step": 4181 }, { "epoch": 0.8765457975267239, "grad_norm": 0.03424501046538353, "learning_rate": 3.958487181620879e-07, "loss": 0.0542, "step": 4182 }, { "epoch": 0.8767553971913645, "grad_norm": 0.02565137669444084, "learning_rate": 3.9452591211119496e-07, "loss": 0.0556, "step": 4183 }, { "epoch": 0.876964996856005, "grad_norm": 0.021563977003097534, "learning_rate": 3.9320522918289973e-07, "loss": 0.0527, "step": 4184 }, { "epoch": 0.8771745965206456, "grad_norm": 0.027671657502651215, "learning_rate": 3.918866699860363e-07, "loss": 0.0522, "step": 4185 }, { "epoch": 0.8773841961852861, "grad_norm": 0.032790180295705795, "learning_rate": 3.9057023512846127e-07, "loss": 0.056, "step": 4186 }, { "epoch": 0.8775937958499267, "grad_norm": 0.023974338546395302, "learning_rate": 3.892559252170508e-07, "loss": 0.0528, "step": 4187 }, { "epoch": 0.8778033955145672, "grad_norm": 0.02199658937752247, "learning_rate": 3.879437408576997e-07, "loss": 0.0536, "step": 4188 }, { "epoch": 0.8780129951792077, "grad_norm": 0.03704606741666794, "learning_rate": 3.866336826553274e-07, "loss": 0.0531, "step": 4189 }, { "epoch": 0.8782225948438482, "grad_norm": 0.027375850826501846, "learning_rate": 3.8532575121386916e-07, "loss": 0.0519, "step": 4190 }, { "epoch": 0.8784321945084888, "grad_norm": 0.01718190312385559, "learning_rate": 3.8401994713628044e-07, "loss": 0.0551, "step": 4191 }, { "epoch": 0.8786417941731294, "grad_norm": 0.031402431428432465, "learning_rate": 3.827162710245369e-07, "loss": 0.0557, "step": 4192 }, { "epoch": 0.8788513938377699, "grad_norm": 0.033082105219364166, "learning_rate": 3.814147234796345e-07, "loss": 0.0532, "step": 4193 }, { "epoch": 0.8790609935024104, "grad_norm": 0.02236991748213768, "learning_rate": 3.8011530510158377e-07, "loss": 0.0544, "step": 4194 }, { "epoch": 0.8792705931670509, "grad_norm": 0.02740001678466797, "learning_rate": 3.78818016489419e-07, "loss": 0.0549, "step": 4195 }, { "epoch": 0.8794801928316914, "grad_norm": 0.03492108732461929, "learning_rate": 3.775228582411877e-07, "loss": 0.0536, "step": 4196 }, { "epoch": 0.8796897924963321, "grad_norm": 0.029871255159378052, "learning_rate": 3.7622983095395973e-07, "loss": 0.0522, "step": 4197 }, { "epoch": 0.8798993921609726, "grad_norm": 0.02398385852575302, "learning_rate": 3.7493893522381866e-07, "loss": 0.0529, "step": 4198 }, { "epoch": 0.8801089918256131, "grad_norm": 0.028454309329390526, "learning_rate": 3.736501716458668e-07, "loss": 0.0572, "step": 4199 }, { "epoch": 0.8803185914902536, "grad_norm": 0.038288019597530365, "learning_rate": 3.723635408142262e-07, "loss": 0.0538, "step": 4200 }, { "epoch": 0.8805281911548941, "grad_norm": 0.030530139803886414, "learning_rate": 3.710790433220324e-07, "loss": 0.0522, "step": 4201 }, { "epoch": 0.8807377908195347, "grad_norm": 0.021036000922322273, "learning_rate": 3.6979667976143663e-07, "loss": 0.0532, "step": 4202 }, { "epoch": 0.8809473904841753, "grad_norm": 0.035266853868961334, "learning_rate": 3.685164507236111e-07, "loss": 0.0538, "step": 4203 }, { "epoch": 0.8811569901488158, "grad_norm": 0.03357855603098869, "learning_rate": 3.6723835679873864e-07, "loss": 0.0546, "step": 4204 }, { "epoch": 0.8813665898134563, "grad_norm": 0.025351261720061302, "learning_rate": 3.6596239857602136e-07, "loss": 0.0522, "step": 4205 }, { "epoch": 0.8815761894780968, "grad_norm": 0.027122756466269493, "learning_rate": 3.6468857664367753e-07, "loss": 0.0541, "step": 4206 }, { "epoch": 0.8817857891427374, "grad_norm": 0.03549753874540329, "learning_rate": 3.6341689158893524e-07, "loss": 0.0535, "step": 4207 }, { "epoch": 0.8819953888073779, "grad_norm": 0.03277155011892319, "learning_rate": 3.6214734399804277e-07, "loss": 0.0532, "step": 4208 }, { "epoch": 0.8822049884720184, "grad_norm": 0.019641457125544548, "learning_rate": 3.6087993445626135e-07, "loss": 0.0543, "step": 4209 }, { "epoch": 0.882414588136659, "grad_norm": 0.027548063546419144, "learning_rate": 3.596146635478676e-07, "loss": 0.0563, "step": 4210 }, { "epoch": 0.8826241878012995, "grad_norm": 0.03249534219503403, "learning_rate": 3.583515318561498e-07, "loss": 0.0534, "step": 4211 }, { "epoch": 0.8828337874659401, "grad_norm": 0.030830688774585724, "learning_rate": 3.570905399634111e-07, "loss": 0.0559, "step": 4212 }, { "epoch": 0.8830433871305806, "grad_norm": 0.019546935334801674, "learning_rate": 3.558316884509694e-07, "loss": 0.054, "step": 4213 }, { "epoch": 0.8832529867952211, "grad_norm": 0.025954263284802437, "learning_rate": 3.545749778991542e-07, "loss": 0.0538, "step": 4214 }, { "epoch": 0.8834625864598616, "grad_norm": 0.03318324312567711, "learning_rate": 3.5332040888730935e-07, "loss": 0.052, "step": 4215 }, { "epoch": 0.8836721861245022, "grad_norm": 0.0273283664137125, "learning_rate": 3.5206798199379166e-07, "loss": 0.0548, "step": 4216 }, { "epoch": 0.8838817857891428, "grad_norm": 0.02014850080013275, "learning_rate": 3.508176977959682e-07, "loss": 0.0536, "step": 4217 }, { "epoch": 0.8840913854537833, "grad_norm": 0.02724394015967846, "learning_rate": 3.495695568702201e-07, "loss": 0.0559, "step": 4218 }, { "epoch": 0.8843009851184238, "grad_norm": 0.028185635805130005, "learning_rate": 3.483235597919404e-07, "loss": 0.0514, "step": 4219 }, { "epoch": 0.8845105847830643, "grad_norm": 0.02001328580081463, "learning_rate": 3.4707970713553364e-07, "loss": 0.0539, "step": 4220 }, { "epoch": 0.8847201844477048, "grad_norm": 0.01998419128358364, "learning_rate": 3.4583799947441487e-07, "loss": 0.0523, "step": 4221 }, { "epoch": 0.8849297841123455, "grad_norm": 0.030080687254667282, "learning_rate": 3.445984373810124e-07, "loss": 0.0544, "step": 4222 }, { "epoch": 0.885139383776986, "grad_norm": 0.026855189353227615, "learning_rate": 3.4336102142676354e-07, "loss": 0.0522, "step": 4223 }, { "epoch": 0.8853489834416265, "grad_norm": 0.01764325052499771, "learning_rate": 3.421257521821153e-07, "loss": 0.0546, "step": 4224 }, { "epoch": 0.885558583106267, "grad_norm": 0.02428247220814228, "learning_rate": 3.408926302165283e-07, "loss": 0.0541, "step": 4225 }, { "epoch": 0.8857681827709075, "grad_norm": 0.025871314108371735, "learning_rate": 3.396616560984711e-07, "loss": 0.0529, "step": 4226 }, { "epoch": 0.8859777824355481, "grad_norm": 0.02489456534385681, "learning_rate": 3.384328303954221e-07, "loss": 0.0562, "step": 4227 }, { "epoch": 0.8861873821001887, "grad_norm": 0.017987683415412903, "learning_rate": 3.372061536738708e-07, "loss": 0.0529, "step": 4228 }, { "epoch": 0.8863969817648292, "grad_norm": 0.025899479165673256, "learning_rate": 3.3598162649931374e-07, "loss": 0.0577, "step": 4229 }, { "epoch": 0.8866065814294697, "grad_norm": 0.030789077281951904, "learning_rate": 3.347592494362578e-07, "loss": 0.0536, "step": 4230 }, { "epoch": 0.8868161810941102, "grad_norm": 0.024812959134578705, "learning_rate": 3.3353902304821826e-07, "loss": 0.0562, "step": 4231 }, { "epoch": 0.8870257807587508, "grad_norm": 0.01769872196018696, "learning_rate": 3.323209478977202e-07, "loss": 0.0534, "step": 4232 }, { "epoch": 0.8872353804233913, "grad_norm": 0.026812558993697166, "learning_rate": 3.31105024546296e-07, "loss": 0.054, "step": 4233 }, { "epoch": 0.8874449800880319, "grad_norm": 0.0294424407184124, "learning_rate": 3.2989125355448623e-07, "loss": 0.0524, "step": 4234 }, { "epoch": 0.8876545797526724, "grad_norm": 0.02184954471886158, "learning_rate": 3.2867963548183723e-07, "loss": 0.052, "step": 4235 }, { "epoch": 0.8878641794173129, "grad_norm": 0.01684270054101944, "learning_rate": 3.274701708869066e-07, "loss": 0.0565, "step": 4236 }, { "epoch": 0.8880737790819535, "grad_norm": 0.028860921040177345, "learning_rate": 3.2626286032725664e-07, "loss": 0.0527, "step": 4237 }, { "epoch": 0.888283378746594, "grad_norm": 0.02757331356406212, "learning_rate": 3.250577043594566e-07, "loss": 0.0569, "step": 4238 }, { "epoch": 0.8884929784112345, "grad_norm": 0.020717767998576164, "learning_rate": 3.238547035390843e-07, "loss": 0.0569, "step": 4239 }, { "epoch": 0.888702578075875, "grad_norm": 0.019048750400543213, "learning_rate": 3.226538584207228e-07, "loss": 0.0549, "step": 4240 }, { "epoch": 0.8889121777405156, "grad_norm": 0.030015893280506134, "learning_rate": 3.214551695579604e-07, "loss": 0.0528, "step": 4241 }, { "epoch": 0.8891217774051562, "grad_norm": 0.02680326998233795, "learning_rate": 3.202586375033928e-07, "loss": 0.0538, "step": 4242 }, { "epoch": 0.8893313770697967, "grad_norm": 0.020373962819576263, "learning_rate": 3.190642628086221e-07, "loss": 0.0539, "step": 4243 }, { "epoch": 0.8895409767344372, "grad_norm": 0.020322667434811592, "learning_rate": 3.178720460242535e-07, "loss": 0.0539, "step": 4244 }, { "epoch": 0.8897505763990777, "grad_norm": 0.025189222767949104, "learning_rate": 3.166819876998989e-07, "loss": 0.0532, "step": 4245 }, { "epoch": 0.8899601760637182, "grad_norm": 0.027642201632261276, "learning_rate": 3.154940883841756e-07, "loss": 0.0551, "step": 4246 }, { "epoch": 0.8901697757283589, "grad_norm": 0.023163793608546257, "learning_rate": 3.1430834862470395e-07, "loss": 0.0545, "step": 4247 }, { "epoch": 0.8903793753929994, "grad_norm": 0.01851850189268589, "learning_rate": 3.131247689681099e-07, "loss": 0.0553, "step": 4248 }, { "epoch": 0.8905889750576399, "grad_norm": 0.028064940124750137, "learning_rate": 3.1194334996002497e-07, "loss": 0.0544, "step": 4249 }, { "epoch": 0.8907985747222804, "grad_norm": 0.02554197795689106, "learning_rate": 3.1076409214508164e-07, "loss": 0.0533, "step": 4250 }, { "epoch": 0.8910081743869209, "grad_norm": 0.018633106723427773, "learning_rate": 3.095869960669173e-07, "loss": 0.0544, "step": 4251 }, { "epoch": 0.8912177740515616, "grad_norm": 0.019019601866602898, "learning_rate": 3.084120622681741e-07, "loss": 0.0535, "step": 4252 }, { "epoch": 0.8914273737162021, "grad_norm": 0.026201697066426277, "learning_rate": 3.072392912904948e-07, "loss": 0.0555, "step": 4253 }, { "epoch": 0.8916369733808426, "grad_norm": 0.027933111414313316, "learning_rate": 3.0606868367452746e-07, "loss": 0.0539, "step": 4254 }, { "epoch": 0.8918465730454831, "grad_norm": 0.018472185358405113, "learning_rate": 3.049002399599232e-07, "loss": 0.0544, "step": 4255 }, { "epoch": 0.8920561727101237, "grad_norm": 0.017080184072256088, "learning_rate": 3.0373396068533234e-07, "loss": 0.054, "step": 4256 }, { "epoch": 0.8922657723747642, "grad_norm": 0.02512308955192566, "learning_rate": 3.0256984638840967e-07, "loss": 0.0548, "step": 4257 }, { "epoch": 0.8924753720394047, "grad_norm": 0.02607639506459236, "learning_rate": 3.0140789760581214e-07, "loss": 0.0531, "step": 4258 }, { "epoch": 0.8926849717040453, "grad_norm": 0.01739831455051899, "learning_rate": 3.0024811487319837e-07, "loss": 0.0551, "step": 4259 }, { "epoch": 0.8928945713686858, "grad_norm": 0.017008259892463684, "learning_rate": 2.990904987252269e-07, "loss": 0.0544, "step": 4260 }, { "epoch": 0.8931041710333264, "grad_norm": 0.024107355624437332, "learning_rate": 2.9793504969555965e-07, "loss": 0.053, "step": 4261 }, { "epoch": 0.8933137706979669, "grad_norm": 0.026618480682373047, "learning_rate": 2.9678176831685776e-07, "loss": 0.0535, "step": 4262 }, { "epoch": 0.8935233703626074, "grad_norm": 0.019204149022698402, "learning_rate": 2.9563065512078325e-07, "loss": 0.053, "step": 4263 }, { "epoch": 0.8937329700272479, "grad_norm": 0.018961802124977112, "learning_rate": 2.9448171063799933e-07, "loss": 0.0525, "step": 4264 }, { "epoch": 0.8939425696918885, "grad_norm": 0.022955385968089104, "learning_rate": 2.933349353981701e-07, "loss": 0.0565, "step": 4265 }, { "epoch": 0.8941521693565291, "grad_norm": 0.023665906861424446, "learning_rate": 2.921903299299572e-07, "loss": 0.0532, "step": 4266 }, { "epoch": 0.8943617690211696, "grad_norm": 0.02021893300116062, "learning_rate": 2.9104789476102515e-07, "loss": 0.0554, "step": 4267 }, { "epoch": 0.8945713686858101, "grad_norm": 0.02034766785800457, "learning_rate": 2.899076304180348e-07, "loss": 0.0534, "step": 4268 }, { "epoch": 0.8947809683504506, "grad_norm": 0.019094116985797882, "learning_rate": 2.8876953742664914e-07, "loss": 0.0563, "step": 4269 }, { "epoch": 0.8949905680150911, "grad_norm": 0.022905079647898674, "learning_rate": 2.876336163115273e-07, "loss": 0.0533, "step": 4270 }, { "epoch": 0.8952001676797318, "grad_norm": 0.022755665704607964, "learning_rate": 2.8649986759632985e-07, "loss": 0.0598, "step": 4271 }, { "epoch": 0.8954097673443723, "grad_norm": 0.01669159345328808, "learning_rate": 2.8536829180371485e-07, "loss": 0.0517, "step": 4272 }, { "epoch": 0.8956193670090128, "grad_norm": 0.021552741527557373, "learning_rate": 2.842388894553377e-07, "loss": 0.0517, "step": 4273 }, { "epoch": 0.8958289666736533, "grad_norm": 0.0235520601272583, "learning_rate": 2.8311166107185263e-07, "loss": 0.0553, "step": 4274 }, { "epoch": 0.8960385663382938, "grad_norm": 0.019750304520130157, "learning_rate": 2.819866071729127e-07, "loss": 0.0529, "step": 4275 }, { "epoch": 0.8962481660029344, "grad_norm": 0.01832771860063076, "learning_rate": 2.808637282771659e-07, "loss": 0.0518, "step": 4276 }, { "epoch": 0.896457765667575, "grad_norm": 0.019599363207817078, "learning_rate": 2.7974302490226034e-07, "loss": 0.05, "step": 4277 }, { "epoch": 0.8966673653322155, "grad_norm": 0.02147594839334488, "learning_rate": 2.786244975648406e-07, "loss": 0.0559, "step": 4278 }, { "epoch": 0.896876964996856, "grad_norm": 0.020394155755639076, "learning_rate": 2.7750814678054626e-07, "loss": 0.0533, "step": 4279 }, { "epoch": 0.8970865646614965, "grad_norm": 0.014958519488573074, "learning_rate": 2.7639397306401527e-07, "loss": 0.052, "step": 4280 }, { "epoch": 0.8972961643261371, "grad_norm": 0.017371637746691704, "learning_rate": 2.7528197692888114e-07, "loss": 0.0491, "step": 4281 }, { "epoch": 0.8975057639907776, "grad_norm": 0.021931590512394905, "learning_rate": 2.7417215888777493e-07, "loss": 0.0539, "step": 4282 }, { "epoch": 0.8977153636554182, "grad_norm": 0.022836145013570786, "learning_rate": 2.730645194523218e-07, "loss": 0.0569, "step": 4283 }, { "epoch": 0.8979249633200587, "grad_norm": 0.01579204574227333, "learning_rate": 2.719590591331428e-07, "loss": 0.0532, "step": 4284 }, { "epoch": 0.8981345629846992, "grad_norm": 0.018433870747685432, "learning_rate": 2.7085577843985634e-07, "loss": 0.0533, "step": 4285 }, { "epoch": 0.8983441626493398, "grad_norm": 0.025535134598612785, "learning_rate": 2.697546778810728e-07, "loss": 0.0572, "step": 4286 }, { "epoch": 0.8985537623139803, "grad_norm": 0.022626137360930443, "learning_rate": 2.6865575796440004e-07, "loss": 0.0534, "step": 4287 }, { "epoch": 0.8987633619786208, "grad_norm": 0.01860016956925392, "learning_rate": 2.675590191964406e-07, "loss": 0.0553, "step": 4288 }, { "epoch": 0.8989729616432613, "grad_norm": 0.020209958776831627, "learning_rate": 2.6646446208279054e-07, "loss": 0.0535, "step": 4289 }, { "epoch": 0.8991825613079019, "grad_norm": 0.023047301918268204, "learning_rate": 2.653720871280396e-07, "loss": 0.0545, "step": 4290 }, { "epoch": 0.8993921609725425, "grad_norm": 0.022766290232539177, "learning_rate": 2.6428189483577283e-07, "loss": 0.0541, "step": 4291 }, { "epoch": 0.899601760637183, "grad_norm": 0.021814832463860512, "learning_rate": 2.631938857085697e-07, "loss": 0.0514, "step": 4292 }, { "epoch": 0.8998113603018235, "grad_norm": 0.017104798927903175, "learning_rate": 2.6210806024800083e-07, "loss": 0.055, "step": 4293 }, { "epoch": 0.900020959966464, "grad_norm": 0.019604802131652832, "learning_rate": 2.6102441895463237e-07, "loss": 0.0555, "step": 4294 }, { "epoch": 0.9002305596311045, "grad_norm": 0.022560935467481613, "learning_rate": 2.5994296232802254e-07, "loss": 0.0527, "step": 4295 }, { "epoch": 0.9004401592957452, "grad_norm": 0.017508791759610176, "learning_rate": 2.5886369086672193e-07, "loss": 0.0542, "step": 4296 }, { "epoch": 0.9006497589603857, "grad_norm": 0.010546859353780746, "learning_rate": 2.577866050682748e-07, "loss": 0.0537, "step": 4297 }, { "epoch": 0.9008593586250262, "grad_norm": 0.017833339050412178, "learning_rate": 2.567117054292184e-07, "loss": 0.0552, "step": 4298 }, { "epoch": 0.9010689582896667, "grad_norm": 0.02061723917722702, "learning_rate": 2.5563899244507974e-07, "loss": 0.0548, "step": 4299 }, { "epoch": 0.9012785579543072, "grad_norm": 0.016052190214395523, "learning_rate": 2.545684666103809e-07, "loss": 0.0593, "step": 4300 }, { "epoch": 0.9014881576189478, "grad_norm": 0.013846572488546371, "learning_rate": 2.5350012841863283e-07, "loss": 0.0579, "step": 4301 }, { "epoch": 0.9016977572835884, "grad_norm": 0.014581656083464622, "learning_rate": 2.5243397836233975e-07, "loss": 0.0558, "step": 4302 }, { "epoch": 0.9019073569482289, "grad_norm": 0.021665887907147408, "learning_rate": 2.513700169329963e-07, "loss": 0.0534, "step": 4303 }, { "epoch": 0.9021169566128694, "grad_norm": 0.020303577184677124, "learning_rate": 2.503082446210886e-07, "loss": 0.0511, "step": 4304 }, { "epoch": 0.9023265562775099, "grad_norm": 0.012806370854377747, "learning_rate": 2.4924866191609387e-07, "loss": 0.0514, "step": 4305 }, { "epoch": 0.9025361559421505, "grad_norm": 0.013476556167006493, "learning_rate": 2.4819126930647976e-07, "loss": 0.0545, "step": 4306 }, { "epoch": 0.902745755606791, "grad_norm": 0.019602788612246513, "learning_rate": 2.471360672797019e-07, "loss": 0.0532, "step": 4307 }, { "epoch": 0.9029553552714316, "grad_norm": 0.021197538822889328, "learning_rate": 2.460830563222111e-07, "loss": 0.0515, "step": 4308 }, { "epoch": 0.9031649549360721, "grad_norm": 0.014859777875244617, "learning_rate": 2.45032236919443e-07, "loss": 0.0555, "step": 4309 }, { "epoch": 0.9033745546007126, "grad_norm": 0.010106687434017658, "learning_rate": 2.439836095558262e-07, "loss": 0.053, "step": 4310 }, { "epoch": 0.9035841542653532, "grad_norm": 0.016903823241591454, "learning_rate": 2.429371747147785e-07, "loss": 0.0536, "step": 4311 }, { "epoch": 0.9037937539299937, "grad_norm": 0.018577901646494865, "learning_rate": 2.41892932878704e-07, "loss": 0.0544, "step": 4312 }, { "epoch": 0.9040033535946342, "grad_norm": 0.01648200862109661, "learning_rate": 2.408508845289992e-07, "loss": 0.054, "step": 4313 }, { "epoch": 0.9042129532592748, "grad_norm": 0.012114167213439941, "learning_rate": 2.3981103014604765e-07, "loss": 0.0536, "step": 4314 }, { "epoch": 0.9044225529239153, "grad_norm": 0.012820238247513771, "learning_rate": 2.387733702092232e-07, "loss": 0.0555, "step": 4315 }, { "epoch": 0.9046321525885559, "grad_norm": 0.018219808116555214, "learning_rate": 2.3773790519688644e-07, "loss": 0.0574, "step": 4316 }, { "epoch": 0.9048417522531964, "grad_norm": 0.022166509181261063, "learning_rate": 2.3670463558638556e-07, "loss": 0.0546, "step": 4317 }, { "epoch": 0.9050513519178369, "grad_norm": 0.013082697987556458, "learning_rate": 2.3567356185405844e-07, "loss": 0.0553, "step": 4318 }, { "epoch": 0.9052609515824774, "grad_norm": 0.0153275141492486, "learning_rate": 2.3464468447522925e-07, "loss": 0.0527, "step": 4319 }, { "epoch": 0.905470551247118, "grad_norm": 0.019890129566192627, "learning_rate": 2.3361800392421086e-07, "loss": 0.0537, "step": 4320 }, { "epoch": 0.9056801509117586, "grad_norm": 0.020730622112751007, "learning_rate": 2.3259352067430298e-07, "loss": 0.0548, "step": 4321 }, { "epoch": 0.9058897505763991, "grad_norm": 0.019399119541049004, "learning_rate": 2.3157123519779168e-07, "loss": 0.0541, "step": 4322 }, { "epoch": 0.9060993502410396, "grad_norm": 0.02034193091094494, "learning_rate": 2.3055114796595e-07, "loss": 0.0571, "step": 4323 }, { "epoch": 0.9063089499056801, "grad_norm": 0.02012833207845688, "learning_rate": 2.2953325944903848e-07, "loss": 0.0539, "step": 4324 }, { "epoch": 0.9065185495703207, "grad_norm": 0.020468810573220253, "learning_rate": 2.2851757011630393e-07, "loss": 0.057, "step": 4325 }, { "epoch": 0.9067281492349613, "grad_norm": 0.0244952030479908, "learning_rate": 2.2750408043597794e-07, "loss": 0.0518, "step": 4326 }, { "epoch": 0.9069377488996018, "grad_norm": 0.023987367749214172, "learning_rate": 2.264927908752801e-07, "loss": 0.0568, "step": 4327 }, { "epoch": 0.9071473485642423, "grad_norm": 0.017585035413503647, "learning_rate": 2.2548370190041414e-07, "loss": 0.0534, "step": 4328 }, { "epoch": 0.9073569482288828, "grad_norm": 0.017734866589307785, "learning_rate": 2.2447681397656908e-07, "loss": 0.0527, "step": 4329 }, { "epoch": 0.9075665478935234, "grad_norm": 0.023448040708899498, "learning_rate": 2.234721275679208e-07, "loss": 0.0557, "step": 4330 }, { "epoch": 0.9077761475581639, "grad_norm": 0.02103973738849163, "learning_rate": 2.2246964313763053e-07, "loss": 0.055, "step": 4331 }, { "epoch": 0.9079857472228045, "grad_norm": 0.019696585834026337, "learning_rate": 2.2146936114784134e-07, "loss": 0.0544, "step": 4332 }, { "epoch": 0.908195346887445, "grad_norm": 0.016549983993172646, "learning_rate": 2.2047128205968494e-07, "loss": 0.0539, "step": 4333 }, { "epoch": 0.9084049465520855, "grad_norm": 0.02317041903734207, "learning_rate": 2.1947540633327437e-07, "loss": 0.0528, "step": 4334 }, { "epoch": 0.9086145462167261, "grad_norm": 0.020171795040369034, "learning_rate": 2.184817344277085e-07, "loss": 0.0543, "step": 4335 }, { "epoch": 0.9088241458813666, "grad_norm": 0.019286734983325005, "learning_rate": 2.1749026680106922e-07, "loss": 0.0522, "step": 4336 }, { "epoch": 0.9090337455460071, "grad_norm": 0.011374424211680889, "learning_rate": 2.1650100391042373e-07, "loss": 0.0576, "step": 4337 }, { "epoch": 0.9092433452106476, "grad_norm": 0.022341901436448097, "learning_rate": 2.1551394621182277e-07, "loss": 0.0552, "step": 4338 }, { "epoch": 0.9094529448752882, "grad_norm": 0.026063280180096626, "learning_rate": 2.1452909416029844e-07, "loss": 0.0539, "step": 4339 }, { "epoch": 0.9096625445399288, "grad_norm": 0.01974085345864296, "learning_rate": 2.1354644820986703e-07, "loss": 0.0536, "step": 4340 }, { "epoch": 0.9098721442045693, "grad_norm": 0.011266632936894894, "learning_rate": 2.1256600881352951e-07, "loss": 0.0542, "step": 4341 }, { "epoch": 0.9100817438692098, "grad_norm": 0.02346237748861313, "learning_rate": 2.1158777642326656e-07, "loss": 0.0558, "step": 4342 }, { "epoch": 0.9102913435338503, "grad_norm": 0.020998260006308556, "learning_rate": 2.1061175149004464e-07, "loss": 0.0536, "step": 4343 }, { "epoch": 0.9105009431984908, "grad_norm": 0.01797637529671192, "learning_rate": 2.0963793446381053e-07, "loss": 0.0532, "step": 4344 }, { "epoch": 0.9107105428631315, "grad_norm": 0.016491252928972244, "learning_rate": 2.08666325793494e-07, "loss": 0.0522, "step": 4345 }, { "epoch": 0.910920142527772, "grad_norm": 0.011900043115019798, "learning_rate": 2.076969259270051e-07, "loss": 0.0581, "step": 4346 }, { "epoch": 0.9111297421924125, "grad_norm": 0.014170379377901554, "learning_rate": 2.0672973531123796e-07, "loss": 0.0534, "step": 4347 }, { "epoch": 0.911339341857053, "grad_norm": 0.01760711334645748, "learning_rate": 2.0576475439206767e-07, "loss": 0.0555, "step": 4348 }, { "epoch": 0.9115489415216935, "grad_norm": 0.017065905034542084, "learning_rate": 2.0480198361435e-07, "loss": 0.055, "step": 4349 }, { "epoch": 0.9117585411863341, "grad_norm": 0.012907272204756737, "learning_rate": 2.038414234219216e-07, "loss": 0.0532, "step": 4350 }, { "epoch": 0.9119681408509747, "grad_norm": 0.010160834528505802, "learning_rate": 2.0288307425760046e-07, "loss": 0.0543, "step": 4351 }, { "epoch": 0.9121777405156152, "grad_norm": 0.013258119113743305, "learning_rate": 2.0192693656318597e-07, "loss": 0.0518, "step": 4352 }, { "epoch": 0.9123873401802557, "grad_norm": 0.0151562774553895, "learning_rate": 2.0097301077945607e-07, "loss": 0.0527, "step": 4353 }, { "epoch": 0.9125969398448962, "grad_norm": 0.016773242503404617, "learning_rate": 2.0002129734617292e-07, "loss": 0.0534, "step": 4354 }, { "epoch": 0.9128065395095368, "grad_norm": 0.012901091016829014, "learning_rate": 1.9907179670207387e-07, "loss": 0.0518, "step": 4355 }, { "epoch": 0.9130161391741773, "grad_norm": 0.010573726147413254, "learning_rate": 1.9812450928487936e-07, "loss": 0.0518, "step": 4356 }, { "epoch": 0.9132257388388179, "grad_norm": 0.012841584160923958, "learning_rate": 1.9717943553128893e-07, "loss": 0.0556, "step": 4357 }, { "epoch": 0.9134353385034584, "grad_norm": 0.014390083961188793, "learning_rate": 1.962365758769802e-07, "loss": 0.0526, "step": 4358 }, { "epoch": 0.9136449381680989, "grad_norm": 0.014265509322285652, "learning_rate": 1.9529593075661267e-07, "loss": 0.0531, "step": 4359 }, { "epoch": 0.9138545378327395, "grad_norm": 0.013005812652409077, "learning_rate": 1.943575006038234e-07, "loss": 0.0535, "step": 4360 }, { "epoch": 0.91406413749738, "grad_norm": 0.009814724326133728, "learning_rate": 1.9342128585122798e-07, "loss": 0.0574, "step": 4361 }, { "epoch": 0.9142737371620205, "grad_norm": 0.011820555664598942, "learning_rate": 1.9248728693042117e-07, "loss": 0.0519, "step": 4362 }, { "epoch": 0.914483336826661, "grad_norm": 0.014412821270525455, "learning_rate": 1.9155550427197577e-07, "loss": 0.0579, "step": 4363 }, { "epoch": 0.9146929364913016, "grad_norm": 0.013178630731999874, "learning_rate": 1.906259383054454e-07, "loss": 0.0536, "step": 4364 }, { "epoch": 0.9149025361559422, "grad_norm": 0.01012821588665247, "learning_rate": 1.8969858945935783e-07, "loss": 0.0566, "step": 4365 }, { "epoch": 0.9151121358205827, "grad_norm": 0.009884363040328026, "learning_rate": 1.8877345816122162e-07, "loss": 0.0551, "step": 4366 }, { "epoch": 0.9153217354852232, "grad_norm": 0.011177174746990204, "learning_rate": 1.8785054483752174e-07, "loss": 0.0518, "step": 4367 }, { "epoch": 0.9155313351498637, "grad_norm": 0.012874050997197628, "learning_rate": 1.8692984991372065e-07, "loss": 0.0554, "step": 4368 }, { "epoch": 0.9157409348145042, "grad_norm": 0.012516334652900696, "learning_rate": 1.8601137381425938e-07, "loss": 0.0551, "step": 4369 }, { "epoch": 0.9159505344791449, "grad_norm": 0.010518589057028294, "learning_rate": 1.8509511696255421e-07, "loss": 0.0543, "step": 4370 }, { "epoch": 0.9161601341437854, "grad_norm": 0.00973634421825409, "learning_rate": 1.841810797810012e-07, "loss": 0.0542, "step": 4371 }, { "epoch": 0.9163697338084259, "grad_norm": 0.01056087389588356, "learning_rate": 1.8326926269096935e-07, "loss": 0.0525, "step": 4372 }, { "epoch": 0.9165793334730664, "grad_norm": 0.012748325243592262, "learning_rate": 1.8235966611280687e-07, "loss": 0.0534, "step": 4373 }, { "epoch": 0.9167889331377069, "grad_norm": 0.010445375926792622, "learning_rate": 1.8145229046583778e-07, "loss": 0.0553, "step": 4374 }, { "epoch": 0.9169985328023476, "grad_norm": 0.00820910930633545, "learning_rate": 1.805471361683614e-07, "loss": 0.0551, "step": 4375 }, { "epoch": 0.9172081324669881, "grad_norm": 0.008532201871275902, "learning_rate": 1.7964420363765444e-07, "loss": 0.0562, "step": 4376 }, { "epoch": 0.9174177321316286, "grad_norm": 0.01010076142847538, "learning_rate": 1.78743493289969e-07, "loss": 0.0537, "step": 4377 }, { "epoch": 0.9176273317962691, "grad_norm": 0.011571811512112617, "learning_rate": 1.778450055405312e-07, "loss": 0.0552, "step": 4378 }, { "epoch": 0.9178369314609096, "grad_norm": 0.01056272629648447, "learning_rate": 1.7694874080354362e-07, "loss": 0.056, "step": 4379 }, { "epoch": 0.9180465311255502, "grad_norm": 0.008779754862189293, "learning_rate": 1.760546994921858e-07, "loss": 0.0541, "step": 4380 }, { "epoch": 0.9182561307901907, "grad_norm": 0.008502528071403503, "learning_rate": 1.7516288201860853e-07, "loss": 0.0514, "step": 4381 }, { "epoch": 0.9184657304548313, "grad_norm": 0.009822116233408451, "learning_rate": 1.742732887939408e-07, "loss": 0.0528, "step": 4382 }, { "epoch": 0.9186753301194718, "grad_norm": 0.012791884131729603, "learning_rate": 1.733859202282845e-07, "loss": 0.0535, "step": 4383 }, { "epoch": 0.9188849297841123, "grad_norm": 0.010147576220333576, "learning_rate": 1.7250077673071685e-07, "loss": 0.0537, "step": 4384 }, { "epoch": 0.9190945294487529, "grad_norm": 0.009435161016881466, "learning_rate": 1.716178587092876e-07, "loss": 0.0526, "step": 4385 }, { "epoch": 0.9193041291133934, "grad_norm": 0.007458524778485298, "learning_rate": 1.7073716657102278e-07, "loss": 0.0523, "step": 4386 }, { "epoch": 0.919513728778034, "grad_norm": 0.008520993404090405, "learning_rate": 1.6985870072192156e-07, "loss": 0.0538, "step": 4387 }, { "epoch": 0.9197233284426745, "grad_norm": 0.01144067570567131, "learning_rate": 1.6898246156695552e-07, "loss": 0.0559, "step": 4388 }, { "epoch": 0.919932928107315, "grad_norm": 0.01067439466714859, "learning_rate": 1.6810844951007099e-07, "loss": 0.0563, "step": 4389 }, { "epoch": 0.9201425277719556, "grad_norm": 0.012738638557493687, "learning_rate": 1.6723666495418844e-07, "loss": 0.0553, "step": 4390 }, { "epoch": 0.9203521274365961, "grad_norm": 0.009737935848534107, "learning_rate": 1.6636710830119863e-07, "loss": 0.054, "step": 4391 }, { "epoch": 0.9205617271012366, "grad_norm": 0.0069859870709478855, "learning_rate": 1.6549977995196809e-07, "loss": 0.0557, "step": 4392 }, { "epoch": 0.9207713267658771, "grad_norm": 0.011632826179265976, "learning_rate": 1.6463468030633478e-07, "loss": 0.0526, "step": 4393 }, { "epoch": 0.9209809264305178, "grad_norm": 0.011854654178023338, "learning_rate": 1.6377180976310968e-07, "loss": 0.0561, "step": 4394 }, { "epoch": 0.9211905260951583, "grad_norm": 0.010901962406933308, "learning_rate": 1.6291116872007573e-07, "loss": 0.0511, "step": 4395 }, { "epoch": 0.9214001257597988, "grad_norm": 0.009396993555128574, "learning_rate": 1.6205275757398774e-07, "loss": 0.0547, "step": 4396 }, { "epoch": 0.9216097254244393, "grad_norm": 0.00782528892159462, "learning_rate": 1.611965767205742e-07, "loss": 0.0529, "step": 4397 }, { "epoch": 0.9218193250890798, "grad_norm": 0.00804727990180254, "learning_rate": 1.6034262655453269e-07, "loss": 0.0538, "step": 4398 }, { "epoch": 0.9220289247537204, "grad_norm": 0.011399338953197002, "learning_rate": 1.594909074695361e-07, "loss": 0.0523, "step": 4399 }, { "epoch": 0.922238524418361, "grad_norm": 0.013382563367486, "learning_rate": 1.586414198582259e-07, "loss": 0.0557, "step": 4400 }, { "epoch": 0.9224481240830015, "grad_norm": 0.010052556172013283, "learning_rate": 1.5779416411221437e-07, "loss": 0.0554, "step": 4401 }, { "epoch": 0.922657723747642, "grad_norm": 0.010469266213476658, "learning_rate": 1.5694914062208799e-07, "loss": 0.0544, "step": 4402 }, { "epoch": 0.9228673234122825, "grad_norm": 0.011628585867583752, "learning_rate": 1.561063497774018e-07, "loss": 0.0535, "step": 4403 }, { "epoch": 0.9230769230769231, "grad_norm": 0.016579635441303253, "learning_rate": 1.552657919666817e-07, "loss": 0.0547, "step": 4404 }, { "epoch": 0.9232865227415636, "grad_norm": 0.01323653757572174, "learning_rate": 1.54427467577426e-07, "loss": 0.0525, "step": 4405 }, { "epoch": 0.9234961224062042, "grad_norm": 0.01618046499788761, "learning_rate": 1.535913769961006e-07, "loss": 0.0535, "step": 4406 }, { "epoch": 0.9237057220708447, "grad_norm": 0.012092587538063526, "learning_rate": 1.527575206081444e-07, "loss": 0.0524, "step": 4407 }, { "epoch": 0.9239153217354852, "grad_norm": 0.014828270301222801, "learning_rate": 1.5192589879796383e-07, "loss": 0.0533, "step": 4408 }, { "epoch": 0.9241249214001258, "grad_norm": 0.008504010736942291, "learning_rate": 1.510965119489366e-07, "loss": 0.0541, "step": 4409 }, { "epoch": 0.9243345210647663, "grad_norm": 0.010867651551961899, "learning_rate": 1.5026936044341078e-07, "loss": 0.0546, "step": 4410 }, { "epoch": 0.9245441207294068, "grad_norm": 0.015342487022280693, "learning_rate": 1.4944444466270248e-07, "loss": 0.0543, "step": 4411 }, { "epoch": 0.9247537203940474, "grad_norm": 0.01415248867124319, "learning_rate": 1.486217649870969e-07, "loss": 0.0539, "step": 4412 }, { "epoch": 0.9249633200586879, "grad_norm": 0.009957171976566315, "learning_rate": 1.478013217958507e-07, "loss": 0.0545, "step": 4413 }, { "epoch": 0.9251729197233285, "grad_norm": 0.00818433053791523, "learning_rate": 1.4698311546718635e-07, "loss": 0.0556, "step": 4414 }, { "epoch": 0.925382519387969, "grad_norm": 0.010151896625757217, "learning_rate": 1.4616714637829822e-07, "loss": 0.0533, "step": 4415 }, { "epoch": 0.9255921190526095, "grad_norm": 0.012620055116713047, "learning_rate": 1.453534149053476e-07, "loss": 0.0556, "step": 4416 }, { "epoch": 0.92580171871725, "grad_norm": 0.011959872208535671, "learning_rate": 1.4454192142346446e-07, "loss": 0.0549, "step": 4417 }, { "epoch": 0.9260113183818905, "grad_norm": 0.010501948185265064, "learning_rate": 1.437326663067462e-07, "loss": 0.0526, "step": 4418 }, { "epoch": 0.9262209180465312, "grad_norm": 0.009827865287661552, "learning_rate": 1.429256499282605e-07, "loss": 0.0529, "step": 4419 }, { "epoch": 0.9264305177111717, "grad_norm": 0.009340137243270874, "learning_rate": 1.421208726600415e-07, "loss": 0.0524, "step": 4420 }, { "epoch": 0.9266401173758122, "grad_norm": 0.010111922398209572, "learning_rate": 1.4131833487309122e-07, "loss": 0.0547, "step": 4421 }, { "epoch": 0.9268497170404527, "grad_norm": 0.01139815803617239, "learning_rate": 1.4051803693737876e-07, "loss": 0.0537, "step": 4422 }, { "epoch": 0.9270593167050932, "grad_norm": 0.009999927133321762, "learning_rate": 1.3971997922184234e-07, "loss": 0.0532, "step": 4423 }, { "epoch": 0.9272689163697339, "grad_norm": 0.010446673259139061, "learning_rate": 1.3892416209438542e-07, "loss": 0.0524, "step": 4424 }, { "epoch": 0.9274785160343744, "grad_norm": 0.012159881182014942, "learning_rate": 1.381305859218801e-07, "loss": 0.051, "step": 4425 }, { "epoch": 0.9276881156990149, "grad_norm": 0.013391540385782719, "learning_rate": 1.3733925107016542e-07, "loss": 0.0519, "step": 4426 }, { "epoch": 0.9278977153636554, "grad_norm": 0.006831051781773567, "learning_rate": 1.3655015790404568e-07, "loss": 0.0536, "step": 4427 }, { "epoch": 0.9281073150282959, "grad_norm": 0.007939697243273258, "learning_rate": 1.3576330678729266e-07, "loss": 0.0535, "step": 4428 }, { "epoch": 0.9283169146929365, "grad_norm": 0.015715105459094048, "learning_rate": 1.3497869808264453e-07, "loss": 0.054, "step": 4429 }, { "epoch": 0.928526514357577, "grad_norm": 0.008145746774971485, "learning_rate": 1.341963321518064e-07, "loss": 0.057, "step": 4430 }, { "epoch": 0.9287361140222176, "grad_norm": 0.0074055916629731655, "learning_rate": 1.3341620935544864e-07, "loss": 0.0559, "step": 4431 }, { "epoch": 0.9289457136868581, "grad_norm": 0.00775395892560482, "learning_rate": 1.3263833005320747e-07, "loss": 0.0521, "step": 4432 }, { "epoch": 0.9291553133514986, "grad_norm": 0.008558698929846287, "learning_rate": 1.3186269460368496e-07, "loss": 0.0523, "step": 4433 }, { "epoch": 0.9293649130161392, "grad_norm": 0.009375239722430706, "learning_rate": 1.3108930336444893e-07, "loss": 0.0522, "step": 4434 }, { "epoch": 0.9295745126807797, "grad_norm": 0.007077595219016075, "learning_rate": 1.303181566920325e-07, "loss": 0.0538, "step": 4435 }, { "epoch": 0.9297841123454202, "grad_norm": 0.0054700132459402084, "learning_rate": 1.2954925494193472e-07, "loss": 0.0545, "step": 4436 }, { "epoch": 0.9299937120100608, "grad_norm": 0.008495796471834183, "learning_rate": 1.2878259846861862e-07, "loss": 0.0587, "step": 4437 }, { "epoch": 0.9302033116747013, "grad_norm": 0.008338769897818565, "learning_rate": 1.2801818762551265e-07, "loss": 0.051, "step": 4438 }, { "epoch": 0.9304129113393419, "grad_norm": 0.009145451709628105, "learning_rate": 1.2725602276501047e-07, "loss": 0.0544, "step": 4439 }, { "epoch": 0.9306225110039824, "grad_norm": 0.00860004685819149, "learning_rate": 1.2649610423846937e-07, "loss": 0.0549, "step": 4440 }, { "epoch": 0.9308321106686229, "grad_norm": 0.006629549898207188, "learning_rate": 1.2573843239621185e-07, "loss": 0.0548, "step": 4441 }, { "epoch": 0.9310417103332634, "grad_norm": 0.007576174568384886, "learning_rate": 1.2498300758752413e-07, "loss": 0.0517, "step": 4442 }, { "epoch": 0.931251309997904, "grad_norm": 0.007479529827833176, "learning_rate": 1.2422983016065816e-07, "loss": 0.0575, "step": 4443 }, { "epoch": 0.9314609096625446, "grad_norm": 0.006873736623674631, "learning_rate": 1.2347890046282785e-07, "loss": 0.0543, "step": 4444 }, { "epoch": 0.9316705093271851, "grad_norm": 0.008335386402904987, "learning_rate": 1.2273021884021074e-07, "loss": 0.0526, "step": 4445 }, { "epoch": 0.9318801089918256, "grad_norm": 0.007241726852953434, "learning_rate": 1.2198378563795023e-07, "loss": 0.0534, "step": 4446 }, { "epoch": 0.9320897086564661, "grad_norm": 0.008679443039000034, "learning_rate": 1.2123960120015045e-07, "loss": 0.0542, "step": 4447 }, { "epoch": 0.9322993083211066, "grad_norm": 0.0065467506647109985, "learning_rate": 1.2049766586988142e-07, "loss": 0.0506, "step": 4448 }, { "epoch": 0.9325089079857473, "grad_norm": 0.0077818529680371284, "learning_rate": 1.1975797998917514e-07, "loss": 0.0526, "step": 4449 }, { "epoch": 0.9327185076503878, "grad_norm": 0.008923080749809742, "learning_rate": 1.1902054389902662e-07, "loss": 0.0511, "step": 4450 }, { "epoch": 0.9329281073150283, "grad_norm": 0.009406541474163532, "learning_rate": 1.1828535793939277e-07, "loss": 0.0507, "step": 4451 }, { "epoch": 0.9331377069796688, "grad_norm": 0.008926275186240673, "learning_rate": 1.1755242244919528e-07, "loss": 0.0553, "step": 4452 }, { "epoch": 0.9333473066443093, "grad_norm": 0.007040916942059994, "learning_rate": 1.1682173776631722e-07, "loss": 0.054, "step": 4453 }, { "epoch": 0.9335569063089499, "grad_norm": 0.009015001356601715, "learning_rate": 1.16093304227603e-07, "loss": 0.0555, "step": 4454 }, { "epoch": 0.9337665059735905, "grad_norm": 0.010234599001705647, "learning_rate": 1.1536712216886181e-07, "loss": 0.0542, "step": 4455 }, { "epoch": 0.933976105638231, "grad_norm": 0.008125402964651585, "learning_rate": 1.1464319192486251e-07, "loss": 0.0515, "step": 4456 }, { "epoch": 0.9341857053028715, "grad_norm": 0.009704657830297947, "learning_rate": 1.1392151382933647e-07, "loss": 0.0555, "step": 4457 }, { "epoch": 0.934395304967512, "grad_norm": 0.009614041075110435, "learning_rate": 1.1320208821497758e-07, "loss": 0.0529, "step": 4458 }, { "epoch": 0.9346049046321526, "grad_norm": 0.007132851053029299, "learning_rate": 1.1248491541344164e-07, "loss": 0.0539, "step": 4459 }, { "epoch": 0.9348145042967931, "grad_norm": 0.008845999836921692, "learning_rate": 1.117699957553442e-07, "loss": 0.0558, "step": 4460 }, { "epoch": 0.9350241039614337, "grad_norm": 0.009133417159318924, "learning_rate": 1.1105732957026272e-07, "loss": 0.0528, "step": 4461 }, { "epoch": 0.9352337036260742, "grad_norm": 0.008758967742323875, "learning_rate": 1.1034691718673774e-07, "loss": 0.0551, "step": 4462 }, { "epoch": 0.9354433032907148, "grad_norm": 0.009887007996439934, "learning_rate": 1.0963875893226728e-07, "loss": 0.0545, "step": 4463 }, { "epoch": 0.9356529029553553, "grad_norm": 0.00951511599123478, "learning_rate": 1.0893285513331353e-07, "loss": 0.0566, "step": 4464 }, { "epoch": 0.9358625026199958, "grad_norm": 0.0072890762239694595, "learning_rate": 1.0822920611529786e-07, "loss": 0.0538, "step": 4465 }, { "epoch": 0.9360721022846363, "grad_norm": 0.008058588020503521, "learning_rate": 1.0752781220260245e-07, "loss": 0.0518, "step": 4466 }, { "epoch": 0.9362817019492768, "grad_norm": 0.008425997570157051, "learning_rate": 1.0682867371856864e-07, "loss": 0.0534, "step": 4467 }, { "epoch": 0.9364913016139175, "grad_norm": 0.008281690068542957, "learning_rate": 1.0613179098549975e-07, "loss": 0.055, "step": 4468 }, { "epoch": 0.936700901278558, "grad_norm": 0.009938735514879227, "learning_rate": 1.0543716432465933e-07, "loss": 0.0524, "step": 4469 }, { "epoch": 0.9369105009431985, "grad_norm": 0.009986302815377712, "learning_rate": 1.0474479405626847e-07, "loss": 0.0527, "step": 4470 }, { "epoch": 0.937120100607839, "grad_norm": 0.007245698943734169, "learning_rate": 1.0405468049951184e-07, "loss": 0.0539, "step": 4471 }, { "epoch": 0.9373297002724795, "grad_norm": 0.009867161512374878, "learning_rate": 1.0336682397252995e-07, "loss": 0.0551, "step": 4472 }, { "epoch": 0.9375392999371202, "grad_norm": 0.008164730854332447, "learning_rate": 1.0268122479242526e-07, "loss": 0.0547, "step": 4473 }, { "epoch": 0.9377488996017607, "grad_norm": 0.0104014091193676, "learning_rate": 1.0199788327525828e-07, "loss": 0.0508, "step": 4474 }, { "epoch": 0.9379584992664012, "grad_norm": 0.009538762271404266, "learning_rate": 1.013167997360498e-07, "loss": 0.0505, "step": 4475 }, { "epoch": 0.9381680989310417, "grad_norm": 0.008720876649022102, "learning_rate": 1.0063797448877977e-07, "loss": 0.0534, "step": 4476 }, { "epoch": 0.9383776985956822, "grad_norm": 0.010985384695231915, "learning_rate": 9.996140784638564e-08, "loss": 0.0572, "step": 4477 }, { "epoch": 0.9385872982603228, "grad_norm": 0.008751952089369297, "learning_rate": 9.928710012076404e-08, "loss": 0.0542, "step": 4478 }, { "epoch": 0.9387968979249633, "grad_norm": 0.007031100802123547, "learning_rate": 9.861505162277186e-08, "loss": 0.0552, "step": 4479 }, { "epoch": 0.9390064975896039, "grad_norm": 0.008988458663225174, "learning_rate": 9.794526266222237e-08, "loss": 0.0531, "step": 4480 }, { "epoch": 0.9392160972542444, "grad_norm": 0.007348395884037018, "learning_rate": 9.727773354788861e-08, "loss": 0.0547, "step": 4481 }, { "epoch": 0.9394256969188849, "grad_norm": 0.009616075083613396, "learning_rate": 9.661246458750162e-08, "loss": 0.053, "step": 4482 }, { "epoch": 0.9396352965835255, "grad_norm": 0.00715535506606102, "learning_rate": 9.594945608774997e-08, "loss": 0.0563, "step": 4483 }, { "epoch": 0.939844896248166, "grad_norm": 0.008326810784637928, "learning_rate": 9.528870835427972e-08, "loss": 0.0532, "step": 4484 }, { "epoch": 0.9400544959128065, "grad_norm": 0.009740813635289669, "learning_rate": 9.463022169169666e-08, "loss": 0.0514, "step": 4485 }, { "epoch": 0.9402640955774471, "grad_norm": 0.0066096787340939045, "learning_rate": 9.397399640356242e-08, "loss": 0.0543, "step": 4486 }, { "epoch": 0.9404736952420876, "grad_norm": 0.009233192540705204, "learning_rate": 9.332003279239665e-08, "loss": 0.0582, "step": 4487 }, { "epoch": 0.9406832949067282, "grad_norm": 0.01031689066439867, "learning_rate": 9.266833115967655e-08, "loss": 0.0556, "step": 4488 }, { "epoch": 0.9408928945713687, "grad_norm": 0.010228820145130157, "learning_rate": 9.201889180583679e-08, "loss": 0.0538, "step": 4489 }, { "epoch": 0.9411024942360092, "grad_norm": 0.0081089548766613, "learning_rate": 9.137171503026787e-08, "loss": 0.0541, "step": 4490 }, { "epoch": 0.9413120939006497, "grad_norm": 0.007212504744529724, "learning_rate": 9.072680113131893e-08, "loss": 0.0517, "step": 4491 }, { "epoch": 0.9415216935652903, "grad_norm": 0.006384396459907293, "learning_rate": 9.008415040629548e-08, "loss": 0.0528, "step": 4492 }, { "epoch": 0.9417312932299309, "grad_norm": 0.007916389964520931, "learning_rate": 8.944376315145831e-08, "loss": 0.0533, "step": 4493 }, { "epoch": 0.9419408928945714, "grad_norm": 0.009434894658625126, "learning_rate": 8.880563966202627e-08, "loss": 0.0538, "step": 4494 }, { "epoch": 0.9421504925592119, "grad_norm": 0.009904226288199425, "learning_rate": 8.816978023217404e-08, "loss": 0.0542, "step": 4495 }, { "epoch": 0.9423600922238524, "grad_norm": 0.008942014537751675, "learning_rate": 8.753618515503325e-08, "loss": 0.0543, "step": 4496 }, { "epoch": 0.9425696918884929, "grad_norm": 0.008763092570006847, "learning_rate": 8.690485472269028e-08, "loss": 0.0546, "step": 4497 }, { "epoch": 0.9427792915531336, "grad_norm": 0.006246887147426605, "learning_rate": 8.627578922618895e-08, "loss": 0.0525, "step": 4498 }, { "epoch": 0.9429888912177741, "grad_norm": 0.00860271230340004, "learning_rate": 8.564898895552843e-08, "loss": 0.0542, "step": 4499 }, { "epoch": 0.9431984908824146, "grad_norm": 0.005993400234729052, "learning_rate": 8.502445419966254e-08, "loss": 0.0541, "step": 4500 }, { "epoch": 0.9434080905470551, "grad_norm": 0.009251521900296211, "learning_rate": 8.440218524650268e-08, "loss": 0.0515, "step": 4501 }, { "epoch": 0.9436176902116956, "grad_norm": 0.01032980252057314, "learning_rate": 8.378218238291492e-08, "loss": 0.0538, "step": 4502 }, { "epoch": 0.9438272898763362, "grad_norm": 0.007855959236621857, "learning_rate": 8.316444589471895e-08, "loss": 0.0531, "step": 4503 }, { "epoch": 0.9440368895409768, "grad_norm": 0.008385205641388893, "learning_rate": 8.254897606669254e-08, "loss": 0.0547, "step": 4504 }, { "epoch": 0.9442464892056173, "grad_norm": 0.0065010362304747105, "learning_rate": 8.193577318256707e-08, "loss": 0.056, "step": 4505 }, { "epoch": 0.9444560888702578, "grad_norm": 0.007750457618385553, "learning_rate": 8.132483752502806e-08, "loss": 0.0555, "step": 4506 }, { "epoch": 0.9446656885348983, "grad_norm": 0.007757239043712616, "learning_rate": 8.071616937571692e-08, "loss": 0.0538, "step": 4507 }, { "epoch": 0.9448752881995389, "grad_norm": 0.006676631513983011, "learning_rate": 8.010976901523082e-08, "loss": 0.0536, "step": 4508 }, { "epoch": 0.9450848878641794, "grad_norm": 0.007698145695030689, "learning_rate": 7.95056367231184e-08, "loss": 0.0557, "step": 4509 }, { "epoch": 0.94529448752882, "grad_norm": 0.009001096710562706, "learning_rate": 7.89037727778863e-08, "loss": 0.0539, "step": 4510 }, { "epoch": 0.9455040871934605, "grad_norm": 0.008882983587682247, "learning_rate": 7.830417745699204e-08, "loss": 0.0516, "step": 4511 }, { "epoch": 0.945713686858101, "grad_norm": 0.006819653324782848, "learning_rate": 7.770685103685005e-08, "loss": 0.0526, "step": 4512 }, { "epoch": 0.9459232865227416, "grad_norm": 0.006686016917228699, "learning_rate": 7.711179379282674e-08, "loss": 0.0529, "step": 4513 }, { "epoch": 0.9461328861873821, "grad_norm": 0.0065452903509140015, "learning_rate": 7.651900599924378e-08, "loss": 0.0526, "step": 4514 }, { "epoch": 0.9463424858520226, "grad_norm": 0.006739321630448103, "learning_rate": 7.592848792937701e-08, "loss": 0.0535, "step": 4515 }, { "epoch": 0.9465520855166631, "grad_norm": 0.009856310673058033, "learning_rate": 7.53402398554548e-08, "loss": 0.0527, "step": 4516 }, { "epoch": 0.9467616851813037, "grad_norm": 0.00838090106844902, "learning_rate": 7.475426204865854e-08, "loss": 0.0539, "step": 4517 }, { "epoch": 0.9469712848459443, "grad_norm": 0.0068512773141264915, "learning_rate": 7.417055477912438e-08, "loss": 0.0536, "step": 4518 }, { "epoch": 0.9471808845105848, "grad_norm": 0.006120042875409126, "learning_rate": 7.358911831594095e-08, "loss": 0.0537, "step": 4519 }, { "epoch": 0.9473904841752253, "grad_norm": 0.008014840073883533, "learning_rate": 7.300995292715107e-08, "loss": 0.0535, "step": 4520 }, { "epoch": 0.9476000838398658, "grad_norm": 0.006846890784800053, "learning_rate": 7.24330588797495e-08, "loss": 0.0555, "step": 4521 }, { "epoch": 0.9478096835045063, "grad_norm": 0.006406300701200962, "learning_rate": 7.18584364396846e-08, "loss": 0.0551, "step": 4522 }, { "epoch": 0.948019283169147, "grad_norm": 0.006220379378646612, "learning_rate": 7.128608587185615e-08, "loss": 0.0531, "step": 4523 }, { "epoch": 0.9482288828337875, "grad_norm": 0.006792591419070959, "learning_rate": 7.071600744011865e-08, "loss": 0.0516, "step": 4524 }, { "epoch": 0.948438482498428, "grad_norm": 0.0063816942274570465, "learning_rate": 7.014820140727797e-08, "loss": 0.0542, "step": 4525 }, { "epoch": 0.9486480821630685, "grad_norm": 0.006532501429319382, "learning_rate": 6.958266803509195e-08, "loss": 0.0515, "step": 4526 }, { "epoch": 0.948857681827709, "grad_norm": 0.005698981694877148, "learning_rate": 6.901940758427206e-08, "loss": 0.0547, "step": 4527 }, { "epoch": 0.9490672814923496, "grad_norm": 0.007572156842797995, "learning_rate": 6.845842031448113e-08, "loss": 0.0508, "step": 4528 }, { "epoch": 0.9492768811569902, "grad_norm": 0.005259588360786438, "learning_rate": 6.789970648433397e-08, "loss": 0.0532, "step": 4529 }, { "epoch": 0.9494864808216307, "grad_norm": 0.00802881084382534, "learning_rate": 6.734326635139732e-08, "loss": 0.0535, "step": 4530 }, { "epoch": 0.9496960804862712, "grad_norm": 0.008471885696053505, "learning_rate": 6.678910017219098e-08, "loss": 0.0531, "step": 4531 }, { "epoch": 0.9499056801509118, "grad_norm": 0.006917075254023075, "learning_rate": 6.623720820218449e-08, "loss": 0.0513, "step": 4532 }, { "epoch": 0.9501152798155523, "grad_norm": 0.006756001152098179, "learning_rate": 6.568759069579988e-08, "loss": 0.0524, "step": 4533 }, { "epoch": 0.9503248794801928, "grad_norm": 0.006958952639251947, "learning_rate": 6.514024790641116e-08, "loss": 0.054, "step": 4534 }, { "epoch": 0.9505344791448334, "grad_norm": 0.006947326939553022, "learning_rate": 6.459518008634313e-08, "loss": 0.0548, "step": 4535 }, { "epoch": 0.9507440788094739, "grad_norm": 0.006160185672342777, "learning_rate": 6.405238748687203e-08, "loss": 0.0538, "step": 4536 }, { "epoch": 0.9509536784741145, "grad_norm": 0.007486341055482626, "learning_rate": 6.351187035822492e-08, "loss": 0.054, "step": 4537 }, { "epoch": 0.951163278138755, "grad_norm": 0.006339498329907656, "learning_rate": 6.297362894958025e-08, "loss": 0.0563, "step": 4538 }, { "epoch": 0.9513728778033955, "grad_norm": 0.006274237297475338, "learning_rate": 6.243766350906733e-08, "loss": 0.0538, "step": 4539 }, { "epoch": 0.951582477468036, "grad_norm": 0.005623075179755688, "learning_rate": 6.190397428376515e-08, "loss": 0.0534, "step": 4540 }, { "epoch": 0.9517920771326766, "grad_norm": 0.0076430076733231544, "learning_rate": 6.137256151970583e-08, "loss": 0.0539, "step": 4541 }, { "epoch": 0.9520016767973172, "grad_norm": 0.006511925719678402, "learning_rate": 6.08434254618695e-08, "loss": 0.0525, "step": 4542 }, { "epoch": 0.9522112764619577, "grad_norm": 0.0067582023330032825, "learning_rate": 6.031656635418825e-08, "loss": 0.0536, "step": 4543 }, { "epoch": 0.9524208761265982, "grad_norm": 0.006065226625651121, "learning_rate": 5.979198443954393e-08, "loss": 0.0538, "step": 4544 }, { "epoch": 0.9526304757912387, "grad_norm": 0.005706054624170065, "learning_rate": 5.926967995976807e-08, "loss": 0.0517, "step": 4545 }, { "epoch": 0.9528400754558792, "grad_norm": 0.005716789048165083, "learning_rate": 5.8749653155643626e-08, "loss": 0.0539, "step": 4546 }, { "epoch": 0.9530496751205199, "grad_norm": 0.0065559083595871925, "learning_rate": 5.8231904266902726e-08, "loss": 0.0522, "step": 4547 }, { "epoch": 0.9532592747851604, "grad_norm": 0.005127377342432737, "learning_rate": 5.771643353222778e-08, "loss": 0.0544, "step": 4548 }, { "epoch": 0.9534688744498009, "grad_norm": 0.005965971853584051, "learning_rate": 5.720324118925036e-08, "loss": 0.0529, "step": 4549 }, { "epoch": 0.9536784741144414, "grad_norm": 0.0051533933728933334, "learning_rate": 5.669232747455178e-08, "loss": 0.0544, "step": 4550 }, { "epoch": 0.9538880737790819, "grad_norm": 0.007161029148846865, "learning_rate": 5.618369262366363e-08, "loss": 0.0548, "step": 4551 }, { "epoch": 0.9540976734437225, "grad_norm": 0.006360564846545458, "learning_rate": 5.567733687106558e-08, "loss": 0.0558, "step": 4552 }, { "epoch": 0.954307273108363, "grad_norm": 0.006422116421163082, "learning_rate": 5.517326045018867e-08, "loss": 0.0535, "step": 4553 }, { "epoch": 0.9545168727730036, "grad_norm": 0.00639104750007391, "learning_rate": 5.4671463593412025e-08, "loss": 0.0536, "step": 4554 }, { "epoch": 0.9547264724376441, "grad_norm": 0.005469374358654022, "learning_rate": 5.417194653206337e-08, "loss": 0.0555, "step": 4555 }, { "epoch": 0.9549360721022846, "grad_norm": 0.006682547274976969, "learning_rate": 5.367470949641906e-08, "loss": 0.0536, "step": 4556 }, { "epoch": 0.9551456717669252, "grad_norm": 0.008950907737016678, "learning_rate": 5.317975271570686e-08, "loss": 0.0548, "step": 4557 }, { "epoch": 0.9553552714315657, "grad_norm": 0.00793489534407854, "learning_rate": 5.268707641810144e-08, "loss": 0.0519, "step": 4558 }, { "epoch": 0.9555648710962062, "grad_norm": 0.0063106887973845005, "learning_rate": 5.2196680830725596e-08, "loss": 0.0545, "step": 4559 }, { "epoch": 0.9557744707608468, "grad_norm": 0.006787710823118687, "learning_rate": 5.1708566179652363e-08, "loss": 0.0508, "step": 4560 }, { "epoch": 0.9559840704254873, "grad_norm": 0.008157514967024326, "learning_rate": 5.122273268990285e-08, "loss": 0.0546, "step": 4561 }, { "epoch": 0.9561936700901279, "grad_norm": 0.00778998015448451, "learning_rate": 5.073918058544458e-08, "loss": 0.0536, "step": 4562 }, { "epoch": 0.9564032697547684, "grad_norm": 0.009132279083132744, "learning_rate": 5.025791008919645e-08, "loss": 0.0527, "step": 4563 }, { "epoch": 0.9566128694194089, "grad_norm": 0.006497305817902088, "learning_rate": 4.977892142302376e-08, "loss": 0.0539, "step": 4564 }, { "epoch": 0.9568224690840494, "grad_norm": 0.005645697936415672, "learning_rate": 4.930221480773989e-08, "loss": 0.0531, "step": 4565 }, { "epoch": 0.95703206874869, "grad_norm": 0.007060209289193153, "learning_rate": 4.882779046310682e-08, "loss": 0.053, "step": 4566 }, { "epoch": 0.9572416684133306, "grad_norm": 0.005475896876305342, "learning_rate": 4.835564860783404e-08, "loss": 0.054, "step": 4567 }, { "epoch": 0.9574512680779711, "grad_norm": 0.007286901585757732, "learning_rate": 4.7885789459578e-08, "loss": 0.0506, "step": 4568 }, { "epoch": 0.9576608677426116, "grad_norm": 0.007155861239880323, "learning_rate": 4.741821323494489e-08, "loss": 0.0533, "step": 4569 }, { "epoch": 0.9578704674072521, "grad_norm": 0.005204516928642988, "learning_rate": 4.6952920149486715e-08, "loss": 0.054, "step": 4570 }, { "epoch": 0.9580800670718926, "grad_norm": 0.006855521816760302, "learning_rate": 4.6489910417703564e-08, "loss": 0.054, "step": 4571 }, { "epoch": 0.9582896667365333, "grad_norm": 0.005472875200212002, "learning_rate": 4.602918425304248e-08, "loss": 0.0533, "step": 4572 }, { "epoch": 0.9584992664011738, "grad_norm": 0.005690296180546284, "learning_rate": 4.5570741867898563e-08, "loss": 0.0535, "step": 4573 }, { "epoch": 0.9587088660658143, "grad_norm": 0.0066895815543830395, "learning_rate": 4.511458347361386e-08, "loss": 0.0539, "step": 4574 }, { "epoch": 0.9589184657304548, "grad_norm": 0.008229286409914494, "learning_rate": 4.4660709280476275e-08, "loss": 0.0512, "step": 4575 }, { "epoch": 0.9591280653950953, "grad_norm": 0.0072631631046533585, "learning_rate": 4.4209119497722883e-08, "loss": 0.0546, "step": 4576 }, { "epoch": 0.959337665059736, "grad_norm": 0.006190068554133177, "learning_rate": 4.375981433353604e-08, "loss": 0.0516, "step": 4577 }, { "epoch": 0.9595472647243765, "grad_norm": 0.0051482743583619595, "learning_rate": 4.331279399504507e-08, "loss": 0.0529, "step": 4578 }, { "epoch": 0.959756864389017, "grad_norm": 0.005935709923505783, "learning_rate": 4.286805868832622e-08, "loss": 0.0524, "step": 4579 }, { "epoch": 0.9599664640536575, "grad_norm": 0.005591293331235647, "learning_rate": 4.242560861840273e-08, "loss": 0.0534, "step": 4580 }, { "epoch": 0.960176063718298, "grad_norm": 0.006417642813175917, "learning_rate": 4.19854439892442e-08, "loss": 0.0546, "step": 4581 }, { "epoch": 0.9603856633829386, "grad_norm": 0.006241221912205219, "learning_rate": 4.154756500376611e-08, "loss": 0.0545, "step": 4582 }, { "epoch": 0.9605952630475791, "grad_norm": 0.004910702351480722, "learning_rate": 4.1111971863830866e-08, "loss": 0.054, "step": 4583 }, { "epoch": 0.9608048627122197, "grad_norm": 0.005474326200783253, "learning_rate": 4.0678664770246177e-08, "loss": 0.0539, "step": 4584 }, { "epoch": 0.9610144623768602, "grad_norm": 0.006352483760565519, "learning_rate": 4.02476439227667e-08, "loss": 0.0556, "step": 4585 }, { "epoch": 0.9612240620415007, "grad_norm": 0.006457141134887934, "learning_rate": 3.981890952009293e-08, "loss": 0.0547, "step": 4586 }, { "epoch": 0.9614336617061413, "grad_norm": 0.00615812698379159, "learning_rate": 3.939246175987232e-08, "loss": 0.0556, "step": 4587 }, { "epoch": 0.9616432613707818, "grad_norm": 0.006070506758987904, "learning_rate": 3.896830083869596e-08, "loss": 0.0511, "step": 4588 }, { "epoch": 0.9618528610354223, "grad_norm": 0.00562552735209465, "learning_rate": 3.8546426952102425e-08, "loss": 0.0568, "step": 4589 }, { "epoch": 0.9620624607000628, "grad_norm": 0.006280606612563133, "learning_rate": 3.812684029457614e-08, "loss": 0.0551, "step": 4590 }, { "epoch": 0.9622720603647034, "grad_norm": 0.006904164794832468, "learning_rate": 3.770954105954461e-08, "loss": 0.0524, "step": 4591 }, { "epoch": 0.962481660029344, "grad_norm": 0.007641474716365337, "learning_rate": 3.7294529439384494e-08, "loss": 0.0559, "step": 4592 }, { "epoch": 0.9626912596939845, "grad_norm": 0.005529316142201424, "learning_rate": 3.6881805625415544e-08, "loss": 0.0517, "step": 4593 }, { "epoch": 0.962900859358625, "grad_norm": 0.004727725870907307, "learning_rate": 3.647136980790333e-08, "loss": 0.0551, "step": 4594 }, { "epoch": 0.9631104590232655, "grad_norm": 0.00712125189602375, "learning_rate": 3.606322217605873e-08, "loss": 0.0546, "step": 4595 }, { "epoch": 0.963320058687906, "grad_norm": 0.006122817751020193, "learning_rate": 3.565736291803734e-08, "loss": 0.0553, "step": 4596 }, { "epoch": 0.9635296583525467, "grad_norm": 0.004824989475309849, "learning_rate": 3.525379222094061e-08, "loss": 0.0539, "step": 4597 }, { "epoch": 0.9637392580171872, "grad_norm": 0.007551755756139755, "learning_rate": 3.485251027081415e-08, "loss": 0.0536, "step": 4598 }, { "epoch": 0.9639488576818277, "grad_norm": 0.005360460840165615, "learning_rate": 3.445351725264945e-08, "loss": 0.0547, "step": 4599 }, { "epoch": 0.9641584573464682, "grad_norm": 0.0053915646858513355, "learning_rate": 3.405681335038158e-08, "loss": 0.0536, "step": 4600 }, { "epoch": 0.9643680570111088, "grad_norm": 0.0051850867457687855, "learning_rate": 3.3662398746890924e-08, "loss": 0.0531, "step": 4601 }, { "epoch": 0.9645776566757494, "grad_norm": 0.0051136258989572525, "learning_rate": 3.327027362400315e-08, "loss": 0.0537, "step": 4602 }, { "epoch": 0.9647872563403899, "grad_norm": 0.008604098111391068, "learning_rate": 3.288043816248809e-08, "loss": 0.0521, "step": 4603 }, { "epoch": 0.9649968560050304, "grad_norm": 0.00728649040684104, "learning_rate": 3.249289254205867e-08, "loss": 0.0521, "step": 4604 }, { "epoch": 0.9652064556696709, "grad_norm": 0.005971864331513643, "learning_rate": 3.21076369413742e-08, "loss": 0.0518, "step": 4605 }, { "epoch": 0.9654160553343115, "grad_norm": 0.004847438540309668, "learning_rate": 3.172467153803704e-08, "loss": 0.0536, "step": 4606 }, { "epoch": 0.965625654998952, "grad_norm": 0.004828069359064102, "learning_rate": 3.134399650859432e-08, "loss": 0.0529, "step": 4607 }, { "epoch": 0.9658352546635925, "grad_norm": 0.004983065649867058, "learning_rate": 3.096561202853676e-08, "loss": 0.0563, "step": 4608 }, { "epoch": 0.9660448543282331, "grad_norm": 0.005727593321353197, "learning_rate": 3.0589518272300946e-08, "loss": 0.052, "step": 4609 }, { "epoch": 0.9662544539928736, "grad_norm": 0.005493259988725185, "learning_rate": 3.0215715413264294e-08, "loss": 0.0526, "step": 4610 }, { "epoch": 0.9664640536575142, "grad_norm": 0.005342698656022549, "learning_rate": 2.984420362375007e-08, "loss": 0.0517, "step": 4611 }, { "epoch": 0.9666736533221547, "grad_norm": 0.0063081239350140095, "learning_rate": 2.9474983075026276e-08, "loss": 0.0536, "step": 4612 }, { "epoch": 0.9668832529867952, "grad_norm": 0.005974064581096172, "learning_rate": 2.9108053937302316e-08, "loss": 0.0562, "step": 4613 }, { "epoch": 0.9670928526514357, "grad_norm": 0.005807976704090834, "learning_rate": 2.8743416379733435e-08, "loss": 0.0499, "step": 4614 }, { "epoch": 0.9673024523160763, "grad_norm": 0.005039132200181484, "learning_rate": 2.8381070570416835e-08, "loss": 0.0521, "step": 4615 }, { "epoch": 0.9675120519807169, "grad_norm": 0.007256725803017616, "learning_rate": 2.8021016676393897e-08, "loss": 0.0539, "step": 4616 }, { "epoch": 0.9677216516453574, "grad_norm": 0.006315998733043671, "learning_rate": 2.7663254863649625e-08, "loss": 0.0531, "step": 4617 }, { "epoch": 0.9679312513099979, "grad_norm": 0.006833828054368496, "learning_rate": 2.7307785297111533e-08, "loss": 0.0571, "step": 4618 }, { "epoch": 0.9681408509746384, "grad_norm": 0.007867738604545593, "learning_rate": 2.6954608140651872e-08, "loss": 0.0566, "step": 4619 }, { "epoch": 0.9683504506392789, "grad_norm": 0.005606554448604584, "learning_rate": 2.6603723557085402e-08, "loss": 0.0547, "step": 4620 }, { "epoch": 0.9685600503039196, "grad_norm": 0.00556945102289319, "learning_rate": 2.6255131708168845e-08, "loss": 0.0523, "step": 4621 }, { "epoch": 0.9687696499685601, "grad_norm": 0.006726409774273634, "learning_rate": 2.5908832754603097e-08, "loss": 0.0541, "step": 4622 }, { "epoch": 0.9689792496332006, "grad_norm": 0.0066106924787163734, "learning_rate": 2.5564826856032677e-08, "loss": 0.054, "step": 4623 }, { "epoch": 0.9691888492978411, "grad_norm": 0.004752601031213999, "learning_rate": 2.5223114171043507e-08, "loss": 0.0539, "step": 4624 }, { "epoch": 0.9693984489624816, "grad_norm": 0.006882491055876017, "learning_rate": 2.488369485716513e-08, "loss": 0.0553, "step": 4625 }, { "epoch": 0.9696080486271222, "grad_norm": 0.008670981973409653, "learning_rate": 2.4546569070870717e-08, "loss": 0.0556, "step": 4626 }, { "epoch": 0.9698176482917628, "grad_norm": 0.006038348656147718, "learning_rate": 2.4211736967574283e-08, "loss": 0.0529, "step": 4627 }, { "epoch": 0.9700272479564033, "grad_norm": 0.00571649894118309, "learning_rate": 2.387919870163291e-08, "loss": 0.054, "step": 4628 }, { "epoch": 0.9702368476210438, "grad_norm": 0.004810945596545935, "learning_rate": 2.354895442634786e-08, "loss": 0.054, "step": 4629 }, { "epoch": 0.9704464472856843, "grad_norm": 0.00811366643756628, "learning_rate": 2.3221004293961237e-08, "loss": 0.0542, "step": 4630 }, { "epoch": 0.9706560469503249, "grad_norm": 0.005908562336117029, "learning_rate": 2.289534845565766e-08, "loss": 0.0528, "step": 4631 }, { "epoch": 0.9708656466149654, "grad_norm": 0.00523727061226964, "learning_rate": 2.2571987061564827e-08, "loss": 0.0542, "step": 4632 }, { "epoch": 0.971075246279606, "grad_norm": 0.006445094011723995, "learning_rate": 2.225092026075182e-08, "loss": 0.0511, "step": 4633 }, { "epoch": 0.9712848459442465, "grad_norm": 0.006017652805894613, "learning_rate": 2.193214820123024e-08, "loss": 0.0585, "step": 4634 }, { "epoch": 0.971494445608887, "grad_norm": 0.00453847274184227, "learning_rate": 2.1615671029954765e-08, "loss": 0.0548, "step": 4635 }, { "epoch": 0.9717040452735276, "grad_norm": 0.0046917712315917015, "learning_rate": 2.1301488892820908e-08, "loss": 0.0533, "step": 4636 }, { "epoch": 0.9719136449381681, "grad_norm": 0.004841329529881477, "learning_rate": 2.098960193466615e-08, "loss": 0.0536, "step": 4637 }, { "epoch": 0.9721232446028086, "grad_norm": 0.0073344954289495945, "learning_rate": 2.0680010299271024e-08, "loss": 0.0557, "step": 4638 }, { "epoch": 0.9723328442674491, "grad_norm": 0.006531344726681709, "learning_rate": 2.0372714129356375e-08, "loss": 0.0546, "step": 4639 }, { "epoch": 0.9725424439320897, "grad_norm": 0.00542111974209547, "learning_rate": 2.0067713566586654e-08, "loss": 0.0539, "step": 4640 }, { "epoch": 0.9727520435967303, "grad_norm": 0.004822442773729563, "learning_rate": 1.9765008751566618e-08, "loss": 0.0562, "step": 4641 }, { "epoch": 0.9729616432613708, "grad_norm": 0.004666461609303951, "learning_rate": 1.9464599823842966e-08, "loss": 0.0558, "step": 4642 }, { "epoch": 0.9731712429260113, "grad_norm": 0.005237584933638573, "learning_rate": 1.9166486921903814e-08, "loss": 0.0542, "step": 4643 }, { "epoch": 0.9733808425906518, "grad_norm": 0.005628027021884918, "learning_rate": 1.8870670183179783e-08, "loss": 0.052, "step": 4644 }, { "epoch": 0.9735904422552923, "grad_norm": 0.005040889140218496, "learning_rate": 1.8577149744042343e-08, "loss": 0.0522, "step": 4645 }, { "epoch": 0.973800041919933, "grad_norm": 0.004789900500327349, "learning_rate": 1.8285925739803812e-08, "loss": 0.0521, "step": 4646 }, { "epoch": 0.9740096415845735, "grad_norm": 0.00599799444898963, "learning_rate": 1.7996998304719016e-08, "loss": 0.0553, "step": 4647 }, { "epoch": 0.974219241249214, "grad_norm": 0.004553031176328659, "learning_rate": 1.7710367571983077e-08, "loss": 0.0505, "step": 4648 }, { "epoch": 0.9744288409138545, "grad_norm": 0.00728783430531621, "learning_rate": 1.7426033673733077e-08, "loss": 0.0537, "step": 4649 }, { "epoch": 0.974638440578495, "grad_norm": 0.004845589864999056, "learning_rate": 1.7143996741045832e-08, "loss": 0.0554, "step": 4650 }, { "epoch": 0.9748480402431356, "grad_norm": 0.005394340958446264, "learning_rate": 1.686425690394178e-08, "loss": 0.0516, "step": 4651 }, { "epoch": 0.9750576399077762, "grad_norm": 0.006395083852112293, "learning_rate": 1.6586814291379428e-08, "loss": 0.0553, "step": 4652 }, { "epoch": 0.9752672395724167, "grad_norm": 0.005952565465122461, "learning_rate": 1.631166903126147e-08, "loss": 0.0554, "step": 4653 }, { "epoch": 0.9754768392370572, "grad_norm": 0.004673383664339781, "learning_rate": 1.603882125042866e-08, "loss": 0.0555, "step": 4654 }, { "epoch": 0.9756864389016977, "grad_norm": 0.005786755122244358, "learning_rate": 1.576827107466372e-08, "loss": 0.0529, "step": 4655 }, { "epoch": 0.9758960385663383, "grad_norm": 0.006783796474337578, "learning_rate": 1.5500018628690216e-08, "loss": 0.0526, "step": 4656 }, { "epoch": 0.9761056382309788, "grad_norm": 0.006686339620500803, "learning_rate": 1.5234064036173114e-08, "loss": 0.0541, "step": 4657 }, { "epoch": 0.9763152378956194, "grad_norm": 0.006544138304889202, "learning_rate": 1.4970407419717116e-08, "loss": 0.0517, "step": 4658 }, { "epoch": 0.9765248375602599, "grad_norm": 0.004879649728536606, "learning_rate": 1.4709048900867772e-08, "loss": 0.0521, "step": 4659 }, { "epoch": 0.9767344372249004, "grad_norm": 0.006231387611478567, "learning_rate": 1.4449988600111486e-08, "loss": 0.0551, "step": 4660 }, { "epoch": 0.976944036889541, "grad_norm": 0.005122179165482521, "learning_rate": 1.4193226636874391e-08, "loss": 0.0516, "step": 4661 }, { "epoch": 0.9771536365541815, "grad_norm": 0.004648258443921804, "learning_rate": 1.393876312952458e-08, "loss": 0.0523, "step": 4662 }, { "epoch": 0.977363236218822, "grad_norm": 0.005532658658921719, "learning_rate": 1.3686598195369327e-08, "loss": 0.0589, "step": 4663 }, { "epoch": 0.9775728358834626, "grad_norm": 0.00559549406170845, "learning_rate": 1.34367319506562e-08, "loss": 0.0499, "step": 4664 }, { "epoch": 0.9777824355481031, "grad_norm": 0.004705341067165136, "learning_rate": 1.318916451057417e-08, "loss": 0.0535, "step": 4665 }, { "epoch": 0.9779920352127437, "grad_norm": 0.005659153684973717, "learning_rate": 1.2943895989251387e-08, "loss": 0.0543, "step": 4666 }, { "epoch": 0.9782016348773842, "grad_norm": 0.004608216229826212, "learning_rate": 1.2700926499756295e-08, "loss": 0.058, "step": 4667 }, { "epoch": 0.9784112345420247, "grad_norm": 0.005427349358797073, "learning_rate": 1.2460256154098738e-08, "loss": 0.0566, "step": 4668 }, { "epoch": 0.9786208342066652, "grad_norm": 0.0053130644373595715, "learning_rate": 1.2221885063226635e-08, "loss": 0.0516, "step": 4669 }, { "epoch": 0.9788304338713059, "grad_norm": 0.004453903995454311, "learning_rate": 1.198581333702986e-08, "loss": 0.053, "step": 4670 }, { "epoch": 0.9790400335359464, "grad_norm": 0.0048965164460241795, "learning_rate": 1.1752041084336364e-08, "loss": 0.0536, "step": 4671 }, { "epoch": 0.9792496332005869, "grad_norm": 0.008032665587961674, "learning_rate": 1.15205684129166e-08, "loss": 0.0548, "step": 4672 }, { "epoch": 0.9794592328652274, "grad_norm": 0.004445253871381283, "learning_rate": 1.1291395429477991e-08, "loss": 0.052, "step": 4673 }, { "epoch": 0.9796688325298679, "grad_norm": 0.004714785609394312, "learning_rate": 1.1064522239669916e-08, "loss": 0.0553, "step": 4674 }, { "epoch": 0.9798784321945085, "grad_norm": 0.004439793061465025, "learning_rate": 1.0839948948080937e-08, "loss": 0.0509, "step": 4675 }, { "epoch": 0.9800880318591491, "grad_norm": 0.004678604193031788, "learning_rate": 1.0617675658239345e-08, "loss": 0.0568, "step": 4676 }, { "epoch": 0.9802976315237896, "grad_norm": 0.007188865914940834, "learning_rate": 1.0397702472612625e-08, "loss": 0.053, "step": 4677 }, { "epoch": 0.9805072311884301, "grad_norm": 0.006724335718899965, "learning_rate": 1.0180029492608546e-08, "loss": 0.0556, "step": 4678 }, { "epoch": 0.9807168308530706, "grad_norm": 0.006979641038924456, "learning_rate": 9.964656818574614e-09, "loss": 0.0528, "step": 4679 }, { "epoch": 0.9809264305177112, "grad_norm": 0.0065865106880664825, "learning_rate": 9.751584549796966e-09, "loss": 0.0538, "step": 4680 }, { "epoch": 0.9811360301823517, "grad_norm": 0.005535759497433901, "learning_rate": 9.54081278450314e-09, "loss": 0.0547, "step": 4681 }, { "epoch": 0.9813456298469923, "grad_norm": 0.006264574825763702, "learning_rate": 9.332341619857078e-09, "loss": 0.0541, "step": 4682 }, { "epoch": 0.9815552295116328, "grad_norm": 0.006483092904090881, "learning_rate": 9.126171151965235e-09, "loss": 0.0536, "step": 4683 }, { "epoch": 0.9817648291762733, "grad_norm": 0.005037569906562567, "learning_rate": 8.922301475872141e-09, "loss": 0.0548, "step": 4684 }, { "epoch": 0.9819744288409139, "grad_norm": 0.005114252213388681, "learning_rate": 8.72073268556095e-09, "loss": 0.055, "step": 4685 }, { "epoch": 0.9821840285055544, "grad_norm": 0.006643661763519049, "learning_rate": 8.52146487395511e-09, "loss": 0.0564, "step": 4686 }, { "epoch": 0.9823936281701949, "grad_norm": 0.007684056647121906, "learning_rate": 8.324498132917248e-09, "loss": 0.055, "step": 4687 }, { "epoch": 0.9826032278348354, "grad_norm": 0.005348839331418276, "learning_rate": 8.129832553249173e-09, "loss": 0.0527, "step": 4688 }, { "epoch": 0.982812827499476, "grad_norm": 0.006276331376284361, "learning_rate": 7.937468224691325e-09, "loss": 0.0553, "step": 4689 }, { "epoch": 0.9830224271641166, "grad_norm": 0.005827800370752811, "learning_rate": 7.747405235923322e-09, "loss": 0.0534, "step": 4690 }, { "epoch": 0.9832320268287571, "grad_norm": 0.005027239676564932, "learning_rate": 7.55964367456452e-09, "loss": 0.0505, "step": 4691 }, { "epoch": 0.9834416264933976, "grad_norm": 0.004755694884806871, "learning_rate": 7.374183627173459e-09, "loss": 0.0544, "step": 4692 }, { "epoch": 0.9836512261580381, "grad_norm": 0.004315654281526804, "learning_rate": 7.191025179246192e-09, "loss": 0.0505, "step": 4693 }, { "epoch": 0.9838608258226786, "grad_norm": 0.006336344871670008, "learning_rate": 7.010168415219621e-09, "loss": 0.0546, "step": 4694 }, { "epoch": 0.9840704254873193, "grad_norm": 0.004240935202687979, "learning_rate": 6.831613418468163e-09, "loss": 0.0529, "step": 4695 }, { "epoch": 0.9842800251519598, "grad_norm": 0.00471118651330471, "learning_rate": 6.655360271305422e-09, "loss": 0.0562, "step": 4696 }, { "epoch": 0.9844896248166003, "grad_norm": 0.005362183786928654, "learning_rate": 6.4814090549847334e-09, "loss": 0.0535, "step": 4697 }, { "epoch": 0.9846992244812408, "grad_norm": 0.003966295626014471, "learning_rate": 6.30975984969695e-09, "loss": 0.0536, "step": 4698 }, { "epoch": 0.9849088241458813, "grad_norm": 0.005599298048764467, "learning_rate": 6.140412734572665e-09, "loss": 0.0529, "step": 4699 }, { "epoch": 0.985118423810522, "grad_norm": 0.00638514244928956, "learning_rate": 5.973367787681095e-09, "loss": 0.0514, "step": 4700 }, { "epoch": 0.9853280234751625, "grad_norm": 0.004646534100174904, "learning_rate": 5.808625086029529e-09, "loss": 0.0538, "step": 4701 }, { "epoch": 0.985537623139803, "grad_norm": 0.0049830214120447636, "learning_rate": 5.646184705563884e-09, "loss": 0.0535, "step": 4702 }, { "epoch": 0.9857472228044435, "grad_norm": 0.005619076080620289, "learning_rate": 5.486046721170368e-09, "loss": 0.0554, "step": 4703 }, { "epoch": 0.985956822469084, "grad_norm": 0.004761868622153997, "learning_rate": 5.328211206671596e-09, "loss": 0.0546, "step": 4704 }, { "epoch": 0.9861664221337246, "grad_norm": 0.004782952833920717, "learning_rate": 5.17267823482992e-09, "loss": 0.0549, "step": 4705 }, { "epoch": 0.9863760217983651, "grad_norm": 0.005984222050756216, "learning_rate": 5.019447877346317e-09, "loss": 0.055, "step": 4706 }, { "epoch": 0.9865856214630057, "grad_norm": 0.004718398675322533, "learning_rate": 4.868520204859284e-09, "loss": 0.0522, "step": 4707 }, { "epoch": 0.9867952211276462, "grad_norm": 0.005698632914572954, "learning_rate": 4.719895286947052e-09, "loss": 0.0547, "step": 4708 }, { "epoch": 0.9870048207922867, "grad_norm": 0.005634862929582596, "learning_rate": 4.573573192125369e-09, "loss": 0.0523, "step": 4709 }, { "epoch": 0.9872144204569273, "grad_norm": 0.0050827860832214355, "learning_rate": 4.429553987849167e-09, "loss": 0.0547, "step": 4710 }, { "epoch": 0.9874240201215678, "grad_norm": 0.005555478390306234, "learning_rate": 4.287837740510336e-09, "loss": 0.0524, "step": 4711 }, { "epoch": 0.9876336197862083, "grad_norm": 0.0045521133579313755, "learning_rate": 4.148424515441063e-09, "loss": 0.0535, "step": 4712 }, { "epoch": 0.9878432194508489, "grad_norm": 0.005545974709093571, "learning_rate": 4.011314376909936e-09, "loss": 0.0513, "step": 4713 }, { "epoch": 0.9880528191154894, "grad_norm": 0.005865436978638172, "learning_rate": 3.876507388125839e-09, "loss": 0.0522, "step": 4714 }, { "epoch": 0.98826241878013, "grad_norm": 0.004759861622005701, "learning_rate": 3.744003611233505e-09, "loss": 0.0529, "step": 4715 }, { "epoch": 0.9884720184447705, "grad_norm": 0.004680339712649584, "learning_rate": 3.613803107317959e-09, "loss": 0.0536, "step": 4716 }, { "epoch": 0.988681618109411, "grad_norm": 0.005101062823086977, "learning_rate": 3.4859059364006354e-09, "loss": 0.0506, "step": 4717 }, { "epoch": 0.9888912177740515, "grad_norm": 0.005664270371198654, "learning_rate": 3.3603121574438126e-09, "loss": 0.0565, "step": 4718 }, { "epoch": 0.989100817438692, "grad_norm": 0.005366276018321514, "learning_rate": 3.237021828344511e-09, "loss": 0.0542, "step": 4719 }, { "epoch": 0.9893104171033327, "grad_norm": 0.004395569209009409, "learning_rate": 3.1160350059405986e-09, "loss": 0.0537, "step": 4720 }, { "epoch": 0.9895200167679732, "grad_norm": 0.00447695842012763, "learning_rate": 2.9973517460063496e-09, "loss": 0.0532, "step": 4721 }, { "epoch": 0.9897296164326137, "grad_norm": 0.00491610262542963, "learning_rate": 2.8809721032552197e-09, "loss": 0.054, "step": 4722 }, { "epoch": 0.9899392160972542, "grad_norm": 0.0043014525435864925, "learning_rate": 2.7668961313376263e-09, "loss": 0.0561, "step": 4723 }, { "epoch": 0.9901488157618947, "grad_norm": 0.007249589078128338, "learning_rate": 2.6551238828431692e-09, "loss": 0.0538, "step": 4724 }, { "epoch": 0.9903584154265354, "grad_norm": 0.005701399873942137, "learning_rate": 2.5456554092984087e-09, "loss": 0.0568, "step": 4725 }, { "epoch": 0.9905680150911759, "grad_norm": 0.00624644011259079, "learning_rate": 2.438490761168533e-09, "loss": 0.0554, "step": 4726 }, { "epoch": 0.9907776147558164, "grad_norm": 0.005006265826523304, "learning_rate": 2.3336299878562453e-09, "loss": 0.0531, "step": 4727 }, { "epoch": 0.9909872144204569, "grad_norm": 0.005804962012916803, "learning_rate": 2.231073137702877e-09, "loss": 0.0542, "step": 4728 }, { "epoch": 0.9911968140850974, "grad_norm": 0.005389668978750706, "learning_rate": 2.1308202579861657e-09, "loss": 0.0529, "step": 4729 }, { "epoch": 0.991406413749738, "grad_norm": 0.005474920384585857, "learning_rate": 2.0328713949230304e-09, "loss": 0.0545, "step": 4730 }, { "epoch": 0.9916160134143785, "grad_norm": 0.0048176608979702, "learning_rate": 1.937226593668462e-09, "loss": 0.0534, "step": 4731 }, { "epoch": 0.9918256130790191, "grad_norm": 0.004726291634142399, "learning_rate": 1.8438858983138575e-09, "loss": 0.0535, "step": 4732 }, { "epoch": 0.9920352127436596, "grad_norm": 0.0048382277600467205, "learning_rate": 1.752849351889796e-09, "loss": 0.0548, "step": 4733 }, { "epoch": 0.9922448124083001, "grad_norm": 0.006312186364084482, "learning_rate": 1.6641169963638182e-09, "loss": 0.0554, "step": 4734 }, { "epoch": 0.9924544120729407, "grad_norm": 0.005486776586622, "learning_rate": 1.5776888726420913e-09, "loss": 0.0535, "step": 4735 }, { "epoch": 0.9926640117375812, "grad_norm": 0.005428542383015156, "learning_rate": 1.4935650205671893e-09, "loss": 0.0557, "step": 4736 }, { "epoch": 0.9928736114022217, "grad_norm": 0.0053994497284293175, "learning_rate": 1.4117454789208673e-09, "loss": 0.0514, "step": 4737 }, { "epoch": 0.9930832110668623, "grad_norm": 0.005145637784153223, "learning_rate": 1.3322302854212876e-09, "loss": 0.0544, "step": 4738 }, { "epoch": 0.9932928107315029, "grad_norm": 0.005606017541140318, "learning_rate": 1.2550194767252387e-09, "loss": 0.054, "step": 4739 }, { "epoch": 0.9935024103961434, "grad_norm": 0.004900652449578047, "learning_rate": 1.1801130884270262e-09, "loss": 0.0527, "step": 4740 }, { "epoch": 0.9937120100607839, "grad_norm": 0.0050704628229141235, "learning_rate": 1.1075111550579166e-09, "loss": 0.0548, "step": 4741 }, { "epoch": 0.9939216097254244, "grad_norm": 0.004520857241004705, "learning_rate": 1.0372137100883584e-09, "loss": 0.0558, "step": 4742 }, { "epoch": 0.9941312093900649, "grad_norm": 0.004727307241410017, "learning_rate": 9.692207859246516e-10, "loss": 0.0543, "step": 4743 }, { "epoch": 0.9943408090547056, "grad_norm": 0.0052885450422763824, "learning_rate": 9.03532413911723e-10, "loss": 0.0537, "step": 4744 }, { "epoch": 0.9945504087193461, "grad_norm": 0.00510826800018549, "learning_rate": 8.401486243320156e-10, "loss": 0.0523, "step": 4745 }, { "epoch": 0.9947600083839866, "grad_norm": 0.005960457026958466, "learning_rate": 7.790694464054893e-10, "loss": 0.0528, "step": 4746 }, { "epoch": 0.9949696080486271, "grad_norm": 0.006908085662871599, "learning_rate": 7.202949082890654e-10, "loss": 0.054, "step": 4747 }, { "epoch": 0.9951792077132676, "grad_norm": 0.004360835067927837, "learning_rate": 6.638250370788468e-10, "loss": 0.0512, "step": 4748 }, { "epoch": 0.9953888073779082, "grad_norm": 0.00422016391530633, "learning_rate": 6.09659858806233e-10, "loss": 0.0539, "step": 4749 }, { "epoch": 0.9955984070425488, "grad_norm": 0.0072461930103600025, "learning_rate": 5.577993984423602e-10, "loss": 0.0521, "step": 4750 }, { "epoch": 0.9958080067071893, "grad_norm": 0.0055481684394180775, "learning_rate": 5.08243679894771e-10, "loss": 0.054, "step": 4751 }, { "epoch": 0.9960176063718298, "grad_norm": 0.004346661269664764, "learning_rate": 4.609927260079694e-10, "loss": 0.0544, "step": 4752 }, { "epoch": 0.9962272060364703, "grad_norm": 0.004471504595130682, "learning_rate": 4.1604655856508633e-10, "loss": 0.0528, "step": 4753 }, { "epoch": 0.9964368057011109, "grad_norm": 0.004563014954328537, "learning_rate": 3.7340519828621415e-10, "loss": 0.0531, "step": 4754 }, { "epoch": 0.9966464053657514, "grad_norm": 0.005326093640178442, "learning_rate": 3.330686648289616e-10, "loss": 0.0527, "step": 4755 }, { "epoch": 0.996856005030392, "grad_norm": 0.005793221294879913, "learning_rate": 2.950369767884542e-10, "loss": 0.0533, "step": 4756 }, { "epoch": 0.9970656046950325, "grad_norm": 0.005273030139505863, "learning_rate": 2.59310151697334e-10, "loss": 0.0521, "step": 4757 }, { "epoch": 0.997275204359673, "grad_norm": 0.004422128200531006, "learning_rate": 2.2588820602631457e-10, "loss": 0.0517, "step": 4758 }, { "epoch": 0.9974848040243136, "grad_norm": 0.006338967010378838, "learning_rate": 1.9477115518140577e-10, "loss": 0.0555, "step": 4759 }, { "epoch": 0.9976944036889541, "grad_norm": 0.004579038359224796, "learning_rate": 1.6595901350890954e-10, "loss": 0.0539, "step": 4760 }, { "epoch": 0.9979040033535946, "grad_norm": 0.005021790973842144, "learning_rate": 1.39451794290979e-10, "loss": 0.0528, "step": 4761 }, { "epoch": 0.9981136030182352, "grad_norm": 0.004821773152798414, "learning_rate": 1.1524950974672878e-10, "loss": 0.0525, "step": 4762 }, { "epoch": 0.9983232026828757, "grad_norm": 0.004908620845526457, "learning_rate": 9.335217103445538e-11, "loss": 0.0524, "step": 4763 }, { "epoch": 0.9985328023475163, "grad_norm": 0.007371780462563038, "learning_rate": 7.375978824775143e-11, "loss": 0.0526, "step": 4764 }, { "epoch": 0.9987424020121568, "grad_norm": 0.005421594250947237, "learning_rate": 5.6472370419391464e-11, "loss": 0.0543, "step": 4765 }, { "epoch": 0.9989520016767973, "grad_norm": 0.0057752360589802265, "learning_rate": 4.148992551855635e-11, "loss": 0.053, "step": 4766 }, { "epoch": 0.9991616013414378, "grad_norm": 0.007910934276878834, "learning_rate": 2.8812460452498637e-11, "loss": 0.0517, "step": 4767 }, { "epoch": 0.9993712010060783, "grad_norm": 0.005225565284490585, "learning_rate": 1.843998106543232e-11, "loss": 0.0547, "step": 4768 }, { "epoch": 0.999580800670719, "grad_norm": 0.005672250874340534, "learning_rate": 1.0372492138532864e-11, "loss": 0.0564, "step": 4769 }, { "epoch": 0.9997904003353595, "grad_norm": 0.005982316564768553, "learning_rate": 4.609997391602505e-12, "loss": 0.0538, "step": 4770 }, { "epoch": 1.0, "grad_norm": 0.005909937433898449, "learning_rate": 1.1524994808498335e-12, "loss": 0.0547, "step": 4771 }, { "epoch": 1.0, "step": 4771, "total_flos": 0.0, "train_loss": 0.06302105758305591, "train_runtime": 301007.0841, "train_samples_per_second": 3.043, "train_steps_per_second": 0.016 } ], "logging_steps": 1.0, "max_steps": 4771, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }