| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9964556962025316, | |
| "eval_steps": 500, | |
| "global_step": 246, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004050632911392405, | |
| "grad_norm": 13.497359275817871, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.2803, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00810126582278481, | |
| "grad_norm": 11.971280097961426, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2781, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012151898734177215, | |
| "grad_norm": 10.184432029724121, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.2722, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01620253164556962, | |
| "grad_norm": 6.801725387573242, | |
| "learning_rate": 1e-05, | |
| "loss": 1.1279, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.020253164556962026, | |
| "grad_norm": 4.426397800445557, | |
| "learning_rate": 1.25e-05, | |
| "loss": 1.1123, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02430379746835443, | |
| "grad_norm": 5.871100425720215, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 1.1242, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.028354430379746835, | |
| "grad_norm": 4.175433158874512, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 1.0718, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.03240506329113924, | |
| "grad_norm": 5.5522990226745605, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0707, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.03645569620253165, | |
| "grad_norm": 5.652365207672119, | |
| "learning_rate": 1.999912881672411e-05, | |
| "loss": 1.1061, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04050632911392405, | |
| "grad_norm": 5.447442531585693, | |
| "learning_rate": 1.9996515418688493e-05, | |
| "loss": 1.0846, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.044556962025316456, | |
| "grad_norm": 2.7070164680480957, | |
| "learning_rate": 1.999216026124288e-05, | |
| "loss": 1.086, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.04860759493670886, | |
| "grad_norm": 2.1301090717315674, | |
| "learning_rate": 1.998606410321534e-05, | |
| "loss": 1.0542, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.052658227848101265, | |
| "grad_norm": 2.466780185699463, | |
| "learning_rate": 1.9978228006780056e-05, | |
| "loss": 1.0618, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.05670886075949367, | |
| "grad_norm": 3.075913667678833, | |
| "learning_rate": 1.9968653337272262e-05, | |
| "loss": 1.0487, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.060759493670886074, | |
| "grad_norm": 2.206223249435425, | |
| "learning_rate": 1.9957341762950346e-05, | |
| "loss": 1.0237, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06481012658227848, | |
| "grad_norm": 1.7991361618041992, | |
| "learning_rate": 1.9944295254705187e-05, | |
| "loss": 1.0212, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.06886075949367089, | |
| "grad_norm": 1.9748821258544922, | |
| "learning_rate": 1.9929516085716736e-05, | |
| "loss": 1.0243, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0729113924050633, | |
| "grad_norm": 2.207519054412842, | |
| "learning_rate": 1.9913006831057967e-05, | |
| "loss": 1.0105, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0769620253164557, | |
| "grad_norm": 1.6344636678695679, | |
| "learning_rate": 1.9894770367246197e-05, | |
| "loss": 1.002, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0810126582278481, | |
| "grad_norm": 1.5756722688674927, | |
| "learning_rate": 1.9874809871741877e-05, | |
| "loss": 0.9825, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08506329113924051, | |
| "grad_norm": 1.6223676204681396, | |
| "learning_rate": 1.9853128822394976e-05, | |
| "loss": 0.982, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.08911392405063291, | |
| "grad_norm": 1.5844409465789795, | |
| "learning_rate": 1.982973099683902e-05, | |
| "loss": 1.024, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09316455696202532, | |
| "grad_norm": 1.8773488998413086, | |
| "learning_rate": 1.980462047183287e-05, | |
| "loss": 0.9737, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.09721518987341772, | |
| "grad_norm": 1.8542094230651855, | |
| "learning_rate": 1.977780162255041e-05, | |
| "loss": 1.0221, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10126582278481013, | |
| "grad_norm": 2.02828049659729, | |
| "learning_rate": 1.9749279121818235e-05, | |
| "loss": 0.996, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.10531645569620253, | |
| "grad_norm": 1.464856743812561, | |
| "learning_rate": 1.9719057939301477e-05, | |
| "loss": 0.9812, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.10936708860759493, | |
| "grad_norm": 1.8954442739486694, | |
| "learning_rate": 1.9687143340637885e-05, | |
| "loss": 0.9721, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11341772151898734, | |
| "grad_norm": 1.6294549703598022, | |
| "learning_rate": 1.9653540886520387e-05, | |
| "loss": 1.0003, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11746835443037974, | |
| "grad_norm": 1.55502188205719, | |
| "learning_rate": 1.961825643172819e-05, | |
| "loss": 0.9727, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12151898734177215, | |
| "grad_norm": 1.4686181545257568, | |
| "learning_rate": 1.9581296124106682e-05, | |
| "loss": 0.9658, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12556962025316457, | |
| "grad_norm": 1.47079598903656, | |
| "learning_rate": 1.9542666403496232e-05, | |
| "loss": 0.9884, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.12962025316455697, | |
| "grad_norm": 1.6131620407104492, | |
| "learning_rate": 1.9502374000610152e-05, | |
| "loss": 1.0191, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13367088607594937, | |
| "grad_norm": 1.5124179124832153, | |
| "learning_rate": 1.946042593586195e-05, | |
| "loss": 0.9668, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.13772151898734178, | |
| "grad_norm": 1.4320518970489502, | |
| "learning_rate": 1.941682951814212e-05, | |
| "loss": 0.9631, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14177215189873418, | |
| "grad_norm": 1.4760117530822754, | |
| "learning_rate": 1.9371592343544655e-05, | |
| "loss": 0.9843, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1458227848101266, | |
| "grad_norm": 1.2786954641342163, | |
| "learning_rate": 1.932472229404356e-05, | |
| "loss": 0.9281, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.149873417721519, | |
| "grad_norm": 1.5036900043487549, | |
| "learning_rate": 1.927622753611948e-05, | |
| "loss": 0.9629, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1539240506329114, | |
| "grad_norm": 1.4190330505371094, | |
| "learning_rate": 1.922611651933683e-05, | |
| "loss": 0.9404, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1579746835443038, | |
| "grad_norm": 1.4339755773544312, | |
| "learning_rate": 1.9174397974871563e-05, | |
| "loss": 0.9955, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1620253164556962, | |
| "grad_norm": 1.4856185913085938, | |
| "learning_rate": 1.912108091398988e-05, | |
| "loss": 0.9969, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1660759493670886, | |
| "grad_norm": 1.5168850421905518, | |
| "learning_rate": 1.906617462647813e-05, | |
| "loss": 0.9656, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.17012658227848101, | |
| "grad_norm": 1.40033757686615, | |
| "learning_rate": 1.900968867902419e-05, | |
| "loss": 1.0061, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.17417721518987342, | |
| "grad_norm": 1.3259391784667969, | |
| "learning_rate": 1.8951632913550625e-05, | |
| "loss": 0.9666, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.17822784810126582, | |
| "grad_norm": 1.432612657546997, | |
| "learning_rate": 1.8892017445499812e-05, | |
| "loss": 0.9565, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18227848101265823, | |
| "grad_norm": 1.4262988567352295, | |
| "learning_rate": 1.8830852662071507e-05, | |
| "loss": 0.939, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.18632911392405063, | |
| "grad_norm": 1.369807243347168, | |
| "learning_rate": 1.876814922041299e-05, | |
| "loss": 0.9547, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.19037974683544304, | |
| "grad_norm": 1.2927881479263306, | |
| "learning_rate": 1.8703918045762197e-05, | |
| "loss": 0.9698, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.19443037974683544, | |
| "grad_norm": 1.3403329849243164, | |
| "learning_rate": 1.8638170329544164e-05, | |
| "loss": 0.9405, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.19848101265822785, | |
| "grad_norm": 1.388960599899292, | |
| "learning_rate": 1.857091752742105e-05, | |
| "loss": 1.0083, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.20253164556962025, | |
| "grad_norm": 1.2685046195983887, | |
| "learning_rate": 1.8502171357296144e-05, | |
| "loss": 0.9702, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20658227848101265, | |
| "grad_norm": 1.3383150100708008, | |
| "learning_rate": 1.8431943797272187e-05, | |
| "loss": 0.9709, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.21063291139240506, | |
| "grad_norm": 1.3614344596862793, | |
| "learning_rate": 1.8360247083564343e-05, | |
| "loss": 0.977, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.21468354430379746, | |
| "grad_norm": 1.3981597423553467, | |
| "learning_rate": 1.828709370836819e-05, | |
| "loss": 0.9734, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.21873417721518987, | |
| "grad_norm": 1.3510750532150269, | |
| "learning_rate": 1.8212496417683135e-05, | |
| "loss": 0.9746, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.22278481012658227, | |
| "grad_norm": 1.703658103942871, | |
| "learning_rate": 1.81364682090916e-05, | |
| "loss": 0.9822, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.22683544303797468, | |
| "grad_norm": 1.1477118730545044, | |
| "learning_rate": 1.805902232949435e-05, | |
| "loss": 0.9598, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23088607594936708, | |
| "grad_norm": 1.4172189235687256, | |
| "learning_rate": 1.7980172272802398e-05, | |
| "loss": 0.917, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.23493670886075949, | |
| "grad_norm": 1.4359899759292603, | |
| "learning_rate": 1.789993177758588e-05, | |
| "loss": 0.9716, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2389873417721519, | |
| "grad_norm": 1.2954188585281372, | |
| "learning_rate": 1.78183148246803e-05, | |
| "loss": 0.9889, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.2430379746835443, | |
| "grad_norm": 1.4424415826797485, | |
| "learning_rate": 1.773533563475053e-05, | |
| "loss": 1.0034, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2470886075949367, | |
| "grad_norm": 1.2044004201889038, | |
| "learning_rate": 1.7651008665813083e-05, | |
| "loss": 0.9816, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.25113924050632913, | |
| "grad_norm": 1.3561129570007324, | |
| "learning_rate": 1.7565348610716963e-05, | |
| "loss": 0.9711, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.25518987341772154, | |
| "grad_norm": 1.4384827613830566, | |
| "learning_rate": 1.7478370394583647e-05, | |
| "loss": 1.0251, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.25924050632911394, | |
| "grad_norm": 1.306980848312378, | |
| "learning_rate": 1.7390089172206594e-05, | |
| "loss": 0.9603, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.26329113924050634, | |
| "grad_norm": 1.357668399810791, | |
| "learning_rate": 1.73005203254107e-05, | |
| "loss": 0.9714, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.26734177215189875, | |
| "grad_norm": 1.4257886409759521, | |
| "learning_rate": 1.720967946037225e-05, | |
| "loss": 1.0092, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.27139240506329115, | |
| "grad_norm": 1.218770980834961, | |
| "learning_rate": 1.7117582404899714e-05, | |
| "loss": 0.9515, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.27544303797468356, | |
| "grad_norm": 1.0931731462478638, | |
| "learning_rate": 1.7024245205675986e-05, | |
| "loss": 0.9759, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.27949367088607596, | |
| "grad_norm": 1.2003381252288818, | |
| "learning_rate": 1.692968412546247e-05, | |
| "loss": 0.9399, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.28354430379746837, | |
| "grad_norm": 1.1908068656921387, | |
| "learning_rate": 1.6833915640265485e-05, | |
| "loss": 0.9535, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.28759493670886077, | |
| "grad_norm": 1.533835530281067, | |
| "learning_rate": 1.6736956436465573e-05, | |
| "loss": 0.9673, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2916455696202532, | |
| "grad_norm": 1.2078437805175781, | |
| "learning_rate": 1.6638823407910085e-05, | |
| "loss": 0.9868, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2956962025316456, | |
| "grad_norm": 1.244404673576355, | |
| "learning_rate": 1.6539533652969683e-05, | |
| "loss": 0.9662, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.299746835443038, | |
| "grad_norm": 1.4124116897583008, | |
| "learning_rate": 1.6439104471559157e-05, | |
| "loss": 1.0016, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3037974683544304, | |
| "grad_norm": 1.2600699663162231, | |
| "learning_rate": 1.6337553362123165e-05, | |
| "loss": 0.9885, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3078481012658228, | |
| "grad_norm": 1.258617639541626, | |
| "learning_rate": 1.6234898018587336e-05, | |
| "loss": 0.9284, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3118987341772152, | |
| "grad_norm": 1.3986912965774536, | |
| "learning_rate": 1.6131156327275372e-05, | |
| "loss": 0.9733, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3159493670886076, | |
| "grad_norm": 1.2392029762268066, | |
| "learning_rate": 1.6026346363792565e-05, | |
| "loss": 0.9788, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.2097805738449097, | |
| "learning_rate": 1.5920486389876383e-05, | |
| "loss": 0.9405, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3240506329113924, | |
| "grad_norm": 1.2511652708053589, | |
| "learning_rate": 1.58135948502146e-05, | |
| "loss": 0.942, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3281012658227848, | |
| "grad_norm": 1.1774235963821411, | |
| "learning_rate": 1.5705690369231552e-05, | |
| "loss": 0.9778, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3321518987341772, | |
| "grad_norm": 1.2575836181640625, | |
| "learning_rate": 1.5596791747843083e-05, | |
| "loss": 0.9466, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3362025316455696, | |
| "grad_norm": 1.2601759433746338, | |
| "learning_rate": 1.5486917960180742e-05, | |
| "loss": 0.9295, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.34025316455696203, | |
| "grad_norm": 1.4311736822128296, | |
| "learning_rate": 1.5376088150285777e-05, | |
| "loss": 0.9894, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.34430379746835443, | |
| "grad_norm": 1.393281102180481, | |
| "learning_rate": 1.526432162877356e-05, | |
| "loss": 0.9497, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.34835443037974684, | |
| "grad_norm": 1.2661269903182983, | |
| "learning_rate": 1.515163786946896e-05, | |
| "loss": 0.9634, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.35240506329113924, | |
| "grad_norm": 1.4130759239196777, | |
| "learning_rate": 1.5038056506013297e-05, | |
| "loss": 0.9384, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.35645569620253165, | |
| "grad_norm": 1.3350815773010254, | |
| "learning_rate": 1.4923597328443423e-05, | |
| "loss": 0.9385, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.36050632911392405, | |
| "grad_norm": 1.3958266973495483, | |
| "learning_rate": 1.4808280279743594e-05, | |
| "loss": 0.9596, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.36455696202531646, | |
| "grad_norm": 1.2324460744857788, | |
| "learning_rate": 1.4692125452370664e-05, | |
| "loss": 0.9333, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.36860759493670886, | |
| "grad_norm": 1.3735852241516113, | |
| "learning_rate": 1.4575153084753233e-05, | |
| "loss": 0.9148, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.37265822784810126, | |
| "grad_norm": 1.1630451679229736, | |
| "learning_rate": 1.4457383557765385e-05, | |
| "loss": 0.9479, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.37670886075949367, | |
| "grad_norm": 1.1744204759597778, | |
| "learning_rate": 1.4338837391175582e-05, | |
| "loss": 0.9433, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3807594936708861, | |
| "grad_norm": 1.1749955415725708, | |
| "learning_rate": 1.4219535240071378e-05, | |
| "loss": 0.9097, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3848101265822785, | |
| "grad_norm": 1.2120801210403442, | |
| "learning_rate": 1.4099497891260538e-05, | |
| "loss": 0.9623, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3888607594936709, | |
| "grad_norm": 1.3005883693695068, | |
| "learning_rate": 1.397874625964921e-05, | |
| "loss": 0.9562, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3929113924050633, | |
| "grad_norm": 1.2138869762420654, | |
| "learning_rate": 1.3857301384597796e-05, | |
| "loss": 0.9524, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3969620253164557, | |
| "grad_norm": 1.355625033378601, | |
| "learning_rate": 1.3735184426255117e-05, | |
| "loss": 0.9434, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4010126582278481, | |
| "grad_norm": 1.1441434621810913, | |
| "learning_rate": 1.3612416661871532e-05, | |
| "loss": 0.9922, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4050632911392405, | |
| "grad_norm": 1.3381670713424683, | |
| "learning_rate": 1.348901948209167e-05, | |
| "loss": 0.9179, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4091139240506329, | |
| "grad_norm": 1.169856071472168, | |
| "learning_rate": 1.3365014387227393e-05, | |
| "loss": 0.9251, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4131645569620253, | |
| "grad_norm": 1.215080976486206, | |
| "learning_rate": 1.324042298351166e-05, | |
| "loss": 0.9315, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4172151898734177, | |
| "grad_norm": 1.1357917785644531, | |
| "learning_rate": 1.3115266979333917e-05, | |
| "loss": 0.9354, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4212658227848101, | |
| "grad_norm": 1.1942713260650635, | |
| "learning_rate": 1.2989568181457704e-05, | |
| "loss": 0.9249, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4253164556962025, | |
| "grad_norm": 1.1736984252929688, | |
| "learning_rate": 1.2863348491221129e-05, | |
| "loss": 0.9138, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4293670886075949, | |
| "grad_norm": 1.187155842781067, | |
| "learning_rate": 1.2736629900720832e-05, | |
| "loss": 0.9255, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.43341772151898733, | |
| "grad_norm": 1.084222674369812, | |
| "learning_rate": 1.2609434488980168e-05, | |
| "loss": 0.9086, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.43746835443037974, | |
| "grad_norm": 1.1785520315170288, | |
| "learning_rate": 1.248178441810224e-05, | |
| "loss": 0.9428, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.44151898734177214, | |
| "grad_norm": 1.1602153778076172, | |
| "learning_rate": 1.2353701929408425e-05, | |
| "loss": 0.9178, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.44556962025316454, | |
| "grad_norm": 1.2142413854599, | |
| "learning_rate": 1.2225209339563144e-05, | |
| "loss": 0.914, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.44962025316455695, | |
| "grad_norm": 1.134624719619751, | |
| "learning_rate": 1.2096329036685469e-05, | |
| "loss": 0.9093, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.45367088607594935, | |
| "grad_norm": 1.1606262922286987, | |
| "learning_rate": 1.1967083476448282e-05, | |
| "loss": 0.9109, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.45772151898734176, | |
| "grad_norm": 1.1459376811981201, | |
| "learning_rate": 1.1837495178165706e-05, | |
| "loss": 0.9197, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.46177215189873416, | |
| "grad_norm": 1.2801434993743896, | |
| "learning_rate": 1.1707586720869375e-05, | |
| "loss": 0.9557, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.46582278481012657, | |
| "grad_norm": 1.2383016347885132, | |
| "learning_rate": 1.1577380739374376e-05, | |
| "loss": 0.9287, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.46987341772151897, | |
| "grad_norm": 1.2320594787597656, | |
| "learning_rate": 1.1446899920335407e-05, | |
| "loss": 0.9559, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4739240506329114, | |
| "grad_norm": 1.0938814878463745, | |
| "learning_rate": 1.1316166998293937e-05, | |
| "loss": 0.969, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.4779746835443038, | |
| "grad_norm": 1.168281078338623, | |
| "learning_rate": 1.118520475171703e-05, | |
| "loss": 0.9374, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.4820253164556962, | |
| "grad_norm": 1.163573145866394, | |
| "learning_rate": 1.1054035999028478e-05, | |
| "loss": 0.9503, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4860759493670886, | |
| "grad_norm": 1.064112901687622, | |
| "learning_rate": 1.092268359463302e-05, | |
| "loss": 0.9221, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.490126582278481, | |
| "grad_norm": 1.1465892791748047, | |
| "learning_rate": 1.0791170424934248e-05, | |
| "loss": 0.9178, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4941772151898734, | |
| "grad_norm": 1.2368698120117188, | |
| "learning_rate": 1.0659519404346955e-05, | |
| "loss": 0.9571, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4982278481012658, | |
| "grad_norm": 1.028232455253601, | |
| "learning_rate": 1.0527753471304625e-05, | |
| "loss": 0.9567, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5022784810126583, | |
| "grad_norm": 1.2025481462478638, | |
| "learning_rate": 1.0395895584262696e-05, | |
| "loss": 0.9406, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5063291139240507, | |
| "grad_norm": 1.0962940454483032, | |
| "learning_rate": 1.0263968717698365e-05, | |
| "loss": 0.9053, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5103797468354431, | |
| "grad_norm": 1.102229356765747, | |
| "learning_rate": 1.013199585810759e-05, | |
| "loss": 0.9523, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5144303797468355, | |
| "grad_norm": 1.2116892337799072, | |
| "learning_rate": 1e-05, | |
| "loss": 0.898, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5184810126582279, | |
| "grad_norm": 1.0647423267364502, | |
| "learning_rate": 9.868004141892412e-06, | |
| "loss": 0.946, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.5225316455696203, | |
| "grad_norm": 1.1740139722824097, | |
| "learning_rate": 9.73603128230164e-06, | |
| "loss": 0.9145, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5265822784810127, | |
| "grad_norm": 1.168250560760498, | |
| "learning_rate": 9.604104415737309e-06, | |
| "loss": 0.9757, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5306329113924051, | |
| "grad_norm": 1.1168098449707031, | |
| "learning_rate": 9.472246528695377e-06, | |
| "loss": 0.9328, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5346835443037975, | |
| "grad_norm": 1.1775215864181519, | |
| "learning_rate": 9.340480595653047e-06, | |
| "loss": 0.9239, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5387341772151899, | |
| "grad_norm": 1.2135863304138184, | |
| "learning_rate": 9.208829575065754e-06, | |
| "loss": 0.908, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5427848101265823, | |
| "grad_norm": 1.0800964832305908, | |
| "learning_rate": 9.07731640536698e-06, | |
| "loss": 0.9478, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5468354430379747, | |
| "grad_norm": 1.0159581899642944, | |
| "learning_rate": 8.945964000971525e-06, | |
| "loss": 0.9377, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5508860759493671, | |
| "grad_norm": 1.1772390604019165, | |
| "learning_rate": 8.814795248282974e-06, | |
| "loss": 0.9329, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5549367088607595, | |
| "grad_norm": 1.163148283958435, | |
| "learning_rate": 8.683833001706068e-06, | |
| "loss": 0.9317, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5589873417721519, | |
| "grad_norm": 1.167102575302124, | |
| "learning_rate": 8.553100079664598e-06, | |
| "loss": 0.9015, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5630379746835443, | |
| "grad_norm": 1.2055310010910034, | |
| "learning_rate": 8.422619260625626e-06, | |
| "loss": 0.9573, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.5670886075949367, | |
| "grad_norm": 1.287394642829895, | |
| "learning_rate": 8.292413279130625e-06, | |
| "loss": 0.9177, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5711392405063291, | |
| "grad_norm": 1.08267343044281, | |
| "learning_rate": 8.162504821834296e-06, | |
| "loss": 0.9502, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5751898734177215, | |
| "grad_norm": 1.1666055917739868, | |
| "learning_rate": 8.03291652355172e-06, | |
| "loss": 0.8894, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.579240506329114, | |
| "grad_norm": 1.1575030088424683, | |
| "learning_rate": 7.903670963314536e-06, | |
| "loss": 0.9206, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.5832911392405064, | |
| "grad_norm": 1.148395299911499, | |
| "learning_rate": 7.774790660436857e-06, | |
| "loss": 0.9333, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5873417721518988, | |
| "grad_norm": 1.0939373970031738, | |
| "learning_rate": 7.646298070591578e-06, | |
| "loss": 0.9334, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5913924050632912, | |
| "grad_norm": 1.0633858442306519, | |
| "learning_rate": 7.518215581897763e-06, | |
| "loss": 0.911, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5954430379746836, | |
| "grad_norm": 1.0724154710769653, | |
| "learning_rate": 7.390565511019834e-06, | |
| "loss": 0.94, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.599493670886076, | |
| "grad_norm": 1.059989333152771, | |
| "learning_rate": 7.263370099279173e-06, | |
| "loss": 0.9125, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6035443037974684, | |
| "grad_norm": 1.0825798511505127, | |
| "learning_rate": 7.136651508778876e-06, | |
| "loss": 0.9218, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6075949367088608, | |
| "grad_norm": 1.1126887798309326, | |
| "learning_rate": 7.010431818542298e-06, | |
| "loss": 0.9038, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6116455696202532, | |
| "grad_norm": 1.0414491891860962, | |
| "learning_rate": 6.884733020666086e-06, | |
| "loss": 0.9031, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6156962025316456, | |
| "grad_norm": 1.1232143640518188, | |
| "learning_rate": 6.759577016488343e-06, | |
| "loss": 0.8803, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.619746835443038, | |
| "grad_norm": 1.111212968826294, | |
| "learning_rate": 6.634985612772611e-06, | |
| "loss": 0.9066, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6237974683544304, | |
| "grad_norm": 1.0482438802719116, | |
| "learning_rate": 6.510980517908334e-06, | |
| "loss": 0.9245, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6278481012658228, | |
| "grad_norm": 1.1926794052124023, | |
| "learning_rate": 6.387583338128471e-06, | |
| "loss": 0.8985, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6318987341772152, | |
| "grad_norm": 1.089269995689392, | |
| "learning_rate": 6.264815573744884e-06, | |
| "loss": 0.9339, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6359493670886076, | |
| "grad_norm": 1.158069372177124, | |
| "learning_rate": 6.142698615402205e-06, | |
| "loss": 0.9193, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1409528255462646, | |
| "learning_rate": 6.021253740350793e-06, | |
| "loss": 0.9499, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6440506329113924, | |
| "grad_norm": 1.042941689491272, | |
| "learning_rate": 5.900502108739466e-06, | |
| "loss": 0.9067, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6481012658227848, | |
| "grad_norm": 1.202627420425415, | |
| "learning_rate": 5.780464759928623e-06, | |
| "loss": 0.8872, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6521518987341772, | |
| "grad_norm": 1.089460015296936, | |
| "learning_rate": 5.66116260882442e-06, | |
| "loss": 0.8848, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.6562025316455696, | |
| "grad_norm": 1.09579336643219, | |
| "learning_rate": 5.542616442234618e-06, | |
| "loss": 0.8871, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.660253164556962, | |
| "grad_norm": 1.073123574256897, | |
| "learning_rate": 5.42484691524677e-06, | |
| "loss": 0.9174, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6643037974683544, | |
| "grad_norm": 1.0642234086990356, | |
| "learning_rate": 5.307874547629339e-06, | |
| "loss": 0.9088, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6683544303797468, | |
| "grad_norm": 1.055198073387146, | |
| "learning_rate": 5.191719720256407e-06, | |
| "loss": 0.8979, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6724050632911392, | |
| "grad_norm": 1.0669524669647217, | |
| "learning_rate": 5.076402671556578e-06, | |
| "loss": 0.9151, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6764556962025317, | |
| "grad_norm": 1.0371671915054321, | |
| "learning_rate": 4.961943493986709e-06, | |
| "loss": 0.8848, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.6805063291139241, | |
| "grad_norm": 1.0839803218841553, | |
| "learning_rate": 4.848362130531039e-06, | |
| "loss": 0.9173, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6845569620253165, | |
| "grad_norm": 1.0065879821777344, | |
| "learning_rate": 4.7356783712264405e-06, | |
| "loss": 0.8986, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.6886075949367089, | |
| "grad_norm": 1.0394333600997925, | |
| "learning_rate": 4.623911849714226e-06, | |
| "loss": 0.8955, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6926582278481013, | |
| "grad_norm": 1.0068820714950562, | |
| "learning_rate": 4.5130820398192645e-06, | |
| "loss": 0.8658, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.6967088607594937, | |
| "grad_norm": 1.0753767490386963, | |
| "learning_rate": 4.403208252156921e-06, | |
| "loss": 0.8855, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7007594936708861, | |
| "grad_norm": 1.0549858808517456, | |
| "learning_rate": 4.294309630768452e-06, | |
| "loss": 0.9062, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7048101265822785, | |
| "grad_norm": 1.1192678213119507, | |
| "learning_rate": 4.186405149785403e-06, | |
| "loss": 0.8991, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7088607594936709, | |
| "grad_norm": 1.0334043502807617, | |
| "learning_rate": 4.079513610123619e-06, | |
| "loss": 0.9067, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7129113924050633, | |
| "grad_norm": 1.1050986051559448, | |
| "learning_rate": 3.973653636207437e-06, | |
| "loss": 0.8982, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7169620253164557, | |
| "grad_norm": 1.1419743299484253, | |
| "learning_rate": 3.86884367272463e-06, | |
| "loss": 0.9532, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7210126582278481, | |
| "grad_norm": 1.052414894104004, | |
| "learning_rate": 3.7651019814126656e-06, | |
| "loss": 0.87, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7250632911392405, | |
| "grad_norm": 1.1054644584655762, | |
| "learning_rate": 3.6624466378768387e-06, | |
| "loss": 0.9101, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7291139240506329, | |
| "grad_norm": 1.0414938926696777, | |
| "learning_rate": 3.560895528440844e-06, | |
| "loss": 0.882, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7331645569620253, | |
| "grad_norm": 1.041466474533081, | |
| "learning_rate": 3.460466347030319e-06, | |
| "loss": 0.8985, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7372151898734177, | |
| "grad_norm": 1.0860273838043213, | |
| "learning_rate": 3.361176592089919e-06, | |
| "loss": 0.9046, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7412658227848101, | |
| "grad_norm": 1.0401769876480103, | |
| "learning_rate": 3.2630435635344283e-06, | |
| "loss": 0.915, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7453164556962025, | |
| "grad_norm": 1.0929770469665527, | |
| "learning_rate": 3.1660843597345137e-06, | |
| "loss": 0.8906, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7493670886075949, | |
| "grad_norm": 1.0812665224075317, | |
| "learning_rate": 3.0703158745375316e-06, | |
| "loss": 0.9205, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7534177215189873, | |
| "grad_norm": 1.010650873184204, | |
| "learning_rate": 2.975754794324015e-06, | |
| "loss": 0.8729, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7574683544303797, | |
| "grad_norm": 1.110718846321106, | |
| "learning_rate": 2.8824175951002918e-06, | |
| "loss": 0.8899, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.7615189873417721, | |
| "grad_norm": 1.1400407552719116, | |
| "learning_rate": 2.7903205396277546e-06, | |
| "loss": 0.904, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7655696202531646, | |
| "grad_norm": 0.9677958488464355, | |
| "learning_rate": 2.6994796745893e-06, | |
| "loss": 0.9165, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.769620253164557, | |
| "grad_norm": 1.0478403568267822, | |
| "learning_rate": 2.6099108277934105e-06, | |
| "loss": 0.8733, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7736708860759494, | |
| "grad_norm": 1.0226112604141235, | |
| "learning_rate": 2.5216296054163547e-06, | |
| "loss": 0.9155, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.7777215189873418, | |
| "grad_norm": 1.0125083923339844, | |
| "learning_rate": 2.4346513892830427e-06, | |
| "loss": 0.9454, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7817721518987342, | |
| "grad_norm": 1.0066049098968506, | |
| "learning_rate": 2.34899133418692e-06, | |
| "loss": 0.9016, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.7858227848101266, | |
| "grad_norm": 1.0303598642349243, | |
| "learning_rate": 2.2646643652494693e-06, | |
| "loss": 0.8988, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.789873417721519, | |
| "grad_norm": 0.9669736623764038, | |
| "learning_rate": 2.1816851753197023e-06, | |
| "loss": 0.8754, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7939240506329114, | |
| "grad_norm": 1.039135217666626, | |
| "learning_rate": 2.100068222414121e-06, | |
| "loss": 0.8954, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.7979746835443038, | |
| "grad_norm": 1.032785415649414, | |
| "learning_rate": 2.019827727197605e-06, | |
| "loss": 0.8981, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8020253164556962, | |
| "grad_norm": 0.9627246260643005, | |
| "learning_rate": 1.9409776705056514e-06, | |
| "loss": 0.8895, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8060759493670886, | |
| "grad_norm": 0.9815201163291931, | |
| "learning_rate": 1.8635317909083983e-06, | |
| "loss": 0.903, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.810126582278481, | |
| "grad_norm": 1.0689142942428589, | |
| "learning_rate": 1.7875035823168641e-06, | |
| "loss": 0.9047, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8141772151898734, | |
| "grad_norm": 1.0282758474349976, | |
| "learning_rate": 1.712906291631814e-06, | |
| "loss": 0.8979, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8182278481012658, | |
| "grad_norm": 1.0624666213989258, | |
| "learning_rate": 1.6397529164356606e-06, | |
| "loss": 0.8986, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8222784810126582, | |
| "grad_norm": 0.9882222414016724, | |
| "learning_rate": 1.5680562027278156e-06, | |
| "loss": 0.9011, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8263291139240506, | |
| "grad_norm": 1.0419288873672485, | |
| "learning_rate": 1.4978286427038602e-06, | |
| "loss": 0.8815, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.830379746835443, | |
| "grad_norm": 1.0580886602401733, | |
| "learning_rate": 1.4290824725789542e-06, | |
| "loss": 0.8613, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8344303797468354, | |
| "grad_norm": 0.9857713580131531, | |
| "learning_rate": 1.3618296704558364e-06, | |
| "loss": 0.8646, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8384810126582278, | |
| "grad_norm": 1.0579966306686401, | |
| "learning_rate": 1.2960819542378055e-06, | |
| "loss": 0.9152, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8425316455696202, | |
| "grad_norm": 1.0483579635620117, | |
| "learning_rate": 1.2318507795870138e-06, | |
| "loss": 0.9258, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8465822784810126, | |
| "grad_norm": 1.0411852598190308, | |
| "learning_rate": 1.1691473379284945e-06, | |
| "loss": 0.8713, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.850632911392405, | |
| "grad_norm": 0.9969512820243835, | |
| "learning_rate": 1.1079825545001887e-06, | |
| "loss": 0.8882, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8546835443037974, | |
| "grad_norm": 0.9896324872970581, | |
| "learning_rate": 1.0483670864493777e-06, | |
| "loss": 0.8773, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.8587341772151899, | |
| "grad_norm": 0.9491327404975891, | |
| "learning_rate": 9.903113209758098e-07, | |
| "loss": 0.8902, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8627848101265823, | |
| "grad_norm": 1.1105666160583496, | |
| "learning_rate": 9.33825373521875e-07, | |
| "loss": 0.8639, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8668354430379747, | |
| "grad_norm": 1.0579783916473389, | |
| "learning_rate": 8.789190860101226e-07, | |
| "loss": 0.8608, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8708860759493671, | |
| "grad_norm": 1.0631372928619385, | |
| "learning_rate": 8.256020251284381e-07, | |
| "loss": 0.8739, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.8749367088607595, | |
| "grad_norm": 1.0193462371826172, | |
| "learning_rate": 7.738834806631712e-07, | |
| "loss": 0.8666, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8789873417721519, | |
| "grad_norm": 0.9283171892166138, | |
| "learning_rate": 7.237724638805221e-07, | |
| "loss": 0.9192, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.8830379746835443, | |
| "grad_norm": 0.9990526437759399, | |
| "learning_rate": 6.752777059564431e-07, | |
| "loss": 0.8985, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8870886075949367, | |
| "grad_norm": 0.9724840521812439, | |
| "learning_rate": 6.284076564553465e-07, | |
| "loss": 0.8909, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.8911392405063291, | |
| "grad_norm": 0.9561246633529663, | |
| "learning_rate": 5.831704818578842e-07, | |
| "loss": 0.8756, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8951898734177215, | |
| "grad_norm": 0.9968512654304504, | |
| "learning_rate": 5.395740641380532e-07, | |
| "loss": 0.924, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.8992405063291139, | |
| "grad_norm": 1.0275803804397583, | |
| "learning_rate": 4.976259993898503e-07, | |
| "loss": 0.8724, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9032911392405063, | |
| "grad_norm": 1.1665242910385132, | |
| "learning_rate": 4.573335965037706e-07, | |
| "loss": 0.8611, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9073417721518987, | |
| "grad_norm": 1.0668385028839111, | |
| "learning_rate": 4.187038758933204e-07, | |
| "loss": 0.8902, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9113924050632911, | |
| "grad_norm": 1.0450729131698608, | |
| "learning_rate": 3.817435682718096e-07, | |
| "loss": 0.8827, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9154430379746835, | |
| "grad_norm": 0.950907826423645, | |
| "learning_rate": 3.4645911347961357e-07, | |
| "loss": 0.8826, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9194936708860759, | |
| "grad_norm": 1.0657615661621094, | |
| "learning_rate": 3.128566593621152e-07, | |
| "loss": 0.9196, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.9235443037974683, | |
| "grad_norm": 1.054658055305481, | |
| "learning_rate": 2.809420606985236e-07, | |
| "loss": 0.8949, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9275949367088607, | |
| "grad_norm": 0.9942110776901245, | |
| "learning_rate": 2.507208781817638e-07, | |
| "loss": 0.8986, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9316455696202531, | |
| "grad_norm": 1.0271327495574951, | |
| "learning_rate": 2.2219837744959284e-07, | |
| "loss": 0.9016, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9356962025316455, | |
| "grad_norm": 1.0039608478546143, | |
| "learning_rate": 1.9537952816713334e-07, | |
| "loss": 0.8706, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.9397468354430379, | |
| "grad_norm": 0.9501670598983765, | |
| "learning_rate": 1.7026900316098217e-07, | |
| "loss": 0.9061, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9437974683544303, | |
| "grad_norm": 1.018248200416565, | |
| "learning_rate": 1.4687117760502579e-07, | |
| "loss": 0.8873, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.9478481012658228, | |
| "grad_norm": 0.9783368110656738, | |
| "learning_rate": 1.2519012825812804e-07, | |
| "loss": 0.9005, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9518987341772152, | |
| "grad_norm": 0.9772806167602539, | |
| "learning_rate": 1.0522963275380494e-07, | |
| "loss": 0.9059, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.9559493670886076, | |
| "grad_norm": 0.985298216342926, | |
| "learning_rate": 8.699316894203225e-08, | |
| "loss": 0.8724, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.0296227931976318, | |
| "learning_rate": 7.048391428326585e-08, | |
| "loss": 0.8971, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.9640506329113924, | |
| "grad_norm": 1.0708626508712769, | |
| "learning_rate": 5.5704745294815624e-08, | |
| "loss": 0.9268, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9681012658227848, | |
| "grad_norm": 0.9980558156967163, | |
| "learning_rate": 4.2658237049655325e-08, | |
| "loss": 0.874, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.9721518987341772, | |
| "grad_norm": 0.9491819739341736, | |
| "learning_rate": 3.134666272774034e-08, | |
| "loss": 0.8829, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9762025316455696, | |
| "grad_norm": 1.0668548345565796, | |
| "learning_rate": 2.177199321994672e-08, | |
| "loss": 0.8689, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.980253164556962, | |
| "grad_norm": 1.0019922256469727, | |
| "learning_rate": 1.3935896784663671e-08, | |
| "loss": 0.8823, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9843037974683544, | |
| "grad_norm": 1.0567593574523926, | |
| "learning_rate": 7.83973875712385e-09, | |
| "loss": 0.9175, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.9883544303797468, | |
| "grad_norm": 1.0056061744689941, | |
| "learning_rate": 3.4845813115114147e-09, | |
| "loss": 0.8979, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9924050632911392, | |
| "grad_norm": 0.9826939702033997, | |
| "learning_rate": 8.711832758934169e-10, | |
| "loss": 0.8757, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.9964556962025316, | |
| "grad_norm": 0.9738645553588867, | |
| "learning_rate": 0.0, | |
| "loss": 0.9027, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.9964556962025316, | |
| "step": 246, | |
| "total_flos": 2.1359726465573192e+18, | |
| "train_loss": 0.9420681825982846, | |
| "train_runtime": 3147.8466, | |
| "train_samples_per_second": 20.072, | |
| "train_steps_per_second": 0.078 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 246, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1359726465573192e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |