| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 918, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010893246187363835, | |
| "grad_norm": 433.59912109375, | |
| "learning_rate": 5e-05, | |
| "loss": 54.5601, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002178649237472767, | |
| "grad_norm": 76.5465087890625, | |
| "learning_rate": 4.994553376906319e-05, | |
| "loss": 49.1056, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0032679738562091504, | |
| "grad_norm": 217.77659606933594, | |
| "learning_rate": 4.9891067538126364e-05, | |
| "loss": 44.5711, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004357298474945534, | |
| "grad_norm": 151.23550415039062, | |
| "learning_rate": 4.983660130718955e-05, | |
| "loss": 42.9959, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0054466230936819175, | |
| "grad_norm": 60.41796875, | |
| "learning_rate": 4.9782135076252726e-05, | |
| "loss": 41.1759, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006535947712418301, | |
| "grad_norm": 70.2886962890625, | |
| "learning_rate": 4.9727668845315904e-05, | |
| "loss": 37.9536, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.007625272331154684, | |
| "grad_norm": 143.0018310546875, | |
| "learning_rate": 4.967320261437909e-05, | |
| "loss": 32.8828, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008714596949891068, | |
| "grad_norm": 149.256103515625, | |
| "learning_rate": 4.9618736383442266e-05, | |
| "loss": 28.8924, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00980392156862745, | |
| "grad_norm": 157.64625549316406, | |
| "learning_rate": 4.956427015250545e-05, | |
| "loss": 22.5113, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.010893246187363835, | |
| "grad_norm": 211.30874633789062, | |
| "learning_rate": 4.9509803921568634e-05, | |
| "loss": 17.3142, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011982570806100218, | |
| "grad_norm": 233.93695068359375, | |
| "learning_rate": 4.945533769063181e-05, | |
| "loss": 13.8046, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.013071895424836602, | |
| "grad_norm": 85.05406951904297, | |
| "learning_rate": 4.940087145969499e-05, | |
| "loss": 12.0784, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.014161220043572984, | |
| "grad_norm": 58.78607940673828, | |
| "learning_rate": 4.9346405228758174e-05, | |
| "loss": 10.3516, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.015250544662309368, | |
| "grad_norm": 87.37806701660156, | |
| "learning_rate": 4.929193899782135e-05, | |
| "loss": 8.9367, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.016339869281045753, | |
| "grad_norm": 39.947357177734375, | |
| "learning_rate": 4.9237472766884536e-05, | |
| "loss": 7.4598, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.017429193899782137, | |
| "grad_norm": 32.828006744384766, | |
| "learning_rate": 4.918300653594771e-05, | |
| "loss": 7.4928, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.018518518518518517, | |
| "grad_norm": 22.29021453857422, | |
| "learning_rate": 4.91285403050109e-05, | |
| "loss": 6.9498, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0196078431372549, | |
| "grad_norm": 18.04950523376465, | |
| "learning_rate": 4.9074074074074075e-05, | |
| "loss": 6.7784, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.020697167755991286, | |
| "grad_norm": 13.946168899536133, | |
| "learning_rate": 4.901960784313725e-05, | |
| "loss": 6.8414, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02178649237472767, | |
| "grad_norm": 8.268109321594238, | |
| "learning_rate": 4.896514161220044e-05, | |
| "loss": 6.3524, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02287581699346405, | |
| "grad_norm": 8.610257148742676, | |
| "learning_rate": 4.891067538126362e-05, | |
| "loss": 6.3006, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.023965141612200435, | |
| "grad_norm": 9.997725486755371, | |
| "learning_rate": 4.88562091503268e-05, | |
| "loss": 6.1728, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02505446623093682, | |
| "grad_norm": 8.498310089111328, | |
| "learning_rate": 4.8801742919389983e-05, | |
| "loss": 6.1896, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.026143790849673203, | |
| "grad_norm": 6.155747890472412, | |
| "learning_rate": 4.874727668845316e-05, | |
| "loss": 6.2143, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.027233115468409588, | |
| "grad_norm": 7.225712776184082, | |
| "learning_rate": 4.869281045751634e-05, | |
| "loss": 5.7938, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02832244008714597, | |
| "grad_norm": 7.321456432342529, | |
| "learning_rate": 4.863834422657952e-05, | |
| "loss": 5.4983, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 7.92922306060791, | |
| "learning_rate": 4.85838779956427e-05, | |
| "loss": 6.1389, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.030501089324618737, | |
| "grad_norm": 9.113614082336426, | |
| "learning_rate": 4.8529411764705885e-05, | |
| "loss": 5.7843, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03159041394335512, | |
| "grad_norm": 10.913616180419922, | |
| "learning_rate": 4.847494553376907e-05, | |
| "loss": 5.7918, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.032679738562091505, | |
| "grad_norm": 4.749008655548096, | |
| "learning_rate": 4.842047930283225e-05, | |
| "loss": 5.5902, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03376906318082789, | |
| "grad_norm": 4.2183308601379395, | |
| "learning_rate": 4.8366013071895424e-05, | |
| "loss": 5.8647, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.034858387799564274, | |
| "grad_norm": 4.211799144744873, | |
| "learning_rate": 4.831154684095861e-05, | |
| "loss": 5.8053, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03594771241830065, | |
| "grad_norm": 4.482990741729736, | |
| "learning_rate": 4.8257080610021786e-05, | |
| "loss": 5.7817, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.037037037037037035, | |
| "grad_norm": 4.5478291511535645, | |
| "learning_rate": 4.820261437908497e-05, | |
| "loss": 5.8279, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03812636165577342, | |
| "grad_norm": 3.495626926422119, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 5.6651, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0392156862745098, | |
| "grad_norm": 3.8772192001342773, | |
| "learning_rate": 4.809368191721133e-05, | |
| "loss": 5.5969, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04030501089324619, | |
| "grad_norm": 6.1503005027771, | |
| "learning_rate": 4.803921568627452e-05, | |
| "loss": 5.4563, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04139433551198257, | |
| "grad_norm": 4.434262275695801, | |
| "learning_rate": 4.798474945533769e-05, | |
| "loss": 5.2927, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.042483660130718956, | |
| "grad_norm": 5.411022186279297, | |
| "learning_rate": 4.793028322440087e-05, | |
| "loss": 5.5202, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.04357298474945534, | |
| "grad_norm": 9.371084213256836, | |
| "learning_rate": 4.7875816993464056e-05, | |
| "loss": 5.5951, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.044662309368191724, | |
| "grad_norm": 4.909243583679199, | |
| "learning_rate": 4.7821350762527234e-05, | |
| "loss": 5.3319, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0457516339869281, | |
| "grad_norm": 5.401090145111084, | |
| "learning_rate": 4.776688453159042e-05, | |
| "loss": 5.4089, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.046840958605664486, | |
| "grad_norm": 5.169169902801514, | |
| "learning_rate": 4.77124183006536e-05, | |
| "loss": 5.2287, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04793028322440087, | |
| "grad_norm": 6.920060157775879, | |
| "learning_rate": 4.765795206971678e-05, | |
| "loss": 5.3415, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.049019607843137254, | |
| "grad_norm": 210.6103973388672, | |
| "learning_rate": 4.760348583877996e-05, | |
| "loss": 5.2508, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05010893246187364, | |
| "grad_norm": 9.694268226623535, | |
| "learning_rate": 4.7549019607843135e-05, | |
| "loss": 5.6864, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05119825708061002, | |
| "grad_norm": 3.8872554302215576, | |
| "learning_rate": 4.749455337690632e-05, | |
| "loss": 5.2706, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.05228758169934641, | |
| "grad_norm": 5.058568477630615, | |
| "learning_rate": 4.7440087145969504e-05, | |
| "loss": 5.1324, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05337690631808279, | |
| "grad_norm": 4.450376510620117, | |
| "learning_rate": 4.738562091503268e-05, | |
| "loss": 5.3785, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.054466230936819175, | |
| "grad_norm": 7.152129650115967, | |
| "learning_rate": 4.7331154684095866e-05, | |
| "loss": 5.1042, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 4.9755024909973145, | |
| "learning_rate": 4.7276688453159044e-05, | |
| "loss": 4.9428, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.05664488017429194, | |
| "grad_norm": 7.026951789855957, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 5.0188, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.05773420479302832, | |
| "grad_norm": 6.52206563949585, | |
| "learning_rate": 4.7167755991285405e-05, | |
| "loss": 5.0259, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.058823529411764705, | |
| "grad_norm": 4.935309886932373, | |
| "learning_rate": 4.711328976034858e-05, | |
| "loss": 5.1773, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.05991285403050109, | |
| "grad_norm": 5.704290390014648, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 5.212, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06100217864923747, | |
| "grad_norm": 7.26671838760376, | |
| "learning_rate": 4.700435729847495e-05, | |
| "loss": 5.1582, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06209150326797386, | |
| "grad_norm": 6.1820902824401855, | |
| "learning_rate": 4.694989106753813e-05, | |
| "loss": 5.1647, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06318082788671024, | |
| "grad_norm": 4.244707107543945, | |
| "learning_rate": 4.689542483660131e-05, | |
| "loss": 5.0759, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06427015250544663, | |
| "grad_norm": 6.365707874298096, | |
| "learning_rate": 4.684095860566449e-05, | |
| "loss": 5.1621, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06535947712418301, | |
| "grad_norm": 5.150200843811035, | |
| "learning_rate": 4.678649237472767e-05, | |
| "loss": 5.0414, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0664488017429194, | |
| "grad_norm": 5.530455112457275, | |
| "learning_rate": 4.673202614379085e-05, | |
| "loss": 5.1397, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06753812636165578, | |
| "grad_norm": 5.366086483001709, | |
| "learning_rate": 4.667755991285404e-05, | |
| "loss": 5.0102, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06862745098039216, | |
| "grad_norm": 4.697946071624756, | |
| "learning_rate": 4.6623093681917215e-05, | |
| "loss": 5.0945, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.06971677559912855, | |
| "grad_norm": 4.403589248657227, | |
| "learning_rate": 4.656862745098039e-05, | |
| "loss": 4.9283, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07080610021786492, | |
| "grad_norm": 6.372044563293457, | |
| "learning_rate": 4.651416122004357e-05, | |
| "loss": 4.5814, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0718954248366013, | |
| "grad_norm": 6.407917499542236, | |
| "learning_rate": 4.6459694989106755e-05, | |
| "loss": 5.0206, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07298474945533769, | |
| "grad_norm": 6.786896228790283, | |
| "learning_rate": 4.640522875816994e-05, | |
| "loss": 5.2232, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 6.38107442855835, | |
| "learning_rate": 4.6350762527233116e-05, | |
| "loss": 4.9373, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07516339869281045, | |
| "grad_norm": 7.348804473876953, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 5.1876, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07625272331154684, | |
| "grad_norm": 5.218241214752197, | |
| "learning_rate": 4.624183006535948e-05, | |
| "loss": 5.3981, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07734204793028322, | |
| "grad_norm": 5.649881362915039, | |
| "learning_rate": 4.6187363834422656e-05, | |
| "loss": 5.1205, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0784313725490196, | |
| "grad_norm": 3.868072032928467, | |
| "learning_rate": 4.613289760348584e-05, | |
| "loss": 5.1545, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07952069716775599, | |
| "grad_norm": 4.630714416503906, | |
| "learning_rate": 4.607843137254902e-05, | |
| "loss": 5.0757, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08061002178649238, | |
| "grad_norm": 6.28192663192749, | |
| "learning_rate": 4.60239651416122e-05, | |
| "loss": 4.8752, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08169934640522876, | |
| "grad_norm": 4.817191123962402, | |
| "learning_rate": 4.5969498910675387e-05, | |
| "loss": 4.9929, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08278867102396514, | |
| "grad_norm": 4.1321234703063965, | |
| "learning_rate": 4.5915032679738564e-05, | |
| "loss": 4.9673, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08387799564270153, | |
| "grad_norm": 5.103870868682861, | |
| "learning_rate": 4.586056644880174e-05, | |
| "loss": 4.989, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.08496732026143791, | |
| "grad_norm": 7.540984153747559, | |
| "learning_rate": 4.5806100217864926e-05, | |
| "loss": 4.9174, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0860566448801743, | |
| "grad_norm": 4.3438944816589355, | |
| "learning_rate": 4.5751633986928104e-05, | |
| "loss": 4.9603, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08714596949891068, | |
| "grad_norm": 5.444218158721924, | |
| "learning_rate": 4.569716775599129e-05, | |
| "loss": 4.9024, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08823529411764706, | |
| "grad_norm": 5.105153560638428, | |
| "learning_rate": 4.564270152505447e-05, | |
| "loss": 4.8206, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.08932461873638345, | |
| "grad_norm": 5.786861896514893, | |
| "learning_rate": 4.558823529411765e-05, | |
| "loss": 4.6149, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09041394335511982, | |
| "grad_norm": 4.919257164001465, | |
| "learning_rate": 4.5533769063180834e-05, | |
| "loss": 4.8996, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.0915032679738562, | |
| "grad_norm": 4.846879005432129, | |
| "learning_rate": 4.547930283224401e-05, | |
| "loss": 5.2135, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09259259259259259, | |
| "grad_norm": 3.3079090118408203, | |
| "learning_rate": 4.542483660130719e-05, | |
| "loss": 5.0587, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09368191721132897, | |
| "grad_norm": 6.1413187980651855, | |
| "learning_rate": 4.5370370370370374e-05, | |
| "loss": 4.6732, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09477124183006536, | |
| "grad_norm": 5.860175609588623, | |
| "learning_rate": 4.531590413943355e-05, | |
| "loss": 5.1839, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09586056644880174, | |
| "grad_norm": 4.185653209686279, | |
| "learning_rate": 4.5261437908496736e-05, | |
| "loss": 5.0061, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09694989106753812, | |
| "grad_norm": 4.365113258361816, | |
| "learning_rate": 4.520697167755992e-05, | |
| "loss": 4.9, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.09803921568627451, | |
| "grad_norm": 6.2008280754089355, | |
| "learning_rate": 4.51525054466231e-05, | |
| "loss": 5.2426, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09912854030501089, | |
| "grad_norm": 5.1145148277282715, | |
| "learning_rate": 4.5098039215686275e-05, | |
| "loss": 4.9137, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.10021786492374728, | |
| "grad_norm": 4.337425708770752, | |
| "learning_rate": 4.504357298474945e-05, | |
| "loss": 4.8755, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10130718954248366, | |
| "grad_norm": 2.7409918308258057, | |
| "learning_rate": 4.498910675381264e-05, | |
| "loss": 5.1968, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10239651416122005, | |
| "grad_norm": 5.112269401550293, | |
| "learning_rate": 4.493464052287582e-05, | |
| "loss": 4.9399, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10348583877995643, | |
| "grad_norm": 4.192493438720703, | |
| "learning_rate": 4.4880174291939e-05, | |
| "loss": 4.8008, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10457516339869281, | |
| "grad_norm": 4.505979537963867, | |
| "learning_rate": 4.482570806100218e-05, | |
| "loss": 4.8807, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1056644880174292, | |
| "grad_norm": 3.6245105266571045, | |
| "learning_rate": 4.477124183006536e-05, | |
| "loss": 4.8467, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.10675381263616558, | |
| "grad_norm": 5.530333995819092, | |
| "learning_rate": 4.471677559912854e-05, | |
| "loss": 4.8561, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10784313725490197, | |
| "grad_norm": 3.775775909423828, | |
| "learning_rate": 4.466230936819172e-05, | |
| "loss": 4.7241, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.10893246187363835, | |
| "grad_norm": 4.200275421142578, | |
| "learning_rate": 4.460784313725491e-05, | |
| "loss": 4.9351, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11002178649237472, | |
| "grad_norm": 5.163980484008789, | |
| "learning_rate": 4.4553376906318085e-05, | |
| "loss": 5.2453, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 2.854311227798462, | |
| "learning_rate": 4.449891067538127e-05, | |
| "loss": 5.0235, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11220043572984749, | |
| "grad_norm": 4.667736053466797, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 5.0177, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.11328976034858387, | |
| "grad_norm": 3.858189105987549, | |
| "learning_rate": 4.4389978213507624e-05, | |
| "loss": 5.0757, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.11437908496732026, | |
| "grad_norm": 3.821836471557617, | |
| "learning_rate": 4.433551198257081e-05, | |
| "loss": 4.9058, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11546840958605664, | |
| "grad_norm": 6.397256851196289, | |
| "learning_rate": 4.4281045751633986e-05, | |
| "loss": 5.2912, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11655773420479303, | |
| "grad_norm": 4.550983905792236, | |
| "learning_rate": 4.422657952069717e-05, | |
| "loss": 4.6086, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 5.067789554595947, | |
| "learning_rate": 4.4172113289760355e-05, | |
| "loss": 4.9223, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1187363834422658, | |
| "grad_norm": 3.8284153938293457, | |
| "learning_rate": 4.411764705882353e-05, | |
| "loss": 4.9873, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.11982570806100218, | |
| "grad_norm": 5.832077503204346, | |
| "learning_rate": 4.406318082788671e-05, | |
| "loss": 5.2499, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12091503267973856, | |
| "grad_norm": 5.154621601104736, | |
| "learning_rate": 4.400871459694989e-05, | |
| "loss": 4.8027, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.12200435729847495, | |
| "grad_norm": 4.583420276641846, | |
| "learning_rate": 4.395424836601307e-05, | |
| "loss": 4.9127, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.12309368191721133, | |
| "grad_norm": 4.431615352630615, | |
| "learning_rate": 4.3899782135076256e-05, | |
| "loss": 4.7283, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.12418300653594772, | |
| "grad_norm": 4.412840843200684, | |
| "learning_rate": 4.3845315904139434e-05, | |
| "loss": 4.7524, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12527233115468409, | |
| "grad_norm": 4.068633556365967, | |
| "learning_rate": 4.379084967320262e-05, | |
| "loss": 4.8365, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.12636165577342048, | |
| "grad_norm": 4.645825386047363, | |
| "learning_rate": 4.37363834422658e-05, | |
| "loss": 4.7768, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.12745098039215685, | |
| "grad_norm": 7.455847263336182, | |
| "learning_rate": 4.368191721132897e-05, | |
| "loss": 5.1508, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.12854030501089325, | |
| "grad_norm": 5.037924289703369, | |
| "learning_rate": 4.362745098039216e-05, | |
| "loss": 4.8405, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.12962962962962962, | |
| "grad_norm": 5.038092613220215, | |
| "learning_rate": 4.357298474945534e-05, | |
| "loss": 4.8704, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.13071895424836602, | |
| "grad_norm": 5.360757350921631, | |
| "learning_rate": 4.351851851851852e-05, | |
| "loss": 4.7742, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1318082788671024, | |
| "grad_norm": 4.969685077667236, | |
| "learning_rate": 4.3464052287581704e-05, | |
| "loss": 4.7473, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1328976034858388, | |
| "grad_norm": 4.633984088897705, | |
| "learning_rate": 4.340958605664488e-05, | |
| "loss": 5.0645, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.13398692810457516, | |
| "grad_norm": 4.873569011688232, | |
| "learning_rate": 4.3355119825708066e-05, | |
| "loss": 4.5869, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.13507625272331156, | |
| "grad_norm": 5.611841678619385, | |
| "learning_rate": 4.330065359477124e-05, | |
| "loss": 4.6719, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.13616557734204793, | |
| "grad_norm": 5.0813398361206055, | |
| "learning_rate": 4.324618736383442e-05, | |
| "loss": 4.7453, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.13725490196078433, | |
| "grad_norm": 4.233030796051025, | |
| "learning_rate": 4.3191721132897605e-05, | |
| "loss": 4.8956, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1383442265795207, | |
| "grad_norm": 4.941433906555176, | |
| "learning_rate": 4.313725490196079e-05, | |
| "loss": 4.4315, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.1394335511982571, | |
| "grad_norm": 5.071558475494385, | |
| "learning_rate": 4.308278867102397e-05, | |
| "loss": 5.1301, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14052287581699346, | |
| "grad_norm": 6.958078384399414, | |
| "learning_rate": 4.302832244008715e-05, | |
| "loss": 4.2597, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.14161220043572983, | |
| "grad_norm": 5.750345230102539, | |
| "learning_rate": 4.297385620915033e-05, | |
| "loss": 4.8546, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14270152505446623, | |
| "grad_norm": 4.584853649139404, | |
| "learning_rate": 4.291938997821351e-05, | |
| "loss": 4.5874, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1437908496732026, | |
| "grad_norm": 4.499705791473389, | |
| "learning_rate": 4.286492374727669e-05, | |
| "loss": 4.7708, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.144880174291939, | |
| "grad_norm": 3.7748095989227295, | |
| "learning_rate": 4.281045751633987e-05, | |
| "loss": 4.8141, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.14596949891067537, | |
| "grad_norm": 3.7674663066864014, | |
| "learning_rate": 4.275599128540305e-05, | |
| "loss": 4.8258, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 3.7754039764404297, | |
| "learning_rate": 4.270152505446624e-05, | |
| "loss": 4.6507, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 4.452386856079102, | |
| "learning_rate": 4.2647058823529415e-05, | |
| "loss": 4.9271, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.14923747276688454, | |
| "grad_norm": 5.3688883781433105, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 4.7376, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.1503267973856209, | |
| "grad_norm": 5.703012466430664, | |
| "learning_rate": 4.253812636165578e-05, | |
| "loss": 4.6959, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.1514161220043573, | |
| "grad_norm": 3.4454829692840576, | |
| "learning_rate": 4.2483660130718954e-05, | |
| "loss": 4.7447, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.15250544662309368, | |
| "grad_norm": 5.328145980834961, | |
| "learning_rate": 4.242919389978214e-05, | |
| "loss": 4.7368, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15359477124183007, | |
| "grad_norm": 5.908607482910156, | |
| "learning_rate": 4.2374727668845316e-05, | |
| "loss": 4.8308, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.15468409586056645, | |
| "grad_norm": 5.648683547973633, | |
| "learning_rate": 4.23202614379085e-05, | |
| "loss": 5.1701, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.15577342047930284, | |
| "grad_norm": 4.8338093757629395, | |
| "learning_rate": 4.226579520697168e-05, | |
| "loss": 4.5905, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1568627450980392, | |
| "grad_norm": 5.994876861572266, | |
| "learning_rate": 4.2211328976034856e-05, | |
| "loss": 4.9874, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1579520697167756, | |
| "grad_norm": 4.386605262756348, | |
| "learning_rate": 4.215686274509804e-05, | |
| "loss": 4.668, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.15904139433551198, | |
| "grad_norm": 4.34113883972168, | |
| "learning_rate": 4.2102396514161224e-05, | |
| "loss": 4.5285, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.16013071895424835, | |
| "grad_norm": 5.540426254272461, | |
| "learning_rate": 4.20479302832244e-05, | |
| "loss": 4.718, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.16122004357298475, | |
| "grad_norm": 4.609899520874023, | |
| "learning_rate": 4.1993464052287586e-05, | |
| "loss": 5.1161, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16230936819172112, | |
| "grad_norm": 4.171559810638428, | |
| "learning_rate": 4.193899782135077e-05, | |
| "loss": 4.6734, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.16339869281045752, | |
| "grad_norm": 5.271131992340088, | |
| "learning_rate": 4.188453159041394e-05, | |
| "loss": 5.0027, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1644880174291939, | |
| "grad_norm": 4.208072185516357, | |
| "learning_rate": 4.1830065359477126e-05, | |
| "loss": 4.8831, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.1655773420479303, | |
| "grad_norm": 4.976131439208984, | |
| "learning_rate": 4.17755991285403e-05, | |
| "loss": 4.9067, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 3.334254503250122, | |
| "learning_rate": 4.172113289760349e-05, | |
| "loss": 4.8799, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.16775599128540306, | |
| "grad_norm": 4.86076021194458, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 4.5356, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.16884531590413943, | |
| "grad_norm": 4.975986957550049, | |
| "learning_rate": 4.161220043572985e-05, | |
| "loss": 4.8953, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.16993464052287582, | |
| "grad_norm": 4.446471214294434, | |
| "learning_rate": 4.1557734204793034e-05, | |
| "loss": 4.7772, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.1710239651416122, | |
| "grad_norm": 4.07933235168457, | |
| "learning_rate": 4.150326797385621e-05, | |
| "loss": 4.4783, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1721132897603486, | |
| "grad_norm": 4.602686405181885, | |
| "learning_rate": 4.144880174291939e-05, | |
| "loss": 4.6011, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.17320261437908496, | |
| "grad_norm": 5.6627936363220215, | |
| "learning_rate": 4.1394335511982573e-05, | |
| "loss": 4.8746, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.17429193899782136, | |
| "grad_norm": 3.6674063205718994, | |
| "learning_rate": 4.133986928104575e-05, | |
| "loss": 4.7012, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17538126361655773, | |
| "grad_norm": 5.442697048187256, | |
| "learning_rate": 4.1285403050108935e-05, | |
| "loss": 4.5692, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 5.077996253967285, | |
| "learning_rate": 4.123093681917212e-05, | |
| "loss": 4.6404, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.1775599128540305, | |
| "grad_norm": 4.218442916870117, | |
| "learning_rate": 4.11764705882353e-05, | |
| "loss": 4.6261, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.1786492374727669, | |
| "grad_norm": 3.8982386589050293, | |
| "learning_rate": 4.1122004357298475e-05, | |
| "loss": 4.9411, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.17973856209150327, | |
| "grad_norm": 6.836637020111084, | |
| "learning_rate": 4.106753812636166e-05, | |
| "loss": 4.8207, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.18082788671023964, | |
| "grad_norm": 5.561280250549316, | |
| "learning_rate": 4.101307189542484e-05, | |
| "loss": 4.6781, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.18191721132897604, | |
| "grad_norm": 4.1247477531433105, | |
| "learning_rate": 4.095860566448802e-05, | |
| "loss": 4.5206, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.1830065359477124, | |
| "grad_norm": 4.342430591583252, | |
| "learning_rate": 4.0904139433551205e-05, | |
| "loss": 4.7182, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1840958605664488, | |
| "grad_norm": 3.85998797416687, | |
| "learning_rate": 4.084967320261438e-05, | |
| "loss": 4.6325, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 4.236372947692871, | |
| "learning_rate": 4.079520697167756e-05, | |
| "loss": 4.8942, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18627450980392157, | |
| "grad_norm": 3.885935068130493, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 4.8012, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.18736383442265794, | |
| "grad_norm": 4.750324249267578, | |
| "learning_rate": 4.068627450980392e-05, | |
| "loss": 4.8206, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.18845315904139434, | |
| "grad_norm": 4.165679931640625, | |
| "learning_rate": 4.063180827886711e-05, | |
| "loss": 4.5117, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.1895424836601307, | |
| "grad_norm": 4.995901107788086, | |
| "learning_rate": 4.0577342047930284e-05, | |
| "loss": 4.8905, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1906318082788671, | |
| "grad_norm": 3.592283010482788, | |
| "learning_rate": 4.052287581699347e-05, | |
| "loss": 4.8644, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19172113289760348, | |
| "grad_norm": 4.262664318084717, | |
| "learning_rate": 4.0468409586056646e-05, | |
| "loss": 4.735, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.19281045751633988, | |
| "grad_norm": 3.8139562606811523, | |
| "learning_rate": 4.0413943355119824e-05, | |
| "loss": 4.6146, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.19389978213507625, | |
| "grad_norm": 4.420293807983398, | |
| "learning_rate": 4.035947712418301e-05, | |
| "loss": 4.7375, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.19498910675381265, | |
| "grad_norm": 3.913332223892212, | |
| "learning_rate": 4.0305010893246186e-05, | |
| "loss": 4.7493, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.19607843137254902, | |
| "grad_norm": 3.472088575363159, | |
| "learning_rate": 4.025054466230937e-05, | |
| "loss": 4.8497, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19716775599128541, | |
| "grad_norm": 3.8188107013702393, | |
| "learning_rate": 4.0196078431372555e-05, | |
| "loss": 4.4733, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.19825708061002179, | |
| "grad_norm": 5.0970072746276855, | |
| "learning_rate": 4.014161220043573e-05, | |
| "loss": 4.6236, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.19934640522875818, | |
| "grad_norm": 4.806885242462158, | |
| "learning_rate": 4.008714596949891e-05, | |
| "loss": 5.3202, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.20043572984749455, | |
| "grad_norm": 3.138608932495117, | |
| "learning_rate": 4.0032679738562094e-05, | |
| "loss": 4.5813, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.20152505446623092, | |
| "grad_norm": 5.461742877960205, | |
| "learning_rate": 3.997821350762527e-05, | |
| "loss": 4.8286, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.20261437908496732, | |
| "grad_norm": 3.3661530017852783, | |
| "learning_rate": 3.9923747276688456e-05, | |
| "loss": 4.8449, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2037037037037037, | |
| "grad_norm": 4.2431511878967285, | |
| "learning_rate": 3.986928104575164e-05, | |
| "loss": 4.8409, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2047930283224401, | |
| "grad_norm": 4.562834739685059, | |
| "learning_rate": 3.981481481481482e-05, | |
| "loss": 4.4071, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.20588235294117646, | |
| "grad_norm": 3.9637649059295654, | |
| "learning_rate": 3.9760348583877995e-05, | |
| "loss": 4.6048, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.20697167755991286, | |
| "grad_norm": 5.083850860595703, | |
| "learning_rate": 3.970588235294117e-05, | |
| "loss": 4.5815, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20806100217864923, | |
| "grad_norm": 4.9229044914245605, | |
| "learning_rate": 3.965141612200436e-05, | |
| "loss": 4.8409, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.20915032679738563, | |
| "grad_norm": 6.389772415161133, | |
| "learning_rate": 3.959694989106754e-05, | |
| "loss": 4.7934, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.210239651416122, | |
| "grad_norm": 4.768709182739258, | |
| "learning_rate": 3.954248366013072e-05, | |
| "loss": 4.75, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.2113289760348584, | |
| "grad_norm": 6.08599853515625, | |
| "learning_rate": 3.9488017429193904e-05, | |
| "loss": 4.771, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.21241830065359477, | |
| "grad_norm": 6.793558120727539, | |
| "learning_rate": 3.943355119825709e-05, | |
| "loss": 4.2603, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.21350762527233116, | |
| "grad_norm": 6.091384410858154, | |
| "learning_rate": 3.9379084967320266e-05, | |
| "loss": 4.6942, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.21459694989106753, | |
| "grad_norm": 5.740220546722412, | |
| "learning_rate": 3.932461873638344e-05, | |
| "loss": 4.9354, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.21568627450980393, | |
| "grad_norm": 6.264912128448486, | |
| "learning_rate": 3.927015250544662e-05, | |
| "loss": 4.5096, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2167755991285403, | |
| "grad_norm": 4.367220401763916, | |
| "learning_rate": 3.9215686274509805e-05, | |
| "loss": 4.5554, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2178649237472767, | |
| "grad_norm": 5.731144905090332, | |
| "learning_rate": 3.916122004357299e-05, | |
| "loss": 4.7079, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21895424836601307, | |
| "grad_norm": 3.7173149585723877, | |
| "learning_rate": 3.910675381263617e-05, | |
| "loss": 4.7306, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.22004357298474944, | |
| "grad_norm": 4.827605247497559, | |
| "learning_rate": 3.905228758169935e-05, | |
| "loss": 4.6261, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.22113289760348584, | |
| "grad_norm": 3.710516929626465, | |
| "learning_rate": 3.899782135076253e-05, | |
| "loss": 4.6336, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 2.7958285808563232, | |
| "learning_rate": 3.8943355119825706e-05, | |
| "loss": 4.798, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2233115468409586, | |
| "grad_norm": 6.49755859375, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 4.5499, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.22440087145969498, | |
| "grad_norm": 4.210638523101807, | |
| "learning_rate": 3.8834422657952075e-05, | |
| "loss": 4.4568, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.22549019607843138, | |
| "grad_norm": 4.805547714233398, | |
| "learning_rate": 3.877995642701525e-05, | |
| "loss": 5.0813, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.22657952069716775, | |
| "grad_norm": 5.599452972412109, | |
| "learning_rate": 3.872549019607844e-05, | |
| "loss": 4.4505, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.22766884531590414, | |
| "grad_norm": 3.9884397983551025, | |
| "learning_rate": 3.8671023965141615e-05, | |
| "loss": 4.634, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.22875816993464052, | |
| "grad_norm": 4.633684158325195, | |
| "learning_rate": 3.861655773420479e-05, | |
| "loss": 4.5797, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2298474945533769, | |
| "grad_norm": 4.7483601570129395, | |
| "learning_rate": 3.8562091503267977e-05, | |
| "loss": 5.0125, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.23093681917211328, | |
| "grad_norm": 6.180882930755615, | |
| "learning_rate": 3.8507625272331154e-05, | |
| "loss": 5.0425, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.23202614379084968, | |
| "grad_norm": 4.736648082733154, | |
| "learning_rate": 3.845315904139434e-05, | |
| "loss": 4.7929, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.23311546840958605, | |
| "grad_norm": 4.327882289886475, | |
| "learning_rate": 3.839869281045752e-05, | |
| "loss": 4.5024, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.23420479302832245, | |
| "grad_norm": 5.636050224304199, | |
| "learning_rate": 3.83442265795207e-05, | |
| "loss": 4.7479, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 6.561770915985107, | |
| "learning_rate": 3.828976034858388e-05, | |
| "loss": 4.6145, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.23638344226579522, | |
| "grad_norm": 4.499276638031006, | |
| "learning_rate": 3.8235294117647055e-05, | |
| "loss": 4.6342, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2374727668845316, | |
| "grad_norm": 5.149621963500977, | |
| "learning_rate": 3.818082788671024e-05, | |
| "loss": 4.995, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.238562091503268, | |
| "grad_norm": 6.08087682723999, | |
| "learning_rate": 3.8126361655773424e-05, | |
| "loss": 4.2428, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.23965141612200436, | |
| "grad_norm": 4.655242443084717, | |
| "learning_rate": 3.80718954248366e-05, | |
| "loss": 4.5472, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24074074074074073, | |
| "grad_norm": 4.544321060180664, | |
| "learning_rate": 3.8017429193899786e-05, | |
| "loss": 4.7213, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.24183006535947713, | |
| "grad_norm": 3.1311380863189697, | |
| "learning_rate": 3.7962962962962964e-05, | |
| "loss": 4.6336, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2429193899782135, | |
| "grad_norm": 7.174770832061768, | |
| "learning_rate": 3.790849673202614e-05, | |
| "loss": 5.014, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2440087145969499, | |
| "grad_norm": 4.7743239402771, | |
| "learning_rate": 3.7854030501089326e-05, | |
| "loss": 4.4768, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.24509803921568626, | |
| "grad_norm": 4.101309299468994, | |
| "learning_rate": 3.779956427015251e-05, | |
| "loss": 4.6725, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.24618736383442266, | |
| "grad_norm": 3.5170490741729736, | |
| "learning_rate": 3.774509803921569e-05, | |
| "loss": 4.6873, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.24727668845315903, | |
| "grad_norm": 4.358458518981934, | |
| "learning_rate": 3.769063180827887e-05, | |
| "loss": 4.6864, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.24836601307189543, | |
| "grad_norm": 6.259149551391602, | |
| "learning_rate": 3.763616557734205e-05, | |
| "loss": 4.7459, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2494553376906318, | |
| "grad_norm": 5.776852130889893, | |
| "learning_rate": 3.758169934640523e-05, | |
| "loss": 4.5755, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.25054466230936817, | |
| "grad_norm": 5.6717753410339355, | |
| "learning_rate": 3.752723311546841e-05, | |
| "loss": 4.4931, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25163398692810457, | |
| "grad_norm": 5.764096260070801, | |
| "learning_rate": 3.747276688453159e-05, | |
| "loss": 4.4024, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.25272331154684097, | |
| "grad_norm": 4.976629734039307, | |
| "learning_rate": 3.741830065359477e-05, | |
| "loss": 4.6055, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.25381263616557737, | |
| "grad_norm": 4.461733818054199, | |
| "learning_rate": 3.736383442265796e-05, | |
| "loss": 4.7002, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.2549019607843137, | |
| "grad_norm": 5.671999931335449, | |
| "learning_rate": 3.7309368191721135e-05, | |
| "loss": 4.4413, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2559912854030501, | |
| "grad_norm": 4.301014423370361, | |
| "learning_rate": 3.725490196078432e-05, | |
| "loss": 4.452, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2570806100217865, | |
| "grad_norm": 3.8826897144317627, | |
| "learning_rate": 3.72004357298475e-05, | |
| "loss": 4.7436, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2581699346405229, | |
| "grad_norm": 3.7980918884277344, | |
| "learning_rate": 3.7145969498910675e-05, | |
| "loss": 4.6848, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.25925925925925924, | |
| "grad_norm": 3.7202043533325195, | |
| "learning_rate": 3.709150326797386e-05, | |
| "loss": 4.3356, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.26034858387799564, | |
| "grad_norm": 5.092905044555664, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 4.7834, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.26143790849673204, | |
| "grad_norm": 7.2429399490356445, | |
| "learning_rate": 3.698257080610022e-05, | |
| "loss": 4.4182, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2625272331154684, | |
| "grad_norm": 4.804575443267822, | |
| "learning_rate": 3.6928104575163405e-05, | |
| "loss": 4.5469, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2636165577342048, | |
| "grad_norm": 4.136233806610107, | |
| "learning_rate": 3.687363834422658e-05, | |
| "loss": 4.5973, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2647058823529412, | |
| "grad_norm": 6.122718334197998, | |
| "learning_rate": 3.681917211328976e-05, | |
| "loss": 5.0967, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.2657952069716776, | |
| "grad_norm": 3.9134368896484375, | |
| "learning_rate": 3.6764705882352945e-05, | |
| "loss": 4.8936, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2668845315904139, | |
| "grad_norm": 3.1468756198883057, | |
| "learning_rate": 3.671023965141612e-05, | |
| "loss": 4.6001, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2679738562091503, | |
| "grad_norm": 3.986370086669922, | |
| "learning_rate": 3.665577342047931e-05, | |
| "loss": 4.4806, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2690631808278867, | |
| "grad_norm": 4.314675331115723, | |
| "learning_rate": 3.6601307189542484e-05, | |
| "loss": 4.7468, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2701525054466231, | |
| "grad_norm": 4.477916240692139, | |
| "learning_rate": 3.654684095860567e-05, | |
| "loss": 4.5576, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.27124183006535946, | |
| "grad_norm": 3.9654428958892822, | |
| "learning_rate": 3.6492374727668846e-05, | |
| "loss": 4.69, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.27233115468409586, | |
| "grad_norm": 3.9319989681243896, | |
| "learning_rate": 3.6437908496732024e-05, | |
| "loss": 4.4654, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27342047930283225, | |
| "grad_norm": 3.9804999828338623, | |
| "learning_rate": 3.638344226579521e-05, | |
| "loss": 4.4819, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.27450980392156865, | |
| "grad_norm": 5.841671466827393, | |
| "learning_rate": 3.632897603485839e-05, | |
| "loss": 5.0806, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.275599128540305, | |
| "grad_norm": 5.6946306228637695, | |
| "learning_rate": 3.627450980392157e-05, | |
| "loss": 4.6161, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2766884531590414, | |
| "grad_norm": 4.4222612380981445, | |
| "learning_rate": 3.6220043572984754e-05, | |
| "loss": 4.3544, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 4.121781349182129, | |
| "learning_rate": 3.616557734204793e-05, | |
| "loss": 4.2072, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2788671023965142, | |
| "grad_norm": 3.593965530395508, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 4.5807, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.27995642701525053, | |
| "grad_norm": 5.608928680419922, | |
| "learning_rate": 3.6056644880174294e-05, | |
| "loss": 4.416, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.28104575163398693, | |
| "grad_norm": 3.7292981147766113, | |
| "learning_rate": 3.600217864923747e-05, | |
| "loss": 4.7935, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2821350762527233, | |
| "grad_norm": 4.825756072998047, | |
| "learning_rate": 3.5947712418300656e-05, | |
| "loss": 4.9406, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.28322440087145967, | |
| "grad_norm": 5.241641521453857, | |
| "learning_rate": 3.589324618736384e-05, | |
| "loss": 4.7552, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28431372549019607, | |
| "grad_norm": 3.7987542152404785, | |
| "learning_rate": 3.583877995642702e-05, | |
| "loss": 4.8658, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.28540305010893247, | |
| "grad_norm": 5.955167293548584, | |
| "learning_rate": 3.5784313725490195e-05, | |
| "loss": 4.3561, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.28649237472766886, | |
| "grad_norm": 6.110357761383057, | |
| "learning_rate": 3.572984749455338e-05, | |
| "loss": 4.7785, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2875816993464052, | |
| "grad_norm": 3.629960775375366, | |
| "learning_rate": 3.567538126361656e-05, | |
| "loss": 4.5009, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2886710239651416, | |
| "grad_norm": 4.371223449707031, | |
| "learning_rate": 3.562091503267974e-05, | |
| "loss": 4.5249, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.289760348583878, | |
| "grad_norm": 3.3808460235595703, | |
| "learning_rate": 3.556644880174292e-05, | |
| "loss": 4.6955, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2908496732026144, | |
| "grad_norm": 3.3614234924316406, | |
| "learning_rate": 3.55119825708061e-05, | |
| "loss": 4.73, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.29193899782135074, | |
| "grad_norm": 4.474226951599121, | |
| "learning_rate": 3.545751633986929e-05, | |
| "loss": 4.5672, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.29302832244008714, | |
| "grad_norm": 4.641172885894775, | |
| "learning_rate": 3.540305010893246e-05, | |
| "loss": 4.3472, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 3.9482967853546143, | |
| "learning_rate": 3.534858387799564e-05, | |
| "loss": 4.429, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.29520697167755994, | |
| "grad_norm": 4.477460861206055, | |
| "learning_rate": 3.529411764705883e-05, | |
| "loss": 4.7273, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 5.178402900695801, | |
| "learning_rate": 3.5239651416122005e-05, | |
| "loss": 4.9476, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.2973856209150327, | |
| "grad_norm": 3.6387414932250977, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 4.5508, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2984749455337691, | |
| "grad_norm": 5.014089107513428, | |
| "learning_rate": 3.513071895424837e-05, | |
| "loss": 4.4896, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.2995642701525055, | |
| "grad_norm": 3.63862681388855, | |
| "learning_rate": 3.507625272331155e-05, | |
| "loss": 4.609, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3006535947712418, | |
| "grad_norm": 6.508639335632324, | |
| "learning_rate": 3.502178649237473e-05, | |
| "loss": 4.5089, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.3017429193899782, | |
| "grad_norm": 4.915494918823242, | |
| "learning_rate": 3.4967320261437906e-05, | |
| "loss": 4.1011, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3028322440087146, | |
| "grad_norm": 4.033348083496094, | |
| "learning_rate": 3.491285403050109e-05, | |
| "loss": 4.8823, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.30392156862745096, | |
| "grad_norm": 4.201326370239258, | |
| "learning_rate": 3.4858387799564275e-05, | |
| "loss": 4.9096, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.30501089324618735, | |
| "grad_norm": 5.313831806182861, | |
| "learning_rate": 3.480392156862745e-05, | |
| "loss": 4.4766, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.30610021786492375, | |
| "grad_norm": 3.8419699668884277, | |
| "learning_rate": 3.474945533769064e-05, | |
| "loss": 4.5904, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.30718954248366015, | |
| "grad_norm": 5.416800498962402, | |
| "learning_rate": 3.4694989106753814e-05, | |
| "loss": 4.705, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3082788671023965, | |
| "grad_norm": 5.810449600219727, | |
| "learning_rate": 3.464052287581699e-05, | |
| "loss": 4.7655, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.3093681917211329, | |
| "grad_norm": 3.642828941345215, | |
| "learning_rate": 3.4586056644880176e-05, | |
| "loss": 4.4227, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3104575163398693, | |
| "grad_norm": 3.401031255722046, | |
| "learning_rate": 3.4531590413943354e-05, | |
| "loss": 4.6759, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3115468409586057, | |
| "grad_norm": 3.7218399047851562, | |
| "learning_rate": 3.447712418300654e-05, | |
| "loss": 4.793, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.31263616557734203, | |
| "grad_norm": 5.313408851623535, | |
| "learning_rate": 3.442265795206972e-05, | |
| "loss": 4.7205, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "grad_norm": 6.181398868560791, | |
| "learning_rate": 3.43681917211329e-05, | |
| "loss": 5.5932, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3148148148148148, | |
| "grad_norm": 4.589676380157471, | |
| "learning_rate": 3.431372549019608e-05, | |
| "loss": 4.922, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.3159041394335512, | |
| "grad_norm": 7.213254451751709, | |
| "learning_rate": 3.425925925925926e-05, | |
| "loss": 4.7858, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.31699346405228757, | |
| "grad_norm": 4.381298542022705, | |
| "learning_rate": 3.420479302832244e-05, | |
| "loss": 4.6587, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.31808278867102396, | |
| "grad_norm": 4.9724860191345215, | |
| "learning_rate": 3.4150326797385624e-05, | |
| "loss": 4.8125, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.31917211328976036, | |
| "grad_norm": 5.060785293579102, | |
| "learning_rate": 3.40958605664488e-05, | |
| "loss": 4.7155, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3202614379084967, | |
| "grad_norm": 4.78064489364624, | |
| "learning_rate": 3.4041394335511986e-05, | |
| "loss": 4.5709, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3213507625272331, | |
| "grad_norm": 4.871672630310059, | |
| "learning_rate": 3.3986928104575163e-05, | |
| "loss": 4.3652, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3224400871459695, | |
| "grad_norm": 4.1409010887146, | |
| "learning_rate": 3.393246187363834e-05, | |
| "loss": 4.5363, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3235294117647059, | |
| "grad_norm": 4.058030128479004, | |
| "learning_rate": 3.3877995642701525e-05, | |
| "loss": 4.7055, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.32461873638344224, | |
| "grad_norm": 4.3956804275512695, | |
| "learning_rate": 3.382352941176471e-05, | |
| "loss": 4.7827, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.32570806100217864, | |
| "grad_norm": 6.620364665985107, | |
| "learning_rate": 3.376906318082789e-05, | |
| "loss": 4.5226, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.32679738562091504, | |
| "grad_norm": 4.759446620941162, | |
| "learning_rate": 3.371459694989107e-05, | |
| "loss": 4.5963, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.32788671023965144, | |
| "grad_norm": 5.040675640106201, | |
| "learning_rate": 3.366013071895425e-05, | |
| "loss": 4.8448, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.3289760348583878, | |
| "grad_norm": 4.076169967651367, | |
| "learning_rate": 3.360566448801743e-05, | |
| "loss": 4.8115, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.3300653594771242, | |
| "grad_norm": 6.015557289123535, | |
| "learning_rate": 3.355119825708061e-05, | |
| "loss": 4.9195, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3311546840958606, | |
| "grad_norm": 6.468891143798828, | |
| "learning_rate": 3.349673202614379e-05, | |
| "loss": 4.4666, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.332244008714597, | |
| "grad_norm": 4.223741054534912, | |
| "learning_rate": 3.344226579520697e-05, | |
| "loss": 4.8265, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 3.7622108459472656, | |
| "learning_rate": 3.338779956427016e-05, | |
| "loss": 5.0257, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3344226579520697, | |
| "grad_norm": 6.763260364532471, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 4.7566, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3355119825708061, | |
| "grad_norm": 3.5491511821746826, | |
| "learning_rate": 3.327886710239652e-05, | |
| "loss": 4.7302, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3366013071895425, | |
| "grad_norm": 3.783433198928833, | |
| "learning_rate": 3.32244008714597e-05, | |
| "loss": 4.5234, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.33769063180827885, | |
| "grad_norm": 3.9712607860565186, | |
| "learning_rate": 3.3169934640522874e-05, | |
| "loss": 4.217, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.33877995642701525, | |
| "grad_norm": 5.989434242248535, | |
| "learning_rate": 3.311546840958606e-05, | |
| "loss": 4.2771, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.33986928104575165, | |
| "grad_norm": 5.530442714691162, | |
| "learning_rate": 3.3061002178649236e-05, | |
| "loss": 4.343, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.340958605664488, | |
| "grad_norm": 4.486259937286377, | |
| "learning_rate": 3.300653594771242e-05, | |
| "loss": 4.3773, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3420479302832244, | |
| "grad_norm": 6.4661407470703125, | |
| "learning_rate": 3.2952069716775605e-05, | |
| "loss": 4.7933, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3431372549019608, | |
| "grad_norm": 4.348084926605225, | |
| "learning_rate": 3.289760348583878e-05, | |
| "loss": 4.2573, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3442265795206972, | |
| "grad_norm": 3.8728368282318115, | |
| "learning_rate": 3.284313725490196e-05, | |
| "loss": 4.4854, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3453159041394335, | |
| "grad_norm": 6.54443359375, | |
| "learning_rate": 3.2788671023965145e-05, | |
| "loss": 4.2914, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.3464052287581699, | |
| "grad_norm": 5.483850479125977, | |
| "learning_rate": 3.273420479302832e-05, | |
| "loss": 4.7332, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3474945533769063, | |
| "grad_norm": 4.324398517608643, | |
| "learning_rate": 3.2679738562091506e-05, | |
| "loss": 4.5035, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.3485838779956427, | |
| "grad_norm": 5.615636348724365, | |
| "learning_rate": 3.262527233115469e-05, | |
| "loss": 4.3841, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.34967320261437906, | |
| "grad_norm": 5.929314136505127, | |
| "learning_rate": 3.257080610021787e-05, | |
| "loss": 4.3496, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.35076252723311546, | |
| "grad_norm": 4.947606086730957, | |
| "learning_rate": 3.2516339869281046e-05, | |
| "loss": 4.4215, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.35185185185185186, | |
| "grad_norm": 4.075250148773193, | |
| "learning_rate": 3.2461873638344223e-05, | |
| "loss": 4.6729, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 5.357743263244629, | |
| "learning_rate": 3.240740740740741e-05, | |
| "loss": 5.3836, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3540305010893246, | |
| "grad_norm": 4.747836589813232, | |
| "learning_rate": 3.235294117647059e-05, | |
| "loss": 4.5918, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.355119825708061, | |
| "grad_norm": 4.516659736633301, | |
| "learning_rate": 3.229847494553377e-05, | |
| "loss": 4.6426, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.3562091503267974, | |
| "grad_norm": 4.06311559677124, | |
| "learning_rate": 3.2244008714596954e-05, | |
| "loss": 4.5073, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.3572984749455338, | |
| "grad_norm": 6.384474754333496, | |
| "learning_rate": 3.218954248366013e-05, | |
| "loss": 4.2853, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.35838779956427014, | |
| "grad_norm": 4.745261192321777, | |
| "learning_rate": 3.213507625272331e-05, | |
| "loss": 4.3903, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.35947712418300654, | |
| "grad_norm": 4.1658806800842285, | |
| "learning_rate": 3.2080610021786494e-05, | |
| "loss": 4.4249, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.36056644880174293, | |
| "grad_norm": 5.156564235687256, | |
| "learning_rate": 3.202614379084967e-05, | |
| "loss": 4.3749, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3616557734204793, | |
| "grad_norm": 4.711488246917725, | |
| "learning_rate": 3.1971677559912855e-05, | |
| "loss": 4.7508, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3627450980392157, | |
| "grad_norm": 4.537379264831543, | |
| "learning_rate": 3.191721132897604e-05, | |
| "loss": 4.5883, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.3638344226579521, | |
| "grad_norm": 6.111281871795654, | |
| "learning_rate": 3.186274509803922e-05, | |
| "loss": 4.8958, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.36492374727668847, | |
| "grad_norm": 4.081695079803467, | |
| "learning_rate": 3.1808278867102395e-05, | |
| "loss": 4.5584, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3660130718954248, | |
| "grad_norm": 5.662016868591309, | |
| "learning_rate": 3.175381263616558e-05, | |
| "loss": 4.7313, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.3671023965141612, | |
| "grad_norm": 3.731066942214966, | |
| "learning_rate": 3.169934640522876e-05, | |
| "loss": 4.439, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.3681917211328976, | |
| "grad_norm": 5.84310245513916, | |
| "learning_rate": 3.164488017429194e-05, | |
| "loss": 5.1895, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.369281045751634, | |
| "grad_norm": 6.330954551696777, | |
| "learning_rate": 3.1590413943355126e-05, | |
| "loss": 4.4193, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 4.727417469024658, | |
| "learning_rate": 3.15359477124183e-05, | |
| "loss": 4.5239, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.37145969498910675, | |
| "grad_norm": 4.300968170166016, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 4.5961, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.37254901960784315, | |
| "grad_norm": 5.783464431762695, | |
| "learning_rate": 3.142701525054466e-05, | |
| "loss": 4.3041, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.37363834422657954, | |
| "grad_norm": 4.986368656158447, | |
| "learning_rate": 3.137254901960784e-05, | |
| "loss": 4.4746, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.3747276688453159, | |
| "grad_norm": 4.202662944793701, | |
| "learning_rate": 3.131808278867103e-05, | |
| "loss": 4.8018, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3758169934640523, | |
| "grad_norm": 4.665887832641602, | |
| "learning_rate": 3.1263616557734205e-05, | |
| "loss": 4.8463, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.3769063180827887, | |
| "grad_norm": 4.940118312835693, | |
| "learning_rate": 3.120915032679739e-05, | |
| "loss": 4.8254, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3779956427015251, | |
| "grad_norm": 4.012518882751465, | |
| "learning_rate": 3.115468409586057e-05, | |
| "loss": 4.7666, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.3790849673202614, | |
| "grad_norm": 4.214693069458008, | |
| "learning_rate": 3.110021786492375e-05, | |
| "loss": 4.6373, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.3801742919389978, | |
| "grad_norm": 4.745850563049316, | |
| "learning_rate": 3.104575163398693e-05, | |
| "loss": 4.4352, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3812636165577342, | |
| "grad_norm": 3.841230869293213, | |
| "learning_rate": 3.099128540305011e-05, | |
| "loss": 4.4874, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.38235294117647056, | |
| "grad_norm": 4.3495259284973145, | |
| "learning_rate": 3.093681917211329e-05, | |
| "loss": 4.442, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.38344226579520696, | |
| "grad_norm": 4.522481441497803, | |
| "learning_rate": 3.0882352941176475e-05, | |
| "loss": 4.6143, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.38453159041394336, | |
| "grad_norm": 3.9618613719940186, | |
| "learning_rate": 3.082788671023965e-05, | |
| "loss": 4.9634, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.38562091503267976, | |
| "grad_norm": 3.789592981338501, | |
| "learning_rate": 3.0773420479302837e-05, | |
| "loss": 4.4981, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3867102396514161, | |
| "grad_norm": 4.098993301391602, | |
| "learning_rate": 3.0718954248366014e-05, | |
| "loss": 4.417, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3877995642701525, | |
| "grad_norm": 4.085011959075928, | |
| "learning_rate": 3.066448801742919e-05, | |
| "loss": 4.5058, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": 4.117382526397705, | |
| "learning_rate": 3.0610021786492376e-05, | |
| "loss": 4.4669, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3899782135076253, | |
| "grad_norm": 4.850119590759277, | |
| "learning_rate": 3.055555555555556e-05, | |
| "loss": 4.5485, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.39106753812636164, | |
| "grad_norm": 6.552071571350098, | |
| "learning_rate": 3.0501089324618738e-05, | |
| "loss": 5.4194, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.39215686274509803, | |
| "grad_norm": 5.386460781097412, | |
| "learning_rate": 3.044662309368192e-05, | |
| "loss": 4.6129, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.39324618736383443, | |
| "grad_norm": 4.183785915374756, | |
| "learning_rate": 3.0392156862745097e-05, | |
| "loss": 4.8347, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.39433551198257083, | |
| "grad_norm": 4.157173156738281, | |
| "learning_rate": 3.033769063180828e-05, | |
| "loss": 4.6295, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3954248366013072, | |
| "grad_norm": 4.491374969482422, | |
| "learning_rate": 3.0283224400871462e-05, | |
| "loss": 4.3447, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.39651416122004357, | |
| "grad_norm": 4.0975565910339355, | |
| "learning_rate": 3.022875816993464e-05, | |
| "loss": 4.5171, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.39760348583877997, | |
| "grad_norm": 3.517444372177124, | |
| "learning_rate": 3.0174291938997824e-05, | |
| "loss": 4.3661, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.39869281045751637, | |
| "grad_norm": 9.484197616577148, | |
| "learning_rate": 3.0119825708061005e-05, | |
| "loss": 4.7755, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3997821350762527, | |
| "grad_norm": 3.5145955085754395, | |
| "learning_rate": 3.0065359477124182e-05, | |
| "loss": 4.4043, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.4008714596949891, | |
| "grad_norm": 3.0473830699920654, | |
| "learning_rate": 3.0010893246187367e-05, | |
| "loss": 4.2712, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.4019607843137255, | |
| "grad_norm": 6.201361179351807, | |
| "learning_rate": 2.9956427015250548e-05, | |
| "loss": 4.122, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.40305010893246185, | |
| "grad_norm": 3.607969045639038, | |
| "learning_rate": 2.9901960784313725e-05, | |
| "loss": 4.7129, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.40413943355119825, | |
| "grad_norm": 3.872899055480957, | |
| "learning_rate": 2.984749455337691e-05, | |
| "loss": 4.6699, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.40522875816993464, | |
| "grad_norm": 3.068768262863159, | |
| "learning_rate": 2.9793028322440087e-05, | |
| "loss": 5.003, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.40631808278867104, | |
| "grad_norm": 5.118782043457031, | |
| "learning_rate": 2.9738562091503268e-05, | |
| "loss": 4.6561, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.4074074074074074, | |
| "grad_norm": 5.239559173583984, | |
| "learning_rate": 2.9684095860566452e-05, | |
| "loss": 4.3943, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4084967320261438, | |
| "grad_norm": 3.700847864151001, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 4.7697, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4095860566448802, | |
| "grad_norm": 3.6922318935394287, | |
| "learning_rate": 2.957516339869281e-05, | |
| "loss": 4.4515, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4106753812636166, | |
| "grad_norm": 5.449310302734375, | |
| "learning_rate": 2.9520697167755995e-05, | |
| "loss": 4.483, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4117647058823529, | |
| "grad_norm": 7.003320217132568, | |
| "learning_rate": 2.9466230936819173e-05, | |
| "loss": 4.6701, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4128540305010893, | |
| "grad_norm": 4.283032417297363, | |
| "learning_rate": 2.9411764705882354e-05, | |
| "loss": 4.7565, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4139433551198257, | |
| "grad_norm": 3.709120750427246, | |
| "learning_rate": 2.935729847494553e-05, | |
| "loss": 4.5018, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4150326797385621, | |
| "grad_norm": 4.113061904907227, | |
| "learning_rate": 2.9302832244008716e-05, | |
| "loss": 4.718, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.41612200435729846, | |
| "grad_norm": 5.726804733276367, | |
| "learning_rate": 2.92483660130719e-05, | |
| "loss": 3.9106, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.41721132897603486, | |
| "grad_norm": 5.4816741943359375, | |
| "learning_rate": 2.9193899782135074e-05, | |
| "loss": 4.7959, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.41830065359477125, | |
| "grad_norm": 5.683758735656738, | |
| "learning_rate": 2.913943355119826e-05, | |
| "loss": 4.4144, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.41938997821350765, | |
| "grad_norm": 4.051987171173096, | |
| "learning_rate": 2.9084967320261443e-05, | |
| "loss": 4.6624, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.420479302832244, | |
| "grad_norm": 3.5016982555389404, | |
| "learning_rate": 2.9030501089324617e-05, | |
| "loss": 4.4873, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.4215686274509804, | |
| "grad_norm": 4.821328163146973, | |
| "learning_rate": 2.89760348583878e-05, | |
| "loss": 4.8168, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.4226579520697168, | |
| "grad_norm": 3.605252504348755, | |
| "learning_rate": 2.8921568627450986e-05, | |
| "loss": 4.3161, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.42374727668845313, | |
| "grad_norm": 4.22033166885376, | |
| "learning_rate": 2.8867102396514163e-05, | |
| "loss": 4.4452, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.42483660130718953, | |
| "grad_norm": 3.2900617122650146, | |
| "learning_rate": 2.8812636165577344e-05, | |
| "loss": 4.5288, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.42592592592592593, | |
| "grad_norm": 6.885782241821289, | |
| "learning_rate": 2.8758169934640522e-05, | |
| "loss": 4.6977, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.42701525054466233, | |
| "grad_norm": 6.14876651763916, | |
| "learning_rate": 2.8703703703703706e-05, | |
| "loss": 4.9728, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.42810457516339867, | |
| "grad_norm": 5.833094120025635, | |
| "learning_rate": 2.8649237472766887e-05, | |
| "loss": 4.2846, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.42919389978213507, | |
| "grad_norm": 4.787719249725342, | |
| "learning_rate": 2.8594771241830065e-05, | |
| "loss": 4.6669, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.43028322440087147, | |
| "grad_norm": 7.0684895515441895, | |
| "learning_rate": 2.854030501089325e-05, | |
| "loss": 5.1943, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.43137254901960786, | |
| "grad_norm": 3.3171699047088623, | |
| "learning_rate": 2.848583877995643e-05, | |
| "loss": 4.3689, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4324618736383442, | |
| "grad_norm": 3.6621744632720947, | |
| "learning_rate": 2.8431372549019608e-05, | |
| "loss": 4.4929, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.4335511982570806, | |
| "grad_norm": 3.942639112472534, | |
| "learning_rate": 2.8376906318082792e-05, | |
| "loss": 4.4459, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.434640522875817, | |
| "grad_norm": 3.8251397609710693, | |
| "learning_rate": 2.832244008714597e-05, | |
| "loss": 4.4222, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.4357298474945534, | |
| "grad_norm": 3.68534779548645, | |
| "learning_rate": 2.826797385620915e-05, | |
| "loss": 4.3779, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.43681917211328974, | |
| "grad_norm": 3.2854976654052734, | |
| "learning_rate": 2.8213507625272335e-05, | |
| "loss": 4.4839, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.43790849673202614, | |
| "grad_norm": 5.259005546569824, | |
| "learning_rate": 2.8159041394335512e-05, | |
| "loss": 4.6245, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.43899782135076254, | |
| "grad_norm": 4.582023620605469, | |
| "learning_rate": 2.8104575163398693e-05, | |
| "loss": 4.641, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4400871459694989, | |
| "grad_norm": 4.25729513168335, | |
| "learning_rate": 2.8050108932461878e-05, | |
| "loss": 4.5049, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 2.630359172821045, | |
| "learning_rate": 2.7995642701525055e-05, | |
| "loss": 4.7791, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.4422657952069717, | |
| "grad_norm": 5.059445381164551, | |
| "learning_rate": 2.7941176470588236e-05, | |
| "loss": 4.2744, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4433551198257081, | |
| "grad_norm": 4.973911285400391, | |
| "learning_rate": 2.788671023965142e-05, | |
| "loss": 4.6604, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 5.941963195800781, | |
| "learning_rate": 2.7832244008714598e-05, | |
| "loss": 4.2393, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4455337690631808, | |
| "grad_norm": 4.253173828125, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 4.3192, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.4466230936819172, | |
| "grad_norm": 6.193839073181152, | |
| "learning_rate": 2.7723311546840957e-05, | |
| "loss": 4.9371, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4477124183006536, | |
| "grad_norm": 4.262903213500977, | |
| "learning_rate": 2.766884531590414e-05, | |
| "loss": 4.4425, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.44880174291938996, | |
| "grad_norm": 4.752615928649902, | |
| "learning_rate": 2.7614379084967322e-05, | |
| "loss": 4.7196, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.44989106753812635, | |
| "grad_norm": 5.462635517120361, | |
| "learning_rate": 2.75599128540305e-05, | |
| "loss": 4.5792, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.45098039215686275, | |
| "grad_norm": 4.441572666168213, | |
| "learning_rate": 2.7505446623093684e-05, | |
| "loss": 4.9088, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.45206971677559915, | |
| "grad_norm": 5.416077136993408, | |
| "learning_rate": 2.7450980392156865e-05, | |
| "loss": 4.6672, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4531590413943355, | |
| "grad_norm": 6.9087982177734375, | |
| "learning_rate": 2.7396514161220042e-05, | |
| "loss": 4.6494, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4542483660130719, | |
| "grad_norm": 3.903771162033081, | |
| "learning_rate": 2.7342047930283227e-05, | |
| "loss": 4.602, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.4553376906318083, | |
| "grad_norm": 3.9193570613861084, | |
| "learning_rate": 2.7287581699346404e-05, | |
| "loss": 4.6281, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.4564270152505447, | |
| "grad_norm": 3.2757160663604736, | |
| "learning_rate": 2.7233115468409585e-05, | |
| "loss": 4.7661, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.45751633986928103, | |
| "grad_norm": 4.3017072677612305, | |
| "learning_rate": 2.717864923747277e-05, | |
| "loss": 4.9977, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.45860566448801743, | |
| "grad_norm": 3.991579294204712, | |
| "learning_rate": 2.7124183006535947e-05, | |
| "loss": 4.5315, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4596949891067538, | |
| "grad_norm": 4.237162113189697, | |
| "learning_rate": 2.706971677559913e-05, | |
| "loss": 4.5494, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.46078431372549017, | |
| "grad_norm": 3.7723388671875, | |
| "learning_rate": 2.7015250544662313e-05, | |
| "loss": 4.7606, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.46187363834422657, | |
| "grad_norm": 6.453665733337402, | |
| "learning_rate": 2.696078431372549e-05, | |
| "loss": 4.6261, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.46296296296296297, | |
| "grad_norm": 3.6461901664733887, | |
| "learning_rate": 2.6906318082788674e-05, | |
| "loss": 4.8389, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.46405228758169936, | |
| "grad_norm": 5.787034511566162, | |
| "learning_rate": 2.6851851851851855e-05, | |
| "loss": 4.4344, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4651416122004357, | |
| "grad_norm": 3.759100914001465, | |
| "learning_rate": 2.6797385620915033e-05, | |
| "loss": 4.5446, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.4662309368191721, | |
| "grad_norm": 3.7477407455444336, | |
| "learning_rate": 2.6742919389978217e-05, | |
| "loss": 4.4291, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4673202614379085, | |
| "grad_norm": 5.046353340148926, | |
| "learning_rate": 2.6688453159041395e-05, | |
| "loss": 4.6969, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4684095860566449, | |
| "grad_norm": 6.176473140716553, | |
| "learning_rate": 2.6633986928104576e-05, | |
| "loss": 4.5158, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.46949891067538124, | |
| "grad_norm": 3.801811456680298, | |
| "learning_rate": 2.657952069716776e-05, | |
| "loss": 4.7979, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 6.4397382736206055, | |
| "learning_rate": 2.6525054466230938e-05, | |
| "loss": 5.0053, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.47167755991285404, | |
| "grad_norm": 4.285563945770264, | |
| "learning_rate": 2.647058823529412e-05, | |
| "loss": 4.3998, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.47276688453159044, | |
| "grad_norm": 3.780402898788452, | |
| "learning_rate": 2.6416122004357303e-05, | |
| "loss": 4.7659, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4738562091503268, | |
| "grad_norm": 3.612997055053711, | |
| "learning_rate": 2.636165577342048e-05, | |
| "loss": 4.4869, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4749455337690632, | |
| "grad_norm": 5.552212715148926, | |
| "learning_rate": 2.630718954248366e-05, | |
| "loss": 4.4574, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.4760348583877996, | |
| "grad_norm": 6.293152809143066, | |
| "learning_rate": 2.625272331154684e-05, | |
| "loss": 4.4487, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.477124183006536, | |
| "grad_norm": 8.899681091308594, | |
| "learning_rate": 2.6198257080610024e-05, | |
| "loss": 5.0852, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4782135076252723, | |
| "grad_norm": 4.204768657684326, | |
| "learning_rate": 2.6143790849673204e-05, | |
| "loss": 4.4156, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4793028322440087, | |
| "grad_norm": 3.687103748321533, | |
| "learning_rate": 2.6089324618736382e-05, | |
| "loss": 4.6105, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4803921568627451, | |
| "grad_norm": 4.622270584106445, | |
| "learning_rate": 2.6034858387799566e-05, | |
| "loss": 4.5703, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.48148148148148145, | |
| "grad_norm": 3.8959567546844482, | |
| "learning_rate": 2.5980392156862747e-05, | |
| "loss": 4.3845, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.48257080610021785, | |
| "grad_norm": 3.7942402362823486, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 4.1155, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.48366013071895425, | |
| "grad_norm": 6.287596225738525, | |
| "learning_rate": 2.587145969498911e-05, | |
| "loss": 4.0269, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.48474945533769065, | |
| "grad_norm": 3.374274492263794, | |
| "learning_rate": 2.581699346405229e-05, | |
| "loss": 4.3734, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.485838779956427, | |
| "grad_norm": 4.503716945648193, | |
| "learning_rate": 2.5762527233115468e-05, | |
| "loss": 4.5737, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.4869281045751634, | |
| "grad_norm": 8.04896068572998, | |
| "learning_rate": 2.5708061002178652e-05, | |
| "loss": 4.174, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.4880174291938998, | |
| "grad_norm": 5.305229187011719, | |
| "learning_rate": 2.565359477124183e-05, | |
| "loss": 4.7195, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4891067538126362, | |
| "grad_norm": 5.27699613571167, | |
| "learning_rate": 2.559912854030501e-05, | |
| "loss": 4.7941, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.49019607843137253, | |
| "grad_norm": 5.369318962097168, | |
| "learning_rate": 2.5544662309368195e-05, | |
| "loss": 4.6811, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4912854030501089, | |
| "grad_norm": 5.501255989074707, | |
| "learning_rate": 2.5490196078431373e-05, | |
| "loss": 4.8498, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.4923747276688453, | |
| "grad_norm": 3.6100876331329346, | |
| "learning_rate": 2.5435729847494554e-05, | |
| "loss": 4.2778, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.4934640522875817, | |
| "grad_norm": 6.290594577789307, | |
| "learning_rate": 2.5381263616557738e-05, | |
| "loss": 4.4993, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.49455337690631807, | |
| "grad_norm": 5.607398509979248, | |
| "learning_rate": 2.5326797385620915e-05, | |
| "loss": 4.8809, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.49564270152505446, | |
| "grad_norm": 5.030911445617676, | |
| "learning_rate": 2.5272331154684096e-05, | |
| "loss": 4.2877, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.49673202614379086, | |
| "grad_norm": 4.7398505210876465, | |
| "learning_rate": 2.5217864923747274e-05, | |
| "loss": 4.5916, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.49782135076252726, | |
| "grad_norm": 4.83160400390625, | |
| "learning_rate": 2.516339869281046e-05, | |
| "loss": 4.5043, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.4989106753812636, | |
| "grad_norm": 3.821277379989624, | |
| "learning_rate": 2.5108932461873643e-05, | |
| "loss": 4.3354, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 3.5293712615966797, | |
| "learning_rate": 2.5054466230936817e-05, | |
| "loss": 4.5431, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5010893246187363, | |
| "grad_norm": 3.7809646129608154, | |
| "learning_rate": 2.5e-05, | |
| "loss": 4.3752, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5021786492374728, | |
| "grad_norm": 5.320569038391113, | |
| "learning_rate": 2.4945533769063182e-05, | |
| "loss": 4.3153, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5032679738562091, | |
| "grad_norm": 3.0900938510894775, | |
| "learning_rate": 2.4891067538126363e-05, | |
| "loss": 4.5361, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5043572984749455, | |
| "grad_norm": 4.1562418937683105, | |
| "learning_rate": 2.4836601307189544e-05, | |
| "loss": 4.4103, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5054466230936819, | |
| "grad_norm": 3.888561248779297, | |
| "learning_rate": 2.4782135076252725e-05, | |
| "loss": 4.4744, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5065359477124183, | |
| "grad_norm": 4.95259952545166, | |
| "learning_rate": 2.4727668845315906e-05, | |
| "loss": 4.0709, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5076252723311547, | |
| "grad_norm": 3.6576156616210938, | |
| "learning_rate": 2.4673202614379087e-05, | |
| "loss": 4.6315, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5087145969498911, | |
| "grad_norm": 3.656534194946289, | |
| "learning_rate": 2.4618736383442268e-05, | |
| "loss": 4.5242, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.5098039215686274, | |
| "grad_norm": 7.541477203369141, | |
| "learning_rate": 2.456427015250545e-05, | |
| "loss": 4.0184, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5108932461873639, | |
| "grad_norm": 2.961549997329712, | |
| "learning_rate": 2.4509803921568626e-05, | |
| "loss": 4.7656, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5119825708061002, | |
| "grad_norm": 4.090572357177734, | |
| "learning_rate": 2.445533769063181e-05, | |
| "loss": 4.7871, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5130718954248366, | |
| "grad_norm": 3.390963554382324, | |
| "learning_rate": 2.4400871459694992e-05, | |
| "loss": 4.5202, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.514161220043573, | |
| "grad_norm": 4.17970609664917, | |
| "learning_rate": 2.434640522875817e-05, | |
| "loss": 4.4444, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5152505446623094, | |
| "grad_norm": 3.525709390640259, | |
| "learning_rate": 2.429193899782135e-05, | |
| "loss": 4.3122, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5163398692810458, | |
| "grad_norm": 4.426867485046387, | |
| "learning_rate": 2.4237472766884535e-05, | |
| "loss": 4.933, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5174291938997821, | |
| "grad_norm": 7.010494709014893, | |
| "learning_rate": 2.4183006535947712e-05, | |
| "loss": 4.4065, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 2.9829752445220947, | |
| "learning_rate": 2.4128540305010893e-05, | |
| "loss": 4.5573, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5196078431372549, | |
| "grad_norm": 3.1097142696380615, | |
| "learning_rate": 2.4074074074074074e-05, | |
| "loss": 4.3603, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5206971677559913, | |
| "grad_norm": 4.386830806732178, | |
| "learning_rate": 2.401960784313726e-05, | |
| "loss": 4.4535, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5217864923747276, | |
| "grad_norm": 2.8752951622009277, | |
| "learning_rate": 2.3965141612200436e-05, | |
| "loss": 4.6723, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "grad_norm": 4.545241355895996, | |
| "learning_rate": 2.3910675381263617e-05, | |
| "loss": 4.544, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5239651416122004, | |
| "grad_norm": 5.116367340087891, | |
| "learning_rate": 2.38562091503268e-05, | |
| "loss": 4.3885, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5250544662309368, | |
| "grad_norm": 3.625804901123047, | |
| "learning_rate": 2.380174291938998e-05, | |
| "loss": 4.3907, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5261437908496732, | |
| "grad_norm": 5.175843715667725, | |
| "learning_rate": 2.374727668845316e-05, | |
| "loss": 4.6858, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5272331154684096, | |
| "grad_norm": 4.214750289916992, | |
| "learning_rate": 2.369281045751634e-05, | |
| "loss": 4.538, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.528322440087146, | |
| "grad_norm": 3.885366916656494, | |
| "learning_rate": 2.3638344226579522e-05, | |
| "loss": 4.321, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": 4.236208915710449, | |
| "learning_rate": 2.3583877995642703e-05, | |
| "loss": 4.2945, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5305010893246187, | |
| "grad_norm": 5.076857089996338, | |
| "learning_rate": 2.3529411764705884e-05, | |
| "loss": 4.5726, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5315904139433552, | |
| "grad_norm": 5.461459636688232, | |
| "learning_rate": 2.3474945533769065e-05, | |
| "loss": 4.3665, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5326797385620915, | |
| "grad_norm": 3.1988160610198975, | |
| "learning_rate": 2.3420479302832246e-05, | |
| "loss": 4.6294, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5337690631808278, | |
| "grad_norm": 4.889984130859375, | |
| "learning_rate": 2.3366013071895427e-05, | |
| "loss": 4.4692, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5348583877995643, | |
| "grad_norm": 3.5023293495178223, | |
| "learning_rate": 2.3311546840958608e-05, | |
| "loss": 4.4569, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5359477124183006, | |
| "grad_norm": 3.9273595809936523, | |
| "learning_rate": 2.3257080610021785e-05, | |
| "loss": 4.2707, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5370370370370371, | |
| "grad_norm": 6.1099653244018555, | |
| "learning_rate": 2.320261437908497e-05, | |
| "loss": 4.4849, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5381263616557734, | |
| "grad_norm": 3.858022928237915, | |
| "learning_rate": 2.314814814814815e-05, | |
| "loss": 4.4456, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5392156862745098, | |
| "grad_norm": 3.4729316234588623, | |
| "learning_rate": 2.3093681917211328e-05, | |
| "loss": 4.6576, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5403050108932462, | |
| "grad_norm": 3.97788405418396, | |
| "learning_rate": 2.303921568627451e-05, | |
| "loss": 4.3794, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5413943355119826, | |
| "grad_norm": 3.7366576194763184, | |
| "learning_rate": 2.2984749455337693e-05, | |
| "loss": 4.4944, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5424836601307189, | |
| "grad_norm": 4.208420276641846, | |
| "learning_rate": 2.293028322440087e-05, | |
| "loss": 4.2988, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5435729847494554, | |
| "grad_norm": 4.636012077331543, | |
| "learning_rate": 2.2875816993464052e-05, | |
| "loss": 4.5459, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5446623093681917, | |
| "grad_norm": 5.597863674163818, | |
| "learning_rate": 2.2821350762527236e-05, | |
| "loss": 4.3067, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.545751633986928, | |
| "grad_norm": 5.325355529785156, | |
| "learning_rate": 2.2766884531590417e-05, | |
| "loss": 4.2632, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5468409586056645, | |
| "grad_norm": 4.638630390167236, | |
| "learning_rate": 2.2712418300653595e-05, | |
| "loss": 4.2662, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5479302832244008, | |
| "grad_norm": 6.97488260269165, | |
| "learning_rate": 2.2657952069716776e-05, | |
| "loss": 4.3772, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5490196078431373, | |
| "grad_norm": 5.084072589874268, | |
| "learning_rate": 2.260348583877996e-05, | |
| "loss": 4.5242, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5501089324618736, | |
| "grad_norm": 3.98335862159729, | |
| "learning_rate": 2.2549019607843138e-05, | |
| "loss": 4.2033, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.55119825708061, | |
| "grad_norm": 3.2263453006744385, | |
| "learning_rate": 2.249455337690632e-05, | |
| "loss": 4.4282, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5522875816993464, | |
| "grad_norm": 3.9081408977508545, | |
| "learning_rate": 2.24400871459695e-05, | |
| "loss": 4.4474, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5533769063180828, | |
| "grad_norm": 4.037959098815918, | |
| "learning_rate": 2.238562091503268e-05, | |
| "loss": 4.3769, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5544662309368191, | |
| "grad_norm": 5.2942633628845215, | |
| "learning_rate": 2.233115468409586e-05, | |
| "loss": 4.2292, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 5.418858051300049, | |
| "learning_rate": 2.2276688453159042e-05, | |
| "loss": 4.9999, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5566448801742919, | |
| "grad_norm": 5.276211261749268, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 4.3284, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5577342047930284, | |
| "grad_norm": 5.227107048034668, | |
| "learning_rate": 2.2167755991285404e-05, | |
| "loss": 4.3607, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5588235294117647, | |
| "grad_norm": 5.116551876068115, | |
| "learning_rate": 2.2113289760348585e-05, | |
| "loss": 4.4697, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5599128540305011, | |
| "grad_norm": 5.137300491333008, | |
| "learning_rate": 2.2058823529411766e-05, | |
| "loss": 5.2701, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5610021786492375, | |
| "grad_norm": 5.481212139129639, | |
| "learning_rate": 2.2004357298474944e-05, | |
| "loss": 4.3033, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5620915032679739, | |
| "grad_norm": 4.41221809387207, | |
| "learning_rate": 2.1949891067538128e-05, | |
| "loss": 4.4464, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5631808278867102, | |
| "grad_norm": 4.827378273010254, | |
| "learning_rate": 2.189542483660131e-05, | |
| "loss": 4.2444, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5642701525054467, | |
| "grad_norm": 4.465081691741943, | |
| "learning_rate": 2.1840958605664487e-05, | |
| "loss": 4.4303, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.565359477124183, | |
| "grad_norm": 3.3138716220855713, | |
| "learning_rate": 2.178649237472767e-05, | |
| "loss": 4.4717, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5664488017429193, | |
| "grad_norm": 6.431881904602051, | |
| "learning_rate": 2.1732026143790852e-05, | |
| "loss": 4.7846, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5675381263616558, | |
| "grad_norm": 5.057530403137207, | |
| "learning_rate": 2.1677559912854033e-05, | |
| "loss": 4.5025, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5686274509803921, | |
| "grad_norm": 4.284402370452881, | |
| "learning_rate": 2.162309368191721e-05, | |
| "loss": 4.652, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5697167755991286, | |
| "grad_norm": 5.079988479614258, | |
| "learning_rate": 2.1568627450980395e-05, | |
| "loss": 4.7505, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5708061002178649, | |
| "grad_norm": 4.292697429656982, | |
| "learning_rate": 2.1514161220043576e-05, | |
| "loss": 4.5752, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5718954248366013, | |
| "grad_norm": 3.6556923389434814, | |
| "learning_rate": 2.1459694989106753e-05, | |
| "loss": 4.4215, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5729847494553377, | |
| "grad_norm": 4.009829521179199, | |
| "learning_rate": 2.1405228758169934e-05, | |
| "loss": 4.6643, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5740740740740741, | |
| "grad_norm": 3.5723490715026855, | |
| "learning_rate": 2.135076252723312e-05, | |
| "loss": 4.4207, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5751633986928104, | |
| "grad_norm": 3.3709464073181152, | |
| "learning_rate": 2.1296296296296296e-05, | |
| "loss": 4.3042, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5762527233115469, | |
| "grad_norm": 4.013104438781738, | |
| "learning_rate": 2.1241830065359477e-05, | |
| "loss": 4.7259, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5773420479302832, | |
| "grad_norm": 4.2519707679748535, | |
| "learning_rate": 2.1187363834422658e-05, | |
| "loss": 4.3777, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5784313725490197, | |
| "grad_norm": 5.45770788192749, | |
| "learning_rate": 2.113289760348584e-05, | |
| "loss": 4.4433, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.579520697167756, | |
| "grad_norm": 4.494056701660156, | |
| "learning_rate": 2.107843137254902e-05, | |
| "loss": 4.5641, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5806100217864923, | |
| "grad_norm": 3.2478690147399902, | |
| "learning_rate": 2.10239651416122e-05, | |
| "loss": 4.6785, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5816993464052288, | |
| "grad_norm": 3.92219877243042, | |
| "learning_rate": 2.0969498910675385e-05, | |
| "loss": 4.3875, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5827886710239651, | |
| "grad_norm": 3.4134628772735596, | |
| "learning_rate": 2.0915032679738563e-05, | |
| "loss": 4.577, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5838779956427015, | |
| "grad_norm": 6.350912094116211, | |
| "learning_rate": 2.0860566448801744e-05, | |
| "loss": 4.4662, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5849673202614379, | |
| "grad_norm": 3.528259754180908, | |
| "learning_rate": 2.0806100217864925e-05, | |
| "loss": 4.5197, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5860566448801743, | |
| "grad_norm": 3.8688910007476807, | |
| "learning_rate": 2.0751633986928106e-05, | |
| "loss": 4.574, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5871459694989106, | |
| "grad_norm": 3.5777485370635986, | |
| "learning_rate": 2.0697167755991287e-05, | |
| "loss": 4.3704, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 3.7712960243225098, | |
| "learning_rate": 2.0642701525054468e-05, | |
| "loss": 4.6141, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5893246187363834, | |
| "grad_norm": 6.292933940887451, | |
| "learning_rate": 2.058823529411765e-05, | |
| "loss": 4.1746, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5904139433551199, | |
| "grad_norm": 4.540781021118164, | |
| "learning_rate": 2.053376906318083e-05, | |
| "loss": 4.6127, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5915032679738562, | |
| "grad_norm": 6.176912307739258, | |
| "learning_rate": 2.047930283224401e-05, | |
| "loss": 4.4052, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 5.659270286560059, | |
| "learning_rate": 2.042483660130719e-05, | |
| "loss": 4.5454, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.593681917211329, | |
| "grad_norm": 5.678999423980713, | |
| "learning_rate": 2.037037037037037e-05, | |
| "loss": 4.4154, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5947712418300654, | |
| "grad_norm": 5.631131649017334, | |
| "learning_rate": 2.0315904139433553e-05, | |
| "loss": 4.5521, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5958605664488017, | |
| "grad_norm": 5.528168201446533, | |
| "learning_rate": 2.0261437908496734e-05, | |
| "loss": 4.7157, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.5969498910675382, | |
| "grad_norm": 3.873080253601074, | |
| "learning_rate": 2.0206971677559912e-05, | |
| "loss": 4.2228, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.5980392156862745, | |
| "grad_norm": 3.855302095413208, | |
| "learning_rate": 2.0152505446623093e-05, | |
| "loss": 4.7138, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.599128540305011, | |
| "grad_norm": 3.60886549949646, | |
| "learning_rate": 2.0098039215686277e-05, | |
| "loss": 4.7938, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6002178649237473, | |
| "grad_norm": 4.135520935058594, | |
| "learning_rate": 2.0043572984749455e-05, | |
| "loss": 4.3301, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6013071895424836, | |
| "grad_norm": 5.3503007888793945, | |
| "learning_rate": 1.9989106753812636e-05, | |
| "loss": 4.2567, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6023965141612201, | |
| "grad_norm": 5.291757583618164, | |
| "learning_rate": 1.993464052287582e-05, | |
| "loss": 4.2228, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6034858387799564, | |
| "grad_norm": 4.103163719177246, | |
| "learning_rate": 1.9880174291938998e-05, | |
| "loss": 4.524, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6045751633986928, | |
| "grad_norm": 2.737255573272705, | |
| "learning_rate": 1.982570806100218e-05, | |
| "loss": 4.5155, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6056644880174292, | |
| "grad_norm": 3.6738312244415283, | |
| "learning_rate": 1.977124183006536e-05, | |
| "loss": 4.4009, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6067538126361656, | |
| "grad_norm": 3.517465114593506, | |
| "learning_rate": 1.9716775599128544e-05, | |
| "loss": 4.443, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6078431372549019, | |
| "grad_norm": 4.664461612701416, | |
| "learning_rate": 1.966230936819172e-05, | |
| "loss": 4.4875, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6089324618736384, | |
| "grad_norm": 4.701452255249023, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": 4.4107, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6100217864923747, | |
| "grad_norm": 4.044220924377441, | |
| "learning_rate": 1.9553376906318083e-05, | |
| "loss": 4.4389, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6111111111111112, | |
| "grad_norm": 7.266241550445557, | |
| "learning_rate": 1.9498910675381264e-05, | |
| "loss": 4.9888, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6122004357298475, | |
| "grad_norm": 4.253453731536865, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 4.4641, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6132897603485838, | |
| "grad_norm": 3.8209705352783203, | |
| "learning_rate": 1.9389978213507626e-05, | |
| "loss": 4.63, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.6143790849673203, | |
| "grad_norm": 5.892187595367432, | |
| "learning_rate": 1.9335511982570807e-05, | |
| "loss": 4.3883, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6154684095860566, | |
| "grad_norm": 3.769759178161621, | |
| "learning_rate": 1.9281045751633988e-05, | |
| "loss": 4.2574, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.616557734204793, | |
| "grad_norm": 4.405909061431885, | |
| "learning_rate": 1.922657952069717e-05, | |
| "loss": 4.4477, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6176470588235294, | |
| "grad_norm": 2.945544481277466, | |
| "learning_rate": 1.917211328976035e-05, | |
| "loss": 4.2905, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6187363834422658, | |
| "grad_norm": 6.129364967346191, | |
| "learning_rate": 1.9117647058823528e-05, | |
| "loss": 3.9663, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6198257080610022, | |
| "grad_norm": 5.491422176361084, | |
| "learning_rate": 1.9063180827886712e-05, | |
| "loss": 4.0034, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6209150326797386, | |
| "grad_norm": 3.7789063453674316, | |
| "learning_rate": 1.9008714596949893e-05, | |
| "loss": 4.5785, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6220043572984749, | |
| "grad_norm": 3.3043131828308105, | |
| "learning_rate": 1.895424836601307e-05, | |
| "loss": 4.4756, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6230936819172114, | |
| "grad_norm": 6.433457374572754, | |
| "learning_rate": 1.8899782135076255e-05, | |
| "loss": 4.0592, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6241830065359477, | |
| "grad_norm": 5.039549350738525, | |
| "learning_rate": 1.8845315904139436e-05, | |
| "loss": 4.6562, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6252723311546841, | |
| "grad_norm": 3.616016387939453, | |
| "learning_rate": 1.8790849673202613e-05, | |
| "loss": 4.4358, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6263616557734205, | |
| "grad_norm": 3.770911455154419, | |
| "learning_rate": 1.8736383442265794e-05, | |
| "loss": 4.6426, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "grad_norm": 3.6203596591949463, | |
| "learning_rate": 1.868191721132898e-05, | |
| "loss": 4.6525, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6285403050108932, | |
| "grad_norm": 3.580564498901367, | |
| "learning_rate": 1.862745098039216e-05, | |
| "loss": 4.2944, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6296296296296297, | |
| "grad_norm": 3.8352503776550293, | |
| "learning_rate": 1.8572984749455337e-05, | |
| "loss": 4.2664, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.630718954248366, | |
| "grad_norm": 5.014768123626709, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 4.6075, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6318082788671024, | |
| "grad_norm": 3.753840923309326, | |
| "learning_rate": 1.8464052287581703e-05, | |
| "loss": 4.5553, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6328976034858388, | |
| "grad_norm": 4.463514804840088, | |
| "learning_rate": 1.840958605664488e-05, | |
| "loss": 4.6116, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6339869281045751, | |
| "grad_norm": 4.3140788078308105, | |
| "learning_rate": 1.835511982570806e-05, | |
| "loss": 4.1399, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6350762527233116, | |
| "grad_norm": 5.793092727661133, | |
| "learning_rate": 1.8300653594771242e-05, | |
| "loss": 4.2842, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.6361655773420479, | |
| "grad_norm": 3.975034236907959, | |
| "learning_rate": 1.8246187363834423e-05, | |
| "loss": 4.3797, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6372549019607843, | |
| "grad_norm": 3.4875426292419434, | |
| "learning_rate": 1.8191721132897604e-05, | |
| "loss": 4.3856, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6383442265795207, | |
| "grad_norm": 3.9347646236419678, | |
| "learning_rate": 1.8137254901960785e-05, | |
| "loss": 4.6164, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6394335511982571, | |
| "grad_norm": 3.559917449951172, | |
| "learning_rate": 1.8082788671023966e-05, | |
| "loss": 4.2322, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6405228758169934, | |
| "grad_norm": 3.8720946311950684, | |
| "learning_rate": 1.8028322440087147e-05, | |
| "loss": 4.3525, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6416122004357299, | |
| "grad_norm": 5.156309604644775, | |
| "learning_rate": 1.7973856209150328e-05, | |
| "loss": 4.2366, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6427015250544662, | |
| "grad_norm": 5.168879985809326, | |
| "learning_rate": 1.791938997821351e-05, | |
| "loss": 4.8512, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6437908496732027, | |
| "grad_norm": 4.186473369598389, | |
| "learning_rate": 1.786492374727669e-05, | |
| "loss": 4.4737, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.644880174291939, | |
| "grad_norm": 4.606125831604004, | |
| "learning_rate": 1.781045751633987e-05, | |
| "loss": 4.2855, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6459694989106753, | |
| "grad_norm": 4.646910190582275, | |
| "learning_rate": 1.775599128540305e-05, | |
| "loss": 4.7942, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6470588235294118, | |
| "grad_norm": 4.100317001342773, | |
| "learning_rate": 1.770152505446623e-05, | |
| "loss": 4.198, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6481481481481481, | |
| "grad_norm": 3.7697038650512695, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 4.7437, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6492374727668845, | |
| "grad_norm": 4.15360689163208, | |
| "learning_rate": 1.7592592592592595e-05, | |
| "loss": 4.7562, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6503267973856209, | |
| "grad_norm": 3.5473389625549316, | |
| "learning_rate": 1.7538126361655776e-05, | |
| "loss": 4.5991, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6514161220043573, | |
| "grad_norm": 4.4543070793151855, | |
| "learning_rate": 1.7483660130718953e-05, | |
| "loss": 3.9751, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6525054466230937, | |
| "grad_norm": 5.226731300354004, | |
| "learning_rate": 1.7429193899782137e-05, | |
| "loss": 4.3751, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6535947712418301, | |
| "grad_norm": 4.279996871948242, | |
| "learning_rate": 1.737472766884532e-05, | |
| "loss": 4.4227, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6546840958605664, | |
| "grad_norm": 5.284342288970947, | |
| "learning_rate": 1.7320261437908496e-05, | |
| "loss": 4.7611, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6557734204793029, | |
| "grad_norm": 3.662707805633545, | |
| "learning_rate": 1.7265795206971677e-05, | |
| "loss": 4.4621, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.6568627450980392, | |
| "grad_norm": 3.8184633255004883, | |
| "learning_rate": 1.721132897603486e-05, | |
| "loss": 4.3989, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6579520697167756, | |
| "grad_norm": 4.741728782653809, | |
| "learning_rate": 1.715686274509804e-05, | |
| "loss": 4.424, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.659041394335512, | |
| "grad_norm": 4.0623674392700195, | |
| "learning_rate": 1.710239651416122e-05, | |
| "loss": 4.7248, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6601307189542484, | |
| "grad_norm": 3.31766676902771, | |
| "learning_rate": 1.70479302832244e-05, | |
| "loss": 4.3949, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6612200435729847, | |
| "grad_norm": 5.473141193389893, | |
| "learning_rate": 1.6993464052287582e-05, | |
| "loss": 4.6733, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6623093681917211, | |
| "grad_norm": 4.301869869232178, | |
| "learning_rate": 1.6938997821350763e-05, | |
| "loss": 4.6882, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6633986928104575, | |
| "grad_norm": 4.356242656707764, | |
| "learning_rate": 1.6884531590413944e-05, | |
| "loss": 4.437, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.664488017429194, | |
| "grad_norm": 4.012183666229248, | |
| "learning_rate": 1.6830065359477125e-05, | |
| "loss": 4.5798, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6655773420479303, | |
| "grad_norm": 3.4721477031707764, | |
| "learning_rate": 1.6775599128540306e-05, | |
| "loss": 4.2853, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 5.355907440185547, | |
| "learning_rate": 1.6721132897603487e-05, | |
| "loss": 4.9026, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6677559912854031, | |
| "grad_norm": 5.025888919830322, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 4.696, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6688453159041394, | |
| "grad_norm": 4.778173446655273, | |
| "learning_rate": 1.661220043572985e-05, | |
| "loss": 4.3627, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6699346405228758, | |
| "grad_norm": 4.3311662673950195, | |
| "learning_rate": 1.655773420479303e-05, | |
| "loss": 3.9889, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6710239651416122, | |
| "grad_norm": 4.17963981628418, | |
| "learning_rate": 1.650326797385621e-05, | |
| "loss": 4.3898, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6721132897603486, | |
| "grad_norm": 3.798701286315918, | |
| "learning_rate": 1.644880174291939e-05, | |
| "loss": 4.562, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.673202614379085, | |
| "grad_norm": 3.7920596599578857, | |
| "learning_rate": 1.6394335511982572e-05, | |
| "loss": 4.4841, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6742919389978214, | |
| "grad_norm": 6.034610748291016, | |
| "learning_rate": 1.6339869281045753e-05, | |
| "loss": 4.6834, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6753812636165577, | |
| "grad_norm": 4.064595699310303, | |
| "learning_rate": 1.6285403050108934e-05, | |
| "loss": 4.4472, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6764705882352942, | |
| "grad_norm": 4.325406551361084, | |
| "learning_rate": 1.6230936819172112e-05, | |
| "loss": 4.696, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6775599128540305, | |
| "grad_norm": 5.50532865524292, | |
| "learning_rate": 1.6176470588235296e-05, | |
| "loss": 4.4232, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6786492374727668, | |
| "grad_norm": 5.562056064605713, | |
| "learning_rate": 1.6122004357298477e-05, | |
| "loss": 4.5281, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6797385620915033, | |
| "grad_norm": 4.248898983001709, | |
| "learning_rate": 1.6067538126361655e-05, | |
| "loss": 4.6647, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6808278867102396, | |
| "grad_norm": 3.5773556232452393, | |
| "learning_rate": 1.6013071895424836e-05, | |
| "loss": 4.337, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.681917211328976, | |
| "grad_norm": 6.574905872344971, | |
| "learning_rate": 1.595860566448802e-05, | |
| "loss": 4.263, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6830065359477124, | |
| "grad_norm": 4.928457736968994, | |
| "learning_rate": 1.5904139433551197e-05, | |
| "loss": 4.273, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6840958605664488, | |
| "grad_norm": 4.450826168060303, | |
| "learning_rate": 1.584967320261438e-05, | |
| "loss": 4.6709, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6851851851851852, | |
| "grad_norm": 3.8180322647094727, | |
| "learning_rate": 1.5795206971677563e-05, | |
| "loss": 4.4993, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6862745098039216, | |
| "grad_norm": 5.934114456176758, | |
| "learning_rate": 1.574074074074074e-05, | |
| "loss": 4.6746, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6873638344226579, | |
| "grad_norm": 4.26176643371582, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": 4.3226, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6884531590413944, | |
| "grad_norm": 5.080132007598877, | |
| "learning_rate": 1.5631808278867102e-05, | |
| "loss": 4.5198, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6895424836601307, | |
| "grad_norm": 4.874204158782959, | |
| "learning_rate": 1.5577342047930287e-05, | |
| "loss": 4.3132, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.690631808278867, | |
| "grad_norm": 4.675774574279785, | |
| "learning_rate": 1.5522875816993464e-05, | |
| "loss": 4.3923, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6917211328976035, | |
| "grad_norm": 5.875096797943115, | |
| "learning_rate": 1.5468409586056645e-05, | |
| "loss": 4.7479, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6928104575163399, | |
| "grad_norm": 3.361774444580078, | |
| "learning_rate": 1.5413943355119826e-05, | |
| "loss": 4.9412, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6938997821350763, | |
| "grad_norm": 4.26216459274292, | |
| "learning_rate": 1.5359477124183007e-05, | |
| "loss": 4.2507, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6949891067538126, | |
| "grad_norm": 4.495184898376465, | |
| "learning_rate": 1.5305010893246188e-05, | |
| "loss": 4.4109, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.696078431372549, | |
| "grad_norm": 4.9283928871154785, | |
| "learning_rate": 1.5250544662309369e-05, | |
| "loss": 4.2233, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6971677559912854, | |
| "grad_norm": 4.214562892913818, | |
| "learning_rate": 1.5196078431372548e-05, | |
| "loss": 4.1414, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6982570806100218, | |
| "grad_norm": 5.533045768737793, | |
| "learning_rate": 1.5141612200435731e-05, | |
| "loss": 4.5057, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6993464052287581, | |
| "grad_norm": 4.344498157501221, | |
| "learning_rate": 1.5087145969498912e-05, | |
| "loss": 4.3404, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7004357298474946, | |
| "grad_norm": 5.38240909576416, | |
| "learning_rate": 1.5032679738562091e-05, | |
| "loss": 4.4784, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.7015250544662309, | |
| "grad_norm": 5.718114376068115, | |
| "learning_rate": 1.4978213507625274e-05, | |
| "loss": 4.3583, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7026143790849673, | |
| "grad_norm": 4.159877777099609, | |
| "learning_rate": 1.4923747276688455e-05, | |
| "loss": 4.4168, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.7037037037037037, | |
| "grad_norm": 4.517073154449463, | |
| "learning_rate": 1.4869281045751634e-05, | |
| "loss": 4.2588, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7047930283224401, | |
| "grad_norm": 3.817117214202881, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 4.4231, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 4.703458309173584, | |
| "learning_rate": 1.4760348583877998e-05, | |
| "loss": 4.2935, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7069716775599129, | |
| "grad_norm": 3.304274320602417, | |
| "learning_rate": 1.4705882352941177e-05, | |
| "loss": 4.3656, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.7080610021786492, | |
| "grad_norm": 3.274470329284668, | |
| "learning_rate": 1.4651416122004358e-05, | |
| "loss": 4.351, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7091503267973857, | |
| "grad_norm": 3.132416009902954, | |
| "learning_rate": 1.4596949891067537e-05, | |
| "loss": 4.1874, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.710239651416122, | |
| "grad_norm": 3.9012346267700195, | |
| "learning_rate": 1.4542483660130721e-05, | |
| "loss": 4.3016, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7113289760348583, | |
| "grad_norm": 3.704073429107666, | |
| "learning_rate": 1.44880174291939e-05, | |
| "loss": 4.3462, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7124183006535948, | |
| "grad_norm": 3.827047348022461, | |
| "learning_rate": 1.4433551198257082e-05, | |
| "loss": 4.6647, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7135076252723311, | |
| "grad_norm": 7.284724712371826, | |
| "learning_rate": 1.4379084967320261e-05, | |
| "loss": 4.1331, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7145969498910676, | |
| "grad_norm": 5.4424614906311035, | |
| "learning_rate": 1.4324618736383444e-05, | |
| "loss": 4.8203, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7156862745098039, | |
| "grad_norm": 7.457292556762695, | |
| "learning_rate": 1.4270152505446625e-05, | |
| "loss": 4.8083, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7167755991285403, | |
| "grad_norm": 4.318899631500244, | |
| "learning_rate": 1.4215686274509804e-05, | |
| "loss": 4.3175, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7178649237472767, | |
| "grad_norm": 5.035153865814209, | |
| "learning_rate": 1.4161220043572985e-05, | |
| "loss": 4.3168, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.7189542483660131, | |
| "grad_norm": 4.649257183074951, | |
| "learning_rate": 1.4106753812636167e-05, | |
| "loss": 4.3793, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7200435729847494, | |
| "grad_norm": 5.266485691070557, | |
| "learning_rate": 1.4052287581699347e-05, | |
| "loss": 4.4818, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7211328976034859, | |
| "grad_norm": 3.864244222640991, | |
| "learning_rate": 1.3997821350762528e-05, | |
| "loss": 4.5254, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7222222222222222, | |
| "grad_norm": 5.268979549407959, | |
| "learning_rate": 1.394335511982571e-05, | |
| "loss": 4.5824, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7233115468409586, | |
| "grad_norm": 4.501189708709717, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 4.5174, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.724400871459695, | |
| "grad_norm": 4.021040439605713, | |
| "learning_rate": 1.383442265795207e-05, | |
| "loss": 4.5277, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7254901960784313, | |
| "grad_norm": 5.010334014892578, | |
| "learning_rate": 1.377995642701525e-05, | |
| "loss": 4.9117, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7265795206971678, | |
| "grad_norm": 3.285604476928711, | |
| "learning_rate": 1.3725490196078432e-05, | |
| "loss": 4.3526, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7276688453159041, | |
| "grad_norm": 5.264760494232178, | |
| "learning_rate": 1.3671023965141613e-05, | |
| "loss": 4.8033, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7287581699346405, | |
| "grad_norm": 4.862313270568848, | |
| "learning_rate": 1.3616557734204793e-05, | |
| "loss": 4.2244, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.7298474945533769, | |
| "grad_norm": 4.057625770568848, | |
| "learning_rate": 1.3562091503267974e-05, | |
| "loss": 4.5175, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7309368191721133, | |
| "grad_norm": 6.695583343505859, | |
| "learning_rate": 1.3507625272331156e-05, | |
| "loss": 4.3994, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7320261437908496, | |
| "grad_norm": 4.969459533691406, | |
| "learning_rate": 1.3453159041394337e-05, | |
| "loss": 4.6669, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7331154684095861, | |
| "grad_norm": 4.054382801055908, | |
| "learning_rate": 1.3398692810457516e-05, | |
| "loss": 4.0327, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7342047930283224, | |
| "grad_norm": 4.597656726837158, | |
| "learning_rate": 1.3344226579520697e-05, | |
| "loss": 4.2524, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 3.264561891555786, | |
| "learning_rate": 1.328976034858388e-05, | |
| "loss": 4.2613, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7363834422657952, | |
| "grad_norm": 2.55009126663208, | |
| "learning_rate": 1.323529411764706e-05, | |
| "loss": 4.7776, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7374727668845316, | |
| "grad_norm": 3.315603733062744, | |
| "learning_rate": 1.318082788671024e-05, | |
| "loss": 4.4884, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.738562091503268, | |
| "grad_norm": 6.264802932739258, | |
| "learning_rate": 1.312636165577342e-05, | |
| "loss": 4.6315, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7396514161220044, | |
| "grad_norm": 3.6444966793060303, | |
| "learning_rate": 1.3071895424836602e-05, | |
| "loss": 4.6662, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 3.580052375793457, | |
| "learning_rate": 1.3017429193899783e-05, | |
| "loss": 4.7089, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7418300653594772, | |
| "grad_norm": 2.530729055404663, | |
| "learning_rate": 1.2962962962962962e-05, | |
| "loss": 4.3946, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7429193899782135, | |
| "grad_norm": 6.866367340087891, | |
| "learning_rate": 1.2908496732026145e-05, | |
| "loss": 4.4075, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7440087145969498, | |
| "grad_norm": 4.892932415008545, | |
| "learning_rate": 1.2854030501089326e-05, | |
| "loss": 4.3774, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7450980392156863, | |
| "grad_norm": 4.313869476318359, | |
| "learning_rate": 1.2799564270152505e-05, | |
| "loss": 4.4591, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7461873638344226, | |
| "grad_norm": 5.024018287658691, | |
| "learning_rate": 1.2745098039215686e-05, | |
| "loss": 4.8658, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7472766884531591, | |
| "grad_norm": 4.71513557434082, | |
| "learning_rate": 1.2690631808278869e-05, | |
| "loss": 4.4149, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7483660130718954, | |
| "grad_norm": 5.822652339935303, | |
| "learning_rate": 1.2636165577342048e-05, | |
| "loss": 3.977, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7494553376906318, | |
| "grad_norm": 5.159669399261475, | |
| "learning_rate": 1.258169934640523e-05, | |
| "loss": 4.2576, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7505446623093682, | |
| "grad_norm": 5.247452259063721, | |
| "learning_rate": 1.2527233115468408e-05, | |
| "loss": 4.0757, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7516339869281046, | |
| "grad_norm": 5.6101393699646, | |
| "learning_rate": 1.2472766884531591e-05, | |
| "loss": 4.3722, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7527233115468409, | |
| "grad_norm": 3.71991229057312, | |
| "learning_rate": 1.2418300653594772e-05, | |
| "loss": 4.3952, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7538126361655774, | |
| "grad_norm": 4.731160640716553, | |
| "learning_rate": 1.2363834422657953e-05, | |
| "loss": 4.8297, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7549019607843137, | |
| "grad_norm": 5.578763961791992, | |
| "learning_rate": 1.2309368191721134e-05, | |
| "loss": 4.1336, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7559912854030502, | |
| "grad_norm": 6.221158504486084, | |
| "learning_rate": 1.2254901960784313e-05, | |
| "loss": 4.1257, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7570806100217865, | |
| "grad_norm": 3.230177879333496, | |
| "learning_rate": 1.2200435729847496e-05, | |
| "loss": 4.4561, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7581699346405228, | |
| "grad_norm": 4.53138542175293, | |
| "learning_rate": 1.2145969498910675e-05, | |
| "loss": 4.4213, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7592592592592593, | |
| "grad_norm": 4.148241996765137, | |
| "learning_rate": 1.2091503267973856e-05, | |
| "loss": 4.1366, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7603485838779956, | |
| "grad_norm": 3.46504545211792, | |
| "learning_rate": 1.2037037037037037e-05, | |
| "loss": 4.1183, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.761437908496732, | |
| "grad_norm": 3.9699110984802246, | |
| "learning_rate": 1.1982570806100218e-05, | |
| "loss": 4.5147, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7625272331154684, | |
| "grad_norm": 3.1759867668151855, | |
| "learning_rate": 1.19281045751634e-05, | |
| "loss": 4.4518, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7636165577342048, | |
| "grad_norm": 4.052336692810059, | |
| "learning_rate": 1.187363834422658e-05, | |
| "loss": 4.3572, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.7647058823529411, | |
| "grad_norm": 5.124566555023193, | |
| "learning_rate": 1.1819172113289761e-05, | |
| "loss": 4.7821, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7657952069716776, | |
| "grad_norm": 3.5251591205596924, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 4.3128, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7668845315904139, | |
| "grad_norm": 7.104674816131592, | |
| "learning_rate": 1.1710239651416123e-05, | |
| "loss": 4.7515, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.7679738562091504, | |
| "grad_norm": 4.211679458618164, | |
| "learning_rate": 1.1655773420479304e-05, | |
| "loss": 4.5811, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7690631808278867, | |
| "grad_norm": 3.5442311763763428, | |
| "learning_rate": 1.1601307189542485e-05, | |
| "loss": 4.3406, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7701525054466231, | |
| "grad_norm": 5.620753765106201, | |
| "learning_rate": 1.1546840958605664e-05, | |
| "loss": 4.1672, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7712418300653595, | |
| "grad_norm": 3.878863573074341, | |
| "learning_rate": 1.1492374727668847e-05, | |
| "loss": 4.3266, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7723311546840959, | |
| "grad_norm": 4.736238479614258, | |
| "learning_rate": 1.1437908496732026e-05, | |
| "loss": 4.8245, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.7734204793028322, | |
| "grad_norm": 5.532759189605713, | |
| "learning_rate": 1.1383442265795209e-05, | |
| "loss": 4.1712, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7745098039215687, | |
| "grad_norm": 5.934828758239746, | |
| "learning_rate": 1.1328976034858388e-05, | |
| "loss": 3.9445, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.775599128540305, | |
| "grad_norm": 5.191685676574707, | |
| "learning_rate": 1.1274509803921569e-05, | |
| "loss": 4.2591, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7766884531590414, | |
| "grad_norm": 3.8068275451660156, | |
| "learning_rate": 1.122004357298475e-05, | |
| "loss": 4.2859, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 4.339662551879883, | |
| "learning_rate": 1.116557734204793e-05, | |
| "loss": 4.4524, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7788671023965141, | |
| "grad_norm": 4.808858394622803, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 3.8684, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7799564270152506, | |
| "grad_norm": 3.490757465362549, | |
| "learning_rate": 1.1056644880174293e-05, | |
| "loss": 4.405, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7810457516339869, | |
| "grad_norm": 6.166412353515625, | |
| "learning_rate": 1.1002178649237472e-05, | |
| "loss": 4.5558, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7821350762527233, | |
| "grad_norm": 4.813010215759277, | |
| "learning_rate": 1.0947712418300655e-05, | |
| "loss": 4.4605, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7832244008714597, | |
| "grad_norm": 5.095247745513916, | |
| "learning_rate": 1.0893246187363835e-05, | |
| "loss": 4.4758, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.7843137254901961, | |
| "grad_norm": 4.473886013031006, | |
| "learning_rate": 1.0838779956427016e-05, | |
| "loss": 4.3553, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7854030501089324, | |
| "grad_norm": 5.142338275909424, | |
| "learning_rate": 1.0784313725490197e-05, | |
| "loss": 4.0964, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.7864923747276689, | |
| "grad_norm": 3.708817481994629, | |
| "learning_rate": 1.0729847494553377e-05, | |
| "loss": 4.4057, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.7875816993464052, | |
| "grad_norm": 4.8980607986450195, | |
| "learning_rate": 1.067538126361656e-05, | |
| "loss": 4.0771, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.7886710239651417, | |
| "grad_norm": 4.077849388122559, | |
| "learning_rate": 1.0620915032679739e-05, | |
| "loss": 4.114, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.789760348583878, | |
| "grad_norm": 5.82405424118042, | |
| "learning_rate": 1.056644880174292e-05, | |
| "loss": 4.2512, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7908496732026143, | |
| "grad_norm": 4.173725605010986, | |
| "learning_rate": 1.05119825708061e-05, | |
| "loss": 4.4487, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.7919389978213508, | |
| "grad_norm": 6.852526664733887, | |
| "learning_rate": 1.0457516339869281e-05, | |
| "loss": 4.5179, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7930283224400871, | |
| "grad_norm": 6.219126224517822, | |
| "learning_rate": 1.0403050108932462e-05, | |
| "loss": 4.3682, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7941176470588235, | |
| "grad_norm": 5.304159641265869, | |
| "learning_rate": 1.0348583877995643e-05, | |
| "loss": 4.5552, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7952069716775599, | |
| "grad_norm": 4.6095499992370605, | |
| "learning_rate": 1.0294117647058824e-05, | |
| "loss": 4.6384, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7962962962962963, | |
| "grad_norm": 4.740571022033691, | |
| "learning_rate": 1.0239651416122005e-05, | |
| "loss": 4.2068, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7973856209150327, | |
| "grad_norm": 4.864438533782959, | |
| "learning_rate": 1.0185185185185185e-05, | |
| "loss": 4.1426, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7984749455337691, | |
| "grad_norm": 3.6585988998413086, | |
| "learning_rate": 1.0130718954248367e-05, | |
| "loss": 4.3638, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.7995642701525054, | |
| "grad_norm": 6.159580230712891, | |
| "learning_rate": 1.0076252723311546e-05, | |
| "loss": 4.1251, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8006535947712419, | |
| "grad_norm": 5.787691593170166, | |
| "learning_rate": 1.0021786492374727e-05, | |
| "loss": 4.414, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8017429193899782, | |
| "grad_norm": 5.72088098526001, | |
| "learning_rate": 9.96732026143791e-06, | |
| "loss": 4.4709, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8028322440087146, | |
| "grad_norm": 3.6602089405059814, | |
| "learning_rate": 9.91285403050109e-06, | |
| "loss": 4.7765, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.803921568627451, | |
| "grad_norm": 3.4336533546447754, | |
| "learning_rate": 9.858387799564272e-06, | |
| "loss": 4.6457, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8050108932461874, | |
| "grad_norm": 4.4453959465026855, | |
| "learning_rate": 9.803921568627451e-06, | |
| "loss": 4.3316, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8061002178649237, | |
| "grad_norm": 7.507024765014648, | |
| "learning_rate": 9.749455337690632e-06, | |
| "loss": 4.2834, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8071895424836601, | |
| "grad_norm": 3.5269410610198975, | |
| "learning_rate": 9.694989106753813e-06, | |
| "loss": 4.6223, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.8082788671023965, | |
| "grad_norm": 4.977156639099121, | |
| "learning_rate": 9.640522875816994e-06, | |
| "loss": 4.7315, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.809368191721133, | |
| "grad_norm": 5.052213191986084, | |
| "learning_rate": 9.586056644880175e-06, | |
| "loss": 4.6754, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8104575163398693, | |
| "grad_norm": 4.510020732879639, | |
| "learning_rate": 9.531590413943356e-06, | |
| "loss": 3.9525, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8115468409586056, | |
| "grad_norm": 5.518550395965576, | |
| "learning_rate": 9.477124183006535e-06, | |
| "loss": 4.8572, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8126361655773421, | |
| "grad_norm": 4.472283840179443, | |
| "learning_rate": 9.422657952069718e-06, | |
| "loss": 4.5, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8137254901960784, | |
| "grad_norm": 5.2868170738220215, | |
| "learning_rate": 9.368191721132897e-06, | |
| "loss": 4.3304, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 4.674109935760498, | |
| "learning_rate": 9.31372549019608e-06, | |
| "loss": 4.5449, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8159041394335512, | |
| "grad_norm": 3.528538703918457, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 4.5034, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.8169934640522876, | |
| "grad_norm": 5.852397918701172, | |
| "learning_rate": 9.20479302832244e-06, | |
| "loss": 4.3882, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.818082788671024, | |
| "grad_norm": 3.6547701358795166, | |
| "learning_rate": 9.150326797385621e-06, | |
| "loss": 4.4889, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.8191721132897604, | |
| "grad_norm": 4.575140953063965, | |
| "learning_rate": 9.095860566448802e-06, | |
| "loss": 4.3097, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8202614379084967, | |
| "grad_norm": 4.217095851898193, | |
| "learning_rate": 9.041394335511983e-06, | |
| "loss": 4.2226, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.8213507625272332, | |
| "grad_norm": 4.0072832107543945, | |
| "learning_rate": 8.986928104575164e-06, | |
| "loss": 4.25, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8224400871459695, | |
| "grad_norm": 3.6737847328186035, | |
| "learning_rate": 8.932461873638345e-06, | |
| "loss": 4.4347, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 4.096911907196045, | |
| "learning_rate": 8.877995642701526e-06, | |
| "loss": 4.8721, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8246187363834423, | |
| "grad_norm": 3.2007627487182617, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 4.2796, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8257080610021786, | |
| "grad_norm": 4.172240257263184, | |
| "learning_rate": 8.769063180827888e-06, | |
| "loss": 4.2638, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.826797385620915, | |
| "grad_norm": 4.084839820861816, | |
| "learning_rate": 8.714596949891069e-06, | |
| "loss": 4.2923, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8278867102396514, | |
| "grad_norm": 7.394184589385986, | |
| "learning_rate": 8.660130718954248e-06, | |
| "loss": 4.8434, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8289760348583878, | |
| "grad_norm": 4.5885796546936035, | |
| "learning_rate": 8.60566448801743e-06, | |
| "loss": 4.791, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.8300653594771242, | |
| "grad_norm": 3.2286288738250732, | |
| "learning_rate": 8.55119825708061e-06, | |
| "loss": 4.3347, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8311546840958606, | |
| "grad_norm": 5.890330791473389, | |
| "learning_rate": 8.496732026143791e-06, | |
| "loss": 4.4, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.8322440087145969, | |
| "grad_norm": 4.351938247680664, | |
| "learning_rate": 8.442265795206972e-06, | |
| "loss": 4.6613, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 3.542966604232788, | |
| "learning_rate": 8.387799564270153e-06, | |
| "loss": 4.4487, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8344226579520697, | |
| "grad_norm": 5.875298500061035, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 4.7932, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.835511982570806, | |
| "grad_norm": 3.6736464500427246, | |
| "learning_rate": 8.278867102396515e-06, | |
| "loss": 4.3527, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.8366013071895425, | |
| "grad_norm": 5.177203178405762, | |
| "learning_rate": 8.224400871459696e-06, | |
| "loss": 4.3445, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8376906318082789, | |
| "grad_norm": 3.6291041374206543, | |
| "learning_rate": 8.169934640522877e-06, | |
| "loss": 4.4271, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8387799564270153, | |
| "grad_norm": 4.049022197723389, | |
| "learning_rate": 8.115468409586056e-06, | |
| "loss": 4.2609, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8398692810457516, | |
| "grad_norm": 4.5292510986328125, | |
| "learning_rate": 8.061002178649239e-06, | |
| "loss": 4.295, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.840958605664488, | |
| "grad_norm": 4.239173412322998, | |
| "learning_rate": 8.006535947712418e-06, | |
| "loss": 4.742, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8420479302832244, | |
| "grad_norm": 4.290888786315918, | |
| "learning_rate": 7.952069716775599e-06, | |
| "loss": 4.4646, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.8431372549019608, | |
| "grad_norm": 4.301486492156982, | |
| "learning_rate": 7.897603485838781e-06, | |
| "loss": 4.277, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8442265795206971, | |
| "grad_norm": 4.258893966674805, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 4.4112, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8453159041394336, | |
| "grad_norm": 4.910170078277588, | |
| "learning_rate": 7.788671023965143e-06, | |
| "loss": 4.6635, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8464052287581699, | |
| "grad_norm": 3.051833391189575, | |
| "learning_rate": 7.734204793028323e-06, | |
| "loss": 4.3734, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8474945533769063, | |
| "grad_norm": 3.251164674758911, | |
| "learning_rate": 7.679738562091504e-06, | |
| "loss": 4.3444, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8485838779956427, | |
| "grad_norm": 5.197653293609619, | |
| "learning_rate": 7.6252723311546845e-06, | |
| "loss": 4.3505, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.8496732026143791, | |
| "grad_norm": 4.635519981384277, | |
| "learning_rate": 7.5708061002178655e-06, | |
| "loss": 4.1494, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8507625272331155, | |
| "grad_norm": 4.019258499145508, | |
| "learning_rate": 7.5163398692810456e-06, | |
| "loss": 4.5715, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8518518518518519, | |
| "grad_norm": 5.865679740905762, | |
| "learning_rate": 7.461873638344227e-06, | |
| "loss": 4.0529, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8529411764705882, | |
| "grad_norm": 4.1584320068359375, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 4.1815, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8540305010893247, | |
| "grad_norm": 5.819606781005859, | |
| "learning_rate": 7.3529411764705884e-06, | |
| "loss": 4.2299, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.855119825708061, | |
| "grad_norm": 3.334429979324341, | |
| "learning_rate": 7.2984749455337685e-06, | |
| "loss": 4.5384, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8562091503267973, | |
| "grad_norm": 3.642559289932251, | |
| "learning_rate": 7.24400871459695e-06, | |
| "loss": 4.4612, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8572984749455338, | |
| "grad_norm": 4.0215582847595215, | |
| "learning_rate": 7.1895424836601305e-06, | |
| "loss": 4.1559, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8583877995642701, | |
| "grad_norm": 5.275108814239502, | |
| "learning_rate": 7.135076252723312e-06, | |
| "loss": 4.2849, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8594771241830066, | |
| "grad_norm": 3.2995479106903076, | |
| "learning_rate": 7.080610021786492e-06, | |
| "loss": 4.3136, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8605664488017429, | |
| "grad_norm": 3.037461280822754, | |
| "learning_rate": 7.026143790849673e-06, | |
| "loss": 4.4863, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8616557734204793, | |
| "grad_norm": 4.794382095336914, | |
| "learning_rate": 6.971677559912855e-06, | |
| "loss": 4.6701, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8627450980392157, | |
| "grad_norm": 4.511960029602051, | |
| "learning_rate": 6.917211328976035e-06, | |
| "loss": 4.4359, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8638344226579521, | |
| "grad_norm": 3.589517831802368, | |
| "learning_rate": 6.862745098039216e-06, | |
| "loss": 4.1445, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8649237472766884, | |
| "grad_norm": 3.635469913482666, | |
| "learning_rate": 6.808278867102396e-06, | |
| "loss": 4.4353, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8660130718954249, | |
| "grad_norm": 6.3288116455078125, | |
| "learning_rate": 6.753812636165578e-06, | |
| "loss": 4.1747, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8671023965141612, | |
| "grad_norm": 5.579259395599365, | |
| "learning_rate": 6.699346405228758e-06, | |
| "loss": 3.9111, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.8681917211328976, | |
| "grad_norm": 2.8106861114501953, | |
| "learning_rate": 6.64488017429194e-06, | |
| "loss": 4.5088, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.869281045751634, | |
| "grad_norm": 4.313303470611572, | |
| "learning_rate": 6.59041394335512e-06, | |
| "loss": 4.1038, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8703703703703703, | |
| "grad_norm": 6.260618209838867, | |
| "learning_rate": 6.535947712418301e-06, | |
| "loss": 3.9127, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.8714596949891068, | |
| "grad_norm": 4.464169979095459, | |
| "learning_rate": 6.481481481481481e-06, | |
| "loss": 4.8833, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8725490196078431, | |
| "grad_norm": 6.596883773803711, | |
| "learning_rate": 6.427015250544663e-06, | |
| "loss": 4.2059, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.8736383442265795, | |
| "grad_norm": 4.000524044036865, | |
| "learning_rate": 6.372549019607843e-06, | |
| "loss": 4.375, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8747276688453159, | |
| "grad_norm": 3.5694878101348877, | |
| "learning_rate": 6.318082788671024e-06, | |
| "loss": 4.4419, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8758169934640523, | |
| "grad_norm": 5.649882793426514, | |
| "learning_rate": 6.263616557734204e-06, | |
| "loss": 4.6012, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8769063180827886, | |
| "grad_norm": 3.345165252685547, | |
| "learning_rate": 6.209150326797386e-06, | |
| "loss": 4.6001, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8779956427015251, | |
| "grad_norm": 4.58209228515625, | |
| "learning_rate": 6.154684095860567e-06, | |
| "loss": 4.3318, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8790849673202614, | |
| "grad_norm": 7.333117485046387, | |
| "learning_rate": 6.100217864923748e-06, | |
| "loss": 4.5413, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.8801742919389978, | |
| "grad_norm": 3.392754554748535, | |
| "learning_rate": 6.045751633986928e-06, | |
| "loss": 4.6486, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8812636165577342, | |
| "grad_norm": 3.35073184967041, | |
| "learning_rate": 5.991285403050109e-06, | |
| "loss": 4.1218, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 4.778507232666016, | |
| "learning_rate": 5.93681917211329e-06, | |
| "loss": 4.6813, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.883442265795207, | |
| "grad_norm": 3.8023056983947754, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 4.2966, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.8845315904139434, | |
| "grad_norm": 4.701990127563477, | |
| "learning_rate": 5.827886710239652e-06, | |
| "loss": 4.1789, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8856209150326797, | |
| "grad_norm": 4.493887901306152, | |
| "learning_rate": 5.773420479302832e-06, | |
| "loss": 4.7467, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.8867102396514162, | |
| "grad_norm": 6.4288740158081055, | |
| "learning_rate": 5.718954248366013e-06, | |
| "loss": 4.1651, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.8877995642701525, | |
| "grad_norm": 4.479551315307617, | |
| "learning_rate": 5.664488017429194e-06, | |
| "loss": 4.0074, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 4.732947826385498, | |
| "learning_rate": 5.610021786492375e-06, | |
| "loss": 3.957, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8899782135076253, | |
| "grad_norm": 4.029106140136719, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 4.3253, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.8910675381263616, | |
| "grad_norm": 5.939345359802246, | |
| "learning_rate": 5.501089324618736e-06, | |
| "loss": 4.987, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.8921568627450981, | |
| "grad_norm": 3.8225717544555664, | |
| "learning_rate": 5.446623093681918e-06, | |
| "loss": 4.5568, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.8932461873638344, | |
| "grad_norm": 4.563981533050537, | |
| "learning_rate": 5.392156862745099e-06, | |
| "loss": 4.5085, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8943355119825708, | |
| "grad_norm": 4.561498641967773, | |
| "learning_rate": 5.33769063180828e-06, | |
| "loss": 4.5559, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.8954248366013072, | |
| "grad_norm": 4.6697773933410645, | |
| "learning_rate": 5.28322440087146e-06, | |
| "loss": 4.6021, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.8965141612200436, | |
| "grad_norm": 4.074893474578857, | |
| "learning_rate": 5.228758169934641e-06, | |
| "loss": 4.3814, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.8976034858387799, | |
| "grad_norm": 4.4995341300964355, | |
| "learning_rate": 5.174291938997822e-06, | |
| "loss": 4.1494, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.8986928104575164, | |
| "grad_norm": 4.323368072509766, | |
| "learning_rate": 5.119825708061003e-06, | |
| "loss": 4.3344, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8997821350762527, | |
| "grad_norm": 3.9739816188812256, | |
| "learning_rate": 5.065359477124184e-06, | |
| "loss": 4.2542, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.900871459694989, | |
| "grad_norm": 4.160286903381348, | |
| "learning_rate": 5.010893246187364e-06, | |
| "loss": 3.9936, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.9019607843137255, | |
| "grad_norm": 4.167511940002441, | |
| "learning_rate": 4.956427015250545e-06, | |
| "loss": 4.3592, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9030501089324618, | |
| "grad_norm": 4.035800933837891, | |
| "learning_rate": 4.901960784313726e-06, | |
| "loss": 4.1755, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.9041394335511983, | |
| "grad_norm": 5.353736877441406, | |
| "learning_rate": 4.847494553376907e-06, | |
| "loss": 4.3737, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9052287581699346, | |
| "grad_norm": 3.228898048400879, | |
| "learning_rate": 4.7930283224400875e-06, | |
| "loss": 4.1672, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.906318082788671, | |
| "grad_norm": 4.113286972045898, | |
| "learning_rate": 4.738562091503268e-06, | |
| "loss": 4.2016, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9074074074074074, | |
| "grad_norm": 4.611151218414307, | |
| "learning_rate": 4.684095860566449e-06, | |
| "loss": 4.1389, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.9084967320261438, | |
| "grad_norm": 4.544444561004639, | |
| "learning_rate": 4.6296296296296296e-06, | |
| "loss": 4.0643, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9095860566448801, | |
| "grad_norm": 6.940279960632324, | |
| "learning_rate": 4.5751633986928105e-06, | |
| "loss": 4.6954, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.9106753812636166, | |
| "grad_norm": 4.801361560821533, | |
| "learning_rate": 4.5206971677559915e-06, | |
| "loss": 4.1483, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9117647058823529, | |
| "grad_norm": 7.0968828201293945, | |
| "learning_rate": 4.4662309368191724e-06, | |
| "loss": 4.3853, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.9128540305010894, | |
| "grad_norm": 4.090330123901367, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": 4.1138, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9139433551198257, | |
| "grad_norm": 4.479462146759033, | |
| "learning_rate": 4.357298474945534e-06, | |
| "loss": 4.2546, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.9150326797385621, | |
| "grad_norm": 5.135635852813721, | |
| "learning_rate": 4.302832244008715e-06, | |
| "loss": 4.4206, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9161220043572985, | |
| "grad_norm": 5.2578253746032715, | |
| "learning_rate": 4.2483660130718954e-06, | |
| "loss": 4.899, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.9172113289760349, | |
| "grad_norm": 3.8340771198272705, | |
| "learning_rate": 4.193899782135076e-06, | |
| "loss": 4.1731, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9183006535947712, | |
| "grad_norm": 4.608198642730713, | |
| "learning_rate": 4.139433551198257e-06, | |
| "loss": 4.7056, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.9193899782135077, | |
| "grad_norm": 3.428565502166748, | |
| "learning_rate": 4.084967320261438e-06, | |
| "loss": 4.4572, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.920479302832244, | |
| "grad_norm": 4.144840717315674, | |
| "learning_rate": 4.030501089324619e-06, | |
| "loss": 4.5927, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.9215686274509803, | |
| "grad_norm": 4.844527721405029, | |
| "learning_rate": 3.976034858387799e-06, | |
| "loss": 4.4512, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9226579520697168, | |
| "grad_norm": 3.5561633110046387, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 4.2362, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.9237472766884531, | |
| "grad_norm": 4.015787124633789, | |
| "learning_rate": 3.867102396514161e-06, | |
| "loss": 4.2329, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9248366013071896, | |
| "grad_norm": 4.480269908905029, | |
| "learning_rate": 3.8126361655773422e-06, | |
| "loss": 4.325, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 4.577909469604492, | |
| "learning_rate": 3.7581699346405228e-06, | |
| "loss": 4.2491, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9270152505446623, | |
| "grad_norm": 4.601688861846924, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 4.4607, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.9281045751633987, | |
| "grad_norm": 5.8293914794921875, | |
| "learning_rate": 3.6492374727668843e-06, | |
| "loss": 4.5189, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9291938997821351, | |
| "grad_norm": 3.680563449859619, | |
| "learning_rate": 3.5947712418300652e-06, | |
| "loss": 4.3622, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9302832244008714, | |
| "grad_norm": 4.435286998748779, | |
| "learning_rate": 3.540305010893246e-06, | |
| "loss": 4.2399, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9313725490196079, | |
| "grad_norm": 3.388056993484497, | |
| "learning_rate": 3.4858387799564276e-06, | |
| "loss": 4.366, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9324618736383442, | |
| "grad_norm": 5.682557106018066, | |
| "learning_rate": 3.431372549019608e-06, | |
| "loss": 4.7221, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9335511982570807, | |
| "grad_norm": 5.899126052856445, | |
| "learning_rate": 3.376906318082789e-06, | |
| "loss": 4.1535, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.934640522875817, | |
| "grad_norm": 3.609330892562866, | |
| "learning_rate": 3.32244008714597e-06, | |
| "loss": 4.0796, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9357298474945533, | |
| "grad_norm": 3.1793229579925537, | |
| "learning_rate": 3.2679738562091506e-06, | |
| "loss": 4.3941, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.9368191721132898, | |
| "grad_norm": 3.5466599464416504, | |
| "learning_rate": 3.2135076252723315e-06, | |
| "loss": 4.3566, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9379084967320261, | |
| "grad_norm": 3.926398515701294, | |
| "learning_rate": 3.159041394335512e-06, | |
| "loss": 4.5488, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.9389978213507625, | |
| "grad_norm": 4.758654594421387, | |
| "learning_rate": 3.104575163398693e-06, | |
| "loss": 4.1554, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9400871459694989, | |
| "grad_norm": 3.9074339866638184, | |
| "learning_rate": 3.050108932461874e-06, | |
| "loss": 4.4455, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 3.795421600341797, | |
| "learning_rate": 2.9956427015250545e-06, | |
| "loss": 4.2981, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9422657952069716, | |
| "grad_norm": 4.088068008422852, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 4.0884, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9433551198257081, | |
| "grad_norm": 6.322877883911133, | |
| "learning_rate": 2.886710239651416e-06, | |
| "loss": 4.5004, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9444444444444444, | |
| "grad_norm": 4.713603496551514, | |
| "learning_rate": 2.832244008714597e-06, | |
| "loss": 4.4102, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.9455337690631809, | |
| "grad_norm": 5.158910274505615, | |
| "learning_rate": 2.777777777777778e-06, | |
| "loss": 4.4982, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9466230936819172, | |
| "grad_norm": 5.767287254333496, | |
| "learning_rate": 2.723311546840959e-06, | |
| "loss": 4.0989, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.9477124183006536, | |
| "grad_norm": 4.222414970397949, | |
| "learning_rate": 2.66884531590414e-06, | |
| "loss": 4.4585, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.94880174291939, | |
| "grad_norm": 5.214550018310547, | |
| "learning_rate": 2.6143790849673204e-06, | |
| "loss": 4.5338, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9498910675381264, | |
| "grad_norm": 4.079739093780518, | |
| "learning_rate": 2.5599128540305013e-06, | |
| "loss": 4.3592, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9509803921568627, | |
| "grad_norm": 4.767613887786865, | |
| "learning_rate": 2.505446623093682e-06, | |
| "loss": 4.6577, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9520697167755992, | |
| "grad_norm": 5.1977434158325195, | |
| "learning_rate": 2.450980392156863e-06, | |
| "loss": 4.1755, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9531590413943355, | |
| "grad_norm": 4.130743980407715, | |
| "learning_rate": 2.3965141612200438e-06, | |
| "loss": 4.704, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.954248366013072, | |
| "grad_norm": 3.34452223777771, | |
| "learning_rate": 2.3420479302832243e-06, | |
| "loss": 4.6829, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9553376906318083, | |
| "grad_norm": 6.322269439697266, | |
| "learning_rate": 2.2875816993464053e-06, | |
| "loss": 4.5667, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9564270152505446, | |
| "grad_norm": 5.552467346191406, | |
| "learning_rate": 2.2331154684095862e-06, | |
| "loss": 4.4046, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9575163398692811, | |
| "grad_norm": 6.079358100891113, | |
| "learning_rate": 2.178649237472767e-06, | |
| "loss": 4.7431, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9586056644880174, | |
| "grad_norm": 6.379845142364502, | |
| "learning_rate": 2.1241830065359477e-06, | |
| "loss": 4.2249, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9596949891067538, | |
| "grad_norm": 4.33439826965332, | |
| "learning_rate": 2.0697167755991287e-06, | |
| "loss": 4.3671, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.9607843137254902, | |
| "grad_norm": 6.321019649505615, | |
| "learning_rate": 2.0152505446623096e-06, | |
| "loss": 3.8003, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9618736383442266, | |
| "grad_norm": 5.352238178253174, | |
| "learning_rate": 1.96078431372549e-06, | |
| "loss": 4.3568, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 4.871586322784424, | |
| "learning_rate": 1.9063180827886711e-06, | |
| "loss": 4.8798, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9640522875816994, | |
| "grad_norm": 4.800078868865967, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 4.27, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9651416122004357, | |
| "grad_norm": 3.8531675338745117, | |
| "learning_rate": 1.7973856209150326e-06, | |
| "loss": 4.364, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9662309368191722, | |
| "grad_norm": 4.1356587409973145, | |
| "learning_rate": 1.7429193899782138e-06, | |
| "loss": 4.6178, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.9673202614379085, | |
| "grad_norm": 3.3925697803497314, | |
| "learning_rate": 1.6884531590413945e-06, | |
| "loss": 4.3998, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9684095860566448, | |
| "grad_norm": 5.225710868835449, | |
| "learning_rate": 1.6339869281045753e-06, | |
| "loss": 4.2083, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.9694989106753813, | |
| "grad_norm": 3.4096145629882812, | |
| "learning_rate": 1.579520697167756e-06, | |
| "loss": 4.5308, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9705882352941176, | |
| "grad_norm": 3.5370752811431885, | |
| "learning_rate": 1.525054466230937e-06, | |
| "loss": 4.2715, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.971677559912854, | |
| "grad_norm": 4.493494510650635, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 4.1, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9727668845315904, | |
| "grad_norm": 3.8683810234069824, | |
| "learning_rate": 1.4161220043572985e-06, | |
| "loss": 4.3108, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.9738562091503268, | |
| "grad_norm": 4.579468727111816, | |
| "learning_rate": 1.3616557734204794e-06, | |
| "loss": 4.1637, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9749455337690632, | |
| "grad_norm": 4.743986129760742, | |
| "learning_rate": 1.3071895424836602e-06, | |
| "loss": 4.2258, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9760348583877996, | |
| "grad_norm": 4.752856731414795, | |
| "learning_rate": 1.252723311546841e-06, | |
| "loss": 5.0414, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9771241830065359, | |
| "grad_norm": 4.322828769683838, | |
| "learning_rate": 1.1982570806100219e-06, | |
| "loss": 4.3155, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.9782135076252724, | |
| "grad_norm": 3.3457512855529785, | |
| "learning_rate": 1.1437908496732026e-06, | |
| "loss": 4.247, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.9793028322440087, | |
| "grad_norm": 3.9859514236450195, | |
| "learning_rate": 1.0893246187363836e-06, | |
| "loss": 4.4445, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.9803921568627451, | |
| "grad_norm": 3.8540663719177246, | |
| "learning_rate": 1.0348583877995643e-06, | |
| "loss": 4.3506, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9814814814814815, | |
| "grad_norm": 3.383423328399658, | |
| "learning_rate": 9.80392156862745e-07, | |
| "loss": 4.541, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.9825708061002179, | |
| "grad_norm": 4.26013708114624, | |
| "learning_rate": 9.259259259259259e-07, | |
| "loss": 4.2978, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.9836601307189542, | |
| "grad_norm": 4.551793098449707, | |
| "learning_rate": 8.714596949891069e-07, | |
| "loss": 4.2529, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.9847494553376906, | |
| "grad_norm": 4.896347999572754, | |
| "learning_rate": 8.169934640522876e-07, | |
| "loss": 4.1393, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.985838779956427, | |
| "grad_norm": 5.963104248046875, | |
| "learning_rate": 7.625272331154685e-07, | |
| "loss": 4.6752, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9869281045751634, | |
| "grad_norm": 4.119946002960205, | |
| "learning_rate": 7.080610021786492e-07, | |
| "loss": 4.234, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9880174291938998, | |
| "grad_norm": 4.1172637939453125, | |
| "learning_rate": 6.535947712418301e-07, | |
| "loss": 4.3597, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.9891067538126361, | |
| "grad_norm": 3.4015111923217773, | |
| "learning_rate": 5.991285403050109e-07, | |
| "loss": 4.2615, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9901960784313726, | |
| "grad_norm": 3.943028688430786, | |
| "learning_rate": 5.446623093681918e-07, | |
| "loss": 4.7452, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.9912854030501089, | |
| "grad_norm": 3.693601608276367, | |
| "learning_rate": 4.901960784313725e-07, | |
| "loss": 4.4842, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9923747276688453, | |
| "grad_norm": 5.16162633895874, | |
| "learning_rate": 4.3572984749455345e-07, | |
| "loss": 4.3073, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.9934640522875817, | |
| "grad_norm": 3.4217946529388428, | |
| "learning_rate": 3.8126361655773425e-07, | |
| "loss": 4.4098, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.9945533769063181, | |
| "grad_norm": 5.515738487243652, | |
| "learning_rate": 3.2679738562091505e-07, | |
| "loss": 4.2126, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.9956427015250545, | |
| "grad_norm": 3.7742040157318115, | |
| "learning_rate": 2.723311546840959e-07, | |
| "loss": 4.2901, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.9967320261437909, | |
| "grad_norm": 4.602190017700195, | |
| "learning_rate": 2.1786492374727672e-07, | |
| "loss": 4.4582, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9978213507625272, | |
| "grad_norm": 4.3014750480651855, | |
| "learning_rate": 1.6339869281045752e-07, | |
| "loss": 4.3998, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.9989106753812637, | |
| "grad_norm": 4.754629611968994, | |
| "learning_rate": 1.0893246187363836e-07, | |
| "loss": 4.7298, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 7.283786773681641, | |
| "learning_rate": 5.446623093681918e-08, | |
| "loss": 4.6555, | |
| "step": 918 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 918, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1396751373828096.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |