code_mix_v2 / trainer_state.json
linyk's picture
Upload folder using huggingface_hub
3d1dbc7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 916,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.043668122270742356,
"grad_norm": 2.421576976776123,
"learning_rate": 1.0869565217391306e-06,
"loss": 0.5167,
"step": 10
},
{
"epoch": 0.08733624454148471,
"grad_norm": 2.160121440887451,
"learning_rate": 2.173913043478261e-06,
"loss": 0.5039,
"step": 20
},
{
"epoch": 0.13100436681222707,
"grad_norm": 0.915518045425415,
"learning_rate": 3.2608695652173914e-06,
"loss": 0.4632,
"step": 30
},
{
"epoch": 0.17467248908296942,
"grad_norm": 0.7904559969902039,
"learning_rate": 4.347826086956522e-06,
"loss": 0.3982,
"step": 40
},
{
"epoch": 0.2183406113537118,
"grad_norm": 0.6008340716362,
"learning_rate": 5.4347826086956525e-06,
"loss": 0.4161,
"step": 50
},
{
"epoch": 0.26200873362445415,
"grad_norm": 0.6038509011268616,
"learning_rate": 6.521739130434783e-06,
"loss": 0.438,
"step": 60
},
{
"epoch": 0.3056768558951965,
"grad_norm": 0.609183132648468,
"learning_rate": 7.608695652173914e-06,
"loss": 0.3848,
"step": 70
},
{
"epoch": 0.34934497816593885,
"grad_norm": 0.65712571144104,
"learning_rate": 8.695652173913044e-06,
"loss": 0.3993,
"step": 80
},
{
"epoch": 0.3930131004366812,
"grad_norm": 0.6194190979003906,
"learning_rate": 9.782608695652175e-06,
"loss": 0.3769,
"step": 90
},
{
"epoch": 0.4366812227074236,
"grad_norm": 0.4761613607406616,
"learning_rate": 9.99947842870608e-06,
"loss": 0.3808,
"step": 100
},
{
"epoch": 0.48034934497816595,
"grad_norm": 0.4921339750289917,
"learning_rate": 9.997359731816998e-06,
"loss": 0.4205,
"step": 110
},
{
"epoch": 0.5240174672489083,
"grad_norm": 0.48024195432662964,
"learning_rate": 9.99361200124597e-06,
"loss": 0.38,
"step": 120
},
{
"epoch": 0.5676855895196506,
"grad_norm": 0.6233803629875183,
"learning_rate": 9.988236458673974e-06,
"loss": 0.3953,
"step": 130
},
{
"epoch": 0.611353711790393,
"grad_norm": 0.5606607794761658,
"learning_rate": 9.981234856414306e-06,
"loss": 0.3865,
"step": 140
},
{
"epoch": 0.6550218340611353,
"grad_norm": 0.49620741605758667,
"learning_rate": 9.972609476841368e-06,
"loss": 0.3899,
"step": 150
},
{
"epoch": 0.6986899563318777,
"grad_norm": 0.5842658281326294,
"learning_rate": 9.962363131646649e-06,
"loss": 0.3792,
"step": 160
},
{
"epoch": 0.74235807860262,
"grad_norm": 0.5468127727508545,
"learning_rate": 9.950499160922184e-06,
"loss": 0.4015,
"step": 170
},
{
"epoch": 0.7860262008733624,
"grad_norm": 0.5464998483657837,
"learning_rate": 9.937021432071754e-06,
"loss": 0.3533,
"step": 180
},
{
"epoch": 0.8296943231441049,
"grad_norm": 0.5048817992210388,
"learning_rate": 9.921934338550187e-06,
"loss": 0.3961,
"step": 190
},
{
"epoch": 0.8733624454148472,
"grad_norm": 0.47697556018829346,
"learning_rate": 9.905242798431196e-06,
"loss": 0.3438,
"step": 200
},
{
"epoch": 0.9170305676855895,
"grad_norm": 0.5746617913246155,
"learning_rate": 9.886952252804177e-06,
"loss": 0.4006,
"step": 210
},
{
"epoch": 0.9606986899563319,
"grad_norm": 0.5081667304039001,
"learning_rate": 9.867068664000538e-06,
"loss": 0.3679,
"step": 220
},
{
"epoch": 1.0043668122270741,
"grad_norm": 0.4806345999240875,
"learning_rate": 9.845598513650104e-06,
"loss": 0.4113,
"step": 230
},
{
"epoch": 1.0480349344978166,
"grad_norm": 0.4791143238544464,
"learning_rate": 9.822548800568238e-06,
"loss": 0.341,
"step": 240
},
{
"epoch": 1.091703056768559,
"grad_norm": 0.5520183444023132,
"learning_rate": 9.797927038474383e-06,
"loss": 0.298,
"step": 250
},
{
"epoch": 1.1353711790393013,
"grad_norm": 0.486562579870224,
"learning_rate": 9.771741253542742e-06,
"loss": 0.2989,
"step": 260
},
{
"epoch": 1.1790393013100438,
"grad_norm": 0.5037546753883362,
"learning_rate": 9.743999981785914e-06,
"loss": 0.3058,
"step": 270
},
{
"epoch": 1.222707423580786,
"grad_norm": 0.5140413045883179,
"learning_rate": 9.714712266272339e-06,
"loss": 0.3164,
"step": 280
},
{
"epoch": 1.2663755458515285,
"grad_norm": 0.4978218972682953,
"learning_rate": 9.683887654178446e-06,
"loss": 0.296,
"step": 290
},
{
"epoch": 1.3100436681222707,
"grad_norm": 0.5410030484199524,
"learning_rate": 9.651536193676476e-06,
"loss": 0.2938,
"step": 300
},
{
"epoch": 1.3537117903930131,
"grad_norm": 0.5140953063964844,
"learning_rate": 9.617668430658991e-06,
"loss": 0.3249,
"step": 310
},
{
"epoch": 1.3973799126637554,
"grad_norm": 0.4528365433216095,
"learning_rate": 9.582295405301131e-06,
"loss": 0.3356,
"step": 320
},
{
"epoch": 1.4410480349344978,
"grad_norm": 0.49946603178977966,
"learning_rate": 9.545428648461756e-06,
"loss": 0.3037,
"step": 330
},
{
"epoch": 1.48471615720524,
"grad_norm": 0.48589998483657837,
"learning_rate": 9.50708017792463e-06,
"loss": 0.3116,
"step": 340
},
{
"epoch": 1.5283842794759825,
"grad_norm": 0.46080437302589417,
"learning_rate": 9.46726249448087e-06,
"loss": 0.296,
"step": 350
},
{
"epoch": 1.572052401746725,
"grad_norm": 0.4566941559314728,
"learning_rate": 9.425988577853959e-06,
"loss": 0.3079,
"step": 360
},
{
"epoch": 1.6157205240174672,
"grad_norm": 0.5628035068511963,
"learning_rate": 9.383271882468631e-06,
"loss": 0.2906,
"step": 370
},
{
"epoch": 1.6593886462882095,
"grad_norm": 0.4313275218009949,
"learning_rate": 9.339126333065008e-06,
"loss": 0.2879,
"step": 380
},
{
"epoch": 1.703056768558952,
"grad_norm": 0.4829094409942627,
"learning_rate": 9.293566320159432e-06,
"loss": 0.3609,
"step": 390
},
{
"epoch": 1.7467248908296944,
"grad_norm": 0.7929471135139465,
"learning_rate": 9.24660669535346e-06,
"loss": 0.3263,
"step": 400
},
{
"epoch": 1.7903930131004366,
"grad_norm": 0.4220748841762543,
"learning_rate": 9.198262766492554e-06,
"loss": 0.3092,
"step": 410
},
{
"epoch": 1.8340611353711789,
"grad_norm": 0.4901680648326874,
"learning_rate": 9.14855029267605e-06,
"loss": 0.3152,
"step": 420
},
{
"epoch": 1.8777292576419216,
"grad_norm": 0.47252029180526733,
"learning_rate": 9.097485479120027e-06,
"loss": 0.3223,
"step": 430
},
{
"epoch": 1.9213973799126638,
"grad_norm": 0.46700412034988403,
"learning_rate": 9.045084971874738e-06,
"loss": 0.3144,
"step": 440
},
{
"epoch": 1.965065502183406,
"grad_norm": 0.46521633863449097,
"learning_rate": 8.99136585239836e-06,
"loss": 0.3179,
"step": 450
},
{
"epoch": 2.0087336244541483,
"grad_norm": 0.48223376274108887,
"learning_rate": 8.9363456319888e-06,
"loss": 0.3021,
"step": 460
},
{
"epoch": 2.052401746724891,
"grad_norm": 0.446074903011322,
"learning_rate": 8.880042246075366e-06,
"loss": 0.2441,
"step": 470
},
{
"epoch": 2.096069868995633,
"grad_norm": 0.45803341269493103,
"learning_rate": 8.82247404837222e-06,
"loss": 0.2733,
"step": 480
},
{
"epoch": 2.1397379912663754,
"grad_norm": 0.429457426071167,
"learning_rate": 8.763659804895442e-06,
"loss": 0.2563,
"step": 490
},
{
"epoch": 2.183406113537118,
"grad_norm": 0.6878861784934998,
"learning_rate": 8.703618687845697e-06,
"loss": 0.2458,
"step": 500
},
{
"epoch": 2.2270742358078603,
"grad_norm": 0.4481293261051178,
"learning_rate": 8.64237026935852e-06,
"loss": 0.2261,
"step": 510
},
{
"epoch": 2.2707423580786026,
"grad_norm": 0.45138078927993774,
"learning_rate": 8.579934515124202e-06,
"loss": 0.2408,
"step": 520
},
{
"epoch": 2.314410480349345,
"grad_norm": 0.5058510303497314,
"learning_rate": 8.5163317778794e-06,
"loss": 0.2386,
"step": 530
},
{
"epoch": 2.3580786026200875,
"grad_norm": 0.5651599168777466,
"learning_rate": 8.45158279077258e-06,
"loss": 0.2035,
"step": 540
},
{
"epoch": 2.4017467248908297,
"grad_norm": 0.4735155999660492,
"learning_rate": 8.385708660605431e-06,
"loss": 0.2106,
"step": 550
},
{
"epoch": 2.445414847161572,
"grad_norm": 0.44301047921180725,
"learning_rate": 8.318730860952523e-06,
"loss": 0.2164,
"step": 560
},
{
"epoch": 2.489082969432314,
"grad_norm": 0.38600876927375793,
"learning_rate": 8.250671225161345e-06,
"loss": 0.2275,
"step": 570
},
{
"epoch": 2.532751091703057,
"grad_norm": 0.49234113097190857,
"learning_rate": 8.181551939235115e-06,
"loss": 0.2254,
"step": 580
},
{
"epoch": 2.576419213973799,
"grad_norm": 0.4783915877342224,
"learning_rate": 8.111395534600604e-06,
"loss": 0.2253,
"step": 590
},
{
"epoch": 2.6200873362445414,
"grad_norm": 0.4308622479438782,
"learning_rate": 8.040224880763368e-06,
"loss": 0.2202,
"step": 600
},
{
"epoch": 2.6637554585152836,
"grad_norm": 0.4942546784877777,
"learning_rate": 7.968063177852775e-06,
"loss": 0.2512,
"step": 610
},
{
"epoch": 2.7074235807860263,
"grad_norm": 0.4427158832550049,
"learning_rate": 7.894933949059245e-06,
"loss": 0.237,
"step": 620
},
{
"epoch": 2.7510917030567685,
"grad_norm": 0.46294692158699036,
"learning_rate": 7.820861032966199e-06,
"loss": 0.226,
"step": 630
},
{
"epoch": 2.7947598253275108,
"grad_norm": 0.42187586426734924,
"learning_rate": 7.745868575779176e-06,
"loss": 0.2362,
"step": 640
},
{
"epoch": 2.8384279475982535,
"grad_norm": 0.4270602762699127,
"learning_rate": 7.669981023454682e-06,
"loss": 0.2159,
"step": 650
},
{
"epoch": 2.8820960698689957,
"grad_norm": 0.49507179856300354,
"learning_rate": 7.593223113731323e-06,
"loss": 0.2566,
"step": 660
},
{
"epoch": 2.925764192139738,
"grad_norm": 0.4554119408130646,
"learning_rate": 7.515619868065833e-06,
"loss": 0.2648,
"step": 670
},
{
"epoch": 2.96943231441048,
"grad_norm": 0.42243942618370056,
"learning_rate": 7.437196583476597e-06,
"loss": 0.2426,
"step": 680
},
{
"epoch": 3.013100436681223,
"grad_norm": 0.4137606620788574,
"learning_rate": 7.357978824297362e-06,
"loss": 0.225,
"step": 690
},
{
"epoch": 3.056768558951965,
"grad_norm": 0.433912992477417,
"learning_rate": 7.2779924138438065e-06,
"loss": 0.1688,
"step": 700
},
{
"epoch": 3.1004366812227073,
"grad_norm": 0.4669990539550781,
"learning_rate": 7.197263425995682e-06,
"loss": 0.1763,
"step": 710
},
{
"epoch": 3.14410480349345,
"grad_norm": 0.4027640223503113,
"learning_rate": 7.115818176697285e-06,
"loss": 0.1805,
"step": 720
},
{
"epoch": 3.1877729257641922,
"grad_norm": 0.577460765838623,
"learning_rate": 7.033683215379002e-06,
"loss": 0.1709,
"step": 730
},
{
"epoch": 3.2314410480349345,
"grad_norm": 0.43528082966804504,
"learning_rate": 6.950885316302773e-06,
"loss": 0.1558,
"step": 740
},
{
"epoch": 3.2751091703056767,
"grad_norm": 0.5665518641471863,
"learning_rate": 6.867451469834237e-06,
"loss": 0.1935,
"step": 750
},
{
"epoch": 3.3187772925764194,
"grad_norm": 0.3636087477207184,
"learning_rate": 6.7834088736444435e-06,
"loss": 0.1779,
"step": 760
},
{
"epoch": 3.3624454148471616,
"grad_norm": 0.4824029505252838,
"learning_rate": 6.698784923843993e-06,
"loss": 0.1748,
"step": 770
},
{
"epoch": 3.406113537117904,
"grad_norm": 0.44956591725349426,
"learning_rate": 6.613607206052476e-06,
"loss": 0.1637,
"step": 780
},
{
"epoch": 3.449781659388646,
"grad_norm": 0.4280209243297577,
"learning_rate": 6.527903486406147e-06,
"loss": 0.1618,
"step": 790
},
{
"epoch": 3.493449781659389,
"grad_norm": 0.5125846862792969,
"learning_rate": 6.441701702506755e-06,
"loss": 0.2097,
"step": 800
},
{
"epoch": 3.537117903930131,
"grad_norm": 0.4643654227256775,
"learning_rate": 6.355029954314468e-06,
"loss": 0.1765,
"step": 810
},
{
"epoch": 3.5807860262008733,
"grad_norm": 0.3958646357059479,
"learning_rate": 6.267916494987883e-06,
"loss": 0.1716,
"step": 820
},
{
"epoch": 3.6244541484716155,
"grad_norm": 0.3993144929409027,
"learning_rate": 6.180389721674101e-06,
"loss": 0.1763,
"step": 830
},
{
"epoch": 3.668122270742358,
"grad_norm": 0.4378385841846466,
"learning_rate": 6.092478166251839e-06,
"loss": 0.1677,
"step": 840
},
{
"epoch": 3.7117903930131004,
"grad_norm": 0.42186248302459717,
"learning_rate": 6.00421048603066e-06,
"loss": 0.1874,
"step": 850
},
{
"epoch": 3.7554585152838427,
"grad_norm": 0.4455322027206421,
"learning_rate": 5.915615454409281e-06,
"loss": 0.1641,
"step": 860
},
{
"epoch": 3.7991266375545854,
"grad_norm": 0.5567952990531921,
"learning_rate": 5.8267219514960625e-06,
"loss": 0.1714,
"step": 870
},
{
"epoch": 3.8427947598253276,
"grad_norm": 0.463058739900589,
"learning_rate": 5.737558954694698e-06,
"loss": 0.1799,
"step": 880
},
{
"epoch": 3.88646288209607,
"grad_norm": 0.4127854108810425,
"learning_rate": 5.648155529258195e-06,
"loss": 0.1721,
"step": 890
},
{
"epoch": 3.930131004366812,
"grad_norm": 0.5975726246833801,
"learning_rate": 5.558540818814213e-06,
"loss": 0.1792,
"step": 900
},
{
"epoch": 3.9737991266375547,
"grad_norm": 0.44827261567115784,
"learning_rate": 5.468744035864867e-06,
"loss": 0.1675,
"step": 910
}
],
"logging_steps": 10,
"max_steps": 1832,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 143371959500800.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}