{ "best_metric": 0.21225065, "best_model_checkpoint": "/workspace/output/molmo-7b-d/v1-20250103-233013/checkpoint-414", "epoch": 3.0, "eval_steps": 200, "global_step": 414, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.93268561, "epoch": 0.007272727272727273, "grad_norm": 4.360905168604235, "learning_rate": 0.0, "loss": 0.15919656, "memory(GiB)": 131.1, "step": 1, "train_speed(iter/s)": 0.051814 }, { "acc": 0.93592656, "epoch": 0.03636363636363636, "grad_norm": 5.722024176509264, "learning_rate": 4.829949384917788e-06, "loss": 0.16946605, "memory(GiB)": 131.7, "step": 5, "train_speed(iter/s)": 0.164947 }, { "acc": 0.92745094, "epoch": 0.07272727272727272, "grad_norm": 5.449760109713864, "learning_rate": 6.910095361682884e-06, "loss": 0.19423571, "memory(GiB)": 131.7, "step": 10, "train_speed(iter/s)": 0.228063 }, { "acc": 0.9191308, "epoch": 0.10909090909090909, "grad_norm": 5.591882854054257, "learning_rate": 8.126902754116446e-06, "loss": 0.2132081, "memory(GiB)": 131.7, "step": 15, "train_speed(iter/s)": 0.262462 }, { "acc": 0.91665707, "epoch": 0.14545454545454545, "grad_norm": 8.643333147328232, "learning_rate": 8.990241338447979e-06, "loss": 0.2489913, "memory(GiB)": 132.85, "step": 20, "train_speed(iter/s)": 0.283686 }, { "acc": 0.92767944, "epoch": 0.18181818181818182, "grad_norm": 5.521300358752013, "learning_rate": 9.659898769835576e-06, "loss": 0.20275159, "memory(GiB)": 100.16, "step": 25, "train_speed(iter/s)": 0.297772 }, { "acc": 0.91237392, "epoch": 0.21818181818181817, "grad_norm": 4.691287833576054, "learning_rate": 9.999635040777627e-06, "loss": 0.24152677, "memory(GiB)": 100.16, "step": 30, "train_speed(iter/s)": 0.306552 }, { "acc": 0.89811802, "epoch": 0.2545454545454545, "grad_norm": 2.3267326177072176, "learning_rate": 9.995529861428146e-06, "loss": 0.2682821, "memory(GiB)": 100.16, "step": 35, "train_speed(iter/s)": 0.312152 }, { "acc": 0.91184635, "epoch": 0.2909090909090909, "grad_norm": 3.241573273096398, "learning_rate": 9.986867061882612e-06, "loss": 0.23578806, "memory(GiB)": 100.16, "step": 40, "train_speed(iter/s)": 0.314978 }, { "acc": 0.9012291, "epoch": 0.32727272727272727, "grad_norm": 2.3552152207973713, "learning_rate": 9.973654546348053e-06, "loss": 0.25761139, "memory(GiB)": 100.16, "step": 45, "train_speed(iter/s)": 0.319208 }, { "acc": 0.9016325, "epoch": 0.36363636363636365, "grad_norm": 3.1153181076119703, "learning_rate": 9.955904370333514e-06, "loss": 0.24715631, "memory(GiB)": 100.16, "step": 50, "train_speed(iter/s)": 0.32296 }, { "acc": 0.89749699, "epoch": 0.4, "grad_norm": 2.4498466601081943, "learning_rate": 9.933632729650212e-06, "loss": 0.25689688, "memory(GiB)": 100.16, "step": 55, "train_speed(iter/s)": 0.325846 }, { "acc": 0.88724833, "epoch": 0.43636363636363634, "grad_norm": 4.364723865759911, "learning_rate": 9.906859945633999e-06, "loss": 0.28743353, "memory(GiB)": 100.16, "step": 60, "train_speed(iter/s)": 0.328247 }, { "acc": 0.90578156, "epoch": 0.4727272727272727, "grad_norm": 3.243778418144708, "learning_rate": 9.875610446603524e-06, "loss": 0.26308877, "memory(GiB)": 100.16, "step": 65, "train_speed(iter/s)": 0.330485 }, { "acc": 0.89676228, "epoch": 0.509090909090909, "grad_norm": 3.4165598224968274, "learning_rate": 9.83991274557109e-06, "loss": 0.26372042, "memory(GiB)": 127.96, "step": 70, "train_speed(iter/s)": 0.332413 }, { "acc": 0.9054903, "epoch": 0.5454545454545454, "grad_norm": 3.814636181453338, "learning_rate": 9.7997994142265e-06, "loss": 0.25466361, "memory(GiB)": 127.96, "step": 75, "train_speed(iter/s)": 0.334379 }, { "acc": 0.90086946, "epoch": 0.5818181818181818, "grad_norm": 3.9972259822599243, "learning_rate": 9.755307053217622e-06, "loss": 0.27588401, "memory(GiB)": 127.96, "step": 80, "train_speed(iter/s)": 0.336004 }, { "acc": 0.89949837, "epoch": 0.6181818181818182, "grad_norm": 5.998240972031008, "learning_rate": 9.706476258754834e-06, "loss": 0.25472341, "memory(GiB)": 127.96, "step": 85, "train_speed(iter/s)": 0.337291 }, { "acc": 0.88558121, "epoch": 0.6545454545454545, "grad_norm": 2.7186082929792574, "learning_rate": 9.653351585569786e-06, "loss": 0.28254557, "memory(GiB)": 127.96, "step": 90, "train_speed(iter/s)": 0.337576 }, { "acc": 0.90562687, "epoch": 0.6909090909090909, "grad_norm": 1.6880555029124777, "learning_rate": 9.595981506262264e-06, "loss": 0.25460241, "memory(GiB)": 127.96, "step": 95, "train_speed(iter/s)": 0.338319 }, { "acc": 0.90238457, "epoch": 0.7272727272727273, "grad_norm": 1.824873702466673, "learning_rate": 9.534418367072303e-06, "loss": 0.25135682, "memory(GiB)": 127.96, "step": 100, "train_speed(iter/s)": 0.33935 }, { "acc": 0.90719824, "epoch": 0.7636363636363637, "grad_norm": 3.0523518026276926, "learning_rate": 9.468718340117846e-06, "loss": 0.23181794, "memory(GiB)": 127.96, "step": 105, "train_speed(iter/s)": 0.340475 }, { "acc": 0.89296656, "epoch": 0.8, "grad_norm": 3.6744833597367514, "learning_rate": 9.398941372141562e-06, "loss": 0.27924564, "memory(GiB)": 127.96, "step": 110, "train_speed(iter/s)": 0.341456 }, { "acc": 0.89754677, "epoch": 0.8363636363636363, "grad_norm": 3.250222318126925, "learning_rate": 9.325151129813582e-06, "loss": 0.26513102, "memory(GiB)": 127.96, "step": 115, "train_speed(iter/s)": 0.342153 }, { "acc": 0.88903837, "epoch": 0.8727272727272727, "grad_norm": 2.376728799007849, "learning_rate": 9.247414941640045e-06, "loss": 0.30169072, "memory(GiB)": 133.76, "step": 120, "train_speed(iter/s)": 0.342998 }, { "acc": 0.89329395, "epoch": 0.9090909090909091, "grad_norm": 4.889478322316845, "learning_rate": 9.165803736530492e-06, "loss": 0.28302565, "memory(GiB)": 100.58, "step": 125, "train_speed(iter/s)": 0.343779 }, { "acc": 0.89977417, "epoch": 0.9454545454545454, "grad_norm": 2.0057917841024633, "learning_rate": 9.080391979080116e-06, "loss": 0.2668047, "memory(GiB)": 100.58, "step": 130, "train_speed(iter/s)": 0.344351 }, { "acc": 0.90148487, "epoch": 0.9818181818181818, "grad_norm": 2.470715179920895, "learning_rate": 8.991257601625973e-06, "loss": 0.25751991, "memory(GiB)": 100.58, "step": 135, "train_speed(iter/s)": 0.345171 }, { "epoch": 1.0, "eval_acc": 0.9078246620237608, "eval_loss": 0.2361508309841156, "eval_runtime": 10.278, "eval_samples_per_second": 11.286, "eval_steps_per_second": 1.459, "step": 138 }, { "acc": 0.8134038, "epoch": 1.0145454545454546, "grad_norm": 1.9385369249323439, "learning_rate": 8.917324354080927e-06, "loss": 0.254459, "memory(GiB)": 100.58, "step": 140, "train_speed(iter/s)": 0.309598 }, { "acc": 0.90728855, "epoch": 1.050909090909091, "grad_norm": 76.54794008048425, "learning_rate": 8.82169644486897e-06, "loss": 0.23623853, "memory(GiB)": 100.58, "step": 145, "train_speed(iter/s)": 0.311044 }, { "acc": 0.91997566, "epoch": 1.0872727272727274, "grad_norm": 1.727673298537959, "learning_rate": 8.722581957483633e-06, "loss": 0.21817675, "memory(GiB)": 100.58, "step": 150, "train_speed(iter/s)": 0.31275 }, { "acc": 0.91184559, "epoch": 1.1236363636363635, "grad_norm": 2.4370845690665974, "learning_rate": 8.620071327057833e-06, "loss": 0.22411692, "memory(GiB)": 100.58, "step": 155, "train_speed(iter/s)": 0.314364 }, { "acc": 0.91105995, "epoch": 1.16, "grad_norm": 4.474578962221848, "learning_rate": 8.514258087470745e-06, "loss": 0.22455444, "memory(GiB)": 100.58, "step": 160, "train_speed(iter/s)": 0.315941 }, { "acc": 0.92596989, "epoch": 1.1963636363636363, "grad_norm": 2.27714865436083, "learning_rate": 8.405238786004592e-06, "loss": 0.19618599, "memory(GiB)": 100.58, "step": 165, "train_speed(iter/s)": 0.317423 }, { "acc": 0.91807003, "epoch": 1.2327272727272727, "grad_norm": 3.476526282944283, "learning_rate": 8.293112895251915e-06, "loss": 0.21812358, "memory(GiB)": 100.58, "step": 170, "train_speed(iter/s)": 0.318837 }, { "acc": 0.91757879, "epoch": 1.269090909090909, "grad_norm": 2.812345046742586, "learning_rate": 8.177982722353686e-06, "loss": 0.20932765, "memory(GiB)": 100.58, "step": 175, "train_speed(iter/s)": 0.319897 }, { "acc": 0.9130724, "epoch": 1.3054545454545454, "grad_norm": 1.909403498812979, "learning_rate": 8.059953315651102e-06, "loss": 0.22100675, "memory(GiB)": 100.58, "step": 180, "train_speed(iter/s)": 0.320821 }, { "acc": 0.91083689, "epoch": 1.3418181818181818, "grad_norm": 3.7534483781265853, "learning_rate": 7.93913236883622e-06, "loss": 0.22075479, "memory(GiB)": 100.58, "step": 185, "train_speed(iter/s)": 0.321724 }, { "acc": 0.90749474, "epoch": 1.3781818181818182, "grad_norm": 3.0657460772043805, "learning_rate": 7.815630122688893e-06, "loss": 0.22630196, "memory(GiB)": 100.58, "step": 190, "train_speed(iter/s)": 0.3226 }, { "acc": 0.92584915, "epoch": 1.4145454545454546, "grad_norm": 5.821099128946982, "learning_rate": 7.689559264489661e-06, "loss": 0.21087196, "memory(GiB)": 100.58, "step": 195, "train_speed(iter/s)": 0.32333 }, { "acc": 0.90973835, "epoch": 1.450909090909091, "grad_norm": 1.830285233435649, "learning_rate": 7.5610348252003814e-06, "loss": 0.24081864, "memory(GiB)": 100.58, "step": 200, "train_speed(iter/s)": 0.323755 }, { "acc": 0.91908627, "epoch": 1.4872727272727273, "grad_norm": 3.46434543645635, "learning_rate": 7.43017407450641e-06, "loss": 0.21430855, "memory(GiB)": 100.58, "step": 205, "train_speed(iter/s)": 0.324304 }, { "acc": 0.90855217, "epoch": 1.5236363636363637, "grad_norm": 1.6445934060533671, "learning_rate": 7.2970964138161006e-06, "loss": 0.2204694, "memory(GiB)": 100.58, "step": 210, "train_speed(iter/s)": 0.325137 }, { "acc": 0.9202652, "epoch": 1.56, "grad_norm": 2.685739587728944, "learning_rate": 7.161923267315262e-06, "loss": 0.20784543, "memory(GiB)": 100.58, "step": 215, "train_speed(iter/s)": 0.325877 }, { "acc": 0.92430801, "epoch": 1.5963636363636362, "grad_norm": 3.4665236755524202, "learning_rate": 7.0247779711759566e-06, "loss": 0.2091445, "memory(GiB)": 100.58, "step": 220, "train_speed(iter/s)": 0.326598 }, { "acc": 0.91858587, "epoch": 1.6327272727272728, "grad_norm": 3.0400419237318674, "learning_rate": 6.885785661020759e-06, "loss": 0.22234173, "memory(GiB)": 100.58, "step": 225, "train_speed(iter/s)": 0.32754 }, { "acc": 0.91896229, "epoch": 1.669090909090909, "grad_norm": 2.50023791606214, "learning_rate": 6.7450731577451255e-06, "loss": 0.20558548, "memory(GiB)": 100.58, "step": 230, "train_speed(iter/s)": 0.328407 }, { "acc": 0.92307997, "epoch": 1.7054545454545456, "grad_norm": 2.789509587118081, "learning_rate": 6.602768851802077e-06, "loss": 0.21382501, "memory(GiB)": 100.58, "step": 235, "train_speed(iter/s)": 0.329247 }, { "acc": 0.91400127, "epoch": 1.7418181818181817, "grad_norm": 2.3889266426439173, "learning_rate": 6.45900258605477e-06, "loss": 0.21889751, "memory(GiB)": 100.58, "step": 240, "train_speed(iter/s)": 0.330086 }, { "acc": 0.90683708, "epoch": 1.7781818181818183, "grad_norm": 3.3107240552086465, "learning_rate": 6.313905537303837e-06, "loss": 0.21690502, "memory(GiB)": 100.58, "step": 245, "train_speed(iter/s)": 0.330898 }, { "acc": 0.91603336, "epoch": 1.8145454545454545, "grad_norm": 2.8852486239120547, "learning_rate": 6.167610096597601e-06, "loss": 0.2154119, "memory(GiB)": 100.58, "step": 250, "train_speed(iter/s)": 0.331673 }, { "acc": 0.91818409, "epoch": 1.850909090909091, "grad_norm": 2.0440810660323585, "learning_rate": 6.020249748434384e-06, "loss": 0.21951377, "memory(GiB)": 100.58, "step": 255, "train_speed(iter/s)": 0.332356 }, { "acc": 0.90970173, "epoch": 1.8872727272727272, "grad_norm": 3.8117037313040574, "learning_rate": 5.871958948967106e-06, "loss": 0.23594971, "memory(GiB)": 100.58, "step": 260, "train_speed(iter/s)": 0.33293 }, { "acc": 0.92123165, "epoch": 1.9236363636363636, "grad_norm": 3.4855685769436375, "learning_rate": 5.722873003321322e-06, "loss": 0.21117101, "memory(GiB)": 100.58, "step": 265, "train_speed(iter/s)": 0.333662 }, { "acc": 0.91777382, "epoch": 1.96, "grad_norm": 2.497000906964384, "learning_rate": 5.573127942138622e-06, "loss": 0.21624155, "memory(GiB)": 100.58, "step": 270, "train_speed(iter/s)": 0.334225 }, { "acc": 0.9166666, "epoch": 1.9963636363636363, "grad_norm": 4.782654736901845, "learning_rate": 5.422860397458064e-06, "loss": 0.21392875, "memory(GiB)": 100.58, "step": 275, "train_speed(iter/s)": 0.334671 }, { "epoch": 2.0, "eval_acc": 0.9098730028676771, "eval_loss": 0.2191523164510727, "eval_runtime": 10.1618, "eval_samples_per_second": 11.415, "eval_steps_per_second": 1.476, "step": 276 }, { "acc": 0.84443541, "epoch": 2.0290909090909093, "grad_norm": 3.015403395241152, "learning_rate": 5.27220747804885e-06, "loss": 0.17099829, "memory(GiB)": 100.58, "step": 280, "train_speed(iter/s)": 0.317633 }, { "acc": 0.93253222, "epoch": 2.0654545454545454, "grad_norm": 2.167435558475328, "learning_rate": 5.121306644308045e-06, "loss": 0.18818057, "memory(GiB)": 100.58, "step": 285, "train_speed(iter/s)": 0.3185 }, { "acc": 0.94647446, "epoch": 2.101818181818182, "grad_norm": 2.1487311628542898, "learning_rate": 4.9702955828374385e-06, "loss": 0.15134431, "memory(GiB)": 100.58, "step": 290, "train_speed(iter/s)": 0.319277 }, { "acc": 0.93036728, "epoch": 2.138181818181818, "grad_norm": 4.174051904681519, "learning_rate": 4.8193120808140185e-06, "loss": 0.16832316, "memory(GiB)": 100.58, "step": 295, "train_speed(iter/s)": 0.320077 }, { "acc": 0.93621769, "epoch": 2.174545454545455, "grad_norm": 2.3866390406657896, "learning_rate": 4.668493900268684e-06, "loss": 0.16947901, "memory(GiB)": 100.58, "step": 300, "train_speed(iter/s)": 0.320854 }, { "acc": 0.93184824, "epoch": 2.210909090909091, "grad_norm": 2.7745369730901595, "learning_rate": 4.517978652387882e-06, "loss": 0.16975009, "memory(GiB)": 100.58, "step": 305, "train_speed(iter/s)": 0.321626 }, { "acc": 0.93711929, "epoch": 2.247272727272727, "grad_norm": 4.606104787695004, "learning_rate": 4.367903671952906e-06, "loss": 0.16885712, "memory(GiB)": 100.58, "step": 310, "train_speed(iter/s)": 0.322203 }, { "acc": 0.93099174, "epoch": 2.2836363636363637, "grad_norm": 8.944877147631175, "learning_rate": 4.218405892031366e-06, "loss": 0.17090337, "memory(GiB)": 100.58, "step": 315, "train_speed(iter/s)": 0.322833 }, { "acc": 0.93137035, "epoch": 2.32, "grad_norm": 4.336121777570645, "learning_rate": 4.069621719035229e-06, "loss": 0.1658249, "memory(GiB)": 100.58, "step": 320, "train_speed(iter/s)": 0.323508 }, { "acc": 0.9393259, "epoch": 2.3563636363636364, "grad_norm": 6.921537975970479, "learning_rate": 3.921686908259354e-06, "loss": 0.15576041, "memory(GiB)": 100.58, "step": 325, "train_speed(iter/s)": 0.324182 }, { "acc": 0.93962708, "epoch": 2.3927272727272726, "grad_norm": 3.5886891547630877, "learning_rate": 3.7747364400141726e-06, "loss": 0.16867373, "memory(GiB)": 100.58, "step": 330, "train_speed(iter/s)": 0.324849 }, { "acc": 0.93609505, "epoch": 2.429090909090909, "grad_norm": 2.686999433312404, "learning_rate": 3.6289043964654526e-06, "loss": 0.15810946, "memory(GiB)": 100.58, "step": 335, "train_speed(iter/s)": 0.325493 }, { "acc": 0.92649899, "epoch": 2.4654545454545453, "grad_norm": 2.591872854237207, "learning_rate": 3.484323839293575e-06, "loss": 0.17918372, "memory(GiB)": 100.58, "step": 340, "train_speed(iter/s)": 0.326123 }, { "acc": 0.93626881, "epoch": 2.501818181818182, "grad_norm": 2.5738296672570233, "learning_rate": 3.341126688283922e-06, "loss": 0.16855428, "memory(GiB)": 100.58, "step": 345, "train_speed(iter/s)": 0.326743 }, { "acc": 0.93825417, "epoch": 2.538181818181818, "grad_norm": 2.7529925608546466, "learning_rate": 3.19944360095919e-06, "loss": 0.16165339, "memory(GiB)": 100.58, "step": 350, "train_speed(iter/s)": 0.327363 }, { "acc": 0.94702225, "epoch": 2.5745454545454547, "grad_norm": 2.9545927202945315, "learning_rate": 3.059403853363393e-06, "loss": 0.14523516, "memory(GiB)": 100.58, "step": 355, "train_speed(iter/s)": 0.327926 }, { "acc": 0.94346981, "epoch": 2.610909090909091, "grad_norm": 4.047109124196383, "learning_rate": 2.9211352221063987e-06, "loss": 0.14715908, "memory(GiB)": 100.58, "step": 360, "train_speed(iter/s)": 0.328285 }, { "acc": 0.94318542, "epoch": 2.6472727272727274, "grad_norm": 2.3923230638690143, "learning_rate": 2.7847638677765936e-06, "loss": 0.1494684, "memory(GiB)": 100.58, "step": 365, "train_speed(iter/s)": 0.328722 }, { "acc": 0.95623245, "epoch": 2.6836363636363636, "grad_norm": 2.457260493406828, "learning_rate": 2.650414219828032e-06, "loss": 0.11759402, "memory(GiB)": 100.58, "step": 370, "train_speed(iter/s)": 0.329264 }, { "acc": 0.94435921, "epoch": 2.7199999999999998, "grad_norm": 1.5322367904545142, "learning_rate": 2.5182088630471517e-06, "loss": 0.13577256, "memory(GiB)": 100.58, "step": 375, "train_speed(iter/s)": 0.329788 }, { "acc": 0.94585953, "epoch": 2.7563636363636363, "grad_norm": 2.8650025435958666, "learning_rate": 2.388268425702614e-06, "loss": 0.14076474, "memory(GiB)": 100.58, "step": 380, "train_speed(iter/s)": 0.330302 }, { "acc": 0.9413455, "epoch": 2.792727272727273, "grad_norm": 4.510750432829035, "learning_rate": 2.2607114694803263e-06, "loss": 0.1642381, "memory(GiB)": 100.58, "step": 385, "train_speed(iter/s)": 0.330731 }, { "acc": 0.93006382, "epoch": 2.829090909090909, "grad_norm": 2.908591189518448, "learning_rate": 2.1356543813040863e-06, "loss": 0.17094066, "memory(GiB)": 100.58, "step": 390, "train_speed(iter/s)": 0.331119 }, { "acc": 0.94227448, "epoch": 2.8654545454545453, "grad_norm": 2.331626905910975, "learning_rate": 2.0132112671405244e-06, "loss": 0.14904225, "memory(GiB)": 100.58, "step": 395, "train_speed(iter/s)": 0.331532 }, { "acc": 0.93090382, "epoch": 2.901818181818182, "grad_norm": 4.223665768837086, "learning_rate": 1.8934938478853108e-06, "loss": 0.17768097, "memory(GiB)": 100.58, "step": 400, "train_speed(iter/s)": 0.331963 }, { "acc": 0.93722563, "epoch": 2.9381818181818184, "grad_norm": 2.7247775486261734, "learning_rate": 1.7766113574255145e-06, "loss": 0.15059752, "memory(GiB)": 100.58, "step": 405, "train_speed(iter/s)": 0.332266 }, { "acc": 0.94374504, "epoch": 2.9745454545454546, "grad_norm": 2.9951618135706055, "learning_rate": 1.6626704429712411e-06, "loss": 0.14953468, "memory(GiB)": 100.58, "step": 410, "train_speed(iter/s)": 0.332599 }, { "epoch": 3.0, "eval_acc": 0.9192953707496927, "eval_loss": 0.21225064992904663, "eval_runtime": 9.5239, "eval_samples_per_second": 12.18, "eval_steps_per_second": 1.575, "step": 414 } ], "logging_steps": 5, "max_steps": 548, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.931788793840435e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }