diff --git a/args.json b/args.json index a11c2e6fef7bab463eed3f94e6ab41e497644c13..bb0dcd2d9a1eda6f1c1fdb9ef0cff2a69a76b533 100644 --- a/args.json +++ b/args.json @@ -25,8 +25,7 @@ "use_chat_template": true, "template_backend": "swift", "dataset": [ - "/mnt/petrelfs/liuhongbo/movie_bench/shotdeck_data/sft/img_train_v5_without_reasoning.json", - "/mnt/petrelfs/liuhongbo/movie_bench/process_video/vid_bench/train_v3_rl_s2_remove_duplicate_refine_2sft.json" + "/mnt/petrelfs/liuhongbo/shotdeck/ShotQA/sft_merged.json" ], "val_dataset": [], "split_dataset_ratio": 0.0, @@ -85,22 +84,22 @@ "custom_register_path": [], "ignore_args_error": false, "use_swift_lora": false, - "output_dir": "/mnt/petrelfs/liuhongbo/ms-swift/output/main/sft_7B_without_reasoning_wvideo/v1-20250623-155054", + "output_dir": "/mnt/petrelfs/liuhongbo/ms-swift/output/sft_7B_v1.1/v3-20250901-131037", "overwrite_output_dir": false, "do_train": false, "do_eval": false, "do_predict": false, "eval_strategy": "steps", "prediction_loss_only": false, - "per_device_train_batch_size": 1, - "per_device_eval_batch_size": 1, + "per_device_train_batch_size": 16, + "per_device_eval_batch_size": 16, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, - "gradient_accumulation_steps": 16, + "gradient_accumulation_steps": 2, "eval_accumulation_steps": null, "eval_delay": 0, "torch_empty_cache_steps": null, - "learning_rate": 5e-06, + "learning_rate": 1e-06, "weight_decay": 0.1, "adam_beta1": 0.9, "adam_beta2": 0.95, @@ -115,13 +114,13 @@ "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, - "logging_dir": "/mnt/petrelfs/liuhongbo/ms-swift/output/main/sft_7B_without_reasoning_wvideo/v1-20250623-155054/runs", + "logging_dir": "/mnt/petrelfs/liuhongbo/ms-swift/output/sft_7B_v1.1/v3-20250901-131037/runs", "logging_strategy": "steps", "logging_first_step": true, "logging_steps": 5, "logging_nan_inf_filter": true, "save_strategy": "steps", - "save_steps": 100.0, + "save_steps": 1000.0, "save_total_limit": 1, "save_safetensors": true, "save_on_each_node": false, @@ -145,7 +144,7 @@ "tpu_metrics_debug": false, "debug": null, "dataloader_drop_last": false, - "eval_steps": 100.0, + "eval_steps": 1000.0, "dataloader_num_workers": 4, "dataloader_prefetch_factor": null, "past_index": -1, @@ -233,7 +232,6 @@ "include_for_metrics": [], "eval_do_concat_batches": true, "fp16_backend": "auto", - "evaluation_strategy": "steps", "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": null, @@ -246,8 +244,6 @@ "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, - "dispatch_batches": null, - "split_batches": null, "include_tokens_per_second": false, "include_num_input_tokens_seen": false, "neftune_noise_alpha": null, @@ -361,12 +357,13 @@ "metric": null, "zero_hpz_partition_size": null, "rank": 0, - "global_world_size": 4, - "local_world_size": 4, + "global_world_size": 8, + "local_world_size": 8, "model_suffix": "Qwen2.5-VL-7B-Instruct", "model_info": "ModelInfo(model_type='qwen2_5_vl', model_dir='/mnt/petrelfs/liuhongbo/Qwen2.5-VL-7B-Instruct', torch_dtype=torch.bfloat16, max_model_len=128000, quant_method=None, quant_bits=None, rope_scaling={'type': 'default', 'mrope_section': [16, 24, 24], 'rope_type': 'default'}, config=None, task_type='causal_lm', num_labels=None)", - "model_meta": "ModelMeta(model_type='qwen2_5_vl', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-VL-3B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-7B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-7B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-32B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-32B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-72B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-72B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-VL-3B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-3B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-7B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-7B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-32B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-32B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-72B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-72B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen2_5_vl', get_function=, model_arch='qwen2_vl', architectures=['Qwen2_5_VLForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.49', 'qwen_vl_utils>=0.0.6', 'decord'], tags=[])", + "model_meta": "ModelMeta(model_type='qwen2_5_vl', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-VL-3B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-3B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-7B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-7B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-32B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-32B-Instruct', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-72B-Instruct', hf_model_id='Qwen/Qwen2.5-VL-72B-Instruct', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[]), ModelGroup(models=[Model(ms_model_id='Qwen/Qwen2.5-VL-3B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-3B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-7B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-7B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-32B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-32B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen2.5-VL-72B-Instruct-AWQ', hf_model_id='Qwen/Qwen2.5-VL-72B-Instruct-AWQ', model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen2_5_vl', get_function=, model_arch='qwen2_vl', architectures=['Qwen2_5_VLForConditionalGeneration'], additional_saved_files=[], torch_dtype=None, is_multimodal=True, is_reward=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.49', 'qwen_vl_utils>=0.0.6', 'decord'], tags=[])", "model_dir": "/mnt/petrelfs/liuhongbo/Qwen2.5-VL-7B-Instruct", "hub": "", - "training_args": "Seq2SeqTrainingArguments(output_dir='/mnt/petrelfs/liuhongbo/ms-swift/output/main/sft_7B_without_reasoning_wvideo/v1-20250623-155054', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=1, per_device_eval_batch_size=1, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=16, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=5e-06, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/mnt/petrelfs/liuhongbo/ms-swift/output/main/sft_7B_without_reasoning_wvideo/v1-20250623-155054/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=100, save_total_limit=1, save_safetensors=True, save_on_each_node=False, save_only_model=True, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=100, dataloader_num_workers=4, dataloader_prefetch_factor=10, past_index=-1, run_name='/mnt/petrelfs/liuhongbo/ms-swift/output/main/sft_7B_without_reasoning_wvideo/v1-20250623-155054', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, tp_size=0, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': False, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'zero_quantized_weights': False, 'zero_quantized_gradients': False, 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', evaluation_strategy=, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, dispatch_batches=None, split_batches=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, check_model=True, acc_strategy='token', train_dataloader_shuffle=True, metric_warmup_step=0, fsdp_num=1, acc_steps=1, eval_use_evalscope=False, eval_datasets=[], eval_limit=None, eval_datasets_args=None, eval_generation_config=None, train_type='full', optimizer=None, local_repo_path=None, galore_config=None)" + "evaluation_strategy": "steps", + "training_args": "Seq2SeqTrainingArguments(output_dir='/mnt/petrelfs/liuhongbo/ms-swift/output/sft_7B_v1.1/v3-20250901-131037', overwrite_output_dir=False, do_train=False, do_eval=True, do_predict=False, eval_strategy=, prediction_loss_only=False, per_device_train_batch_size=16, per_device_eval_batch_size=16, per_gpu_train_batch_size=None, per_gpu_eval_batch_size=None, gradient_accumulation_steps=2, eval_accumulation_steps=None, eval_delay=0, torch_empty_cache_steps=None, learning_rate=1e-06, weight_decay=0.1, adam_beta1=0.9, adam_beta2=0.95, adam_epsilon=1e-08, max_grad_norm=1.0, num_train_epochs=1.0, max_steps=-1, lr_scheduler_type=, lr_scheduler_kwargs=None, warmup_ratio=0.05, warmup_steps=0, log_level='passive', log_level_replica='warning', log_on_each_node=True, logging_dir='/mnt/petrelfs/liuhongbo/ms-swift/output/sft_7B_v1.1/v3-20250901-131037/runs', logging_strategy=, logging_first_step=True, logging_steps=5, logging_nan_inf_filter=True, save_strategy=, save_steps=1000, save_total_limit=1, save_safetensors=True, save_on_each_node=False, save_only_model=True, restore_callback_states_from_checkpoint=False, no_cuda=False, use_cpu=False, use_mps_device=False, seed=42, data_seed=42, jit_mode_eval=False, use_ipex=False, bf16=True, fp16=False, fp16_opt_level='O1', half_precision_backend='auto', bf16_full_eval=False, fp16_full_eval=False, tf32=None, local_rank=0, ddp_backend=None, tpu_num_cores=None, tpu_metrics_debug=False, debug=[], dataloader_drop_last=False, eval_steps=1000, dataloader_num_workers=4, dataloader_prefetch_factor=10, past_index=-1, run_name='/mnt/petrelfs/liuhongbo/ms-swift/output/sft_7B_v1.1/v3-20250901-131037', disable_tqdm=False, remove_unused_columns=False, label_names=None, load_best_model_at_end=False, metric_for_best_model='loss', greater_is_better=False, ignore_data_skip=False, fsdp=[], fsdp_min_num_params=0, fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, tp_size=0, fsdp_transformer_layer_cls_to_wrap=None, accelerator_config=AcceleratorConfig(split_batches=False, dispatch_batches=False, even_batches=True, use_seedable_sampler=True, non_blocking=False, gradient_accumulation_kwargs=None, use_configured_state=False), deepspeed={'fp16': {'enabled': 'auto', 'loss_scale': 0, 'loss_scale_window': 1000, 'initial_scale_power': 16, 'hysteresis': 2, 'min_loss_scale': 1}, 'bf16': {'enabled': 'auto'}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'none', 'pin_memory': True}, 'offload_param': {'device': 'none', 'pin_memory': True}, 'overlap_comm': False, 'contiguous_gradients': True, 'sub_group_size': 1000000000.0, 'reduce_bucket_size': 'auto', 'zero_quantized_weights': False, 'zero_quantized_gradients': False, 'stage3_prefetch_bucket_size': 'auto', 'stage3_param_persistence_threshold': 'auto', 'stage3_max_live_parameters': 1000000000.0, 'stage3_max_reuse_distance': 1000000000.0, 'stage3_gather_16bit_weights_on_model_save': True}, 'gradient_accumulation_steps': 'auto', 'gradient_clipping': 'auto', 'steps_per_print': 2000, 'train_batch_size': 'auto', 'train_micro_batch_size_per_gpu': 'auto', 'wall_clock_breakdown': False}, label_smoothing_factor=0.0, optim=, optim_args=None, adafactor=False, group_by_length=False, length_column_name='length', report_to=['wandb'], ddp_find_unused_parameters=None, ddp_bucket_cap_mb=None, ddp_broadcast_buffers=None, dataloader_pin_memory=True, dataloader_persistent_workers=False, skip_memory_metrics=True, use_legacy_prediction_loop=False, push_to_hub=False, resume_from_checkpoint=None, hub_model_id=None, hub_strategy=, hub_token=None, hub_private_repo=None, hub_always_push=False, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, include_inputs_for_metrics=False, include_for_metrics=[], eval_do_concat_batches=True, fp16_backend='auto', push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=None, mp_parameters='', auto_find_batch_size=False, full_determinism=False, torchdynamo=None, ray_scope='last', ddp_timeout=1800, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, include_tokens_per_second=None, include_num_input_tokens_seen=None, neftune_noise_alpha=None, optim_target_modules=None, batch_eval_metrics=False, eval_on_start=False, use_liger_kernel=False, eval_use_gather_object=False, average_tokens_across_devices=None, sortish_sampler=False, predict_with_generate=False, generation_max_length=None, generation_num_beams=None, generation_config=None, check_model=True, acc_strategy='token', train_dataloader_shuffle=True, metric_warmup_step=0, fsdp_num=1, acc_steps=1, eval_use_evalscope=False, eval_datasets=[], eval_limit=None, eval_datasets_args=None, eval_generation_config=None, train_type='full', optimizer=None, local_repo_path=None, galore_config=None)" } \ No newline at end of file diff --git a/config.json b/config.json index 5722916b44326d18457b9ee526843d6df730e928..116926f80ec56bb3fb3d0050213abaa6dbd4166c 100644 --- a/config.json +++ b/config.json @@ -31,7 +31,7 @@ "sliding_window": 32768, "tie_word_embeddings": false, "torch_dtype": "bfloat16", - "transformers_version": "4.50.3", + "transformers_version": "4.51.3", "use_cache": false, "use_sliding_window": false, "video_token_id": 151656, diff --git a/generation_config.json b/generation_config.json index 948fb3d7e6dff9e9cc1e406d2ecd6a3bd14e8eeb..54782b57c02403e0a78f4ec674857e159b4fde80 100644 --- a/generation_config.json +++ b/generation_config.json @@ -10,5 +10,5 @@ "temperature": 0.1, "top_k": 1, "top_p": 0.001, - "transformers_version": "4.50.3" + "transformers_version": "4.51.3" } diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors index fd6a9b1bde36dabddd78f1ff4ba1199b66bb136d..3d73366d673b09ac3cf3015d23427bdf7dc72e84 100644 --- a/model-00001-of-00004.safetensors +++ b/model-00001-of-00004.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e092af04e4b1cf0427712eb6c65a748026ac404d8c3a1fdc64f9d362fce7ecb5 +oid sha256:0740ea992528b84d89247ad9a3544d688d74596cd6a7cf6cd1c587016b93c458 size 4968243304 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors index 3d115daede6e83f8a02fc7843eeddfa8b2699cd9..dfb6d67ec26761cf6408a03260de192e0e6432d7 100644 --- a/model-00002-of-00004.safetensors +++ b/model-00002-of-00004.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c427fc3ed4ebf47d1ad246b11321e255f7f4685cd15ddc3efae4f0d5d90c7ac6 +oid sha256:d38dc6a2731aeb341bb8ae055692881a48ef94e0b2ab6d29d8d4608e9e2f620d size 4991495816 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors index 0c752ead96fd53619a85c8b67c265424c4dd20a3..8a7d3d89ce7bf5882041469f8dccd158aba32937 100644 --- a/model-00003-of-00004.safetensors +++ b/model-00003-of-00004.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3211bb0a7b56a4a330b102d210e31fe084a5f8d721a030e8940c7dfaf4a1e2c4 +oid sha256:4dd6038d4e15af8b097d2cc7259fa5af2e8310546d268df3dd64f285e9612fef size 4932751040 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors index cde8f1110ddfc42450ba906f8b29aae47ed13992..690fb08a0d7bfb4857f4e3694d14a9afbc5a2801 100644 --- a/model-00004-of-00004.safetensors +++ b/model-00004-of-00004.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd744074338b4c2b7b9d6ad3628d03e21bc507498b86c842a1cfb0e02c0836af +oid sha256:e9bcb75184abd54cdd8f38315cb7ad67daee695167ce0c68b5a72ed3e70f64af size 1691924384 diff --git a/trainer_state.json b/trainer_state.json index 81e8603fc4334e8f96597048623a338e251ba8bd..f9c04d883c8732fd68d3a7e157c65bf31b55e9ce 100644 --- a/trainer_state.json +++ b/trainer_state.json @@ -2,1709 +2,105649 @@ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9995543009953944, - "eval_steps": 100, - "global_step": 841, + "epoch": 9.998758997269794, + "eval_steps": 500, + "global_step": 5030, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0011885306789481504, - "grad_norm": 548.1797235189983, - "learning_rate": 1.1627906976744187e-07, - "loss": 8.022117614746094, - "memory(GiB)": 46.35, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 51.5, + "completions/mean_length": 9.343750238418579, + "completions/min_length": 2.0, + "epoch": 0.0019856043683296105, + "grad_norm": 8.833409236542238, + "kl": 0.0, + "learning_rate": 1.9607843137254902e-08, + "loss": -0.1307593137025833, + "memory(GiB)": 90.14, + "reward": 0.16666666977107525, + "reward_std": 0.2535768188536167, + "rewards/CineAccuracyORM/mean": 0.09375000093132257, + "rewards/CineAccuracyORM/std": 0.19780732691287994, + "rewards/Format/mean": 0.0729166679084301, + "rewards/Format/std": 0.17735834047198296, "step": 1, - "token_acc": 0.16666666666666666, - "train_speed(iter/s)": 0.035854 + "train_speed(iter/s)": 0.005831 }, { - "epoch": 0.005942653394740751, - "grad_norm": 517.1879841831781, - "learning_rate": 5.813953488372094e-07, - "loss": 7.886555194854736, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 29.375, + "completions/mean_length": 7.989583492279053, + "completions/min_length": 2.0, + "epoch": 0.003971208736659221, + "grad_norm": 8.360215664061299, + "kl": 0.0, + "learning_rate": 3.9215686274509804e-08, + "loss": -0.1537517011165619, + "memory(GiB)": 93.19, + "reward": 0.2083333395421505, + "reward_std": 0.2977868393063545, + "rewards/CineAccuracyORM/mean": 0.11458333767950535, + "rewards/CineAccuracyORM/std": 0.17456800863146782, + "rewards/Format/mean": 0.09375000186264515, + "rewards/Format/std": 0.16673530638217926, + "step": 2, + "train_speed(iter/s)": 0.009915 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 33.5, + "completions/mean_length": 4.6250001192092896, + "completions/min_length": 2.0, + "epoch": 0.005956813104988831, + "grad_norm": 7.288747169581666, + "kl": 2.47955322265625e-05, + "learning_rate": 5.88235294117647e-08, + "loss": -0.09165790677070618, + "memory(GiB)": 93.19, + "reward": 0.0833333358168602, + "reward_std": 0.20412414148449898, + "rewards/CineAccuracyORM/mean": 0.0416666679084301, + "rewards/CineAccuracyORM/std": 0.14433756470680237, + "rewards/Format/mean": 0.0416666679084301, + "rewards/Format/std": 0.14433756470680237, + "step": 3, + "train_speed(iter/s)": 0.013195 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 11.625, + "completions/mean_length": 3.4375, + "completions/min_length": 2.0, + "epoch": 0.007942417473318442, + "grad_norm": 2.279249541632776, + "kl": -0.00013136863708496094, + "learning_rate": 7.843137254901961e-08, + "loss": -0.03810252994298935, + "memory(GiB)": 93.19, + "reward": 0.0416666679084301, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.02083333395421505, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 0.02083333395421505, + "rewards/Format/std": 0.04865618050098419, + "step": 4, + "train_speed(iter/s)": 0.015446 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 65.375, + "completions/mean_length": 17.635416984558105, + "completions/min_length": 2.0, + "epoch": 0.009928021841648052, + "grad_norm": 7.483078403985399, + "kl": 0.0003783702850341797, + "learning_rate": 9.80392156862745e-08, + "loss": -0.18313568830490112, + "memory(GiB)": 94.21, + "reward": 0.354166679084301, + "reward_std": 0.2472660318017006, + "rewards/CineAccuracyORM/mean": 0.1666666679084301, + "rewards/CineAccuracyORM/std": 0.17461250349879265, + "rewards/Format/mean": 0.18750000558793545, + "rewards/Format/std": 0.2311013862490654, "step": 5, - "token_acc": 0.14788732394366197, - "train_speed(iter/s)": 0.064567 + "train_speed(iter/s)": 0.017389 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 60.375, + "completions/mean_length": 14.072916865348816, + "completions/min_length": 2.0, + "epoch": 0.011913626209977662, + "grad_norm": 8.836452160215568, + "kl": 0.00021839141845703125, + "learning_rate": 1.176470588235294e-07, + "loss": -0.13913874328136444, + "memory(GiB)": 94.21, + "reward": 0.23958334140479565, + "reward_std": 0.2653941735625267, + "rewards/CineAccuracyORM/mean": 0.10416667070239782, + "rewards/CineAccuracyORM/std": 0.19776283204555511, + "rewards/Format/mean": 0.13541667070239782, + "rewards/Format/std": 0.2542962096631527, + "step": 6, + "train_speed(iter/s)": 0.018869 }, { - "epoch": 0.011885306789481503, - "grad_norm": 295.78882511572357, - "learning_rate": 1.1627906976744188e-06, - "loss": 6.243068695068359, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 25.75, + "completions/mean_length": 6.96875, + "completions/min_length": 2.0, + "epoch": 0.013899230578307272, + "grad_norm": 2.5665338568614255, + "kl": 0.00019860267639160156, + "learning_rate": 1.3725490196078432e-07, + "loss": -0.08070332556962967, + "memory(GiB)": 94.21, + "reward": 0.1354166716337204, + "reward_std": 0.12991482764482498, + "rewards/CineAccuracyORM/mean": 0.0729166679084301, + "rewards/CineAccuracyORM/std": 0.11807912588119507, + "rewards/Format/mean": 0.0625, + "rewards/Format/std": 0.11306675523519516, + "step": 7, + "train_speed(iter/s)": 0.020182 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 77.25, + "completions/mean_length": 13.302083730697632, + "completions/min_length": 2.0, + "epoch": 0.015884834946636884, + "grad_norm": 12.586083776732455, + "kl": 0.00010991096496582031, + "learning_rate": 1.5686274509803921e-07, + "loss": -0.18610158562660217, + "memory(GiB)": 94.21, + "reward": 0.28125000558793545, + "reward_std": 0.3487507253885269, + "rewards/CineAccuracyORM/mean": 0.16666666883975267, + "rewards/CineAccuracyORM/std": 0.315886452794075, + "rewards/Format/mean": 0.11458333488553762, + "rewards/Format/std": 0.26997610926628113, + "step": 8, + "train_speed(iter/s)": 0.020938 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 94.375, + "completions/mean_length": 17.937500715255737, + "completions/min_length": 2.0, + "epoch": 0.017870439314966492, + "grad_norm": 8.726468045240763, + "kl": 0.00038111209869384766, + "learning_rate": 1.764705882352941e-07, + "loss": -0.19536086916923523, + "memory(GiB)": 94.21, + "reward": 0.28125001303851604, + "reward_std": 0.2841503508388996, + "rewards/CineAccuracyORM/mean": 0.11458333674818277, + "rewards/CineAccuracyORM/std": 0.2056400291621685, + "rewards/Format/mean": 0.1666666679084301, + "rewards/Format/std": 0.23389171808958054, + "step": 9, + "train_speed(iter/s)": 0.021565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 46.25, + "completions/mean_length": 11.604167103767395, + "completions/min_length": 2.0, + "epoch": 0.019856043683296104, + "grad_norm": 3.708710818056766, + "kl": -1.0102987289428711e-05, + "learning_rate": 1.96078431372549e-07, + "loss": -0.08594602346420288, + "memory(GiB)": 94.21, + "reward": 0.11458333767950535, + "reward_std": 0.14109627529978752, + "rewards/CineAccuracyORM/mean": 0.031250000931322575, + "rewards/CineAccuracyORM/std": 0.08474057167768478, + "rewards/Format/mean": 0.08333333674818277, + "rewards/Format/std": 0.14910665154457092, "step": 10, - "token_acc": 0.2388888888888889, - "train_speed(iter/s)": 0.071958 + "train_speed(iter/s)": 0.022199 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 23.75, + "completions/mean_length": 5.770833492279053, + "completions/min_length": 2.0, + "epoch": 0.021841648051625712, + "grad_norm": 6.596021037229982, + "kl": 0.0005369186401367188, + "learning_rate": 2.156862745098039e-07, + "loss": -0.11402159929275513, + "memory(GiB)": 94.21, + "reward": 0.12500000186264515, + "reward_std": 0.18404607847332954, + "rewards/CineAccuracyORM/mean": 0.06250000093132257, + "rewards/CineAccuracyORM/std": 0.14127394929528236, + "rewards/Format/mean": 0.06250000093132257, + "rewards/Format/std": 0.14127394929528236, + "step": 11, + "train_speed(iter/s)": 0.022713 }, { - "epoch": 0.017827960184222256, - "grad_norm": 12.276341768278998, - "learning_rate": 1.7441860465116282e-06, - "loss": 1.707086181640625, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 49.0, + "completions/mean_length": 10.583333611488342, + "completions/min_length": 2.0, + "epoch": 0.023827252419955324, + "grad_norm": 9.710736479035985, + "kl": 0.0021132230758666992, + "learning_rate": 2.352941176470588e-07, + "loss": -0.14895620942115784, + "memory(GiB)": 94.21, + "reward": 0.250000006519258, + "reward_std": 0.26867386512458324, + "rewards/CineAccuracyORM/mean": 0.11458333767950535, + "rewards/CineAccuracyORM/std": 0.1980806104838848, + "rewards/Format/mean": 0.13541667256504297, + "rewards/Format/std": 0.23698533326387405, + "step": 12, + "train_speed(iter/s)": 0.023357 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 74.625, + "completions/mean_length": 18.5208338201046, + "completions/min_length": 2.0, + "epoch": 0.025812856788284936, + "grad_norm": 14.74385141030088, + "kl": 0.005173683166503906, + "learning_rate": 2.549019607843137e-07, + "loss": -0.22722145915031433, + "memory(GiB)": 94.21, + "reward": 0.5000000186264515, + "reward_std": 0.5557370781898499, + "rewards/CineAccuracyORM/mean": 0.26041667722165585, + "rewards/CineAccuracyORM/std": 0.4162924289703369, + "rewards/Format/mean": 0.23958333767950535, + "rewards/Format/std": 0.41219309717416763, + "step": 13, + "train_speed(iter/s)": 0.02377 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 58.625, + "completions/mean_length": 17.67708432674408, + "completions/min_length": 2.0, + "epoch": 0.027798461156614544, + "grad_norm": 6.691494016340618, + "kl": 0.00567626953125, + "learning_rate": 2.7450980392156863e-07, + "loss": -0.12714232504367828, + "memory(GiB)": 94.21, + "reward": 0.2916666716337204, + "reward_std": 0.2648143917322159, + "rewards/CineAccuracyORM/mean": 0.14583334140479565, + "rewards/CineAccuracyORM/std": 0.17738834023475647, + "rewards/Format/mean": 0.14583333861082792, + "rewards/Format/std": 0.1657295897603035, + "step": 14, + "train_speed(iter/s)": 0.024162 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 87.5, + "completions/mean_length": 12.56250062584877, + "completions/min_length": 2.0, + "epoch": 0.029784065524944156, + "grad_norm": 11.826028163769157, + "kl": 0.002541065216064453, + "learning_rate": 2.941176470588235e-07, + "loss": -0.18597036600112915, + "memory(GiB)": 94.21, + "reward": 0.21875000838190317, + "reward_std": 0.2941893860697746, + "rewards/CineAccuracyORM/mean": 0.11458333488553762, + "rewards/CineAccuracyORM/std": 0.24570107460021973, + "rewards/Format/mean": 0.10416666977107525, + "rewards/Format/std": 0.25453949347138405, "step": 15, - "token_acc": 0.5085714285714286, - "train_speed(iter/s)": 0.074777 + "train_speed(iter/s)": 0.024488 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 93.25, + "completions/mean_length": 26.000000953674316, + "completions/min_length": 2.0, + "epoch": 0.03176966989327377, + "grad_norm": 19.22682565973066, + "kl": 0.0291595458984375, + "learning_rate": 3.1372549019607843e-07, + "loss": -0.28655847907066345, + "memory(GiB)": 94.21, + "reward": 0.6979166772216558, + "reward_std": 0.5280137322843075, + "rewards/CineAccuracyORM/mean": 0.33333333395421505, + "rewards/CineAccuracyORM/std": 0.38107289373874664, + "rewards/Format/mean": 0.3645833386108279, + "rewards/Format/std": 0.4106335826218128, + "step": 16, + "train_speed(iter/s)": 0.024635 }, { - "epoch": 0.023770613578963005, - "grad_norm": 14.766060462541946, - "learning_rate": 2.3255813953488376e-06, - "loss": 0.6782948493957519, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 35.55208444595337, + "completions/min_length": 2.0, + "epoch": 0.03375527426160337, + "grad_norm": 16.161087880095785, + "kl": 0.03133392333984375, + "learning_rate": 3.333333333333333e-07, + "loss": -0.2801506519317627, + "memory(GiB)": 94.21, + "reward": 0.8333333693444729, + "reward_std": 0.6809760481119156, + "rewards/CineAccuracyORM/mean": 0.39583334047347307, + "rewards/CineAccuracyORM/std": 0.4389779530465603, + "rewards/Format/mean": 0.4375000111758709, + "rewards/Format/std": 0.4703202247619629, + "step": 17, + "train_speed(iter/s)": 0.024738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 43.281251430511475, + "completions/min_length": 2.0, + "epoch": 0.035740878629932984, + "grad_norm": 16.578350883090533, + "kl": 0.067230224609375, + "learning_rate": 3.529411764705882e-07, + "loss": -0.32550498843193054, + "memory(GiB)": 94.21, + "reward": 0.9479166865348816, + "reward_std": 0.6519781649112701, + "rewards/CineAccuracyORM/mean": 0.4479166744276881, + "rewards/CineAccuracyORM/std": 0.45145706087350845, + "rewards/Format/mean": 0.5000000074505806, + "rewards/Format/std": 0.4876159466803074, + "step": 18, + "train_speed(iter/s)": 0.02466 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 56.229168176651, + "completions/min_length": 4.625, + "epoch": 0.037726482998262596, + "grad_norm": 8.071834804214191, + "kl": 0.07891845703125, + "learning_rate": 3.7254901960784315e-07, + "loss": -0.1669750064611435, + "memory(GiB)": 94.21, + "reward": 1.5312500223517418, + "reward_std": 0.2910274975001812, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.19398127868771553, + "rewards/Format/mean": 0.7708333507180214, + "rewards/Format/std": 0.3783007487654686, + "step": 19, + "train_speed(iter/s)": 0.024471 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 46.822917461395264, + "completions/min_length": 5.625, + "epoch": 0.03971208736659221, + "grad_norm": 9.923832627018575, + "kl": 0.0755157470703125, + "learning_rate": 3.92156862745098e-07, + "loss": -0.21674101054668427, + "memory(GiB)": 94.21, + "reward": 1.2395833656191826, + "reward_std": 0.4177897088229656, + "rewards/CineAccuracyORM/mean": 0.604166679084301, + "rewards/CineAccuracyORM/std": 0.34121063724160194, + "rewards/Format/mean": 0.6354166828095913, + "rewards/Format/std": 0.4222923330962658, "step": 20, - "token_acc": 0.7597765363128491, - "train_speed(iter/s)": 0.076407 + "train_speed(iter/s)": 0.024436 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.75, + "completions/mean_length": 35.95833429694176, + "completions/min_length": 2.0, + "epoch": 0.04169769173492182, + "grad_norm": 24.745996197714177, + "kl": 0.077484130859375, + "learning_rate": 4.117647058823529e-07, + "loss": -0.30436521768569946, + "memory(GiB)": 94.21, + "reward": 0.9895833488553762, + "reward_std": 0.6039165481925011, + "rewards/CineAccuracyORM/mean": 0.4791666781529784, + "rewards/CineAccuracyORM/std": 0.4370591938495636, + "rewards/Format/mean": 0.5104166818782687, + "rewards/Format/std": 0.45182302221655846, + "step": 21, + "train_speed(iter/s)": 0.02441 }, { - "epoch": 0.02971326697370376, - "grad_norm": 14.972149762033396, - "learning_rate": 2.9069767441860468e-06, - "loss": 0.5007028579711914, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 63.91666841506958, + "completions/min_length": 24.125, + "epoch": 0.043683296103251425, + "grad_norm": 1.0997888211451068, + "kl": 0.174560546875, + "learning_rate": 4.313725490196078e-07, + "loss": -0.015414511784911156, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 22, + "train_speed(iter/s)": 0.024545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.25, + "completions/mean_length": 66.44791889190674, + "completions/min_length": 25.25, + "epoch": 0.045668900471581036, + "grad_norm": 0.009539260932387994, + "kl": 0.18096923828125, + "learning_rate": 4.5098039215686274e-07, + "loss": 0.00018086486670654267, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 23, + "train_speed(iter/s)": 0.024559 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 72.69791889190674, + "completions/min_length": 28.25, + "epoch": 0.04765450483991065, + "grad_norm": 0.007386712773020203, + "kl": 0.14459228515625, + "learning_rate": 4.705882352941176e-07, + "loss": 0.0001446278765797615, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 24, + "train_speed(iter/s)": 0.024555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.875, + "completions/mean_length": 61.95833492279053, + "completions/min_length": 24.625, + "epoch": 0.04964010920824026, + "grad_norm": 0.00875220203675299, + "kl": 0.1748046875, + "learning_rate": 4.901960784313725e-07, + "loss": 0.00017489976016804576, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, "step": 25, - "token_acc": 0.7833333333333333, - "train_speed(iter/s)": 0.077351 + "train_speed(iter/s)": 0.024602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 66.00000143051147, + "completions/min_length": 25.25, + "epoch": 0.05162571357656987, + "grad_norm": 1.9358129459128381, + "kl": 0.17364501953125, + "learning_rate": 5.098039215686274e-07, + "loss": 0.0022797263227403164, + "memory(GiB)": 94.21, + "reward": 1.8125000149011612, + "reward_std": 0.05103103816509247, + "rewards/CineAccuracyORM/mean": 0.8125000074505806, + "rewards/CineAccuracyORM/std": 0.23100870847702026, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 26, + "train_speed(iter/s)": 0.024617 }, { - "epoch": 0.03565592036844451, - "grad_norm": 14.305194685298874, - "learning_rate": 3.4883720930232564e-06, - "loss": 0.47702178955078123, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 71.32291841506958, + "completions/min_length": 18.5, + "epoch": 0.05361131794489948, + "grad_norm": 0.00795613264759887, + "kl": 0.1551513671875, + "learning_rate": 5.294117647058823e-07, + "loss": 0.00015511378296650946, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 27, + "train_speed(iter/s)": 0.024504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.0, + "completions/mean_length": 67.26041889190674, + "completions/min_length": 31.625, + "epoch": 0.05559692231322909, + "grad_norm": 0.008135823299937521, + "kl": 0.18121337890625, + "learning_rate": 5.490196078431373e-07, + "loss": 0.00018121811444871128, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 28, + "train_speed(iter/s)": 0.024483 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 71.23958492279053, + "completions/min_length": 29.0, + "epoch": 0.0575825266815587, + "grad_norm": 0.0072313349721089765, + "kl": 0.13995361328125, + "learning_rate": 5.686274509803921e-07, + "loss": 0.00014012031897436827, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 29, + "train_speed(iter/s)": 0.024428 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.625, + "completions/mean_length": 69.34375238418579, + "completions/min_length": 33.5, + "epoch": 0.05956813104988831, + "grad_norm": 0.007334074235255649, + "kl": 0.14501953125, + "learning_rate": 5.88235294117647e-07, + "loss": 0.0001449078117730096, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, "step": 30, - "token_acc": 0.8111111111111111, - "train_speed(iter/s)": 0.078111 + "train_speed(iter/s)": 0.024441 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 63.281251430511475, + "completions/min_length": 26.625, + "epoch": 0.06155373541821792, + "grad_norm": 0.009302543997764104, + "kl": 0.2073974609375, + "learning_rate": 6.078431372549019e-07, + "loss": 0.00020755574223585427, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 31, + "train_speed(iter/s)": 0.0244 }, { - "epoch": 0.041598573763185265, - "grad_norm": 6.111868397462935, - "learning_rate": 4.0697674418604655e-06, - "loss": 0.42873945236206057, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 67.78125238418579, + "completions/min_length": 25.875, + "epoch": 0.06353933978654754, + "grad_norm": 0.007934062675082865, + "kl": 0.19287109375, + "learning_rate": 6.274509803921569e-07, + "loss": 0.00019275880185887218, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 32, + "train_speed(iter/s)": 0.024458 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.625, + "completions/mean_length": 79.22917079925537, + "completions/min_length": 27.625, + "epoch": 0.06552494415487714, + "grad_norm": 0.006838073989216734, + "kl": 0.1513671875, + "learning_rate": 6.470588235294117e-07, + "loss": 0.00015125676873140037, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 33, + "train_speed(iter/s)": 0.024334 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 77.739586353302, + "completions/min_length": 39.5, + "epoch": 0.06751054852320675, + "grad_norm": 0.006296733856276834, + "kl": 0.13775634765625, + "learning_rate": 6.666666666666666e-07, + "loss": 0.00013771496014669538, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 34, + "train_speed(iter/s)": 0.024298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 83.29166793823242, + "completions/min_length": 40.75, + "epoch": 0.06949615289153636, + "grad_norm": 0.005644199777163406, + "kl": 0.1414794921875, + "learning_rate": 6.862745098039216e-07, + "loss": 0.0001416189334122464, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, "step": 35, - "token_acc": 0.8225806451612904, - "train_speed(iter/s)": 0.078557 + "train_speed(iter/s)": 0.024321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 73.78125095367432, + "completions/min_length": 32.75, + "epoch": 0.07148175725986597, + "grad_norm": 1.3475220550057878, + "kl": 0.17987060546875, + "learning_rate": 7.058823529411765e-07, + "loss": -0.002007975010201335, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 36, + "train_speed(iter/s)": 0.024252 }, { - "epoch": 0.04754122715792601, - "grad_norm": 8.991713613954735, - "learning_rate": 4.651162790697675e-06, - "loss": 0.403080415725708, - "memory(GiB)": 50.11, + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 68.67708587646484, + "completions/min_length": 27.625, + "epoch": 0.07346736162819559, + "grad_norm": 1.378099520868228, + "kl": 0.19500732421875, + "learning_rate": 7.254901960784313e-07, + "loss": 0.01300447341054678, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 37, + "train_speed(iter/s)": 0.024175 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 75.84375143051147, + "completions/min_length": 36.25, + "epoch": 0.07545296599652519, + "grad_norm": 0.010631492564245152, + "kl": 0.15240478515625, + "learning_rate": 7.450980392156863e-07, + "loss": 0.00015249915304593742, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 38, + "train_speed(iter/s)": 0.024119 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 69.36458539962769, + "completions/min_length": 32.875, + "epoch": 0.0774385703648548, + "grad_norm": 0.007479684874167, + "kl": 0.17919921875, + "learning_rate": 7.647058823529411e-07, + "loss": 0.0001796395517885685, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 39, + "train_speed(iter/s)": 0.024105 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.625, + "completions/mean_length": 70.46875286102295, + "completions/min_length": 32.125, + "epoch": 0.07942417473318442, + "grad_norm": 0.007549139015510863, + "kl": 0.1712646484375, + "learning_rate": 7.84313725490196e-07, + "loss": 0.00017136195674538612, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, "step": 40, - "token_acc": 0.8461538461538461, - "train_speed(iter/s)": 0.078949 + "train_speed(iter/s)": 0.023999 }, { - "epoch": 0.053483880552666764, - "grad_norm": 4.917565148203119, - "learning_rate": 4.999922507133964e-06, - "loss": 0.37815165519714355, - "memory(GiB)": 50.11, - "step": 45, - "token_acc": 0.8789473684210526, - "train_speed(iter/s)": 0.07932 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.875, + "completions/mean_length": 69.66666889190674, + "completions/min_length": 29.5, + "epoch": 0.08140977910151402, + "grad_norm": 0.007635037188927877, + "kl": 0.17462158203125, + "learning_rate": 8.03921568627451e-07, + "loss": 0.00017471713363192976, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 41, + "train_speed(iter/s)": 0.024045 }, { - "epoch": 0.05942653394740752, - "grad_norm": 11.989491241774688, - "learning_rate": 4.999050767562379e-06, - "loss": 0.3381024360656738, - "memory(GiB)": 50.11, - "step": 50, - "token_acc": 0.8735632183908046, - "train_speed(iter/s)": 0.079547 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 76.26041889190674, + "completions/min_length": 37.75, + "epoch": 0.08339538346984364, + "grad_norm": 0.006637911382664406, + "kl": 0.1431884765625, + "learning_rate": 8.235294117647058e-07, + "loss": 0.00014315629960037768, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 42, + "train_speed(iter/s)": 0.023979 }, { - "epoch": 0.06536918734214826, - "grad_norm": 6.777839360520193, - "learning_rate": 4.99721076122146e-06, - "loss": 0.3407719135284424, - "memory(GiB)": 50.11, - "step": 55, - "token_acc": 0.8579234972677595, - "train_speed(iter/s)": 0.079765 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 76.08333587646484, + "completions/min_length": 33.125, + "epoch": 0.08538098783817324, + "grad_norm": 0.007545569232213225, + "kl": 0.16400146484375, + "learning_rate": 8.431372549019608e-07, + "loss": 0.00016389989468734711, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 43, + "train_speed(iter/s)": 0.023853 }, { - "epoch": 0.07131184073688902, - "grad_norm": 6.139296584815925, - "learning_rate": 4.994403201028695e-06, - "loss": 0.33808121681213377, - "memory(GiB)": 50.11, - "step": 60, - "token_acc": 0.8743455497382199, - "train_speed(iter/s)": 0.079935 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 70.80208587646484, + "completions/min_length": 29.75, + "epoch": 0.08736659220650285, + "grad_norm": 0.007971807973221248, + "kl": 0.187744140625, + "learning_rate": 8.627450980392156e-07, + "loss": 0.00018761036335490644, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 44, + "train_speed(iter/s)": 0.023811 }, { - "epoch": 0.07725449413162977, - "grad_norm": 5.890787284633843, - "learning_rate": 4.990629174784009e-06, - "loss": 0.3293055295944214, - "memory(GiB)": 50.11, - "step": 65, - "token_acc": 0.898936170212766, - "train_speed(iter/s)": 0.080067 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 77.07291984558105, + "completions/min_length": 39.25, + "epoch": 0.08935219657483247, + "grad_norm": 0.0071067330941543775, + "kl": 0.14849853515625, + "learning_rate": 8.823529411764705e-07, + "loss": 0.00014854694018140435, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 45, + "train_speed(iter/s)": 0.023768 }, { - "epoch": 0.08319714752637053, - "grad_norm": 10.931767446162842, - "learning_rate": 4.9858901447482924e-06, - "loss": 0.3187825679779053, - "memory(GiB)": 50.11, - "step": 70, - "token_acc": 0.8478260869565217, - "train_speed(iter/s)": 0.080169 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 73.270836353302, + "completions/min_length": 32.375, + "epoch": 0.09133780094316207, + "grad_norm": 0.007376222395453255, + "kl": 0.1529541015625, + "learning_rate": 9.019607843137255e-07, + "loss": 0.00015288867871277034, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 46, + "train_speed(iter/s)": 0.023741 }, { - "epoch": 0.08913980092111128, - "grad_norm": 10.824836091859028, - "learning_rate": 4.980187947076841e-06, - "loss": 0.34889607429504393, - "memory(GiB)": 50.11, - "step": 75, - "token_acc": 0.8495145631067961, - "train_speed(iter/s)": 0.08027 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.75, + "completions/mean_length": 61.11458492279053, + "completions/min_length": 32.75, + "epoch": 0.09332340531149169, + "grad_norm": 0.008943850465861202, + "kl": 0.18115234375, + "learning_rate": 9.215686274509803e-07, + "loss": 0.00018107870710082352, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 47, + "train_speed(iter/s)": 0.023778 }, { - "epoch": 0.09508245431585202, - "grad_norm": 6.620646988752802, - "learning_rate": 4.973524791107931e-06, - "loss": 0.3007768154144287, - "memory(GiB)": 50.11, - "step": 80, - "token_acc": 0.8418079096045198, - "train_speed(iter/s)": 0.080366 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.125, + "completions/mean_length": 79.77083587646484, + "completions/min_length": 33.375, + "epoch": 0.0953090096798213, + "grad_norm": 0.007256615320610051, + "kl": 0.1475830078125, + "learning_rate": 9.411764705882352e-07, + "loss": 0.00014765582454856485, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 48, + "train_speed(iter/s)": 0.023708 }, { - "epoch": 0.10102510771059278, - "grad_norm": 9.519794919805669, - "learning_rate": 4.965903258506806e-06, - "loss": 0.34135324954986573, - "memory(GiB)": 50.11, - "step": 85, - "token_acc": 0.8839779005524862, - "train_speed(iter/s)": 0.080443 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 81.05208587646484, + "completions/min_length": 39.375, + "epoch": 0.0972946140481509, + "grad_norm": 0.007071796756037824, + "kl": 0.129150390625, + "learning_rate": 9.607843137254902e-07, + "loss": 0.00012925347255077213, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 49, + "train_speed(iter/s)": 0.023616 }, { - "epoch": 0.10696776110533353, - "grad_norm": 8.719690644097742, - "learning_rate": 4.957326302265395e-06, - "loss": 0.31029839515686036, - "memory(GiB)": 50.11, - "step": 90, - "token_acc": 0.8924731182795699, - "train_speed(iter/s)": 0.080511 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 71.208336353302, + "completions/min_length": 31.375, + "epoch": 0.09928021841648052, + "grad_norm": 0.008868009201440628, + "kl": 0.16436767578125, + "learning_rate": 9.80392156862745e-07, + "loss": 0.0001643826690269634, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 50, + "train_speed(iter/s)": 0.023591 }, { - "epoch": 0.11291041450007429, - "grad_norm": 7.358059720333543, - "learning_rate": 4.947797245558168e-06, - "loss": 0.3377894639968872, - "memory(GiB)": 50.11, - "step": 95, - "token_acc": 0.8789473684210526, - "train_speed(iter/s)": 0.080597 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.625, + "completions/mean_length": 62.500001430511475, + "completions/min_length": 29.625, + "epoch": 0.10126582278481013, + "grad_norm": 2.8657160529905243, + "kl": 0.1888427734375, + "learning_rate": 1e-06, + "loss": -0.0009202882647514343, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 51, + "train_speed(iter/s)": 0.023603 }, { - "epoch": 0.11885306789481503, - "grad_norm": 4.938431225494223, - "learning_rate": 4.937319780454559e-06, - "loss": 0.31557579040527345, - "memory(GiB)": 50.11, - "step": 100, - "token_acc": 0.8663101604278075, - "train_speed(iter/s)": 0.080572 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 70.00000286102295, + "completions/min_length": 32.75, + "epoch": 0.10325142715313974, + "grad_norm": 0.008463049210465283, + "kl": 0.15093994140625, + "learning_rate": 9.9999990046966e-07, + "loss": 0.00015097255527507514, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 52, + "train_speed(iter/s)": 0.023579 }, { - "epoch": 0.12479572128955578, - "grad_norm": 7.518639654198759, - "learning_rate": 4.9258979664884595e-06, - "loss": 0.3270684242248535, - "memory(GiB)": 50.11, - "step": 105, - "token_acc": 0.8390804597701149, - "train_speed(iter/s)": 0.077371 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 74.71875190734863, + "completions/min_length": 37.875, + "epoch": 0.10523703152146935, + "grad_norm": 0.007915017657529928, + "kl": 0.13067626953125, + "learning_rate": 9.9999960187868e-07, + "loss": 0.00013078741903882474, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 53, + "train_speed(iter/s)": 0.023519 }, { - "epoch": 0.13073837468429653, - "grad_norm": 5.275542865372311, - "learning_rate": 4.9135362290853365e-06, - "loss": 0.3212160348892212, - "memory(GiB)": 50.11, - "step": 110, - "token_acc": 0.8793103448275862, - "train_speed(iter/s)": 0.077562 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 73.16666793823242, + "completions/min_length": 33.75, + "epoch": 0.10722263588979895, + "grad_norm": 0.008342352123523848, + "kl": 0.142333984375, + "learning_rate": 9.999991042271788e-07, + "loss": 0.00014238506264518946, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 54, + "train_speed(iter/s)": 0.023532 }, { - "epoch": 0.1366810280790373, - "grad_norm": 5.256523302626162, - "learning_rate": 4.900239357847582e-06, - "loss": 0.32684850692749023, - "memory(GiB)": 50.11, - "step": 115, - "token_acc": 0.8659217877094972, - "train_speed(iter/s)": 0.077745 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.5, + "completions/mean_length": 70.78125143051147, + "completions/min_length": 36.125, + "epoch": 0.10920824025812857, + "grad_norm": 0.00871038345150832, + "kl": 0.17596435546875, + "learning_rate": 9.999984075153545e-07, + "loss": 0.00017616058175917715, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 55, + "train_speed(iter/s)": 0.023589 }, { - "epoch": 0.14262368147377805, - "grad_norm": 6.9956864362318925, - "learning_rate": 4.886012504698769e-06, - "loss": 0.3396021842956543, - "memory(GiB)": 50.11, - "step": 120, - "token_acc": 0.8314606741573034, - "train_speed(iter/s)": 0.077907 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 81.28125190734863, + "completions/min_length": 40.25, + "epoch": 0.11119384462645818, + "grad_norm": 1.6665108646221753, + "kl": 0.11895751953125, + "learning_rate": 9.999975117434842e-07, + "loss": -0.0060371882282197475, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 56, + "train_speed(iter/s)": 0.023557 }, { - "epoch": 0.1485663348685188, - "grad_norm": 8.220091525088554, - "learning_rate": 4.870861181887514e-06, - "loss": 0.2878549575805664, - "memory(GiB)": 50.11, - "step": 125, - "token_acc": 0.867816091954023, - "train_speed(iter/s)": 0.078077 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 70.81250190734863, + "completions/min_length": 32.25, + "epoch": 0.11317944899478778, + "grad_norm": 1.7144506327915527, + "kl": 0.17333984375, + "learning_rate": 9.99996416911925e-07, + "loss": 0.0006940675666555762, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 57, + "train_speed(iter/s)": 0.023512 }, { - "epoch": 0.15450898826325954, - "grad_norm": 9.001408898309402, - "learning_rate": 4.854791259851735e-06, - "loss": 0.28747098445892333, - "memory(GiB)": 50.11, - "step": 130, - "token_acc": 0.9010416666666666, - "train_speed(iter/s)": 0.078215 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.125, + "completions/mean_length": 85.10416984558105, + "completions/min_length": 38.25, + "epoch": 0.1151650533631174, + "grad_norm": 0.00724976652524824, + "kl": 0.1123046875, + "learning_rate": 9.999951230211125e-07, + "loss": 0.00011247491784160957, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 58, + "train_speed(iter/s)": 0.023468 }, { - "epoch": 0.1604516416580003, - "grad_norm": 9.115366763265595, - "learning_rate": 4.8378089649441355e-06, - "loss": 0.37898375988006594, - "memory(GiB)": 50.11, - "step": 135, - "token_acc": 0.8135593220338984, - "train_speed(iter/s)": 0.078346 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 64.27083539962769, + "completions/min_length": 30.625, + "epoch": 0.117150657731447, + "grad_norm": 0.009724558694402126, + "kl": 0.1688232421875, + "learning_rate": 9.99993630071562e-07, + "loss": 0.00016898219473659992, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 59, + "train_speed(iter/s)": 0.023438 }, { - "epoch": 0.16639429505274106, - "grad_norm": 6.196127668973169, - "learning_rate": 4.819920877019767e-06, - "loss": 0.3501492977142334, - "memory(GiB)": 50.11, - "step": 140, - "token_acc": 0.824468085106383, - "train_speed(iter/s)": 0.078454 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.375, + "completions/mean_length": 71.0729193687439, + "completions/min_length": 31.0, + "epoch": 0.11913626209977662, + "grad_norm": 0.008708744885255611, + "kl": 0.13507080078125, + "learning_rate": 9.999919380638675e-07, + "loss": 0.00013499600754585117, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 60, + "train_speed(iter/s)": 0.023439 }, { - "epoch": 0.1723369484474818, - "grad_norm": 6.007338381318857, - "learning_rate": 4.8011339268866505e-06, - "loss": 0.322367000579834, - "memory(GiB)": 50.11, - "step": 145, - "token_acc": 0.8932584269662921, - "train_speed(iter/s)": 0.07857 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.75, + "completions/mean_length": 85.77083396911621, + "completions/min_length": 37.25, + "epoch": 0.12112186646810623, + "grad_norm": 0.007643034017040172, + "kl": 0.0946044921875, + "learning_rate": 9.99990046998703e-07, + "loss": 9.463238529860973e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 61, + "train_speed(iter/s)": 0.0234 }, { - "epoch": 0.17827960184222255, - "grad_norm": 4.125624729653197, - "learning_rate": 4.781455393620407e-06, - "loss": 0.3071049690246582, - "memory(GiB)": 50.11, - "step": 150, - "token_acc": 0.8461538461538461, - "train_speed(iter/s)": 0.078684 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 82.5729193687439, + "completions/min_length": 35.25, + "epoch": 0.12310747083643583, + "grad_norm": 0.008057492803880658, + "kl": 0.1072998046875, + "learning_rate": 9.999879568768213e-07, + "loss": 0.00010734701936598867, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 62, + "train_speed(iter/s)": 0.023381 }, { - "epoch": 0.1842222552369633, - "grad_norm": 6.3546672576711085, - "learning_rate": 4.760892901743944e-06, - "loss": 0.28942744731903075, - "memory(GiB)": 50.11, - "step": 155, - "token_acc": 0.868020304568528, - "train_speed(iter/s)": 0.078783 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 72.43750190734863, + "completions/min_length": 33.375, + "epoch": 0.12509307520476545, + "grad_norm": 0.7000140733772771, + "kl": 0.134521484375, + "learning_rate": 9.999856676990543e-07, + "loss": 0.014437368139624596, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 63, + "train_speed(iter/s)": 0.023404 }, { - "epoch": 0.19016490863170404, - "grad_norm": 6.332293006494747, - "learning_rate": 4.739454418273314e-06, - "loss": 0.2908078908920288, - "memory(GiB)": 50.11, - "step": 160, - "token_acc": 0.8160919540229885, - "train_speed(iter/s)": 0.07888 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 83.71875190734863, + "completions/min_length": 35.875, + "epoch": 0.12707867957309507, + "grad_norm": 0.008002377526796563, + "kl": 0.10614013671875, + "learning_rate": 9.99983179466314e-07, + "loss": 0.00010616659710649401, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 64, + "train_speed(iter/s)": 0.023413 }, { - "epoch": 0.1961075620264448, - "grad_norm": 7.334991420256453, - "learning_rate": 4.717148249630859e-06, - "loss": 0.30244882106781007, - "memory(GiB)": 50.11, - "step": 165, - "token_acc": 0.9, - "train_speed(iter/s)": 0.078938 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 74.02083539962769, + "completions/min_length": 39.125, + "epoch": 0.12906428394142466, + "grad_norm": 0.008723755406961395, + "kl": 0.115203857421875, + "learning_rate": 9.9998049217959e-07, + "loss": 0.00011538701073732227, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 65, + "train_speed(iter/s)": 0.023392 }, { - "epoch": 0.20205021542118556, - "grad_norm": 5.66996373194292, - "learning_rate": 4.693983038426857e-06, - "loss": 0.26120476722717284, - "memory(GiB)": 50.11, - "step": 170, - "token_acc": 0.9022988505747126, - "train_speed(iter/s)": 0.079018 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 74.53125143051147, + "completions/min_length": 33.875, + "epoch": 0.13104988830975428, + "grad_norm": 0.00918228653621971, + "kl": 0.11761474609375, + "learning_rate": 9.99977605839953e-07, + "loss": 0.00011754984734579921, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 66, + "train_speed(iter/s)": 0.023386 }, { - "epoch": 0.20799286881592632, - "grad_norm": 5.485146565073312, - "learning_rate": 4.669967760110908e-06, - "loss": 0.3136150360107422, - "memory(GiB)": 50.11, - "step": 175, - "token_acc": 0.875, - "train_speed(iter/s)": 0.079091 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 67.22916793823242, + "completions/min_length": 34.875, + "epoch": 0.1330354926780839, + "grad_norm": 0.0097638059421632, + "kl": 0.12744140625, + "learning_rate": 9.999745204485517e-07, + "loss": 0.0001275779795832932, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 67, + "train_speed(iter/s)": 0.023388 }, { - "epoch": 0.21393552221066706, - "grad_norm": 4.790229015417923, - "learning_rate": 4.645111719494363e-06, - "loss": 0.31197218894958495, - "memory(GiB)": 50.11, - "step": 180, - "token_acc": 0.8707865168539326, - "train_speed(iter/s)": 0.079157 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 72.06250286102295, + "completions/min_length": 33.75, + "epoch": 0.1350210970464135, + "grad_norm": 0.009237344961798894, + "kl": 0.092041015625, + "learning_rate": 9.99971236006615e-07, + "loss": 9.19430167414248e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 68, + "train_speed(iter/s)": 0.023388 }, { - "epoch": 0.21987817560540782, - "grad_norm": 3.9788813029239924, - "learning_rate": 4.6194245471451395e-06, - "loss": 0.29698636531829836, - "memory(GiB)": 50.11, - "step": 185, - "token_acc": 0.8829787234042553, - "train_speed(iter/s)": 0.079217 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 76.62500286102295, + "completions/min_length": 35.375, + "epoch": 0.1370067014147431, + "grad_norm": 0.00843833223047032, + "kl": 0.077301025390625, + "learning_rate": 9.999677525154495e-07, + "loss": 7.73863575886935e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 69, + "train_speed(iter/s)": 0.023398 }, { - "epoch": 0.22582082900014858, - "grad_norm": 5.38043307053944, - "learning_rate": 4.592916195656322e-06, - "loss": 0.3022609233856201, - "memory(GiB)": 50.11, - "step": 190, - "token_acc": 0.8870967741935484, - "train_speed(iter/s)": 0.079283 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 78.12500190734863, + "completions/min_length": 39.25, + "epoch": 0.13899230578307273, + "grad_norm": 0.014993373918113943, + "kl": 0.08154296875, + "learning_rate": 9.99964069976443e-07, + "loss": 8.15772800706327e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 70, + "train_speed(iter/s)": 0.023378 }, { - "epoch": 0.2317634823948893, - "grad_norm": 5.706060180746966, - "learning_rate": 4.565596935789987e-06, - "loss": 0.28669142723083496, - "memory(GiB)": 50.11, - "step": 195, - "token_acc": 0.8837209302325582, - "train_speed(iter/s)": 0.079339 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 77.33333587646484, + "completions/min_length": 34.5, + "epoch": 0.14097791015140235, + "grad_norm": 0.008776763393947107, + "kl": 0.0693359375, + "learning_rate": 9.999601883910613e-07, + "loss": 6.930225936230272e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 71, + "train_speed(iter/s)": 0.023341 }, { - "epoch": 0.23770613578963007, - "grad_norm": 4.399286357383267, - "learning_rate": 4.537477352497766e-06, - "loss": 0.27329254150390625, - "memory(GiB)": 50.11, - "step": 200, - "token_acc": 0.9301075268817204, - "train_speed(iter/s)": 0.079333 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 66.85416984558105, + "completions/min_length": 30.375, + "epoch": 0.14296351451973194, + "grad_norm": 0.01027322713928672, + "kl": 0.087493896484375, + "learning_rate": 9.999561077608495e-07, + "loss": 8.75980913406238e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 72, + "train_speed(iter/s)": 0.023366 }, { - "epoch": 0.24364878918437083, - "grad_norm": 2.7396674582431135, - "learning_rate": 4.508568340819654e-06, - "loss": 0.30466504096984864, - "memory(GiB)": 50.11, - "step": 205, - "token_acc": 0.875, - "train_speed(iter/s)": 0.077513 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.125, + "completions/mean_length": 85.00000286102295, + "completions/min_length": 35.25, + "epoch": 0.14494911888806156, + "grad_norm": 0.0077692777675985635, + "kl": 0.0548095703125, + "learning_rate": 9.999518280874326e-07, + "loss": 5.491119736689143e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 73, + "train_speed(iter/s)": 0.023346 }, { - "epoch": 0.24959144257911156, - "grad_norm": 6.688114488293933, - "learning_rate": 4.478881101662694e-06, - "loss": 0.2621138095855713, - "memory(GiB)": 50.11, - "step": 210, - "token_acc": 0.8810810810810811, - "train_speed(iter/s)": 0.077595 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 75.75000286102295, + "completions/min_length": 35.5, + "epoch": 0.14693472325639118, + "grad_norm": 0.9484565619998777, + "kl": 0.1073455810546875, + "learning_rate": 9.999473493725142e-07, + "loss": 0.01214763056486845, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 74, + "train_speed(iter/s)": 0.023349 }, { - "epoch": 0.25553409597385235, - "grad_norm": 5.255888806481665, - "learning_rate": 4.44842713746114e-06, - "loss": 0.28530049324035645, - "memory(GiB)": 50.11, - "step": 215, - "token_acc": 0.9065934065934066, - "train_speed(iter/s)": 0.07769 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 69.5729193687439, + "completions/min_length": 25.125, + "epoch": 0.14892032762472077, + "grad_norm": 0.009753262896379912, + "kl": 0.0610809326171875, + "learning_rate": 9.999426716178771e-07, + "loss": 6.102630868554115e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 75, + "train_speed(iter/s)": 0.023325 }, { - "epoch": 0.26147674936859305, - "grad_norm": 3.91726058342407, - "learning_rate": 4.417218247719794e-06, - "loss": 0.31370389461517334, - "memory(GiB)": 50.11, - "step": 220, - "token_acc": 0.9130434782608695, - "train_speed(iter/s)": 0.077784 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 71.53125190734863, + "completions/min_length": 31.5, + "epoch": 0.15090593199305039, + "grad_norm": 2.0107202959631496, + "kl": 0.058013916015625, + "learning_rate": 9.999377948253843e-07, + "loss": 0.0037022705655544996, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 76, + "train_speed(iter/s)": 0.023327 }, { - "epoch": 0.2674194027633338, - "grad_norm": 5.422963885256581, - "learning_rate": 4.385266524442241e-06, - "loss": 0.2774806499481201, - "memory(GiB)": 50.11, - "step": 225, - "token_acc": 0.8681318681318682, - "train_speed(iter/s)": 0.077873 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 75.51041889190674, + "completions/min_length": 34.625, + "epoch": 0.15289153636138, + "grad_norm": 0.008656955877994034, + "kl": 0.04874420166015625, + "learning_rate": 9.999327189969767e-07, + "loss": 4.8715279262978584e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 77, + "train_speed(iter/s)": 0.023346 }, { - "epoch": 0.2733620561580746, - "grad_norm": 6.722879151062824, - "learning_rate": 4.352584347445761e-06, - "loss": 0.2985124111175537, - "memory(GiB)": 50.11, - "step": 230, - "token_acc": 0.8736263736263736, - "train_speed(iter/s)": 0.077946 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 74.12500333786011, + "completions/min_length": 34.875, + "epoch": 0.1548771407297096, + "grad_norm": 0.007265926721153918, + "kl": 0.044189453125, + "learning_rate": 9.999274441346755e-07, + "loss": 4.4221189455129206e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 78, + "train_speed(iter/s)": 0.023349 }, { - "epoch": 0.27930470955281533, - "grad_norm": 6.4224195510449285, - "learning_rate": 4.319184379564716e-06, - "loss": 0.28224008083343505, - "memory(GiB)": 50.11, - "step": 235, - "token_acc": 0.8839779005524862, - "train_speed(iter/s)": 0.077999 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.375, + "completions/mean_length": 65.87500190734863, + "completions/min_length": 29.875, + "epoch": 0.1568627450980392, + "grad_norm": 0.010017192688670995, + "kl": 0.044769287109375, + "learning_rate": 9.999219702405802e-07, + "loss": 4.470112617127597e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 79, + "train_speed(iter/s)": 0.023366 }, { - "epoch": 0.2852473629475561, - "grad_norm": 10.728017343540358, - "learning_rate": 4.285079561744292e-06, - "loss": 0.315199875831604, - "memory(GiB)": 50.11, - "step": 240, - "token_acc": 0.86, - "train_speed(iter/s)": 0.07806 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.625, + "completions/mean_length": 67.26041793823242, + "completions/min_length": 30.375, + "epoch": 0.15884834946636883, + "grad_norm": 0.006441999280046141, + "kl": 0.039794921875, + "learning_rate": 9.999162973168709e-07, + "loss": 3.9837083022575825e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 80, + "train_speed(iter/s)": 0.023393 }, { - "epoch": 0.29119001634229685, - "grad_norm": 3.5104593820561916, - "learning_rate": 4.250283108026474e-06, - "loss": 0.28578615188598633, - "memory(GiB)": 50.11, - "step": 245, - "token_acc": 0.8715083798882681, - "train_speed(iter/s)": 0.078121 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 73.85416841506958, + "completions/min_length": 28.375, + "epoch": 0.16083395383469842, + "grad_norm": 0.00781500278880752, + "kl": 0.042877197265625, + "learning_rate": 9.999104253658055e-07, + "loss": 4.2875220970017835e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 81, + "train_speed(iter/s)": 0.023389 }, { - "epoch": 0.2971326697370376, - "grad_norm": 12.24592817826425, - "learning_rate": 4.2148085004302205e-06, - "loss": 0.3042537927627563, - "memory(GiB)": 50.11, - "step": 250, - "token_acc": 0.8556149732620321, - "train_speed(iter/s)": 0.078192 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.75, + "completions/mean_length": 82.1041693687439, + "completions/min_length": 33.0, + "epoch": 0.16281955820302804, + "grad_norm": 0.005676629435187418, + "kl": 0.05523681640625, + "learning_rate": 9.99904354389722e-07, + "loss": 5.515553857549094e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 82, + "train_speed(iter/s)": 0.023334 }, { - "epoch": 0.3030753231317783, - "grad_norm": 5.395962132144766, - "learning_rate": 4.178669483727803e-06, - "loss": 0.32353897094726564, - "memory(GiB)": 50.11, - "step": 255, - "token_acc": 0.9037433155080213, - "train_speed(iter/s)": 0.078259 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 73.83333444595337, + "completions/min_length": 37.25, + "epoch": 0.16480516257135766, + "grad_norm": 0.004333843743146698, + "kl": 0.0418243408203125, + "learning_rate": 9.998980843910374e-07, + "loss": 4.1765895730350167e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 83, + "train_speed(iter/s)": 0.023327 }, { - "epoch": 0.3090179765265191, - "grad_norm": 10.569254917118544, - "learning_rate": 4.141880060119336e-06, - "loss": 0.2870334148406982, - "memory(GiB)": 50.11, - "step": 260, - "token_acc": 0.8461538461538461, - "train_speed(iter/s)": 0.078334 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 81.47916889190674, + "completions/min_length": 34.5, + "epoch": 0.16679076693968728, + "grad_norm": 0.005410702417640253, + "kl": 0.027149200439453125, + "learning_rate": 9.998916153722476e-07, + "loss": 2.7147478249389678e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 84, + "train_speed(iter/s)": 0.023309 }, { - "epoch": 0.31496062992125984, - "grad_norm": 8.563532881846635, - "learning_rate": 4.104454483807579e-06, - "loss": 0.2994786262512207, - "memory(GiB)": 50.11, - "step": 265, - "token_acc": 0.9144385026737968, - "train_speed(iter/s)": 0.078387 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.25, + "completions/mean_length": 73.30208396911621, + "completions/min_length": 33.75, + "epoch": 0.16877637130801687, + "grad_norm": 0.002806379142786781, + "kl": 0.0284881591796875, + "learning_rate": 9.998849473359283e-07, + "loss": 2.8479480533860624e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 85, + "train_speed(iter/s)": 0.023309 }, { - "epoch": 0.3209032833160006, - "grad_norm": 6.750118253503608, - "learning_rate": 4.066407255475086e-06, - "loss": 0.2697244644165039, - "memory(GiB)": 50.11, - "step": 270, - "token_acc": 0.8956043956043956, - "train_speed(iter/s)": 0.078439 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 79.60416889190674, + "completions/min_length": 38.625, + "epoch": 0.1707619756763465, + "grad_norm": 0.002732902384908916, + "kl": 0.016204833984375, + "learning_rate": 9.998780802847344e-07, + "loss": 1.6214426068472676e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 86, + "train_speed(iter/s)": 0.023312 }, { - "epoch": 0.32684593671074136, - "grad_norm": 3.934344456524906, - "learning_rate": 4.027753116665859e-06, - "loss": 0.2338794946670532, - "memory(GiB)": 50.11, - "step": 275, - "token_acc": 0.9405405405405406, - "train_speed(iter/s)": 0.078498 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 77.81250238418579, + "completions/min_length": 38.0, + "epoch": 0.1727475800446761, + "grad_norm": 0.016243090387746868, + "kl": 0.033344268798828125, + "learning_rate": 9.998710142213994e-07, + "loss": 3.334351640660316e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 87, + "train_speed(iter/s)": 0.023321 }, { - "epoch": 0.3327885901054821, - "grad_norm": 5.970848924895414, - "learning_rate": 3.988507044073687e-06, - "loss": 0.28338768482208254, - "memory(GiB)": 50.11, - "step": 280, - "token_acc": 0.8804347826086957, - "train_speed(iter/s)": 0.078559 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 66.70833492279053, + "completions/min_length": 31.0, + "epoch": 0.1747331844130057, + "grad_norm": 1.9790585771315492, + "kl": 0.0699310302734375, + "learning_rate": 9.998637491487367e-07, + "loss": 0.003212481737136841, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.17046868056058884, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 88, + "train_speed(iter/s)": 0.023363 }, { - "epoch": 0.3387312435002228, - "grad_norm": 3.9146270439627364, - "learning_rate": 3.948684243739366e-06, - "loss": 0.2838298797607422, - "memory(GiB)": 50.11, - "step": 285, - "token_acc": 0.8797814207650273, - "train_speed(iter/s)": 0.078615 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 73.56250238418579, + "completions/min_length": 30.125, + "epoch": 0.17671878878133532, + "grad_norm": 0.016190785316558057, + "kl": 0.0405426025390625, + "learning_rate": 9.998562850696387e-07, + "loss": 4.052785880048759e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 89, + "train_speed(iter/s)": 0.023362 }, { - "epoch": 0.3446738968949636, - "grad_norm": 9.327297524134098, - "learning_rate": 3.908300145159055e-06, - "loss": 0.2870032787322998, - "memory(GiB)": 50.11, - "step": 290, - "token_acc": 0.8707865168539326, - "train_speed(iter/s)": 0.078663 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 74.72916984558105, + "completions/min_length": 34.125, + "epoch": 0.17870439314966494, + "grad_norm": 0.08120243183969493, + "kl": 0.11138916015625, + "learning_rate": 9.998486219870767e-07, + "loss": 0.0001112430909415707, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 90, + "train_speed(iter/s)": 0.023352 }, { - "epoch": 0.35061655028970434, - "grad_norm": 6.335507218560851, - "learning_rate": 3.8673703953060685e-06, - "loss": 0.278945779800415, - "memory(GiB)": 50.11, - "step": 295, - "token_acc": 0.8994413407821229, - "train_speed(iter/s)": 0.078706 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 72.13541793823242, + "completions/min_length": 30.75, + "epoch": 0.18068999751799453, + "grad_norm": 1.848356040125397, + "kl": 0.031524658203125, + "learning_rate": 9.998407599041019e-07, + "loss": 0.009597557596862316, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 91, + "train_speed(iter/s)": 0.023357 }, { - "epoch": 0.3565592036844451, - "grad_norm": 3.6244644464198563, - "learning_rate": 3.8259108525683854e-06, - "loss": 0.2880126953125, - "memory(GiB)": 50.11, - "step": 300, - "token_acc": 0.8770949720670391, - "train_speed(iter/s)": 0.07875 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 76.03125095367432, + "completions/min_length": 32.125, + "epoch": 0.18267560188632415, + "grad_norm": 0.0054572906759311635, + "kl": 0.0155792236328125, + "learning_rate": 9.998326988238443e-07, + "loss": 1.5576528312521987e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 92, + "train_speed(iter/s)": 0.023342 }, { - "epoch": 0.36250185707918586, - "grad_norm": 9.73665571895264, - "learning_rate": 3.7839375806042672e-06, - "loss": 0.3021416187286377, - "memory(GiB)": 50.11, - "step": 305, - "token_acc": 0.8829787234042553, - "train_speed(iter/s)": 0.077565 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 74.31250238418579, + "completions/min_length": 37.125, + "epoch": 0.18466120625465376, + "grad_norm": 0.007428558860000597, + "kl": 0.03792572021484375, + "learning_rate": 9.998244387495128e-07, + "loss": 3.800111880991608e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 93, + "train_speed(iter/s)": 0.023366 }, { - "epoch": 0.3684445104739266, - "grad_norm": 4.111562074991708, - "learning_rate": 3.741466842118327e-06, - "loss": 0.24168176651000978, - "memory(GiB)": 50.11, - "step": 310, - "token_acc": 0.9135135135135135, - "train_speed(iter/s)": 0.077626 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 79.26041889190674, + "completions/min_length": 35.125, + "epoch": 0.18664681062298338, + "grad_norm": 0.009430591551939916, + "kl": 0.0423126220703125, + "learning_rate": 9.998159796843967e-07, + "loss": 4.230852209730074e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 94, + "train_speed(iter/s)": 0.023352 }, { - "epoch": 0.3743871638686674, - "grad_norm": 5.628251208959277, - "learning_rate": 3.698515092560481e-06, - "loss": 0.28665938377380373, - "memory(GiB)": 50.11, - "step": 315, - "token_acc": 0.8978494623655914, - "train_speed(iter/s)": 0.07769 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.75, + "completions/mean_length": 84.44791984558105, + "completions/min_length": 32.25, + "epoch": 0.18863241499131297, + "grad_norm": 0.0031344675307674466, + "kl": 0.02513885498046875, + "learning_rate": 9.99807321631863e-07, + "loss": 2.5113808078458533e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 95, + "train_speed(iter/s)": 0.023319 }, { - "epoch": 0.3803298172634081, - "grad_norm": 3.2364463841026265, - "learning_rate": 3.655098973750223e-06, - "loss": 0.2696410894393921, - "memory(GiB)": 50.11, - "step": 320, - "token_acc": 0.9034090909090909, - "train_speed(iter/s)": 0.077754 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.0, + "completions/mean_length": 69.864586353302, + "completions/min_length": 32.25, + "epoch": 0.1906180193596426, + "grad_norm": 0.9794287990894858, + "kl": 0.022918701171875, + "learning_rate": 9.99798464595359e-07, + "loss": 0.007392986677587032, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 96, + "train_speed(iter/s)": 0.023334 }, { - "epoch": 0.38627247065814885, - "grad_norm": 6.612958655073718, - "learning_rate": 3.61123530742869e-06, - "loss": 0.26622281074523924, - "memory(GiB)": 50.11, - "step": 325, - "token_acc": 0.9080459770114943, - "train_speed(iter/s)": 0.077814 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 81.91666746139526, + "completions/min_length": 38.0, + "epoch": 0.1926036237279722, + "grad_norm": 0.003960916632761671, + "kl": 0.04085540771484375, + "learning_rate": 9.997894085784107e-07, + "loss": 4.087508932570927e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 97, + "train_speed(iter/s)": 0.023347 }, { - "epoch": 0.3922151240528896, - "grad_norm": 3.3735858832830448, - "learning_rate": 3.5669410887410095e-06, - "loss": 0.28887372016906737, - "memory(GiB)": 50.11, - "step": 330, - "token_acc": 0.9055555555555556, - "train_speed(iter/s)": 0.077876 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 75.95833587646484, + "completions/min_length": 36.0, + "epoch": 0.1945892280963018, + "grad_norm": 1.3083302896777618, + "kl": 0.018798828125, + "learning_rate": 9.997801535846237e-07, + "loss": 0.005059582181274891, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 98, + "train_speed(iter/s)": 0.023299 }, { - "epoch": 0.39815777744763037, - "grad_norm": 2.3741384793834066, - "learning_rate": 3.5222334796514724e-06, - "loss": 0.27968392372131345, - "memory(GiB)": 50.11, - "step": 335, - "token_acc": 0.88268156424581, - "train_speed(iter/s)": 0.077927 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.0, + "completions/mean_length": 84.53125476837158, + "completions/min_length": 42.25, + "epoch": 0.19657483246463142, + "grad_norm": 0.0030215346529137454, + "kl": 0.0220489501953125, + "learning_rate": 9.997706996176825e-07, + "loss": 2.202572431997396e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 99, + "train_speed(iter/s)": 0.02326 }, { - "epoch": 0.40410043084237113, - "grad_norm": 4.628193650632556, - "learning_rate": 3.477129802294057e-06, - "loss": 0.3233078956604004, - "memory(GiB)": 50.11, - "step": 340, - "token_acc": 0.8111111111111111, - "train_speed(iter/s)": 0.077985 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.25, + "completions/mean_length": 86.72916793823242, + "completions/min_length": 35.75, + "epoch": 0.19856043683296104, + "grad_norm": 0.008493777978797476, + "kl": 0.039031982421875, + "learning_rate": 9.997610466813509e-07, + "loss": 3.904445475200191e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 100, + "train_speed(iter/s)": 0.023264 }, { - "epoch": 0.4100430842371119, - "grad_norm": 5.86802322326311, - "learning_rate": 3.431647532260908e-06, - "loss": 0.2708756923675537, - "memory(GiB)": 50.11, - "step": 345, - "token_acc": 0.9037433155080213, - "train_speed(iter/s)": 0.078049 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 203.125, + "completions/mean_length": 86.77083587646484, + "completions/min_length": 36.75, + "epoch": 0.20054604120129063, + "grad_norm": 0.0033261305896777147, + "kl": 0.02559661865234375, + "learning_rate": 9.997511947794718e-07, + "loss": 2.5546782126184553e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 101, + "train_speed(iter/s)": 0.02324 }, { - "epoch": 0.41598573763185265, - "grad_norm": 3.7680564123531686, - "learning_rate": 3.385804291831347e-06, - "loss": 0.28063795566558836, - "memory(GiB)": 50.11, - "step": 350, - "token_acc": 0.8846153846153846, - "train_speed(iter/s)": 0.078093 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 80.07291793823242, + "completions/min_length": 33.75, + "epoch": 0.20253164556962025, + "grad_norm": 0.0050221345405609625, + "kl": 0.031707763671875, + "learning_rate": 9.99741143915968e-07, + "loss": 3.17314152198378e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 102, + "train_speed(iter/s)": 0.023258 }, { - "epoch": 0.42192839102659335, - "grad_norm": 3.5932738811910006, - "learning_rate": 3.3396178431440572e-06, - "loss": 0.25298926830291746, - "memory(GiB)": 50.11, - "step": 355, - "token_acc": 0.9010989010989011, - "train_speed(iter/s)": 0.078147 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.0, + "completions/mean_length": 69.17708539962769, + "completions/min_length": 31.25, + "epoch": 0.20451724993794987, + "grad_norm": 0.005426878797230674, + "kl": 0.018100738525390625, + "learning_rate": 9.997308940948404e-07, + "loss": 1.811725815059617e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 103, + "train_speed(iter/s)": 0.023243 }, { - "epoch": 0.4278710444213341, - "grad_norm": 8.287812705069305, - "learning_rate": 3.2931060813150685e-06, - "loss": 0.27823238372802733, - "memory(GiB)": 50.11, - "step": 360, - "token_acc": 0.8804347826086957, - "train_speed(iter/s)": 0.0782 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 72.34375286102295, + "completions/min_length": 26.0, + "epoch": 0.2065028543062795, + "grad_norm": 0.002885475700919475, + "kl": 0.02909088134765625, + "learning_rate": 9.997204453201696e-07, + "loss": 2.9084389097988605e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 104, + "train_speed(iter/s)": 0.023234 }, { - "epoch": 0.4338136978160749, - "grad_norm": 5.939778148959477, - "learning_rate": 3.246287027504237e-06, - "loss": 0.23206405639648436, - "memory(GiB)": 50.11, - "step": 365, - "token_acc": 0.90625, - "train_speed(iter/s)": 0.07825 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 77.37500238418579, + "completions/min_length": 35.0, + "epoch": 0.20848845867460908, + "grad_norm": 0.784014990022842, + "kl": 0.8624954223632812, + "learning_rate": 9.997097975961162e-07, + "loss": 0.0008623730391263962, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 105, + "train_speed(iter/s)": 0.023243 }, { - "epoch": 0.43975635121081563, - "grad_norm": 3.9285461219783153, - "learning_rate": 3.1991788219328657e-06, - "loss": 0.24766221046447753, - "memory(GiB)": 50.11, - "step": 370, - "token_acc": 0.9069767441860465, - "train_speed(iter/s)": 0.078298 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 73.20833539962769, + "completions/min_length": 34.75, + "epoch": 0.2104740630429387, + "grad_norm": 0.004153006792887623, + "kl": 0.040924072265625, + "learning_rate": 9.996989509269185e-07, + "loss": 4.089593858225271e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 106, + "train_speed(iter/s)": 0.023249 }, { - "epoch": 0.4456990046055564, - "grad_norm": 3.484393370769258, - "learning_rate": 3.151799716855215e-06, - "loss": 0.2592806339263916, - "memory(GiB)": 50.11, - "step": 375, - "token_acc": 0.8716577540106952, - "train_speed(iter/s)": 0.078342 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 82.07291889190674, + "completions/min_length": 35.125, + "epoch": 0.21245966741126832, + "grad_norm": 1.2569571450902168, + "kl": 0.0576629638671875, + "learning_rate": 9.996879053168951e-07, + "loss": -0.012867176905274391, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 107, + "train_speed(iter/s)": 0.023245 }, { - "epoch": 0.45164165800029715, - "grad_norm": 3.7696986787730387, - "learning_rate": 3.1041680694865937e-06, - "loss": 0.24771764278411865, - "memory(GiB)": 50.11, - "step": 380, - "token_acc": 0.8651685393258427, - "train_speed(iter/s)": 0.078379 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 83.13541984558105, + "completions/min_length": 35.75, + "epoch": 0.2144452717795979, + "grad_norm": 0.0021027888710954037, + "kl": 0.0270843505859375, + "learning_rate": 9.996766607704436e-07, + "loss": 2.7072226657764986e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 108, + "train_speed(iter/s)": 0.02325 }, { - "epoch": 0.45758431139503786, - "grad_norm": 4.392389040330689, - "learning_rate": 3.056302334890786e-06, - "loss": 0.2508379936218262, - "memory(GiB)": 50.11, - "step": 385, - "token_acc": 0.9011627906976745, - "train_speed(iter/s)": 0.078418 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 77.18750238418579, + "completions/min_length": 31.25, + "epoch": 0.21643087614792753, + "grad_norm": 0.7112216195025038, + "kl": 0.05426025390625, + "learning_rate": 9.996652172920405e-07, + "loss": -4.5799341023666784e-05, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 109, + "train_speed(iter/s)": 0.023234 }, { - "epoch": 0.4635269647897786, - "grad_norm": 5.433260978821099, - "learning_rate": 3.0082210588295673e-06, - "loss": 0.25749917030334474, - "memory(GiB)": 50.11, - "step": 390, - "token_acc": 0.9, - "train_speed(iter/s)": 0.078455 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 202.625, + "completions/mean_length": 86.58333683013916, + "completions/min_length": 33.0, + "epoch": 0.21841648051625714, + "grad_norm": 0.0018519723391530849, + "kl": 0.02147674560546875, + "learning_rate": 9.99653574886242e-07, + "loss": 2.149765714420937e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 110, + "train_speed(iter/s)": 0.023224 }, { - "epoch": 0.4694696181845194, - "grad_norm": 9.046890762181722, - "learning_rate": 2.9599428705770773e-06, - "loss": 0.24419078826904297, - "memory(GiB)": 50.11, - "step": 395, - "token_acc": 0.918918918918919, - "train_speed(iter/s)": 0.078496 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 83.78125286102295, + "completions/min_length": 33.75, + "epoch": 0.22040208488458674, + "grad_norm": 0.004536140829723313, + "kl": 0.02535247802734375, + "learning_rate": 9.99641733557683e-07, + "loss": 2.539155502745416e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 111, + "train_speed(iter/s)": 0.023221 }, { - "epoch": 0.47541227157926014, - "grad_norm": 5.91365665819666, - "learning_rate": 2.911486475701835e-06, - "loss": 0.2059952735900879, - "memory(GiB)": 50.11, - "step": 400, - "token_acc": 0.8932584269662921, - "train_speed(iter/s)": 0.078535 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.625, + "completions/mean_length": 89.65625286102295, + "completions/min_length": 37.25, + "epoch": 0.22238768925291635, + "grad_norm": 0.0018853550139549076, + "kl": 0.02152252197265625, + "learning_rate": 9.996296933110775e-07, + "loss": 2.150795262423344e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 112, + "train_speed(iter/s)": 0.023179 }, { - "epoch": 0.4813549249740009, - "grad_norm": 5.755994095586612, - "learning_rate": 2.8628706488191994e-06, - "loss": 0.24112114906311036, - "memory(GiB)": 50.11, - "step": 405, - "token_acc": 0.9028571428571428, - "train_speed(iter/s)": 0.077697 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 76.06250095367432, + "completions/min_length": 36.125, + "epoch": 0.22437329362124597, + "grad_norm": 0.004650605904984792, + "kl": 0.02893829345703125, + "learning_rate": 9.996174541512194e-07, + "loss": 2.893549026339315e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 113, + "train_speed(iter/s)": 0.023167 }, { - "epoch": 0.48729757836874166, - "grad_norm": 5.029325178235288, - "learning_rate": 2.814114226317066e-06, - "loss": 0.2753992795944214, - "memory(GiB)": 50.11, - "step": 410, - "token_acc": 0.8693181818181818, - "train_speed(iter/s)": 0.077745 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.5, + "completions/mean_length": 74.51041889190674, + "completions/min_length": 34.25, + "epoch": 0.22635889798957556, + "grad_norm": 0.004502458143847963, + "kl": 0.03432464599609375, + "learning_rate": 9.996050160829812e-07, + "loss": 3.434516111155972e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 114, + "train_speed(iter/s)": 0.023162 }, { - "epoch": 0.4932402317634824, - "grad_norm": 4.672471465609193, - "learning_rate": 2.7652360990576457e-06, - "loss": 0.25846428871154786, - "memory(GiB)": 50.11, - "step": 415, - "token_acc": 0.9080459770114943, - "train_speed(iter/s)": 0.07779 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.125, + "completions/mean_length": 85.05208587646484, + "completions/min_length": 38.25, + "epoch": 0.22834450235790518, + "grad_norm": 0.003187846579359576, + "kl": 0.05316925048828125, + "learning_rate": 9.995923791113149e-07, + "loss": 5.3155214118305594e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 115, + "train_speed(iter/s)": 0.023159 }, { - "epoch": 0.4991828851582231, - "grad_norm": 3.7971655368107315, - "learning_rate": 2.7162552050581172e-06, - "loss": 0.2129420280456543, - "memory(GiB)": 50.11, - "step": 420, - "token_acc": 0.9447513812154696, - "train_speed(iter/s)": 0.077839 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.25, + "completions/mean_length": 78.47916984558105, + "completions/min_length": 32.25, + "epoch": 0.2303301067262348, + "grad_norm": 0.005922228518706139, + "kl": 0.03337860107421875, + "learning_rate": 9.995795432412512e-07, + "loss": 3.339227623655461e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 116, + "train_speed(iter/s)": 0.023122 }, { - "epoch": 0.5051255385529639, - "grad_norm": 4.868089805000437, - "learning_rate": 2.6671905221530286e-06, - "loss": 0.2723516941070557, - "memory(GiB)": 50.11, - "step": 425, - "token_acc": 0.8895027624309392, - "train_speed(iter/s)": 0.077893 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 74.11458587646484, + "completions/min_length": 32.875, + "epoch": 0.23231571109456442, + "grad_norm": 0.006251164047718008, + "kl": 0.02973175048828125, + "learning_rate": 9.995665084779008e-07, + "loss": 2.9748269298579544e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 117, + "train_speed(iter/s)": 0.023124 }, { - "epoch": 0.5110681919477047, - "grad_norm": 5.948903426193995, - "learning_rate": 2.6180610606412587e-06, - "loss": 0.28262810707092284, - "memory(GiB)": 50.11, - "step": 430, - "token_acc": 0.8497109826589595, - "train_speed(iter/s)": 0.077943 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 85.07291793823242, + "completions/min_length": 35.875, + "epoch": 0.234301315462894, + "grad_norm": 1.3624861529703818, + "kl": 0.030242919921875, + "learning_rate": 9.995532748264528e-07, + "loss": -0.002606650348752737, + "memory(GiB)": 94.21, + "reward": 1.802083358168602, + "reward_std": 0.0765465535223484, + "rewards/CineAccuracyORM/mean": 0.8020833432674408, + "rewards/CineAccuracyORM/std": 0.26709309965372086, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 118, + "train_speed(iter/s)": 0.023096 }, { - "epoch": 0.5170108453424453, - "grad_norm": 5.296623912717331, - "learning_rate": 2.5688858559204056e-06, - "loss": 0.24843716621398926, - "memory(GiB)": 50.11, - "step": 435, - "token_acc": 0.9096045197740112, - "train_speed(iter/s)": 0.077992 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 78.11458539962769, + "completions/min_length": 29.0, + "epoch": 0.23628691983122363, + "grad_norm": 0.8400324532484467, + "kl": 0.0632781982421875, + "learning_rate": 9.995398422921758e-07, + "loss": 0.006895631551742554, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 119, + "train_speed(iter/s)": 0.023069 }, { - "epoch": 0.5229534987371861, - "grad_norm": 2.8799847075968446, - "learning_rate": 2.519683961111447e-06, - "loss": 0.2587742805480957, - "memory(GiB)": 50.11, - "step": 440, - "token_acc": 0.9010989010989011, - "train_speed(iter/s)": 0.078034 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 81.72916793823242, + "completions/min_length": 36.875, + "epoch": 0.23827252419955325, + "grad_norm": 0.004239886164230574, + "kl": 0.02639007568359375, + "learning_rate": 9.995262108804176e-07, + "loss": 2.6379761038697325e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 120, + "train_speed(iter/s)": 0.023076 }, { - "epoch": 0.5288961521319269, - "grad_norm": 4.4925409006318615, - "learning_rate": 2.470474439676539e-06, - "loss": 0.24898104667663573, - "memory(GiB)": 50.11, - "step": 445, - "token_acc": 0.8641304347826086, - "train_speed(iter/s)": 0.078071 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 199.625, + "completions/mean_length": 84.35416889190674, + "completions/min_length": 31.75, + "epoch": 0.24025812856788284, + "grad_norm": 0.001949576903914849, + "kl": 0.016693115234375, + "learning_rate": 9.995123805966055e-07, + "loss": 1.6695452359272167e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 121, + "train_speed(iter/s)": 0.023062 }, { - "epoch": 0.5348388055266676, - "grad_norm": 3.120530995149119, - "learning_rate": 2.4212763580328026e-06, - "loss": 0.27778019905090334, - "memory(GiB)": 50.11, - "step": 450, - "token_acc": 0.9180327868852459, - "train_speed(iter/s)": 0.078105 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.25, + "completions/mean_length": 69.76041793823242, + "completions/min_length": 39.125, + "epoch": 0.24224373293621246, + "grad_norm": 0.006273383546754521, + "kl": 0.02678680419921875, + "learning_rate": 9.99498351446245e-07, + "loss": 2.679082354006823e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 122, + "train_speed(iter/s)": 0.023075 }, { - "epoch": 0.5407814589214084, - "grad_norm": 8.148649805708308, - "learning_rate": 2.3721087781649677e-06, - "loss": 0.26514854431152346, - "memory(GiB)": 50.11, - "step": 455, - "token_acc": 0.8941176470588236, - "train_speed(iter/s)": 0.078143 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.875, + "completions/mean_length": 83.07291841506958, + "completions/min_length": 31.5, + "epoch": 0.24422933730454208, + "grad_norm": 0.002229431612515994, + "kl": 0.03147125244140625, + "learning_rate": 9.99484123434922e-07, + "loss": 3.1477738957619295e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 123, + "train_speed(iter/s)": 0.023072 }, { - "epoch": 0.5467241123161491, - "grad_norm": 12.288778203336234, - "learning_rate": 2.322990750239733e-06, - "loss": 0.2430635690689087, - "memory(GiB)": 50.11, - "step": 460, - "token_acc": 0.9080459770114943, - "train_speed(iter/s)": 0.078177 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 83.75000190734863, + "completions/min_length": 28.125, + "epoch": 0.24621494167287167, + "grad_norm": 0.0023781479884869595, + "kl": 0.023468017578125, + "learning_rate": 9.994696965683008e-07, + "loss": 2.3481863536289893e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 124, + "train_speed(iter/s)": 0.023043 }, { - "epoch": 0.5526667657108899, - "grad_norm": 4.975433909870037, - "learning_rate": 2.2739413052247112e-06, - "loss": 0.23356072902679442, - "memory(GiB)": 50.11, - "step": 465, - "token_acc": 0.9028571428571428, - "train_speed(iter/s)": 0.078208 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 82.833336353302, + "completions/min_length": 36.875, + "epoch": 0.2482005460412013, + "grad_norm": 0.0033789079677525925, + "kl": 0.0220489501953125, + "learning_rate": 9.994550708521249e-07, + "loss": 2.2042582713766024e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 125, + "train_speed(iter/s)": 0.023048 }, { - "epoch": 0.5586094191056307, - "grad_norm": 4.123700733275386, - "learning_rate": 2.224979447514802e-06, - "loss": 0.2492293357849121, - "memory(GiB)": 50.11, - "step": 470, - "token_acc": 0.8983050847457628, - "train_speed(iter/s)": 0.078242 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.625, + "completions/mean_length": 81.13541889190674, + "completions/min_length": 34.875, + "epoch": 0.2501861504095309, + "grad_norm": 0.01273019257020008, + "kl": 0.03211212158203125, + "learning_rate": 9.994402462922168e-07, + "loss": 3.207269764970988e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 126, + "train_speed(iter/s)": 0.023021 }, { - "epoch": 0.5645520725003714, - "grad_norm": 8.285141746199841, - "learning_rate": 2.1761241475688697e-06, - "loss": 0.23113720417022704, - "memory(GiB)": 50.11, - "step": 475, - "token_acc": 0.9135135135135135, - "train_speed(iter/s)": 0.078271 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 79.23958683013916, + "completions/min_length": 30.5, + "epoch": 0.2521717547778605, + "grad_norm": 0.003132205556229125, + "kl": 0.03112030029296875, + "learning_rate": 9.994252228944792e-07, + "loss": 3.1085153750609607e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 127, + "train_speed(iter/s)": 0.023006 }, { - "epoch": 0.5704947258951122, - "grad_norm": 3.60546896428184, - "learning_rate": 2.1273943345595637e-06, - "loss": 0.247955584526062, - "memory(GiB)": 50.11, - "step": 480, - "token_acc": 0.872093023255814, - "train_speed(iter/s)": 0.078303 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 74.01041841506958, + "completions/min_length": 26.875, + "epoch": 0.25415735914619014, + "grad_norm": 0.0052449185528267266, + "kl": 0.043609619140625, + "learning_rate": 9.994100006648929e-07, + "loss": 4.3588974222075194e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 128, + "train_speed(iter/s)": 0.022997 }, { - "epoch": 0.576437379289853, - "grad_norm": 7.008401141324143, - "learning_rate": 2.078808889039145e-06, - "loss": 0.2634110927581787, - "memory(GiB)": 50.11, - "step": 485, - "token_acc": 0.898936170212766, - "train_speed(iter/s)": 0.07833 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 79.82291889190674, + "completions/min_length": 38.5, + "epoch": 0.2561429635145197, + "grad_norm": 1.4595321664608227, + "kl": 0.0400543212890625, + "learning_rate": 9.993945796095182e-07, + "loss": -0.009771152399480343, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 129, + "train_speed(iter/s)": 0.023005 }, { - "epoch": 0.5823800326845937, - "grad_norm": 3.131382846950792, - "learning_rate": 2.030386635624135e-06, - "loss": 0.23566818237304688, - "memory(GiB)": 50.11, - "step": 490, - "token_acc": 0.9047619047619048, - "train_speed(iter/s)": 0.078358 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 81.37500286102295, + "completions/min_length": 34.625, + "epoch": 0.2581285678828493, + "grad_norm": 0.0028472569708675247, + "kl": 0.0347442626953125, + "learning_rate": 9.993789597344946e-07, + "loss": 3.474685217952356e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 130, + "train_speed(iter/s)": 0.023014 }, { - "epoch": 0.5883226860793345, - "grad_norm": 4.264889192430325, - "learning_rate": 1.9821463357016517e-06, - "loss": 0.23806171417236327, - "memory(GiB)": 50.11, - "step": 495, - "token_acc": 0.896551724137931, - "train_speed(iter/s)": 0.078392 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.625, + "completions/mean_length": 87.67708587646484, + "completions/min_length": 34.5, + "epoch": 0.26011417225117894, + "grad_norm": 1.1348476239911196, + "kl": 0.0557098388671875, + "learning_rate": 9.993631410460404e-07, + "loss": 0.013155095279216766, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 131, + "train_speed(iter/s)": 0.022986 }, { - "epoch": 0.5942653394740752, - "grad_norm": 3.2931924737171885, - "learning_rate": 1.934106680160237e-06, - "loss": 0.21435232162475587, - "memory(GiB)": 50.11, - "step": 500, - "token_acc": 0.8956043956043956, - "train_speed(iter/s)": 0.078424 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.75, + "completions/mean_length": 80.54167032241821, + "completions/min_length": 34.625, + "epoch": 0.26209977661950856, + "grad_norm": 0.002179782815721632, + "kl": 0.04027557373046875, + "learning_rate": 9.993471235504537e-07, + "loss": 4.025837915833108e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 132, + "train_speed(iter/s)": 0.022971 }, { - "epoch": 0.6002079928688159, - "grad_norm": 8.47554455420946, - "learning_rate": 1.8862862821480023e-06, - "loss": 0.23268427848815917, - "memory(GiB)": 50.11, - "step": 505, - "token_acc": 0.8950276243093923, - "train_speed(iter/s)": 0.077754 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 72.16666841506958, + "completions/min_length": 32.25, + "epoch": 0.2640853809878382, + "grad_norm": 0.005984471816592713, + "kl": 0.0272216796875, + "learning_rate": 9.993309072541115e-07, + "loss": 2.720650809351355e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 133, + "train_speed(iter/s)": 0.022957 }, { - "epoch": 0.6061506462635566, - "grad_norm": 4.327104476328467, - "learning_rate": 1.8387036698608893e-06, - "loss": 0.2465203285217285, - "memory(GiB)": 50.11, - "step": 510, - "token_acc": 0.8944444444444445, - "train_speed(iter/s)": 0.077791 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.625, + "completions/mean_length": 73.77083587646484, + "completions/min_length": 26.0, + "epoch": 0.2660709853561678, + "grad_norm": 0.007462330711181815, + "kl": 0.0359954833984375, + "learning_rate": 9.993144921634693e-07, + "loss": 3.602053766371682e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 134, + "train_speed(iter/s)": 0.022952 }, { - "epoch": 0.6120932996582974, - "grad_norm": 4.784491787188988, - "learning_rate": 1.7913772793638517e-06, - "loss": 0.22468171119689942, - "memory(GiB)": 50.11, - "step": 515, - "token_acc": 0.8900523560209425, - "train_speed(iter/s)": 0.077832 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 76.86458444595337, + "completions/min_length": 32.5, + "epoch": 0.2680565897244974, + "grad_norm": 0.010850988612707155, + "kl": 0.03931427001953125, + "learning_rate": 9.992978782850628e-07, + "loss": 3.928934529540129e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 135, + "train_speed(iter/s)": 0.022964 }, { - "epoch": 0.6180359530530382, - "grad_norm": 7.681035665569222, - "learning_rate": 1.7443254474477328e-06, - "loss": 0.25678319931030275, - "memory(GiB)": 50.11, - "step": 520, - "token_acc": 0.9044943820224719, - "train_speed(iter/s)": 0.077861 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.125, + "completions/mean_length": 79.54167079925537, + "completions/min_length": 32.125, + "epoch": 0.270042194092827, + "grad_norm": 0.003653760470591168, + "kl": 0.022106170654296875, + "learning_rate": 9.992810656255062e-07, + "loss": 2.2124680981505662e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 136, + "train_speed(iter/s)": 0.022952 }, { - "epoch": 0.6239786064477789, - "grad_norm": 4.916037468988723, - "learning_rate": 1.697566404524606e-06, - "loss": 0.23521194458007813, - "memory(GiB)": 50.11, - "step": 525, - "token_acc": 0.9209039548022598, - "train_speed(iter/s)": 0.077894 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 77.42708683013916, + "completions/min_length": 42.5, + "epoch": 0.2720277984611566, + "grad_norm": 0.005826066885775745, + "kl": 0.029163360595703125, + "learning_rate": 9.992640541914931e-07, + "loss": 2.9201857614680193e-05, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 137, + "train_speed(iter/s)": 0.022963 }, { - "epoch": 0.6299212598425197, - "grad_norm": 4.319245744348972, - "learning_rate": 1.6511182675643273e-06, - "loss": 0.2768810272216797, - "memory(GiB)": 50.11, - "step": 530, - "token_acc": 0.9010989010989011, - "train_speed(iter/s)": 0.077931 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 78.97916889190674, + "completions/min_length": 35.25, + "epoch": 0.2740134028294862, + "grad_norm": 2.440091205265988, + "kl": 0.02922821044921875, + "learning_rate": 9.992468439897958e-07, + "loss": -0.001077904598787427, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 138, + "train_speed(iter/s)": 0.022968 }, { - "epoch": 0.6358639132372604, - "grad_norm": 4.03213626434002, - "learning_rate": 1.6049990330750508e-06, - "loss": 0.24452097415924073, - "memory(GiB)": 50.11, - "step": 535, - "token_acc": 0.8924731182795699, - "train_speed(iter/s)": 0.077966 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.75, + "completions/mean_length": 93.23958587646484, + "completions/min_length": 43.375, + "epoch": 0.27599900719781584, + "grad_norm": 0.0029068940782511665, + "kl": 0.01699066162109375, + "learning_rate": 9.992294350272665e-07, + "loss": 1.6966134353424422e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 139, + "train_speed(iter/s)": 0.02297 }, { - "epoch": 0.6418065666320012, - "grad_norm": 4.953556314587903, - "learning_rate": 1.5592265701304116e-06, - "loss": 0.23774099349975586, - "memory(GiB)": 50.11, - "step": 540, - "token_acc": 0.8789473684210526, - "train_speed(iter/s)": 0.077993 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 81.77083587646484, + "completions/min_length": 34.0, + "epoch": 0.27798461156614546, + "grad_norm": 1.5684642870902674, + "kl": 0.02487945556640625, + "learning_rate": 9.992118273108356e-07, + "loss": 0.009041143581271172, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 140, + "train_speed(iter/s)": 0.022948 }, { - "epoch": 0.647749220026742, - "grad_norm": 3.899829991625053, - "learning_rate": 1.5138186134460847e-06, - "loss": 0.22564327716827393, - "memory(GiB)": 50.11, - "step": 545, - "token_acc": 0.925531914893617, - "train_speed(iter/s)": 0.07802 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 81.79166841506958, + "completions/min_length": 35.125, + "epoch": 0.2799702159344751, + "grad_norm": 0.002408231301548354, + "kl": 0.021148681640625, + "learning_rate": 9.991940208475134e-07, + "loss": 2.1152703993720934e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 141, + "train_speed(iter/s)": 0.022936 }, { - "epoch": 0.6536918734214827, - "grad_norm": 3.0465339812240715, - "learning_rate": 1.4687927565084023e-06, - "loss": 0.22194738388061525, - "memory(GiB)": 50.11, - "step": 550, - "token_acc": 0.9021739130434783, - "train_speed(iter/s)": 0.078047 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 74.14583587646484, + "completions/min_length": 34.875, + "epoch": 0.2819558203028047, + "grad_norm": 0.001945442237554167, + "kl": 0.02030181884765625, + "learning_rate": 9.991760156443892e-07, + "loss": 2.029309507634025e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 142, + "train_speed(iter/s)": 0.022942 }, { - "epoch": 0.6596345268162235, - "grad_norm": 6.803712947943455, - "learning_rate": 1.4241664447576876e-06, - "loss": 0.23829455375671388, - "memory(GiB)": 50.11, - "step": 555, - "token_acc": 0.8791208791208791, - "train_speed(iter/s)": 0.078089 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 84.09375190734863, + "completions/min_length": 36.75, + "epoch": 0.28394142467113426, + "grad_norm": 0.0021499238761391714, + "kl": 0.0267333984375, + "learning_rate": 9.991578117086306e-07, + "loss": 2.6741268811747432e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 143, + "train_speed(iter/s)": 0.022935 }, { - "epoch": 0.6655771802109642, - "grad_norm": 5.769102045042221, - "learning_rate": 1.379956968828956e-06, - "loss": 0.24107139110565184, - "memory(GiB)": 50.11, - "step": 560, - "token_acc": 0.898876404494382, - "train_speed(iter/s)": 0.078129 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 74.41666793823242, + "completions/min_length": 33.625, + "epoch": 0.2859270290394639, + "grad_norm": 0.005360105407463123, + "kl": 0.03845977783203125, + "learning_rate": 9.991394090474855e-07, + "loss": 3.8434896850958467e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 144, + "train_speed(iter/s)": 0.022924 }, { - "epoch": 0.671519833605705, - "grad_norm": 3.871858784721118, - "learning_rate": 1.3361814578525922e-06, - "loss": 0.21992707252502441, - "memory(GiB)": 50.11, - "step": 565, - "token_acc": 0.8913043478260869, - "train_speed(iter/s)": 0.078168 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 80.79166889190674, + "completions/min_length": 37.0, + "epoch": 0.2879126334077935, + "grad_norm": 0.00730347960850382, + "kl": 0.039794921875, + "learning_rate": 9.991208076682805e-07, + "loss": 3.9786933484720066e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 145, + "train_speed(iter/s)": 0.022921 }, { - "epoch": 0.6774624870004456, - "grad_norm": 3.7175877166428197, - "learning_rate": 1.2928568728175985e-06, - "loss": 0.22011113166809082, - "memory(GiB)": 50.11, - "step": 570, - "token_acc": 0.907103825136612, - "train_speed(iter/s)": 0.078212 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 79.53125286102295, + "completions/min_length": 36.125, + "epoch": 0.2898982377761231, + "grad_norm": 0.6769544801856519, + "kl": 0.03891754150390625, + "learning_rate": 9.991020075784209e-07, + "loss": 0.00011350711429258808, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 146, + "train_speed(iter/s)": 0.022918 }, { - "epoch": 0.6834051403951864, - "grad_norm": 2.6462218742223254, - "learning_rate": 1.2500000000000007e-06, - "loss": 0.26825509071350095, - "memory(GiB)": 50.11, - "step": 575, - "token_acc": 0.8522727272727273, - "train_speed(iter/s)": 0.078245 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.875, + "completions/mean_length": 84.75000238418579, + "completions/min_length": 37.75, + "epoch": 0.29188384214445273, + "grad_norm": 0.003463262564598902, + "kl": 0.0317230224609375, + "learning_rate": 9.990830087853915e-07, + "loss": 3.175391975673847e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 147, + "train_speed(iter/s)": 0.0229 }, { - "epoch": 0.6893477937899272, - "grad_norm": 5.726663283210285, - "learning_rate": 1.2076274444589361e-06, - "loss": 0.2344191551208496, - "memory(GiB)": 50.11, - "step": 580, - "token_acc": 0.8913043478260869, - "train_speed(iter/s)": 0.078285 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 66.58333492279053, + "completions/min_length": 31.5, + "epoch": 0.29386944651278235, + "grad_norm": 0.006143614130758875, + "kl": 0.0285797119140625, + "learning_rate": 9.99063811296756e-07, + "loss": 2.8581631340784952e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 148, + "train_speed(iter/s)": 0.022904 }, { - "epoch": 0.6952904471846679, - "grad_norm": 3.3648860171072914, - "learning_rate": 1.1657556236029665e-06, - "loss": 0.22296814918518065, - "memory(GiB)": 50.11, - "step": 585, - "token_acc": 0.9162011173184358, - "train_speed(iter/s)": 0.078321 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 86.20833587646484, + "completions/min_length": 39.625, + "epoch": 0.2958550508811119, + "grad_norm": 0.0028823911907623215, + "kl": 0.0189056396484375, + "learning_rate": 9.990444151201577e-07, + "loss": 1.8928169083665125e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 149, + "train_speed(iter/s)": 0.022882 }, { - "epoch": 0.7012331005794087, - "grad_norm": 3.3716796433764746, - "learning_rate": 1.1244007608290835e-06, - "loss": 0.2307145118713379, - "memory(GiB)": 50.11, - "step": 590, - "token_acc": 0.8944444444444445, - "train_speed(iter/s)": 0.078355 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 71.09375381469727, + "completions/min_length": 32.125, + "epoch": 0.29784065524944153, + "grad_norm": 0.006682389422446879, + "kl": 0.0361175537109375, + "learning_rate": 9.990248202633183e-07, + "loss": 3.612569344113581e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 150, + "train_speed(iter/s)": 0.022889 }, { - "epoch": 0.7071757539741494, - "grad_norm": 5.163940745117661, - "learning_rate": 1.083578879236895e-06, - "loss": 0.18101364374160767, - "memory(GiB)": 50.11, - "step": 595, - "token_acc": 0.907608695652174, - "train_speed(iter/s)": 0.078393 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 76.31250095367432, + "completions/min_length": 35.75, + "epoch": 0.29982625961777115, + "grad_norm": 0.003806402044783724, + "kl": 0.0270233154296875, + "learning_rate": 9.990050267340389e-07, + "loss": 2.702613710425794e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 151, + "train_speed(iter/s)": 0.022902 }, { - "epoch": 0.7131184073688902, - "grad_norm": 5.0920246897520975, - "learning_rate": 1.043305795420413e-06, - "loss": 0.2275557041168213, - "memory(GiB)": 50.11, - "step": 600, - "token_acc": 0.907608695652174, - "train_speed(iter/s)": 0.078425 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 69.41666889190674, + "completions/min_length": 31.0, + "epoch": 0.30181186398610077, + "grad_norm": 0.7748031523798731, + "kl": 0.0301666259765625, + "learning_rate": 9.989850345402e-07, + "loss": -0.006249020807445049, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 152, + "train_speed(iter/s)": 0.022891 }, { - "epoch": 0.719061060763631, - "grad_norm": 5.49044908007764, - "learning_rate": 1.003597113339855e-06, - "loss": 0.19089471101760863, - "memory(GiB)": 50.11, - "step": 605, - "token_acc": 0.9273743016759777, - "train_speed(iter/s)": 0.077897 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 81.18750143051147, + "completions/min_length": 38.875, + "epoch": 0.3037974683544304, + "grad_norm": 0.0020422359628149258, + "kl": 0.0211029052734375, + "learning_rate": 9.989648436897607e-07, + "loss": 2.112751462846063e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 153, + "train_speed(iter/s)": 0.022881 }, { - "epoch": 0.7250037141583717, - "grad_norm": 4.608743000415324, - "learning_rate": 9.644682182758305e-07, - "loss": 0.2407398223876953, - "memory(GiB)": 50.11, - "step": 610, - "token_acc": 0.8837209302325582, - "train_speed(iter/s)": 0.077939 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 81.15625238418579, + "completions/min_length": 34.875, + "epoch": 0.30578307272276, + "grad_norm": 1.1158877039086597, + "kl": 0.024139404296875, + "learning_rate": 9.989444541907596e-07, + "loss": 0.0025634984485805035, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 154, + "train_speed(iter/s)": 0.022875 }, { - "epoch": 0.7309463675531125, - "grad_norm": 5.486991473096443, - "learning_rate": 9.259342708682515e-07, - "loss": 0.21678531169891357, - "memory(GiB)": 50.11, - "step": 615, - "token_acc": 0.9444444444444444, - "train_speed(iter/s)": 0.077974 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.125, + "completions/mean_length": 79.55208683013916, + "completions/min_length": 31.625, + "epoch": 0.3077686770910896, + "grad_norm": 0.027379737913495964, + "kl": 0.04638671875, + "learning_rate": 9.98923866051314e-07, + "loss": 4.642191925086081e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 155, + "train_speed(iter/s)": 0.022866 }, { - "epoch": 0.7368890209478532, - "grad_norm": 6.981253763857014, - "learning_rate": 8.880102012422873e-07, - "loss": 0.2674489736557007, - "memory(GiB)": 50.11, - "step": 620, - "token_acc": 0.9186046511627907, - "train_speed(iter/s)": 0.078013 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 78.86458587646484, + "completions/min_length": 39.25, + "epoch": 0.3097542814594192, + "grad_norm": 0.004745359305371207, + "kl": 0.024932861328125, + "learning_rate": 9.989030792796205e-07, + "loss": 2.4921868316596374e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 156, + "train_speed(iter/s)": 0.022869 }, { - "epoch": 0.742831674342594, - "grad_norm": 4.912562813187418, - "learning_rate": 8.507107032236323e-07, - "loss": 0.21210527420043945, - "memory(GiB)": 50.11, - "step": 625, - "token_acc": 0.9230769230769231, - "train_speed(iter/s)": 0.078051 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 78.40625238418579, + "completions/min_length": 33.5, + "epoch": 0.3117398858277488, + "grad_norm": 0.004681803725018852, + "kl": 0.026885986328125, + "learning_rate": 9.98882093883955e-07, + "loss": 2.6866193366004154e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 157, + "train_speed(iter/s)": 0.022864 }, { - "epoch": 0.7487743277373348, - "grad_norm": 2.4710747552066668, - "learning_rate": 8.140502286453231e-07, - "loss": 0.24000158309936523, - "memory(GiB)": 50.11, - "step": 630, - "token_acc": 0.9226519337016574, - "train_speed(iter/s)": 0.078084 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.125, + "completions/mean_length": 84.48958683013916, + "completions/min_length": 33.375, + "epoch": 0.3137254901960784, + "grad_norm": 0.016892752767291387, + "kl": 0.033935546875, + "learning_rate": 9.988609098726718e-07, + "loss": 3.390968049643561e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 158, + "train_speed(iter/s)": 0.022857 }, { - "epoch": 0.7547169811320755, - "grad_norm": 5.740745699538898, - "learning_rate": 7.780429817483229e-07, - "loss": 0.23018264770507812, - "memory(GiB)": 50.11, - "step": 635, - "token_acc": 0.9152542372881356, - "train_speed(iter/s)": 0.078117 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 78.75000190734863, + "completions/min_length": 34.25, + "epoch": 0.31571109456440805, + "grad_norm": 0.007675206194843289, + "kl": 0.0335235595703125, + "learning_rate": 9.988395272542052e-07, + "loss": 3.3529042411828414e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 159, + "train_speed(iter/s)": 0.022844 }, { - "epoch": 0.7606596345268162, - "grad_norm": 3.1530564071349634, - "learning_rate": 7.427029136780333e-07, - "loss": 0.19342280626296998, - "memory(GiB)": 50.11, - "step": 640, - "token_acc": 0.9308510638297872, - "train_speed(iter/s)": 0.07815 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.375, + "completions/mean_length": 73.94791889190674, + "completions/min_length": 31.625, + "epoch": 0.31769669893273766, + "grad_norm": 0.0026379205790726377, + "kl": 0.02301025390625, + "learning_rate": 9.988179460370678e-07, + "loss": 2.3003169189905748e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 160, + "train_speed(iter/s)": 0.022817 }, { - "epoch": 0.7666022879215569, - "grad_norm": 5.113637773400957, - "learning_rate": 7.080437170788723e-07, - "loss": 0.22134556770324706, - "memory(GiB)": 50.11, - "step": 645, - "token_acc": 0.925531914893617, - "train_speed(iter/s)": 0.078183 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 82.56250286102295, + "completions/min_length": 36.625, + "epoch": 0.3196823033010673, + "grad_norm": 0.019981606658156535, + "kl": 0.05838775634765625, + "learning_rate": 9.987961662298514e-07, + "loss": 5.835080082761124e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 161, + "train_speed(iter/s)": 0.02282 }, { - "epoch": 0.7725449413162977, - "grad_norm": 6.481778521945802, - "learning_rate": 6.740788207890017e-07, - "loss": 0.21804354190826417, - "memory(GiB)": 50.11, - "step": 650, - "token_acc": 0.9050279329608939, - "train_speed(iter/s)": 0.078218 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.5, + "completions/mean_length": 69.94791984558105, + "completions/min_length": 33.625, + "epoch": 0.32166790766939685, + "grad_norm": 1.1121464524504103, + "kl": 0.0399017333984375, + "learning_rate": 9.987741878412273e-07, + "loss": 3.9871782064437866e-05, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.833333333954215, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 162, + "train_speed(iter/s)": 0.022825 }, { - "epoch": 0.7784875947110385, - "grad_norm": 8.619444299529343, - "learning_rate": 6.40821384637276e-07, - "loss": 0.24983735084533693, - "memory(GiB)": 50.11, - "step": 655, - "token_acc": 0.8736842105263158, - "train_speed(iter/s)": 0.078254 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.625, + "completions/mean_length": 82.65625286102295, + "completions/min_length": 31.75, + "epoch": 0.32365351203772647, + "grad_norm": 0.9818224490673001, + "kl": 0.0430145263671875, + "learning_rate": 9.987520108799455e-07, + "loss": -0.0016035562148317695, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 163, + "train_speed(iter/s)": 0.022819 }, { - "epoch": 0.7844302481057792, - "grad_norm": 5.377134356716376, - "learning_rate": 6.082842943444173e-07, - "loss": 0.21551246643066407, - "memory(GiB)": 50.11, - "step": 660, - "token_acc": 0.9148936170212766, - "train_speed(iter/s)": 0.078283 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 81.39583492279053, + "completions/min_length": 35.5, + "epoch": 0.3256391164060561, + "grad_norm": 0.005152185096823894, + "kl": 0.02822113037109375, + "learning_rate": 9.98729635354835e-07, + "loss": 2.8228670998942107e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 164, + "train_speed(iter/s)": 0.022809 }, { - "epoch": 0.79037290150052, - "grad_norm": 5.763600489660546, - "learning_rate": 5.764801565303918e-07, - "loss": 0.20307836532592774, - "memory(GiB)": 50.11, - "step": 665, - "token_acc": 0.8932584269662921, - "train_speed(iter/s)": 0.078309 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 75.14583492279053, + "completions/min_length": 37.125, + "epoch": 0.3276247207743857, + "grad_norm": 0.007955405284278825, + "kl": 0.0336761474609375, + "learning_rate": 9.987070612748041e-07, + "loss": 3.369571641087532e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 165, + "train_speed(iter/s)": 0.02281 }, { - "epoch": 0.7963155548952607, - "grad_norm": 4.388976996043623, - "learning_rate": 5.454212938299256e-07, - "loss": 0.23792681694030762, - "memory(GiB)": 50.11, - "step": 670, - "token_acc": 0.9044943820224719, - "train_speed(iter/s)": 0.07834 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.5, + "completions/mean_length": 84.60416793823242, + "completions/min_length": 37.5, + "epoch": 0.3296103251427153, + "grad_norm": 0.0037385246624281293, + "kl": 0.03680419921875, + "learning_rate": 9.986842886488398e-07, + "loss": 3.675417974591255e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 166, + "train_speed(iter/s)": 0.022795 }, { - "epoch": 0.8022582082900015, - "grad_norm": 5.711136883849875, - "learning_rate": 5.151197401180552e-07, - "loss": 0.2023566722869873, - "memory(GiB)": 50.11, - "step": 675, - "token_acc": 0.8814432989690721, - "train_speed(iter/s)": 0.078375 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 84.79166841506958, + "completions/min_length": 42.625, + "epoch": 0.33159592951104494, + "grad_norm": 1.0329286841533998, + "kl": 0.0334014892578125, + "learning_rate": 9.986613174860087e-07, + "loss": 0.005444470327347517, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 167, + "train_speed(iter/s)": 0.022797 }, { - "epoch": 0.8082008616847423, - "grad_norm": 5.454661743233463, - "learning_rate": 4.855872358475546e-07, - "loss": 0.2369149684906006, - "memory(GiB)": 50.11, - "step": 680, - "token_acc": 0.945054945054945, - "train_speed(iter/s)": 0.078406 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 85.83333587646484, + "completions/min_length": 36.25, + "epoch": 0.33358153387937456, + "grad_norm": 0.0030034448511075465, + "kl": 0.0290985107421875, + "learning_rate": 9.98638147795456e-07, + "loss": 2.9090309908497147e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 168, + "train_speed(iter/s)": 0.022794 }, { - "epoch": 0.814143515079483, - "grad_norm": 4.447210422496218, - "learning_rate": 4.5683522350005505e-07, - "loss": 0.21783523559570311, - "memory(GiB)": 50.11, - "step": 685, - "token_acc": 0.9197860962566845, - "train_speed(iter/s)": 0.078436 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 192.875, + "completions/mean_length": 89.98958587646484, + "completions/min_length": 36.25, + "epoch": 0.3355671382477041, + "grad_norm": 0.6460215211895096, + "kl": 0.0342559814453125, + "learning_rate": 9.98614779586406e-07, + "loss": -0.01349202822893858, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 169, + "train_speed(iter/s)": 0.022787 }, { - "epoch": 0.8200861684742238, - "grad_norm": 6.434342650205402, - "learning_rate": 4.288748431526082e-07, - "loss": 0.23307533264160157, - "memory(GiB)": 50.11, - "step": 690, - "token_acc": 0.8961748633879781, - "train_speed(iter/s)": 0.078471 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 79.25000190734863, + "completions/min_length": 36.25, + "epoch": 0.33755274261603374, + "grad_norm": 0.014864367188056237, + "kl": 0.048095703125, + "learning_rate": 9.98591212868162e-07, + "loss": 4.805076605407521e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 170, + "train_speed(iter/s)": 0.022773 }, { - "epoch": 0.8260288218689645, - "grad_norm": 4.153284895963464, - "learning_rate": 4.017169281614225e-07, - "loss": 0.22777295112609863, - "memory(GiB)": 50.11, - "step": 695, - "token_acc": 0.9244186046511628, - "train_speed(iter/s)": 0.078498 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.0, + "completions/mean_length": 81.78125143051147, + "completions/min_length": 37.25, + "epoch": 0.33953834698436336, + "grad_norm": 0.9752023643543171, + "kl": 0.041717529296875, + "learning_rate": 9.985674476501063e-07, + "loss": 0.009409889578819275, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 171, + "train_speed(iter/s)": 0.022784 }, { - "epoch": 0.8319714752637053, - "grad_norm": 5.4901230399454555, - "learning_rate": 3.753720009644371e-07, - "loss": 0.21194028854370117, - "memory(GiB)": 50.11, - "step": 700, - "token_acc": 0.9285714285714286, - "train_speed(iter/s)": 0.078524 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 85.98958778381348, + "completions/min_length": 38.875, + "epoch": 0.341523951352693, + "grad_norm": 0.0023246536830248844, + "kl": 0.0350799560546875, + "learning_rate": 9.985434839417009e-07, + "loss": 3.503591869957745e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 172, + "train_speed(iter/s)": 0.022756 }, { - "epoch": 0.837914128658446, - "grad_norm": 5.520932610215849, - "learning_rate": 3.498502690043651e-07, - "loss": 0.19888880252838134, - "memory(GiB)": 50.11, - "step": 705, - "token_acc": 0.9222222222222223, - "train_speed(iter/s)": 0.078056 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.625, + "completions/mean_length": 90.75000286102295, + "completions/min_length": 43.125, + "epoch": 0.3435095557210226, + "grad_norm": 1.1776113696584138, + "kl": 0.029327392578125, + "learning_rate": 9.985193217524856e-07, + "loss": -0.0044493707828223705, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 173, + "train_speed(iter/s)": 0.022754 }, { - "epoch": 0.8438567820531867, - "grad_norm": 5.181898234052906, - "learning_rate": 3.2516162077377956e-07, - "loss": 0.20560116767883302, - "memory(GiB)": 50.11, - "step": 710, - "token_acc": 0.9310344827586207, - "train_speed(iter/s)": 0.078083 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.5, + "completions/mean_length": 90.52083492279053, + "completions/min_length": 43.125, + "epoch": 0.3454951600893522, + "grad_norm": 0.0060009006219862385, + "kl": 0.0308990478515625, + "learning_rate": 9.984949610920804e-07, + "loss": 3.088471567025408e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 174, + "train_speed(iter/s)": 0.022732 }, { - "epoch": 0.8497994354479275, - "grad_norm": 10.349451389076972, - "learning_rate": 3.0131562198377763e-07, - "loss": 0.23746159076690673, - "memory(GiB)": 50.11, - "step": 715, - "token_acc": 0.9162303664921466, - "train_speed(iter/s)": 0.078112 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 74.0104193687439, + "completions/min_length": 32.875, + "epoch": 0.34748076445768183, + "grad_norm": 1.7715474696283, + "kl": 0.045623779296875, + "learning_rate": 9.984704019701834e-07, + "loss": 0.005407353863120079, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 175, + "train_speed(iter/s)": 0.022734 }, { - "epoch": 0.8557420888426682, - "grad_norm": 5.618481765930095, - "learning_rate": 2.7832151185771096e-07, - "loss": 0.19666438102722167, - "memory(GiB)": 50.11, - "step": 720, - "token_acc": 0.91005291005291, - "train_speed(iter/s)": 0.078144 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 85.82291889190674, + "completions/min_length": 33.375, + "epoch": 0.3494663688260114, + "grad_norm": 1.217663861653863, + "kl": 0.0449981689453125, + "learning_rate": 9.984456443965726e-07, + "loss": 0.009984655305743217, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 176, + "train_speed(iter/s)": 0.022732 }, { - "epoch": 0.861684742237409, - "grad_norm": 5.312926998681132, - "learning_rate": 2.5618819955141453e-07, - "loss": 0.23160531520843505, - "memory(GiB)": 50.11, - "step": 725, - "token_acc": 0.9388888888888889, - "train_speed(iter/s)": 0.078174 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.25, + "completions/mean_length": 75.98958587646484, + "completions/min_length": 40.0, + "epoch": 0.351451973194341, + "grad_norm": 1.7135717160363662, + "kl": 0.0357666015625, + "learning_rate": 9.98420688381104e-07, + "loss": 0.007558799814432859, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 177, + "train_speed(iter/s)": 0.022733 }, { - "epoch": 0.8676273956321497, - "grad_norm": 6.483313259346484, - "learning_rate": 2.3492426070131746e-07, - "loss": 0.22388756275177002, - "memory(GiB)": 50.11, - "step": 730, - "token_acc": 0.9441340782122905, - "train_speed(iter/s)": 0.078204 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 86.53125476837158, + "completions/min_length": 38.125, + "epoch": 0.35343757756267064, + "grad_norm": 1.062893936555404, + "kl": 0.0452880859375, + "learning_rate": 9.983955339337133e-07, + "loss": 0.006683503743261099, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 178, + "train_speed(iter/s)": 0.022722 }, { - "epoch": 0.8735700490268905, - "grad_norm": 7.509137819610738, - "learning_rate": 2.1453793410178169e-07, - "loss": 0.2553365468978882, - "memory(GiB)": 50.11, - "step": 735, - "token_acc": 0.9347826086956522, - "train_speed(iter/s)": 0.078234 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 73.00000190734863, + "completions/min_length": 30.875, + "epoch": 0.35542318193100025, + "grad_norm": 1.3567944264860505, + "kl": 0.049652099609375, + "learning_rate": 9.98370181064415e-07, + "loss": -0.0048940302804112434, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 179, + "train_speed(iter/s)": 0.022732 }, { - "epoch": 0.8795127024216313, - "grad_norm": 3.277852306532476, - "learning_rate": 1.950371185129485e-07, - "loss": 0.2170994758605957, - "memory(GiB)": 50.11, - "step": 740, - "token_acc": 0.9153439153439153, - "train_speed(iter/s)": 0.078263 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 71.33333539962769, + "completions/min_length": 32.75, + "epoch": 0.3574087862993299, + "grad_norm": 0.005652753110890744, + "kl": 0.0494537353515625, + "learning_rate": 9.983446297833029e-07, + "loss": 4.945131513522938e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 180, + "train_speed(iter/s)": 0.022732 }, { - "epoch": 0.885455355816372, - "grad_norm": 5.884609932336502, - "learning_rate": 1.764293696003358e-07, - "loss": 0.1915382981300354, - "memory(GiB)": 50.11, - "step": 745, - "token_acc": 0.9508196721311475, - "train_speed(iter/s)": 0.078289 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 77.45833587646484, + "completions/min_length": 35.25, + "epoch": 0.3593943906676595, + "grad_norm": 0.007234938322409181, + "kl": 0.0520782470703125, + "learning_rate": 9.98318880100549e-07, + "loss": 5.203445834922604e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 181, + "train_speed(iter/s)": 0.022742 }, { - "epoch": 0.8913980092111128, - "grad_norm": 4.022239658633687, - "learning_rate": 1.587218970073634e-07, - "loss": 0.22730159759521484, - "memory(GiB)": 50.11, - "step": 750, - "token_acc": 0.893048128342246, - "train_speed(iter/s)": 0.078317 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 75.55208587646484, + "completions/min_length": 34.0, + "epoch": 0.36137999503598905, + "grad_norm": 1.2782972997384654, + "kl": 0.049957275390625, + "learning_rate": 9.982929320264052e-07, + "loss": 0.007232224568724632, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 182, + "train_speed(iter/s)": 0.022739 }, { - "epoch": 0.8973406626058535, - "grad_norm": 5.092959105324688, - "learning_rate": 1.4192156156195153e-07, - "loss": 0.24943215847015382, - "memory(GiB)": 50.11, - "step": 755, - "token_acc": 0.90625, - "train_speed(iter/s)": 0.078342 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 81.03125286102295, + "completions/min_length": 35.5, + "epoch": 0.3633655994043187, + "grad_norm": 0.007560394321297047, + "kl": 0.0479888916015625, + "learning_rate": 9.982667855712021e-07, + "loss": 4.798533336725086e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 183, + "train_speed(iter/s)": 0.02272 }, { - "epoch": 0.9032833160005943, - "grad_norm": 5.108519787962356, - "learning_rate": 1.2603487261826726e-07, - "loss": 0.20730853080749512, - "memory(GiB)": 50.11, - "step": 760, - "token_acc": 0.9209039548022598, - "train_speed(iter/s)": 0.07837 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.5, + "completions/mean_length": 73.23958587646484, + "completions/min_length": 39.75, + "epoch": 0.3653512037726483, + "grad_norm": 0.0057649652997279685, + "kl": 0.0381317138671875, + "learning_rate": 9.982404407453487e-07, + "loss": 3.8141461118357256e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 184, + "train_speed(iter/s)": 0.022727 }, { - "epoch": 0.9092259693953351, - "grad_norm": 6.6659075199664635, - "learning_rate": 1.1106798553464804e-07, - "loss": 0.2543288230895996, - "memory(GiB)": 50.11, - "step": 765, - "token_acc": 0.9047619047619048, - "train_speed(iter/s)": 0.078395 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 77.84375286102295, + "completions/min_length": 31.75, + "epoch": 0.3673368081409779, + "grad_norm": 1.6356979932321425, + "kl": 0.04962158203125, + "learning_rate": 9.982138975593337e-07, + "loss": 0.0018555410206317902, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 185, + "train_speed(iter/s)": 0.022733 }, { - "epoch": 0.9151686227900757, - "grad_norm": 3.9596865112538637, - "learning_rate": 9.702669928868674e-08, - "loss": 0.20190849304199218, - "memory(GiB)": 50.11, - "step": 770, - "token_acc": 0.9020618556701031, - "train_speed(iter/s)": 0.07842 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 81.2291693687439, + "completions/min_length": 32.875, + "epoch": 0.36932241250930753, + "grad_norm": 0.006116373265448865, + "kl": 0.0457763671875, + "learning_rate": 9.981871560237246e-07, + "loss": 4.576150968205184e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 186, + "train_speed(iter/s)": 0.022745 }, { - "epoch": 0.9211112761848165, - "grad_norm": 4.119428617367879, - "learning_rate": 8.391645423039357e-08, - "loss": 0.2413175582885742, - "memory(GiB)": 50.11, - "step": 775, - "token_acc": 0.9411764705882353, - "train_speed(iter/s)": 0.078442 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 81.37500190734863, + "completions/min_length": 36.375, + "epoch": 0.37130801687763715, + "grad_norm": 0.007859130592004748, + "kl": 0.0453643798828125, + "learning_rate": 9.981602161491675e-07, + "loss": 4.534694744506851e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 187, + "train_speed(iter/s)": 0.022744 }, { - "epoch": 0.9270539295795572, - "grad_norm": 3.657107164063937, - "learning_rate": 7.174232997431391e-08, - "loss": 0.2084414005279541, - "memory(GiB)": 50.11, - "step": 780, - "token_acc": 0.9438202247191011, - "train_speed(iter/s)": 0.078465 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 82.83333683013916, + "completions/min_length": 38.125, + "epoch": 0.37329362124596677, + "grad_norm": 0.0029779687640615524, + "kl": 0.04638671875, + "learning_rate": 9.98133077946388e-07, + "loss": 4.642216663341969e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 188, + "train_speed(iter/s)": 0.022714 }, { - "epoch": 0.932996582974298, - "grad_norm": 4.011455719615575, - "learning_rate": 6.050904343141095e-08, - "loss": 0.20162324905395507, - "memory(GiB)": 50.11, - "step": 785, - "token_acc": 0.9293478260869565, - "train_speed(iter/s)": 0.078489 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 79.69791984558105, + "completions/min_length": 37.125, + "epoch": 0.37527922561429633, + "grad_norm": 0.0054615328069089015, + "kl": 0.03363037109375, + "learning_rate": 9.981057414261901e-07, + "loss": 3.364525764482096e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 189, + "train_speed(iter/s)": 0.02271 }, { - "epoch": 0.9389392363690388, - "grad_norm": 6.727744613539731, - "learning_rate": 5.022094698148072e-08, - "loss": 0.24507064819335939, - "memory(GiB)": 50.11, - "step": 790, - "token_acc": 0.9358288770053476, - "train_speed(iter/s)": 0.078512 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 74.46875190734863, + "completions/min_length": 34.375, + "epoch": 0.37726482998262595, + "grad_norm": 0.007080508958201603, + "kl": 0.042327880859375, + "learning_rate": 9.980782065994575e-07, + "loss": 4.234170773997903e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 190, + "train_speed(iter/s)": 0.022713 }, { - "epoch": 0.9448818897637795, - "grad_norm": 3.2723118161208093, - "learning_rate": 4.088202678680597e-08, - "loss": 0.23910284042358398, - "memory(GiB)": 50.11, - "step": 795, - "token_acc": 0.861878453038674, - "train_speed(iter/s)": 0.078537 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 74.18750286102295, + "completions/min_length": 32.0, + "epoch": 0.37925043435095557, + "grad_norm": 0.007638784127471412, + "kl": 0.0382843017578125, + "learning_rate": 9.980504734771521e-07, + "loss": 3.823750012088567e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 191, + "train_speed(iter/s)": 0.022717 }, { - "epoch": 0.9508245431585203, - "grad_norm": 2.6190372074419326, - "learning_rate": 3.249590124770191e-08, - "loss": 0.19168223142623902, - "memory(GiB)": 50.11, - "step": 800, - "token_acc": 0.9213483146067416, - "train_speed(iter/s)": 0.078553 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 74.79167032241821, + "completions/min_length": 34.625, + "epoch": 0.3812360387192852, + "grad_norm": 0.004682504778406129, + "kl": 0.042877197265625, + "learning_rate": 9.98022542070315e-07, + "loss": 4.284096939954907e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 192, + "train_speed(iter/s)": 0.02271 }, { - "epoch": 0.956767196553261, - "grad_norm": 4.68738255184625, - "learning_rate": 2.506581960055432e-08, - "loss": 0.21967520713806152, - "memory(GiB)": 50.11, - "step": 805, - "token_acc": 0.9047619047619048, - "train_speed(iter/s)": 0.078158 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 75.27083587646484, + "completions/min_length": 36.375, + "epoch": 0.3832216430876148, + "grad_norm": 0.006744413200935781, + "kl": 0.034637451171875, + "learning_rate": 9.979944123900666e-07, + "loss": 3.460952575551346e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 193, + "train_speed(iter/s)": 0.02272 }, { - "epoch": 0.9627098499480018, - "grad_norm": 5.503922565128282, - "learning_rate": 1.8594660658889095e-08, - "loss": 0.22110648155212403, - "memory(GiB)": 50.11, - "step": 810, - "token_acc": 0.9021739130434783, - "train_speed(iter/s)": 0.078184 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 71.61458444595337, + "completions/min_length": 33.375, + "epoch": 0.3852072474559444, + "grad_norm": 0.005485243666423708, + "kl": 0.0358428955078125, + "learning_rate": 9.979660844476055e-07, + "loss": 3.581827331800014e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 194, + "train_speed(iter/s)": 0.022707 }, { - "epoch": 0.9686525033427426, - "grad_norm": 4.402575819342294, - "learning_rate": 1.3084931697966152e-08, - "loss": 0.21621761322021485, - "memory(GiB)": 50.11, - "step": 815, - "token_acc": 0.9090909090909091, - "train_speed(iter/s)": 0.07821 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 75.27083587646484, + "completions/min_length": 32.125, + "epoch": 0.387192851824274, + "grad_norm": 0.0026650032870345276, + "kl": 0.0361480712890625, + "learning_rate": 9.9793755825421e-07, + "loss": 3.617025868152268e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 195, + "train_speed(iter/s)": 0.022704 }, { - "epoch": 0.9745951567374833, - "grad_norm": 4.840434765416083, - "learning_rate": 8.538767483325384e-09, - "loss": 0.2232905387878418, - "memory(GiB)": 50.11, - "step": 820, - "token_acc": 0.8882978723404256, - "train_speed(iter/s)": 0.078233 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 71.14583683013916, + "completions/min_length": 32.25, + "epoch": 0.3891784561926036, + "grad_norm": 0.011339254737858831, + "kl": 0.04736328125, + "learning_rate": 9.979088338212367e-07, + "loss": 4.738846837426536e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 196, + "train_speed(iter/s)": 0.022715 }, { - "epoch": 0.9805378101322241, - "grad_norm": 4.635090700558647, - "learning_rate": 4.9579294436635784e-09, - "loss": 0.2567557096481323, - "memory(GiB)": 50.11, - "step": 825, - "token_acc": 0.8911917098445595, - "train_speed(iter/s)": 0.078256 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 74.39583492279053, + "completions/min_length": 34.625, + "epoch": 0.3911640605609332, + "grad_norm": 0.00682870203745647, + "kl": 0.0364227294921875, + "learning_rate": 9.978799111601215e-07, + "loss": 3.645420292741619e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 197, + "train_speed(iter/s)": 0.022709 }, { - "epoch": 0.9864804635269648, - "grad_norm": 5.44883977227373, - "learning_rate": 2.3438049883625635e-09, - "loss": 0.1815792441368103, - "memory(GiB)": 50.11, - "step": 830, - "token_acc": 0.93048128342246, - "train_speed(iter/s)": 0.07828 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 86.50000190734863, + "completions/min_length": 38.125, + "epoch": 0.39314966492926284, + "grad_norm": 0.004287982782498444, + "kl": 0.0297088623046875, + "learning_rate": 9.978507902823794e-07, + "loss": 2.9695647754124366e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 198, + "train_speed(iter/s)": 0.022721 }, { - "epoch": 0.9924231169217056, - "grad_norm": 6.50498346714347, - "learning_rate": 6.974069699314246e-10, - "loss": 0.20119824409484863, - "memory(GiB)": 50.11, - "step": 835, - "token_acc": 0.9209039548022598, - "train_speed(iter/s)": 0.078303 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 80.72916841506958, + "completions/min_length": 30.5, + "epoch": 0.39513526929759246, + "grad_norm": 0.004199276731034225, + "kl": 0.0319061279296875, + "learning_rate": 9.978214711996038e-07, + "loss": 3.188504706486128e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 199, + "train_speed(iter/s)": 0.022719 }, { - "epoch": 0.9983657703164462, - "grad_norm": 3.2168471908087506, - "learning_rate": 1.9373291574031893e-11, - "loss": 0.1927746295928955, - "memory(GiB)": 50.11, - "step": 840, - "token_acc": 0.8863636363636364, - "train_speed(iter/s)": 0.078328 + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 74.95833587646484, + "completions/min_length": 28.125, + "epoch": 0.3971208736659221, + "grad_norm": 0.006726660768530172, + "kl": 0.0353240966796875, + "learning_rate": 9.977919539234674e-07, + "loss": 3.5278513678349555e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 200, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 71.03125381469727, + "completions/min_length": 28.625, + "epoch": 0.3991064780342517, + "grad_norm": 0.002983145134616605, + "kl": 0.043304443359375, + "learning_rate": 9.977622384657214e-07, + "loss": 4.3290659959893674e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 201, + "train_speed(iter/s)": 0.022729 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.625, + "completions/mean_length": 85.18750238418579, + "completions/min_length": 37.125, + "epoch": 0.40109208240258126, + "grad_norm": 0.035229834694054274, + "kl": 0.078033447265625, + "learning_rate": 9.977323248381964e-07, + "loss": 7.797306898282841e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 202, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 76.95833492279053, + "completions/min_length": 28.25, + "epoch": 0.4030776867709109, + "grad_norm": 1.4431260101444856, + "kl": 0.0474700927734375, + "learning_rate": 9.977022130528014e-07, + "loss": 0.010393962264060974, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 203, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 73.32291889190674, + "completions/min_length": 29.375, + "epoch": 0.4050632911392405, + "grad_norm": 0.024717577152067193, + "kl": 0.03564453125, + "learning_rate": 9.97671903121525e-07, + "loss": 3.5631743230624124e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 204, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 87.88541889190674, + "completions/min_length": 33.5, + "epoch": 0.4070488955075701, + "grad_norm": 0.002780148259801971, + "kl": 0.03948974609375, + "learning_rate": 9.976413950564337e-07, + "loss": 3.9515100070275366e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 205, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 77.92708539962769, + "completions/min_length": 33.75, + "epoch": 0.40903449987589974, + "grad_norm": 0.0022159754933545627, + "kl": 0.02740478515625, + "learning_rate": 9.976106888696735e-07, + "loss": 2.7362289984012023e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 206, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.25, + "completions/mean_length": 80.77083587646484, + "completions/min_length": 31.25, + "epoch": 0.41102010424422936, + "grad_norm": 0.004332621026768188, + "kl": 0.0364837646484375, + "learning_rate": 9.975797845734696e-07, + "loss": 3.6460402043303475e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 207, + "train_speed(iter/s)": 0.022681 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 185.0, + "completions/mean_length": 83.84375190734863, + "completions/min_length": 34.0, + "epoch": 0.413005708612559, + "grad_norm": 0.7001292843181908, + "kl": 0.0475311279296875, + "learning_rate": 9.975486821801255e-07, + "loss": 0.00872232485562563, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 208, + "train_speed(iter/s)": 0.022671 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.375, + "completions/mean_length": 70.13541984558105, + "completions/min_length": 30.875, + "epoch": 0.41499131298088854, + "grad_norm": 0.01058725693311942, + "kl": 0.0386199951171875, + "learning_rate": 9.975173817020235e-07, + "loss": 3.862637822749093e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 209, + "train_speed(iter/s)": 0.022673 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.25, + "completions/mean_length": 82.63541793823242, + "completions/min_length": 34.25, + "epoch": 0.41697691734921816, + "grad_norm": 0.03540330059763418, + "kl": 0.080810546875, + "learning_rate": 9.974858831516252e-07, + "loss": 8.076893573161215e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 210, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 77.93750238418579, + "completions/min_length": 30.625, + "epoch": 0.4189625217175478, + "grad_norm": 0.004490513100645009, + "kl": 0.0385284423828125, + "learning_rate": 9.974541865414707e-07, + "loss": 3.852363079204224e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 211, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 80.7291693687439, + "completions/min_length": 29.5, + "epoch": 0.4209481260858774, + "grad_norm": 0.006926164779839543, + "kl": 0.0422821044921875, + "learning_rate": 9.97422291884179e-07, + "loss": 4.230643389746547e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 212, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.25, + "completions/mean_length": 78.01041984558105, + "completions/min_length": 29.5, + "epoch": 0.422933730454207, + "grad_norm": 0.9432920181158546, + "kl": 0.045013427734375, + "learning_rate": 9.973901991924485e-07, + "loss": -0.006227703299373388, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 213, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.25, + "completions/mean_length": 83.18750286102295, + "completions/min_length": 35.5, + "epoch": 0.42491933482253663, + "grad_norm": 1.245023826855593, + "kl": 0.05291748046875, + "learning_rate": 9.973579084790555e-07, + "loss": -0.0023613572120666504, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 214, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 79.98958444595337, + "completions/min_length": 35.125, + "epoch": 0.4269049391908662, + "grad_norm": 0.0041365932117952385, + "kl": 0.0484619140625, + "learning_rate": 9.973254197568559e-07, + "loss": 4.843662463827059e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 215, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 77.25000238418579, + "completions/min_length": 35.0, + "epoch": 0.4288905435591958, + "grad_norm": 0.0075376148887607965, + "kl": 0.050933837890625, + "learning_rate": 9.97292733038784e-07, + "loss": 5.091197817819193e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 216, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.0, + "completions/mean_length": 90.13541984558105, + "completions/min_length": 40.25, + "epoch": 0.43087614792752543, + "grad_norm": 1.3811214492343569, + "kl": 0.0423736572265625, + "learning_rate": 9.97259848337853e-07, + "loss": 0.009429289028048515, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 217, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.375, + "completions/mean_length": 89.16666984558105, + "completions/min_length": 35.625, + "epoch": 0.43286175229585505, + "grad_norm": 0.007093413604702633, + "kl": 0.0416107177734375, + "learning_rate": 9.972267656671555e-07, + "loss": 4.167377483099699e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 218, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.375, + "completions/mean_length": 82.25000286102295, + "completions/min_length": 37.875, + "epoch": 0.43484735666418467, + "grad_norm": 0.0032133978419041143, + "kl": 0.0321197509765625, + "learning_rate": 9.97193485039862e-07, + "loss": 3.212739829905331e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 219, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.375, + "completions/mean_length": 82.51041984558105, + "completions/min_length": 32.75, + "epoch": 0.4368329610325143, + "grad_norm": 0.006870201965163573, + "kl": 0.0323028564453125, + "learning_rate": 9.97160006469222e-07, + "loss": 3.233300958527252e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 220, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 76.56250143051147, + "completions/min_length": 33.25, + "epoch": 0.4388185654008439, + "grad_norm": 0.008288409891884372, + "kl": 0.0484466552734375, + "learning_rate": 9.971263299685647e-07, + "loss": 4.844851719099097e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 221, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.5, + "completions/mean_length": 87.30208778381348, + "completions/min_length": 28.875, + "epoch": 0.44080416976917347, + "grad_norm": 1.5929481681531068, + "kl": 0.044677734375, + "learning_rate": 9.97092455551297e-07, + "loss": -0.0032086584251374006, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 222, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 73.53125286102295, + "completions/min_length": 32.125, + "epoch": 0.4427897741375031, + "grad_norm": 1.4608313242259412, + "kl": 0.071014404296875, + "learning_rate": 9.970583832309049e-07, + "loss": 0.00710804108530283, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 223, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.5, + "completions/mean_length": 96.67708730697632, + "completions/min_length": 47.5, + "epoch": 0.4447753785058327, + "grad_norm": 1.1323641519408885, + "kl": 0.2937774658203125, + "learning_rate": 9.970241130209535e-07, + "loss": 0.006472825538367033, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 224, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 84.79166889190674, + "completions/min_length": 37.5, + "epoch": 0.4467609828741623, + "grad_norm": 0.025415104142644453, + "kl": 0.069915771484375, + "learning_rate": 9.969896449350867e-07, + "loss": 6.981095066294074e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 225, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.75, + "completions/mean_length": 86.21875381469727, + "completions/min_length": 36.125, + "epoch": 0.44874658724249195, + "grad_norm": 0.022870315593115254, + "kl": 0.0628509521484375, + "learning_rate": 9.969549789870268e-07, + "loss": 6.29330679657869e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 226, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.5, + "completions/mean_length": 97.69791889190674, + "completions/min_length": 44.125, + "epoch": 0.45073219161082156, + "grad_norm": 0.0033120817024783213, + "kl": 0.03057861328125, + "learning_rate": 9.96920115190575e-07, + "loss": 3.062791802221909e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 227, + "train_speed(iter/s)": 0.022621 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 80.2291693687439, + "completions/min_length": 37.125, + "epoch": 0.4527177959791511, + "grad_norm": 0.003413224744477318, + "kl": 0.03436279296875, + "learning_rate": 9.968850535596112e-07, + "loss": 3.437111445236951e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 228, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 207.125, + "completions/mean_length": 107.64583683013916, + "completions/min_length": 43.75, + "epoch": 0.45470340034748075, + "grad_norm": 0.01834847724387913, + "kl": 0.0690460205078125, + "learning_rate": 9.968497941080947e-07, + "loss": 6.910585943842307e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 229, + "train_speed(iter/s)": 0.022582 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 85.92708587646484, + "completions/min_length": 38.875, + "epoch": 0.45668900471581036, + "grad_norm": 0.005574792809061802, + "kl": 0.0443878173828125, + "learning_rate": 9.968143368500624e-07, + "loss": 4.4396467274054885e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 230, + "train_speed(iter/s)": 0.022577 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.25, + "completions/mean_length": 92.47916984558105, + "completions/min_length": 39.125, + "epoch": 0.45867460908414, + "grad_norm": 1.6943395973826418, + "kl": 0.04364013671875, + "learning_rate": 9.96778681799631e-07, + "loss": -0.0025493118446320295, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 231, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 204.875, + "completions/mean_length": 103.64583587646484, + "completions/min_length": 39.125, + "epoch": 0.4606602134524696, + "grad_norm": 0.004216408435550681, + "kl": 0.03826904296875, + "learning_rate": 9.967428289709954e-07, + "loss": 3.823783481493592e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 232, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.5, + "completions/mean_length": 92.43750381469727, + "completions/min_length": 35.625, + "epoch": 0.4626458178207992, + "grad_norm": 0.006381344909782896, + "kl": 0.049468994140625, + "learning_rate": 9.967067783784295e-07, + "loss": 4.9477264838060364e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 233, + "train_speed(iter/s)": 0.022538 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 79.51041984558105, + "completions/min_length": 32.5, + "epoch": 0.46463142218912884, + "grad_norm": 0.007741405754680641, + "kl": 0.0480499267578125, + "learning_rate": 9.966705300362856e-07, + "loss": 4.800094393431209e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 234, + "train_speed(iter/s)": 0.022536 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.75, + "completions/mean_length": 90.66666889190674, + "completions/min_length": 42.875, + "epoch": 0.4666170265574584, + "grad_norm": 0.007435477817668643, + "kl": 0.051727294921875, + "learning_rate": 9.966340839589952e-07, + "loss": 5.173621320864186e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 235, + "train_speed(iter/s)": 0.022535 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 194.875, + "completions/mean_length": 98.91666984558105, + "completions/min_length": 38.625, + "epoch": 0.468602630925788, + "grad_norm": 0.0067140910993177885, + "kl": 0.043060302734375, + "learning_rate": 9.965974401610681e-07, + "loss": 4.3081170588266104e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 236, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 83.93750238418579, + "completions/min_length": 35.25, + "epoch": 0.47058823529411764, + "grad_norm": 0.9093190382569424, + "kl": 0.0510711669921875, + "learning_rate": 9.96560598657093e-07, + "loss": 5.110601705382578e-05, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 237, + "train_speed(iter/s)": 0.022534 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.625, + "completions/mean_length": 85.70833587646484, + "completions/min_length": 32.25, + "epoch": 0.47257383966244726, + "grad_norm": 0.005275237660228004, + "kl": 0.046417236328125, + "learning_rate": 9.96523559461737e-07, + "loss": 4.6435285184998065e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 238, + "train_speed(iter/s)": 0.022535 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 90.32291889190674, + "completions/min_length": 38.75, + "epoch": 0.4745594440307769, + "grad_norm": 0.00663160482510452, + "kl": 0.0475921630859375, + "learning_rate": 9.96486322589747e-07, + "loss": 4.759243893204257e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 239, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 77.40625286102295, + "completions/min_length": 35.875, + "epoch": 0.4765450483991065, + "grad_norm": 0.004519614019553688, + "kl": 0.043975830078125, + "learning_rate": 9.964488880559467e-07, + "loss": 4.393017297843471e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 240, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.625, + "completions/mean_length": 87.23958587646484, + "completions/min_length": 38.625, + "epoch": 0.4785306527674361, + "grad_norm": 0.006456902365410465, + "kl": 0.038970947265625, + "learning_rate": 9.964112558752404e-07, + "loss": 3.894519613822922e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 241, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.375, + "completions/mean_length": 86.46875143051147, + "completions/min_length": 34.5, + "epoch": 0.4805162571357657, + "grad_norm": 0.009139479028185519, + "kl": 0.0570831298828125, + "learning_rate": 9.963734260626102e-07, + "loss": 5.712979327654466e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 242, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 192.375, + "completions/mean_length": 94.75000286102295, + "completions/min_length": 39.875, + "epoch": 0.4825018615040953, + "grad_norm": 0.00588933968219357, + "kl": 0.040985107421875, + "learning_rate": 9.963353986331167e-07, + "loss": 4.097309647477232e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 243, + "train_speed(iter/s)": 0.022487 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.375, + "completions/mean_length": 99.65625190734863, + "completions/min_length": 47.5, + "epoch": 0.4844874658724249, + "grad_norm": 1.7037963995996528, + "kl": 0.0664520263671875, + "learning_rate": 9.962971736018994e-07, + "loss": 0.005754552781581879, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 244, + "train_speed(iter/s)": 0.022478 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.75, + "completions/mean_length": 87.07292032241821, + "completions/min_length": 40.25, + "epoch": 0.48647307024075453, + "grad_norm": 0.004045860851494451, + "kl": 0.0439453125, + "learning_rate": 9.962587509841769e-07, + "loss": 4.3991487473249435e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 245, + "train_speed(iter/s)": 0.022484 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.25, + "completions/mean_length": 87.14583587646484, + "completions/min_length": 36.125, + "epoch": 0.48845867460908415, + "grad_norm": 1.0926547240421847, + "kl": 0.0585784912109375, + "learning_rate": 9.962201307952454e-07, + "loss": 0.013563526794314384, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 246, + "train_speed(iter/s)": 0.02248 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 83.97917032241821, + "completions/min_length": 30.75, + "epoch": 0.4904442789774138, + "grad_norm": 0.007633299726407986, + "kl": 0.0464019775390625, + "learning_rate": 9.961813130504812e-07, + "loss": 4.640250699594617e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 247, + "train_speed(iter/s)": 0.022458 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 84.64583587646484, + "completions/min_length": 37.5, + "epoch": 0.49242988334574334, + "grad_norm": 0.004855671388923836, + "kl": 0.031890869140625, + "learning_rate": 9.961422977653378e-07, + "loss": 3.1894323910819367e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 248, + "train_speed(iter/s)": 0.02246 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.625, + "completions/mean_length": 83.68750190734863, + "completions/min_length": 36.0, + "epoch": 0.49441548771407295, + "grad_norm": 0.006811685669870012, + "kl": 0.0357666015625, + "learning_rate": 9.961030849553484e-07, + "loss": 3.576920062187128e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 249, + "train_speed(iter/s)": 0.022453 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.5, + "completions/mean_length": 93.46875190734863, + "completions/min_length": 32.625, + "epoch": 0.4964010920824026, + "grad_norm": 0.0036080834204791417, + "kl": 0.033050537109375, + "learning_rate": 9.960636746361243e-07, + "loss": 3.304368146928027e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 250, + "train_speed(iter/s)": 0.022458 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 84.34375333786011, + "completions/min_length": 33.0, + "epoch": 0.4983866964507322, + "grad_norm": 1.1380731620526459, + "kl": 0.0817108154296875, + "learning_rate": 9.96024066823356e-07, + "loss": -0.004473407752811909, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 251, + "train_speed(iter/s)": 0.022464 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 78.51041889190674, + "completions/min_length": 32.375, + "epoch": 0.5003723008190618, + "grad_norm": 1.5622229972707942, + "kl": 0.064605712890625, + "learning_rate": 9.959842615328115e-07, + "loss": -0.008338342420756817, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.8333333432674408, + "rewards/CineAccuracyORM/std": 0.19401127845048904, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 252, + "train_speed(iter/s)": 0.022464 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.875, + "completions/mean_length": 93.94791841506958, + "completions/min_length": 37.0, + "epoch": 0.5023579051873914, + "grad_norm": 0.006438376083683289, + "kl": 0.040191650390625, + "learning_rate": 9.959442587803385e-07, + "loss": 4.020285268779844e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 253, + "train_speed(iter/s)": 0.022464 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 79.15625095367432, + "completions/min_length": 36.875, + "epoch": 0.504343509555721, + "grad_norm": 0.0051954476787273145, + "kl": 0.037506103515625, + "learning_rate": 9.959040585818633e-07, + "loss": 3.753899363800883e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 254, + "train_speed(iter/s)": 0.022464 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.0, + "completions/mean_length": 100.38541889190674, + "completions/min_length": 39.375, + "epoch": 0.5063291139240507, + "grad_norm": 1.545961572006339, + "kl": 0.0454254150390625, + "learning_rate": 9.958636609533898e-07, + "loss": 0.00044527961290441453, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 255, + "train_speed(iter/s)": 0.022445 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.125, + "completions/mean_length": 82.21875286102295, + "completions/min_length": 40.5, + "epoch": 0.5083147182923803, + "grad_norm": 0.03625501034741339, + "kl": 0.0703582763671875, + "learning_rate": 9.958230659110015e-07, + "loss": 7.038464536890388e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 256, + "train_speed(iter/s)": 0.022447 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 66.52083444595337, + "completions/min_length": 28.875, + "epoch": 0.5103003226607099, + "grad_norm": 0.0043879151172943115, + "kl": 0.0428009033203125, + "learning_rate": 9.957822734708601e-07, + "loss": 4.278571213944815e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 257, + "train_speed(iter/s)": 0.022464 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 80.46875190734863, + "completions/min_length": 34.75, + "epoch": 0.5122859270290394, + "grad_norm": 0.020347219478938732, + "kl": 0.064788818359375, + "learning_rate": 9.95741283649206e-07, + "loss": 6.475487316492945e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 258, + "train_speed(iter/s)": 0.022458 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 90.50000238418579, + "completions/min_length": 30.5, + "epoch": 0.514271531397369, + "grad_norm": 0.011466843767705974, + "kl": 0.0531005859375, + "learning_rate": 9.957000964623582e-07, + "loss": 5.304034857545048e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 259, + "train_speed(iter/s)": 0.022444 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.25, + "completions/mean_length": 81.04166889190674, + "completions/min_length": 33.125, + "epoch": 0.5162571357656986, + "grad_norm": 0.00401580149659702, + "kl": 0.0311279296875, + "learning_rate": 9.95658711926714e-07, + "loss": 3.109029057668522e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 260, + "train_speed(iter/s)": 0.022447 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.5, + "completions/mean_length": 87.50000095367432, + "completions/min_length": 39.25, + "epoch": 0.5182427401340283, + "grad_norm": 0.7951947229030756, + "kl": 0.0403289794921875, + "learning_rate": 9.956171300587497e-07, + "loss": 0.0014353692531585693, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 261, + "train_speed(iter/s)": 0.022442 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.125, + "completions/mean_length": 89.78125095367432, + "completions/min_length": 28.75, + "epoch": 0.5202283445023579, + "grad_norm": 0.08799506980790449, + "kl": 0.0831756591796875, + "learning_rate": 9.955753508750195e-07, + "loss": 8.309617987833917e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 262, + "train_speed(iter/s)": 0.022438 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 78.44791889190674, + "completions/min_length": 29.75, + "epoch": 0.5222139488706875, + "grad_norm": 0.9698159168886337, + "kl": 0.047393798828125, + "learning_rate": 9.955333743921572e-07, + "loss": 0.0029926998540759087, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.895833333954215, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 263, + "train_speed(iter/s)": 0.022436 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 87.67708492279053, + "completions/min_length": 41.125, + "epoch": 0.5241995532390171, + "grad_norm": 0.005996355455010234, + "kl": 0.0398712158203125, + "learning_rate": 9.954912006268741e-07, + "loss": 3.985785224358551e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 264, + "train_speed(iter/s)": 0.022425 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 222.625, + "completions/mean_length": 96.97916793823242, + "completions/min_length": 37.375, + "epoch": 0.5261851576073467, + "grad_norm": 0.00471993857290991, + "kl": 0.04254150390625, + "learning_rate": 9.954488295959603e-07, + "loss": 4.256993270246312e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 265, + "train_speed(iter/s)": 0.02241 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 97.50000190734863, + "completions/min_length": 45.125, + "epoch": 0.5281707619756764, + "grad_norm": 0.5242150185143424, + "kl": 0.040802001953125, + "learning_rate": 9.954062613162853e-07, + "loss": 0.0003016740083694458, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 266, + "train_speed(iter/s)": 0.0224 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 210.375, + "completions/mean_length": 85.45833683013916, + "completions/min_length": 32.25, + "epoch": 0.530156366344006, + "grad_norm": 1.2094964227030165, + "kl": 0.0511932373046875, + "learning_rate": 9.95363495804796e-07, + "loss": -0.0017611955991014838, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 267, + "train_speed(iter/s)": 0.022394 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.625, + "completions/mean_length": 88.79166889190674, + "completions/min_length": 28.125, + "epoch": 0.5321419707123356, + "grad_norm": 0.005372675718279741, + "kl": 0.0513763427734375, + "learning_rate": 9.953205330785181e-07, + "loss": 5.144669194123708e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 268, + "train_speed(iter/s)": 0.022395 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 198.5, + "completions/mean_length": 100.26041889190674, + "completions/min_length": 42.75, + "epoch": 0.5341275750806652, + "grad_norm": 1.217340586434367, + "kl": 0.046112060546875, + "learning_rate": 9.952773731545562e-07, + "loss": -0.006409394554793835, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.05103103816509247, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 269, + "train_speed(iter/s)": 0.022389 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 196.5, + "completions/mean_length": 92.29166984558105, + "completions/min_length": 37.875, + "epoch": 0.5361131794489948, + "grad_norm": 0.0069470145760686, + "kl": 0.046142578125, + "learning_rate": 9.95234016050093e-07, + "loss": 4.617391823558137e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 270, + "train_speed(iter/s)": 0.022381 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.625, + "completions/mean_length": 85.10416889190674, + "completions/min_length": 33.75, + "epoch": 0.5380987838173243, + "grad_norm": 0.005386574412981814, + "kl": 0.039306640625, + "learning_rate": 9.951904617823906e-07, + "loss": 3.9301499782595783e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 271, + "train_speed(iter/s)": 0.022376 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 79.56250238418579, + "completions/min_length": 34.75, + "epoch": 0.540084388185654, + "grad_norm": 1.3403204334209802, + "kl": 0.061492919921875, + "learning_rate": 9.951467103687878e-07, + "loss": 0.00987269263714552, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 272, + "train_speed(iter/s)": 0.022379 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.0, + "completions/mean_length": 91.26041984558105, + "completions/min_length": 40.125, + "epoch": 0.5420699925539836, + "grad_norm": 0.00409245928499806, + "kl": 0.03070068359375, + "learning_rate": 9.95102761826704e-07, + "loss": 3.071514947805554e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 273, + "train_speed(iter/s)": 0.022384 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.875, + "completions/mean_length": 86.89583539962769, + "completions/min_length": 35.125, + "epoch": 0.5440555969223132, + "grad_norm": 0.0071617563473429634, + "kl": 0.045074462890625, + "learning_rate": 9.950586161736352e-07, + "loss": 4.5137589040677994e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 274, + "train_speed(iter/s)": 0.022379 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 78.19791984558105, + "completions/min_length": 33.875, + "epoch": 0.5460412012906428, + "grad_norm": 0.007087138473852444, + "kl": 0.0514373779296875, + "learning_rate": 9.950142734271572e-07, + "loss": 5.141062865732238e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 275, + "train_speed(iter/s)": 0.022388 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 87.02083587646484, + "completions/min_length": 33.5, + "epoch": 0.5480268056589724, + "grad_norm": 0.005770063883750235, + "kl": 0.04537200927734375, + "learning_rate": 9.949697336049236e-07, + "loss": 4.5340821088757366e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 276, + "train_speed(iter/s)": 0.022391 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.0, + "completions/mean_length": 83.56250143051147, + "completions/min_length": 38.875, + "epoch": 0.5500124100273021, + "grad_norm": 0.005353004081434314, + "kl": 0.03570556640625, + "learning_rate": 9.949249967246668e-07, + "loss": 3.572989226086065e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 277, + "train_speed(iter/s)": 0.022384 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.5, + "completions/mean_length": 89.14583587646484, + "completions/min_length": 34.25, + "epoch": 0.5519980143956317, + "grad_norm": 0.004317811649377504, + "kl": 0.038909912109375, + "learning_rate": 9.948800628041975e-07, + "loss": 3.8890226278454065e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 278, + "train_speed(iter/s)": 0.022378 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 87.21875333786011, + "completions/min_length": 44.75, + "epoch": 0.5539836187639613, + "grad_norm": 1.1137985471826737, + "kl": 0.03668212890625, + "learning_rate": 9.948349318614047e-07, + "loss": -0.008970300666987896, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 279, + "train_speed(iter/s)": 0.022367 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 82.96875286102295, + "completions/min_length": 34.125, + "epoch": 0.5559692231322909, + "grad_norm": 0.007099269441260585, + "kl": 0.0446624755859375, + "learning_rate": 9.947896039142563e-07, + "loss": 4.4696083932649344e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 280, + "train_speed(iter/s)": 0.022354 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 76.63541793823242, + "completions/min_length": 30.875, + "epoch": 0.5579548275006205, + "grad_norm": 0.007768146728698171, + "kl": 0.0522308349609375, + "learning_rate": 9.947440789807979e-07, + "loss": 5.225494896876626e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 281, + "train_speed(iter/s)": 0.022359 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 73.66667032241821, + "completions/min_length": 29.0, + "epoch": 0.5599404318689502, + "grad_norm": 0.008280904620608561, + "kl": 0.044586181640625, + "learning_rate": 9.946983570791542e-07, + "loss": 4.455495945876464e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 282, + "train_speed(iter/s)": 0.022368 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 200.625, + "completions/mean_length": 89.302086353302, + "completions/min_length": 34.625, + "epoch": 0.5619260362372798, + "grad_norm": 0.022693547898120697, + "kl": 0.0592498779296875, + "learning_rate": 9.946524382275281e-07, + "loss": 5.9227146266493946e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 283, + "train_speed(iter/s)": 0.022361 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 70.1354193687439, + "completions/min_length": 26.75, + "epoch": 0.5639116406056094, + "grad_norm": 0.03910581010816483, + "kl": 0.0723419189453125, + "learning_rate": 9.94606322444201e-07, + "loss": 7.234037184389308e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 284, + "train_speed(iter/s)": 0.022362 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.0, + "completions/mean_length": 73.25000190734863, + "completions/min_length": 34.75, + "epoch": 0.5658972449739389, + "grad_norm": 0.0072473280602554, + "kl": 0.043609619140625, + "learning_rate": 9.94560009747532e-07, + "loss": 4.363508924143389e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 285, + "train_speed(iter/s)": 0.022355 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 78.62500286102295, + "completions/min_length": 36.75, + "epoch": 0.5678828493422685, + "grad_norm": 0.00513229061344777, + "kl": 0.052734375, + "learning_rate": 9.9451350015596e-07, + "loss": 5.2742478146683425e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 286, + "train_speed(iter/s)": 0.02234 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 72.06250190734863, + "completions/min_length": 34.75, + "epoch": 0.5698684537105981, + "grad_norm": 0.007204794620578479, + "kl": 0.051910400390625, + "learning_rate": 9.944667936880007e-07, + "loss": 5.192386743146926e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 287, + "train_speed(iter/s)": 0.022347 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 78.52083539962769, + "completions/min_length": 30.125, + "epoch": 0.5718540580789278, + "grad_norm": 0.0080653686717861, + "kl": 0.056793212890625, + "learning_rate": 9.944198903622492e-07, + "loss": 5.678350498783402e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 288, + "train_speed(iter/s)": 0.022338 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 82.47917079925537, + "completions/min_length": 34.5, + "epoch": 0.5738396624472574, + "grad_norm": 0.004289253503086958, + "kl": 0.0355377197265625, + "learning_rate": 9.943727901973792e-07, + "loss": 3.553136775735766e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 289, + "train_speed(iter/s)": 0.022345 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.020833333333333332, + "completions/max_length": 263.375, + "completions/mean_length": 102.79166889190674, + "completions/min_length": 31.25, + "epoch": 0.575825266815587, + "grad_norm": 0.5694186165643396, + "kl": 0.051727294921875, + "learning_rate": 9.943254932121415e-07, + "loss": 5.182623863220215e-05, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 0.9791666716337204, + "rewards/Format/std": 0.04865618050098419, + "step": 290, + "train_speed(iter/s)": 0.022321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.625, + "completions/mean_length": 82.02083683013916, + "completions/min_length": 34.75, + "epoch": 0.5778108711839166, + "grad_norm": 0.004835814356733436, + "kl": 0.05499267578125, + "learning_rate": 9.942779994253665e-07, + "loss": 5.494131983141415e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 291, + "train_speed(iter/s)": 0.022317 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.5, + "completions/mean_length": 80.95833444595337, + "completions/min_length": 32.75, + "epoch": 0.5797964755522462, + "grad_norm": 0.00600677045636444, + "kl": 0.06103515625, + "learning_rate": 9.942303088559624e-07, + "loss": 6.105084321461618e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 292, + "train_speed(iter/s)": 0.02231 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 82.58333587646484, + "completions/min_length": 39.75, + "epoch": 0.5817820799205758, + "grad_norm": 0.005354442662333966, + "kl": 0.038970947265625, + "learning_rate": 9.941824215229158e-07, + "loss": 3.8933507312322035e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 293, + "train_speed(iter/s)": 0.022321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 77.895836353302, + "completions/min_length": 35.75, + "epoch": 0.5837676842889055, + "grad_norm": 0.0034861829787340166, + "kl": 0.051513671875, + "learning_rate": 9.941343374452917e-07, + "loss": 5.145318209542893e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 294, + "train_speed(iter/s)": 0.022321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 70.67708587646484, + "completions/min_length": 31.5, + "epoch": 0.5857532886572351, + "grad_norm": 0.0075820542365631315, + "kl": 0.0500640869140625, + "learning_rate": 9.940860566422333e-07, + "loss": 5.0081373046850786e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 295, + "train_speed(iter/s)": 0.022325 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.25, + "completions/mean_length": 84.75000190734863, + "completions/min_length": 41.5, + "epoch": 0.5877388930255647, + "grad_norm": 0.006990368984765174, + "kl": 0.05078125, + "learning_rate": 9.940375791329626e-07, + "loss": 5.07462173118256e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 296, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.5, + "completions/mean_length": 80.16666889190674, + "completions/min_length": 34.375, + "epoch": 0.5897244973938943, + "grad_norm": 0.007612508013274423, + "kl": 0.0609130859375, + "learning_rate": 9.93988904936779e-07, + "loss": 6.082579056965187e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 297, + "train_speed(iter/s)": 0.022308 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 70.20833492279053, + "completions/min_length": 33.25, + "epoch": 0.5917101017622238, + "grad_norm": 3.3256630180378113, + "kl": 0.053009033203125, + "learning_rate": 9.939400340730611e-07, + "loss": 5.300156772136688e-05, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.39076167345046997, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 298, + "train_speed(iter/s)": 0.022309 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 80.70833587646484, + "completions/min_length": 33.25, + "epoch": 0.5936957061305534, + "grad_norm": 0.5084239726350965, + "kl": 0.063568115234375, + "learning_rate": 9.938909665612654e-07, + "loss": 0.00017539411783218384, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 299, + "train_speed(iter/s)": 0.022309 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.75, + "completions/mean_length": 67.59375190734863, + "completions/min_length": 30.375, + "epoch": 0.5956813104988831, + "grad_norm": 0.007156312969301177, + "kl": 0.048553466796875, + "learning_rate": 9.938417024209264e-07, + "loss": 4.8531859647482634e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 300, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 70.8854193687439, + "completions/min_length": 33.125, + "epoch": 0.5976669148672127, + "grad_norm": 1.6348234739854737, + "kl": 0.059356689453125, + "learning_rate": 9.937922416716576e-07, + "loss": -0.0008529163897037506, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 301, + "train_speed(iter/s)": 0.022308 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 66.22916746139526, + "completions/min_length": 32.875, + "epoch": 0.5996525192355423, + "grad_norm": 0.004159999935517174, + "kl": 0.071044921875, + "learning_rate": 9.937425843331503e-07, + "loss": 7.094735337886959e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 302, + "train_speed(iter/s)": 0.022309 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 67.69791793823242, + "completions/min_length": 31.125, + "epoch": 0.6016381236038719, + "grad_norm": 0.007738811762971821, + "kl": 0.0450286865234375, + "learning_rate": 9.93692730425174e-07, + "loss": 4.5024469727650285e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 303, + "train_speed(iter/s)": 0.022315 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 87.93750286102295, + "completions/min_length": 35.0, + "epoch": 0.6036237279722015, + "grad_norm": 0.007015388847999951, + "kl": 0.047637939453125, + "learning_rate": 9.936426799675768e-07, + "loss": 4.764476034324616e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 304, + "train_speed(iter/s)": 0.022304 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.875, + "completions/mean_length": 80.97916841506958, + "completions/min_length": 31.75, + "epoch": 0.6056093323405312, + "grad_norm": 0.82371722631021, + "kl": 0.044219970703125, + "learning_rate": 9.935924329802845e-07, + "loss": 0.006322646047919989, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 305, + "train_speed(iter/s)": 0.022298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.5, + "completions/mean_length": 73.82291889190674, + "completions/min_length": 35.625, + "epoch": 0.6075949367088608, + "grad_norm": 0.005484129378873674, + "kl": 0.047882080078125, + "learning_rate": 9.93541989483302e-07, + "loss": 4.78505899081938e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 306, + "train_speed(iter/s)": 0.022302 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 78.56250286102295, + "completions/min_length": 34.5, + "epoch": 0.6095805410771904, + "grad_norm": 0.0074837079932473325, + "kl": 0.06488037109375, + "learning_rate": 9.934913494967115e-07, + "loss": 6.480350566562265e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 307, + "train_speed(iter/s)": 0.022308 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 73.63541841506958, + "completions/min_length": 36.25, + "epoch": 0.61156614544552, + "grad_norm": 0.005640734271409294, + "kl": 0.0375518798828125, + "learning_rate": 9.93440513040674e-07, + "loss": 3.755502984859049e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 308, + "train_speed(iter/s)": 0.022309 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 68.36458539962769, + "completions/min_length": 32.375, + "epoch": 0.6135517498138496, + "grad_norm": 0.008726351361175394, + "kl": 0.0644073486328125, + "learning_rate": 9.933894801354288e-07, + "loss": 6.441806181101128e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 309, + "train_speed(iter/s)": 0.022304 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 79.23958587646484, + "completions/min_length": 33.75, + "epoch": 0.6155373541821793, + "grad_norm": 0.005139653467304836, + "kl": 0.0477142333984375, + "learning_rate": 9.933382508012929e-07, + "loss": 4.774502303916961e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 310, + "train_speed(iter/s)": 0.022311 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 72.31250238418579, + "completions/min_length": 34.5, + "epoch": 0.6175229585505088, + "grad_norm": 0.007771748144733654, + "kl": 0.065460205078125, + "learning_rate": 9.932868250586617e-07, + "loss": 6.555522850248963e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 311, + "train_speed(iter/s)": 0.022318 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.0, + "completions/mean_length": 83.28125238418579, + "completions/min_length": 30.375, + "epoch": 0.6195085629188384, + "grad_norm": 1.1986690163625677, + "kl": 0.0679168701171875, + "learning_rate": 9.932352029280094e-07, + "loss": -0.00581248989328742, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.05103103630244732, + "rewards/CineAccuracyORM/mean": 0.8958333432674408, + "rewards/CineAccuracyORM/std": 0.12873215973377228, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 312, + "train_speed(iter/s)": 0.022311 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 76.63541984558105, + "completions/min_length": 32.875, + "epoch": 0.621494167287168, + "grad_norm": 1.219891757000396, + "kl": 0.16864013671875, + "learning_rate": 9.931833844298874e-07, + "loss": 0.009329535998404026, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 313, + "train_speed(iter/s)": 0.022307 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 78.72916984558105, + "completions/min_length": 35.75, + "epoch": 0.6234797716554976, + "grad_norm": 0.0049773928757574975, + "kl": 0.0591888427734375, + "learning_rate": 9.931313695849258e-07, + "loss": 5.92159922234714e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 314, + "train_speed(iter/s)": 0.022312 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 73.833336353302, + "completions/min_length": 31.0, + "epoch": 0.6254653760238272, + "grad_norm": 0.021152166395529724, + "kl": 0.0635833740234375, + "learning_rate": 9.930791584138333e-07, + "loss": 6.348952592816204e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 315, + "train_speed(iter/s)": 0.022306 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.5, + "completions/mean_length": 73.62500143051147, + "completions/min_length": 33.75, + "epoch": 0.6274509803921569, + "grad_norm": 0.004783430890467514, + "kl": 0.05029296875, + "learning_rate": 9.930267509373956e-07, + "loss": 5.024982237955555e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 316, + "train_speed(iter/s)": 0.022302 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 72.82291841506958, + "completions/min_length": 33.125, + "epoch": 0.6294365847604865, + "grad_norm": 0.993476054674328, + "kl": 0.04193115234375, + "learning_rate": 9.929741471764776e-07, + "loss": -0.0001899873313959688, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 317, + "train_speed(iter/s)": 0.022298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 81.79166984558105, + "completions/min_length": 40.0, + "epoch": 0.6314221891288161, + "grad_norm": 1.4042089329030751, + "kl": 0.035736083984375, + "learning_rate": 9.92921347152022e-07, + "loss": -0.014559095725417137, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 318, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 71.739586353302, + "completions/min_length": 30.75, + "epoch": 0.6334077934971457, + "grad_norm": 1.5579331441674198, + "kl": 0.051544189453125, + "learning_rate": 9.928683508850495e-07, + "loss": -0.015120545402169228, + "memory(GiB)": 94.21, + "reward": 1.7604166865348816, + "reward_std": 0.0900652389973402, + "rewards/CineAccuracyORM/mean": 0.7604166753590107, + "rewards/CineAccuracyORM/std": 0.2768445573747158, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 319, + "train_speed(iter/s)": 0.022298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 74.13541793823242, + "completions/min_length": 30.125, + "epoch": 0.6353933978654753, + "grad_norm": 0.004824523037702253, + "kl": 0.0405731201171875, + "learning_rate": 9.928151583966592e-07, + "loss": 4.057247861055657e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 320, + "train_speed(iter/s)": 0.022294 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 76.145836353302, + "completions/min_length": 38.125, + "epoch": 0.637379002233805, + "grad_norm": 0.02241491387971438, + "kl": 0.0762939453125, + "learning_rate": 9.927617697080278e-07, + "loss": 7.626811566296965e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 321, + "train_speed(iter/s)": 0.022292 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 68.46875190734863, + "completions/min_length": 31.875, + "epoch": 0.6393646066021346, + "grad_norm": 0.0034482191002354365, + "kl": 0.04315185546875, + "learning_rate": 9.92708184840411e-07, + "loss": 4.309406358515844e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 322, + "train_speed(iter/s)": 0.022292 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.125, + "completions/mean_length": 79.66666984558105, + "completions/min_length": 33.75, + "epoch": 0.6413502109704642, + "grad_norm": 0.014205677935915088, + "kl": 0.04925537109375, + "learning_rate": 9.926544038151414e-07, + "loss": 4.916825128020719e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 323, + "train_speed(iter/s)": 0.02229 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 68.30208587646484, + "completions/min_length": 26.75, + "epoch": 0.6433358153387937, + "grad_norm": 5.454400755145376, + "kl": 1.1224365234375, + "learning_rate": 9.926004266536313e-07, + "loss": -0.0042072865180671215, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 324, + "train_speed(iter/s)": 0.022285 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 77.56250190734863, + "completions/min_length": 34.875, + "epoch": 0.6453214197071233, + "grad_norm": 0.03913550833558443, + "kl": 0.0589141845703125, + "learning_rate": 9.925462533773693e-07, + "loss": 5.893352135899477e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 325, + "train_speed(iter/s)": 0.022278 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 79.98958539962769, + "completions/min_length": 36.5, + "epoch": 0.6473070240754529, + "grad_norm": 0.1877933333268866, + "kl": 0.1809844970703125, + "learning_rate": 9.924918840079234e-07, + "loss": 0.00018087081843987107, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 326, + "train_speed(iter/s)": 0.022279 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 72.81250238418579, + "completions/min_length": 35.75, + "epoch": 0.6492926284437825, + "grad_norm": 0.01665023136607427, + "kl": 0.060211181640625, + "learning_rate": 9.92437318566939e-07, + "loss": 6.011496589053422e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 327, + "train_speed(iter/s)": 0.02228 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 87.16666889190674, + "completions/min_length": 45.0, + "epoch": 0.6512782328121122, + "grad_norm": 0.003712898810028288, + "kl": 0.038299560546875, + "learning_rate": 9.9238255707614e-07, + "loss": 3.829874185612425e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 328, + "train_speed(iter/s)": 0.022279 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 74.73958587646484, + "completions/min_length": 33.375, + "epoch": 0.6532638371804418, + "grad_norm": 0.004942025964931426, + "kl": 0.050689697265625, + "learning_rate": 9.923275995573278e-07, + "loss": 5.067026359029114e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 329, + "train_speed(iter/s)": 0.022272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.75, + "completions/mean_length": 66.21875238418579, + "completions/min_length": 26.75, + "epoch": 0.6552494415487714, + "grad_norm": 0.7874083390702336, + "kl": 0.055450439453125, + "learning_rate": 9.922724460323825e-07, + "loss": -0.014580942690372467, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 330, + "train_speed(iter/s)": 0.022272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 76.00000238418579, + "completions/min_length": 34.5, + "epoch": 0.657235045917101, + "grad_norm": 0.004149881115869045, + "kl": 0.051239013671875, + "learning_rate": 9.922170965232618e-07, + "loss": 5.1151342631783336e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 331, + "train_speed(iter/s)": 0.022261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.5, + "completions/mean_length": 62.47916841506958, + "completions/min_length": 29.625, + "epoch": 0.6592206502854306, + "grad_norm": 0.018607020206996103, + "kl": 0.076171875, + "learning_rate": 9.921615510520014e-07, + "loss": 7.611622277181596e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 332, + "train_speed(iter/s)": 0.022275 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.125, + "completions/mean_length": 87.60416841506958, + "completions/min_length": 37.375, + "epoch": 0.6612062546537603, + "grad_norm": 1.4880830188793217, + "kl": 0.0885009765625, + "learning_rate": 9.921058096407152e-07, + "loss": 0.0032539120875298977, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 333, + "train_speed(iter/s)": 0.022277 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 71.89583539962769, + "completions/min_length": 34.125, + "epoch": 0.6631918590220899, + "grad_norm": 0.027870046553761317, + "kl": 0.08740234375, + "learning_rate": 9.920498723115949e-07, + "loss": 8.746829553274438e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 334, + "train_speed(iter/s)": 0.022268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.5, + "completions/mean_length": 71.81250190734863, + "completions/min_length": 33.0, + "epoch": 0.6651774633904195, + "grad_norm": 1.5966502848162438, + "kl": 0.072967529296875, + "learning_rate": 9.919937390869107e-07, + "loss": 0.00811631977558136, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 335, + "train_speed(iter/s)": 0.022276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 75.73958539962769, + "completions/min_length": 34.5, + "epoch": 0.6671630677587491, + "grad_norm": 1.840870931410954, + "kl": 0.0665283203125, + "learning_rate": 9.919374099890101e-07, + "loss": 0.0009361244738101959, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 336, + "train_speed(iter/s)": 0.022278 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 77.89583492279053, + "completions/min_length": 37.5, + "epoch": 0.6691486721270786, + "grad_norm": 0.9857391057574876, + "kl": 0.087371826171875, + "learning_rate": 9.918808850403192e-07, + "loss": -0.000843668996822089, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 337, + "train_speed(iter/s)": 0.022267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 82.63541889190674, + "completions/min_length": 35.25, + "epoch": 0.6711342764954082, + "grad_norm": 0.03017565115051977, + "kl": 0.075103759765625, + "learning_rate": 9.918241642633414e-07, + "loss": 7.500908395741135e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 338, + "train_speed(iter/s)": 0.02227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.375, + "completions/mean_length": 68.82291889190674, + "completions/min_length": 31.625, + "epoch": 0.6731198808637379, + "grad_norm": 0.006981340970946005, + "kl": 0.0548095703125, + "learning_rate": 9.917672476806588e-07, + "loss": 5.483850691234693e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 339, + "train_speed(iter/s)": 0.022271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 84.85416841506958, + "completions/min_length": 38.75, + "epoch": 0.6751054852320675, + "grad_norm": 0.010032278040829628, + "kl": 0.054901123046875, + "learning_rate": 9.91710135314931e-07, + "loss": 5.494210563483648e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 340, + "train_speed(iter/s)": 0.022263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 68.82291841506958, + "completions/min_length": 32.125, + "epoch": 0.6770910896003971, + "grad_norm": 0.009341870336307356, + "kl": 0.057037353515625, + "learning_rate": 9.916528271888956e-07, + "loss": 5.7096789532806724e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 341, + "train_speed(iter/s)": 0.022271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.5, + "completions/mean_length": 66.62500143051147, + "completions/min_length": 33.125, + "epoch": 0.6790766939687267, + "grad_norm": 2.2099864740654147, + "kl": 0.1019744873046875, + "learning_rate": 9.915953233253683e-07, + "loss": 0.0027551865205168724, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 342, + "train_speed(iter/s)": 0.022277 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 80.71875286102295, + "completions/min_length": 37.0, + "epoch": 0.6810622983370563, + "grad_norm": 0.005790785987682834, + "kl": 0.05206298828125, + "learning_rate": 9.915376237472425e-07, + "loss": 5.207936555962078e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 343, + "train_speed(iter/s)": 0.022281 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 69.65625190734863, + "completions/min_length": 29.375, + "epoch": 0.683047902705386, + "grad_norm": 0.5256727787266359, + "kl": 0.076141357421875, + "learning_rate": 9.914797284774895e-07, + "loss": -0.007159882690757513, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 344, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 75.06250190734863, + "completions/min_length": 32.75, + "epoch": 0.6850335070737156, + "grad_norm": 0.00690008198512845, + "kl": 0.05670166015625, + "learning_rate": 9.914216375391593e-07, + "loss": 5.6735367252258584e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 345, + "train_speed(iter/s)": 0.022271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 75.21875286102295, + "completions/min_length": 37.125, + "epoch": 0.6870191114420452, + "grad_norm": 0.007116456286213994, + "kl": 0.06036376953125, + "learning_rate": 9.913633509553784e-07, + "loss": 6.0376849432941526e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 346, + "train_speed(iter/s)": 0.022275 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 78.59375143051147, + "completions/min_length": 41.875, + "epoch": 0.6890047158103748, + "grad_norm": 0.007416111518728378, + "kl": 0.058441162109375, + "learning_rate": 9.91304868749352e-07, + "loss": 5.842831888003275e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 347, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.5, + "completions/mean_length": 80.333336353302, + "completions/min_length": 31.125, + "epoch": 0.6909903201787044, + "grad_norm": 1.0017270820050066, + "kl": 0.0704345703125, + "learning_rate": 9.912461909443636e-07, + "loss": 0.01074531301856041, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 348, + "train_speed(iter/s)": 0.022281 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 67.13541841506958, + "completions/min_length": 34.25, + "epoch": 0.692975924547034, + "grad_norm": 1.05958784983628, + "kl": 0.0814208984375, + "learning_rate": 9.911873175637739e-07, + "loss": 0.007089934777468443, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 349, + "train_speed(iter/s)": 0.022289 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 65.64583444595337, + "completions/min_length": 35.0, + "epoch": 0.6949615289153637, + "grad_norm": 0.009623425537380588, + "kl": 0.067596435546875, + "learning_rate": 9.911282486310212e-07, + "loss": 6.757803203072399e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 350, + "train_speed(iter/s)": 0.022295 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.25, + "completions/mean_length": 83.1041693687439, + "completions/min_length": 41.625, + "epoch": 0.6969471332836932, + "grad_norm": 0.008448838470143364, + "kl": 0.076141357421875, + "learning_rate": 9.910689841696229e-07, + "loss": 7.613049092469737e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 351, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 75.73958539962769, + "completions/min_length": 35.5, + "epoch": 0.6989327376520228, + "grad_norm": 0.008973097999604834, + "kl": 0.073699951171875, + "learning_rate": 9.910095242031727e-07, + "loss": 7.361932512139902e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 352, + "train_speed(iter/s)": 0.022287 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.75, + "completions/mean_length": 55.42708444595337, + "completions/min_length": 29.5, + "epoch": 0.7009183420203524, + "grad_norm": 0.011896671231470929, + "kl": 0.09442138671875, + "learning_rate": 9.909498687553433e-07, + "loss": 9.437694097869098e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 353, + "train_speed(iter/s)": 0.022301 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.625, + "completions/mean_length": 77.80208587646484, + "completions/min_length": 34.625, + "epoch": 0.702903946388682, + "grad_norm": 0.009088107575926282, + "kl": 0.09478759765625, + "learning_rate": 9.908900178498847e-07, + "loss": 9.483918984187767e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 354, + "train_speed(iter/s)": 0.022292 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.625, + "completions/mean_length": 63.406251430511475, + "completions/min_length": 32.0, + "epoch": 0.7048895507570117, + "grad_norm": 0.010886413379501973, + "kl": 0.097900390625, + "learning_rate": 9.908299715106248e-07, + "loss": 9.774637874215841e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 355, + "train_speed(iter/s)": 0.022296 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 77.03125190734863, + "completions/min_length": 38.5, + "epoch": 0.7068751551253413, + "grad_norm": 0.009118278406867757, + "kl": 0.094329833984375, + "learning_rate": 9.907697297614694e-07, + "loss": 9.433356899535283e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 356, + "train_speed(iter/s)": 0.022298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 63.65625238418579, + "completions/min_length": 31.625, + "epoch": 0.7088607594936709, + "grad_norm": 0.010805662813000999, + "kl": 0.1063232421875, + "learning_rate": 9.90709292626402e-07, + "loss": 0.00010625859431456774, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 357, + "train_speed(iter/s)": 0.022306 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 71.85416841506958, + "completions/min_length": 32.375, + "epoch": 0.7108463638620005, + "grad_norm": 0.8144932183814274, + "kl": 0.1082763671875, + "learning_rate": 9.906486601294836e-07, + "loss": -0.010265604592859745, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 358, + "train_speed(iter/s)": 0.022307 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.75, + "completions/mean_length": 61.42708492279053, + "completions/min_length": 29.875, + "epoch": 0.7128319682303301, + "grad_norm": 1.239045959422636, + "kl": 0.11566162109375, + "learning_rate": 9.90587832294854e-07, + "loss": -0.006729485467076302, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 359, + "train_speed(iter/s)": 0.022308 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 77.1354193687439, + "completions/min_length": 31.0, + "epoch": 0.7148175725986597, + "grad_norm": 1.9365133335728133, + "kl": 0.10089111328125, + "learning_rate": 9.905268091467294e-07, + "loss": 0.00010097896301886067, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 360, + "train_speed(iter/s)": 0.022309 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 70.05208492279053, + "completions/min_length": 36.375, + "epoch": 0.7168031769669894, + "grad_norm": 0.00962282633834019, + "kl": 0.10614013671875, + "learning_rate": 9.90465590709405e-07, + "loss": 0.00010618605301715434, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 361, + "train_speed(iter/s)": 0.022316 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.75, + "completions/mean_length": 61.03125190734863, + "completions/min_length": 29.75, + "epoch": 0.718788781335319, + "grad_norm": 0.014007692663057595, + "kl": 0.111572265625, + "learning_rate": 9.904041770072524e-07, + "loss": 0.00011155110405525193, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 362, + "train_speed(iter/s)": 0.022322 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 67.13541841506958, + "completions/min_length": 32.875, + "epoch": 0.7207743857036486, + "grad_norm": 1.2162865170250325, + "kl": 0.09619140625, + "learning_rate": 9.903425680647223e-07, + "loss": 0.0010505297686904669, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 363, + "train_speed(iter/s)": 0.022324 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.75, + "completions/mean_length": 60.98958492279053, + "completions/min_length": 31.125, + "epoch": 0.7227599900719781, + "grad_norm": 0.011429530437017058, + "kl": 0.10235595703125, + "learning_rate": 9.902807639063425e-07, + "loss": 0.0001023800577968359, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 364, + "train_speed(iter/s)": 0.022339 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.5, + "completions/mean_length": 61.833335876464844, + "completions/min_length": 30.25, + "epoch": 0.7247455944403077, + "grad_norm": 0.016087478102308123, + "kl": 0.10528564453125, + "learning_rate": 9.902187645567183e-07, + "loss": 0.00010526964615564793, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 365, + "train_speed(iter/s)": 0.022353 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 75.30208539962769, + "completions/min_length": 34.875, + "epoch": 0.7267311988086373, + "grad_norm": 0.009143578785985302, + "kl": 0.0863037109375, + "learning_rate": 9.901565700405331e-07, + "loss": 8.640994201414287e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 366, + "train_speed(iter/s)": 0.02236 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.375, + "completions/mean_length": 71.03125143051147, + "completions/min_length": 35.5, + "epoch": 0.728716803176967, + "grad_norm": 0.009792348440264387, + "kl": 0.103759765625, + "learning_rate": 9.90094180382548e-07, + "loss": 0.00010376061254646629, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 367, + "train_speed(iter/s)": 0.022364 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.375, + "completions/mean_length": 75.04166889190674, + "completions/min_length": 34.0, + "epoch": 0.7307024075452966, + "grad_norm": 0.008107420608290445, + "kl": 0.068450927734375, + "learning_rate": 9.900315956076015e-07, + "loss": 6.850845966255292e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 368, + "train_speed(iter/s)": 0.022366 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 70.91666841506958, + "completions/min_length": 31.875, + "epoch": 0.7326880119136262, + "grad_norm": 0.009108907130136363, + "kl": 0.075958251953125, + "learning_rate": 9.8996881574061e-07, + "loss": 7.596638170070946e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 369, + "train_speed(iter/s)": 0.022376 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 68.79166984558105, + "completions/min_length": 32.75, + "epoch": 0.7346736162819558, + "grad_norm": 1.6168107617006753, + "kl": 0.139007568359375, + "learning_rate": 9.89905840806567e-07, + "loss": 0.0032623931765556335, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6354166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 370, + "train_speed(iter/s)": 0.022386 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 76.3854193687439, + "completions/min_length": 29.75, + "epoch": 0.7366592206502854, + "grad_norm": 0.7460126406043012, + "kl": 0.08642578125, + "learning_rate": 9.898426708305453e-07, + "loss": -0.0033700112253427505, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 371, + "train_speed(iter/s)": 0.022386 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.625, + "completions/mean_length": 74.28125095367432, + "completions/min_length": 34.0, + "epoch": 0.7386448250186151, + "grad_norm": 0.9955465777870846, + "kl": 0.1749267578125, + "learning_rate": 9.897793058376932e-07, + "loss": 0.0034031940158456564, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 372, + "train_speed(iter/s)": 0.022388 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 76.22917032241821, + "completions/min_length": 34.75, + "epoch": 0.7406304293869447, + "grad_norm": 0.008875594900954038, + "kl": 0.06402587890625, + "learning_rate": 9.89715745853238e-07, + "loss": 6.403854786185548e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 373, + "train_speed(iter/s)": 0.022381 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 72.46875143051147, + "completions/min_length": 33.875, + "epoch": 0.7426160337552743, + "grad_norm": 0.009116732402555323, + "kl": 0.072784423828125, + "learning_rate": 9.896519909024841e-07, + "loss": 7.276926044141874e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 374, + "train_speed(iter/s)": 0.022378 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.125, + "completions/mean_length": 65.22916841506958, + "completions/min_length": 33.125, + "epoch": 0.7446016381236039, + "grad_norm": 0.009145128063679963, + "kl": 0.07708740234375, + "learning_rate": 9.895880410108142e-07, + "loss": 7.714629464317113e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 375, + "train_speed(iter/s)": 0.022391 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 75.62500190734863, + "completions/min_length": 36.875, + "epoch": 0.7465872424919335, + "grad_norm": 0.007210039722318257, + "kl": 0.06585693359375, + "learning_rate": 9.895238962036878e-07, + "loss": 6.576798477908596e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 376, + "train_speed(iter/s)": 0.022394 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 73.77083587646484, + "completions/min_length": 35.625, + "epoch": 0.748572846860263, + "grad_norm": 0.010693999667093904, + "kl": 0.079681396484375, + "learning_rate": 9.894595565066422e-07, + "loss": 7.966612611198798e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 377, + "train_speed(iter/s)": 0.022392 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.0, + "completions/mean_length": 76.03125286102295, + "completions/min_length": 28.875, + "epoch": 0.7505584512285927, + "grad_norm": 0.006922013911813544, + "kl": 0.0616455078125, + "learning_rate": 9.893950219452926e-07, + "loss": 6.165902595967054e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 378, + "train_speed(iter/s)": 0.022397 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 70.03125190734863, + "completions/min_length": 34.25, + "epoch": 0.7525440555969223, + "grad_norm": 0.005846201649571549, + "kl": 0.064605712890625, + "learning_rate": 9.893302925453314e-07, + "loss": 6.460178701672703e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 379, + "train_speed(iter/s)": 0.022401 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 75.08333492279053, + "completions/min_length": 36.875, + "epoch": 0.7545296599652519, + "grad_norm": 0.008013367074850918, + "kl": 0.063568115234375, + "learning_rate": 9.892653683325292e-07, + "loss": 6.364649016177282e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 380, + "train_speed(iter/s)": 0.022402 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 73.63541889190674, + "completions/min_length": 32.625, + "epoch": 0.7565152643335815, + "grad_norm": 0.005479395627387796, + "kl": 0.06341552734375, + "learning_rate": 9.892002493327331e-07, + "loss": 6.33429444860667e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 381, + "train_speed(iter/s)": 0.02241 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 73.01041841506958, + "completions/min_length": 32.625, + "epoch": 0.7585008687019111, + "grad_norm": 0.034901433273961564, + "kl": 0.066864013671875, + "learning_rate": 9.891349355718688e-07, + "loss": 6.686744745820761e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 382, + "train_speed(iter/s)": 0.022411 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.5, + "completions/mean_length": 74.30208539962769, + "completions/min_length": 35.375, + "epoch": 0.7604864730702408, + "grad_norm": 0.005921809752816739, + "kl": 0.048095703125, + "learning_rate": 9.89069427075939e-07, + "loss": 4.815867214347236e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 383, + "train_speed(iter/s)": 0.022409 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.75, + "completions/mean_length": 73.18750286102295, + "completions/min_length": 32.375, + "epoch": 0.7624720774385704, + "grad_norm": 0.04355059164827771, + "kl": 0.11383056640625, + "learning_rate": 9.89003723871024e-07, + "loss": 0.0001137763902079314, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 384, + "train_speed(iter/s)": 0.022408 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 71.1354193687439, + "completions/min_length": 35.375, + "epoch": 0.7644576818069, + "grad_norm": 0.015617950505835214, + "kl": 0.065216064453125, + "learning_rate": 9.889378259832816e-07, + "loss": 6.520246097352356e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 385, + "train_speed(iter/s)": 0.022413 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 83.02083492279053, + "completions/min_length": 36.25, + "epoch": 0.7664432861752296, + "grad_norm": 0.0064691606105345845, + "kl": 0.07122802734375, + "learning_rate": 9.888717334389471e-07, + "loss": 7.12551482138224e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 386, + "train_speed(iter/s)": 0.022403 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 74.10416984558105, + "completions/min_length": 33.0, + "epoch": 0.7684288905435592, + "grad_norm": 0.005423592725224395, + "kl": 0.07147216796875, + "learning_rate": 9.888054462643336e-07, + "loss": 7.151537283789366e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 387, + "train_speed(iter/s)": 0.022401 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 73.09375286102295, + "completions/min_length": 34.5, + "epoch": 0.7704144949118888, + "grad_norm": 0.004434764158909642, + "kl": 0.071502685546875, + "learning_rate": 9.887389644858313e-07, + "loss": 7.159661618061364e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 388, + "train_speed(iter/s)": 0.022405 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.75, + "completions/mean_length": 83.32292032241821, + "completions/min_length": 35.875, + "epoch": 0.7724000992802185, + "grad_norm": 0.0043503700290423025, + "kl": 0.050018310546875, + "learning_rate": 9.88672288129908e-07, + "loss": 5.001589306630194e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 389, + "train_speed(iter/s)": 0.022396 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 84.36458587646484, + "completions/min_length": 41.375, + "epoch": 0.774385703648548, + "grad_norm": 0.004625194476448961, + "kl": 0.050048828125, + "learning_rate": 9.88605417223109e-07, + "loss": 5.003442493034527e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 390, + "train_speed(iter/s)": 0.022395 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 73.95833539962769, + "completions/min_length": 34.0, + "epoch": 0.7763713080168776, + "grad_norm": 0.006495049310969569, + "kl": 0.05206298828125, + "learning_rate": 9.88538351792057e-07, + "loss": 5.2074785344302654e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 391, + "train_speed(iter/s)": 0.022387 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 69.59375190734863, + "completions/min_length": 31.625, + "epoch": 0.7783569123852072, + "grad_norm": 0.005235384921360963, + "kl": 0.05841064453125, + "learning_rate": 9.884710918634523e-07, + "loss": 5.844328552484512e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 392, + "train_speed(iter/s)": 0.022387 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 83.56250286102295, + "completions/min_length": 36.375, + "epoch": 0.7803425167535368, + "grad_norm": 1.0439111989860015, + "kl": 0.050567626953125, + "learning_rate": 9.884036374640723e-07, + "loss": 0.00759144825860858, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.833333333954215, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 393, + "train_speed(iter/s)": 0.022389 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 212.0, + "completions/mean_length": 92.51041984558105, + "completions/min_length": 34.125, + "epoch": 0.7823281211218664, + "grad_norm": 0.005357530698709353, + "kl": 0.05413818359375, + "learning_rate": 9.883359886207723e-07, + "loss": 5.421918467618525e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 394, + "train_speed(iter/s)": 0.022383 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 68.98958587646484, + "completions/min_length": 34.375, + "epoch": 0.7843137254901961, + "grad_norm": 1.9208803677712984, + "kl": 0.067413330078125, + "learning_rate": 9.882681453604844e-07, + "loss": 0.010571276769042015, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 395, + "train_speed(iter/s)": 0.02239 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 69.54166841506958, + "completions/min_length": 31.875, + "epoch": 0.7862993298585257, + "grad_norm": 0.011602241931888259, + "kl": 0.063201904296875, + "learning_rate": 9.88200107710219e-07, + "loss": 6.322594708763063e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 396, + "train_speed(iter/s)": 0.022395 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.25, + "completions/mean_length": 70.70833539962769, + "completions/min_length": 36.625, + "epoch": 0.7882849342268553, + "grad_norm": 0.8977360123288795, + "kl": 0.055145263671875, + "learning_rate": 9.881318756970626e-07, + "loss": 0.005529012531042099, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 397, + "train_speed(iter/s)": 0.022405 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.5, + "completions/mean_length": 86.03125238418579, + "completions/min_length": 37.0, + "epoch": 0.7902705385951849, + "grad_norm": 0.004238869827726038, + "kl": 0.05126953125, + "learning_rate": 9.880634493481805e-07, + "loss": 5.128474367666058e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 398, + "train_speed(iter/s)": 0.022402 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 76.614586353302, + "completions/min_length": 34.375, + "epoch": 0.7922561429635145, + "grad_norm": 0.005037145006621605, + "kl": 0.061126708984375, + "learning_rate": 9.879948286908144e-07, + "loss": 6.111576658440754e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 399, + "train_speed(iter/s)": 0.022402 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 76.79166889190674, + "completions/min_length": 35.125, + "epoch": 0.7942417473318442, + "grad_norm": 0.004274011937062756, + "kl": 0.0633544921875, + "learning_rate": 9.879260137522835e-07, + "loss": 6.329896859824657e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 400, + "train_speed(iter/s)": 0.022403 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 70.63541889190674, + "completions/min_length": 33.875, + "epoch": 0.7962273517001738, + "grad_norm": 0.2554109089907897, + "kl": 0.20697021484375, + "learning_rate": 9.87857004559985e-07, + "loss": 0.00020729737298097461, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 401, + "train_speed(iter/s)": 0.022408 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 75.8229193687439, + "completions/min_length": 31.625, + "epoch": 0.7982129560685034, + "grad_norm": 0.011398283755190015, + "kl": 0.055694580078125, + "learning_rate": 9.877878011413922e-07, + "loss": 5.56960585527122e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 402, + "train_speed(iter/s)": 0.022403 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.125, + "completions/mean_length": 82.51041984558105, + "completions/min_length": 34.625, + "epoch": 0.8001985604368329, + "grad_norm": 1.8505796388499405, + "kl": 0.0501708984375, + "learning_rate": 9.877184035240572e-07, + "loss": 5.0192080379929394e-05, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 403, + "train_speed(iter/s)": 0.022396 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 80.73958444595337, + "completions/min_length": 41.125, + "epoch": 0.8021841648051625, + "grad_norm": 0.012820796671756323, + "kl": 0.057403564453125, + "learning_rate": 9.87648811735608e-07, + "loss": 5.742545909015462e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 404, + "train_speed(iter/s)": 0.022397 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 79.27083587646484, + "completions/min_length": 38.625, + "epoch": 0.8041697691734921, + "grad_norm": 0.014906282746045442, + "kl": 0.058197021484375, + "learning_rate": 9.875790258037514e-07, + "loss": 5.812738891108893e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 405, + "train_speed(iter/s)": 0.022401 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 77.302086353302, + "completions/min_length": 36.625, + "epoch": 0.8061553735418218, + "grad_norm": 0.011130849927422536, + "kl": 0.059295654296875, + "learning_rate": 9.875090457562697e-07, + "loss": 5.927743040956557e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 406, + "train_speed(iter/s)": 0.022406 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.25, + "completions/mean_length": 93.34375286102295, + "completions/min_length": 40.875, + "epoch": 0.8081409779101514, + "grad_norm": 0.11481029559933198, + "kl": 0.13702392578125, + "learning_rate": 9.874388716210242e-07, + "loss": 0.0001369929377688095, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 407, + "train_speed(iter/s)": 0.022406 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 88.364586353302, + "completions/min_length": 39.0, + "epoch": 0.810126582278481, + "grad_norm": 0.003428084619563002, + "kl": 0.047027587890625, + "learning_rate": 9.873685034259524e-07, + "loss": 4.6960762119852006e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 408, + "train_speed(iter/s)": 0.022411 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 86.16666793823242, + "completions/min_length": 34.375, + "epoch": 0.8121121866468106, + "grad_norm": 0.00384555028749029, + "kl": 0.06048583984375, + "learning_rate": 9.872979411990694e-07, + "loss": 6.048551222193055e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 409, + "train_speed(iter/s)": 0.02241 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.875, + "completions/mean_length": 83.8229193687439, + "completions/min_length": 35.875, + "epoch": 0.8140977910151402, + "grad_norm": 0.003670618302961186, + "kl": 0.067596435546875, + "learning_rate": 9.872271849684674e-07, + "loss": 6.753490742994472e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 410, + "train_speed(iter/s)": 0.022412 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 85.27083492279053, + "completions/min_length": 36.125, + "epoch": 0.8160833953834699, + "grad_norm": 0.004061576867033558, + "kl": 0.056427001953125, + "learning_rate": 9.871562347623164e-07, + "loss": 5.638205766445026e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 411, + "train_speed(iter/s)": 0.022405 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 77.59375381469727, + "completions/min_length": 30.0, + "epoch": 0.8180689997517995, + "grad_norm": 0.00660518149892489, + "kl": 0.06072998046875, + "learning_rate": 9.870850906088627e-07, + "loss": 6.069945084163919e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 412, + "train_speed(iter/s)": 0.022407 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 83.90625190734863, + "completions/min_length": 40.0, + "epoch": 0.8200546041201291, + "grad_norm": 0.005380659804986027, + "kl": 0.063812255859375, + "learning_rate": 9.870137525364308e-07, + "loss": 6.376288365572691e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 413, + "train_speed(iter/s)": 0.022405 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 76.20833683013916, + "completions/min_length": 33.0, + "epoch": 0.8220402084884587, + "grad_norm": 0.004214593524566473, + "kl": 0.047576904296875, + "learning_rate": 9.869422205734215e-07, + "loss": 4.756057387567125e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 414, + "train_speed(iter/s)": 0.022402 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.625, + "completions/mean_length": 86.87500286102295, + "completions/min_length": 34.125, + "epoch": 0.8240258128567883, + "grad_norm": 0.004190381797998117, + "kl": 0.07012939453125, + "learning_rate": 9.868704947483133e-07, + "loss": 7.006079249549657e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 415, + "train_speed(iter/s)": 0.022405 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 79.07291841506958, + "completions/min_length": 33.375, + "epoch": 0.826011417225118, + "grad_norm": 0.003436649069030626, + "kl": 0.07763671875, + "learning_rate": 9.867985750896619e-07, + "loss": 7.758998981444165e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 416, + "train_speed(iter/s)": 0.022403 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.25, + "completions/mean_length": 83.48958492279053, + "completions/min_length": 37.125, + "epoch": 0.8279970215934475, + "grad_norm": 1.2588152892744315, + "kl": 0.240631103515625, + "learning_rate": 9.867264616261e-07, + "loss": 0.003334498032927513, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 417, + "train_speed(iter/s)": 0.022391 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.375, + "completions/mean_length": 81.20833492279053, + "completions/min_length": 32.125, + "epoch": 0.8299826259617771, + "grad_norm": 0.106336738579708, + "kl": 0.198455810546875, + "learning_rate": 9.866541543863374e-07, + "loss": 0.00019865986541844904, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 418, + "train_speed(iter/s)": 0.022386 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.875, + "completions/mean_length": 88.31250238418579, + "completions/min_length": 36.375, + "epoch": 0.8319682303301067, + "grad_norm": 0.0046822794961672055, + "kl": 0.05023193359375, + "learning_rate": 9.865816533991614e-07, + "loss": 5.0257076509296894e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 419, + "train_speed(iter/s)": 0.022374 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 76.0416693687439, + "completions/min_length": 34.25, + "epoch": 0.8339538346984363, + "grad_norm": 0.00622958021240744, + "kl": 0.066162109375, + "learning_rate": 9.86508958693436e-07, + "loss": 6.617772305617109e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 420, + "train_speed(iter/s)": 0.022378 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 86.37500190734863, + "completions/min_length": 43.875, + "epoch": 0.8359394390667659, + "grad_norm": 0.005137313718892348, + "kl": 0.052154541015625, + "learning_rate": 9.864360702981024e-07, + "loss": 5.2151775889797136e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 421, + "train_speed(iter/s)": 0.022382 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 85.05208492279053, + "completions/min_length": 39.0, + "epoch": 0.8379250434350956, + "grad_norm": 0.015260451196580099, + "kl": 0.054931640625, + "learning_rate": 9.863629882421792e-07, + "loss": 5.488552778842859e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 422, + "train_speed(iter/s)": 0.022376 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.5, + "completions/mean_length": 95.82291984558105, + "completions/min_length": 39.25, + "epoch": 0.8399106478034252, + "grad_norm": 0.0039593592890841064, + "kl": 0.048614501953125, + "learning_rate": 9.86289712554762e-07, + "loss": 4.861816341872327e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 423, + "train_speed(iter/s)": 0.022376 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 79.23958587646484, + "completions/min_length": 34.0, + "epoch": 0.8418962521717548, + "grad_norm": 0.9652327185822706, + "kl": 0.058807373046875, + "learning_rate": 9.862162432650234e-07, + "loss": 0.005081596784293652, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 424, + "train_speed(iter/s)": 0.022369 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.0, + "completions/mean_length": 82.72916889190674, + "completions/min_length": 32.375, + "epoch": 0.8438818565400844, + "grad_norm": 1.0403684241123083, + "kl": 0.057403564453125, + "learning_rate": 9.861425804022128e-07, + "loss": -0.004854725208133459, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 425, + "train_speed(iter/s)": 0.022362 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.25, + "completions/mean_length": 83.16666889190674, + "completions/min_length": 37.75, + "epoch": 0.845867460908414, + "grad_norm": 0.005497342568892283, + "kl": 0.051544189453125, + "learning_rate": 9.860687239956573e-07, + "loss": 5.15462743351236e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 426, + "train_speed(iter/s)": 0.022357 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 196.75, + "completions/mean_length": 91.81250286102295, + "completions/min_length": 35.375, + "epoch": 0.8478530652767436, + "grad_norm": 0.005472516329216914, + "kl": 0.04522705078125, + "learning_rate": 9.859946740747607e-07, + "loss": 4.515825639828108e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 427, + "train_speed(iter/s)": 0.022352 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.75, + "completions/mean_length": 91.36458683013916, + "completions/min_length": 38.125, + "epoch": 0.8498386696450733, + "grad_norm": 0.004497448013878643, + "kl": 0.0579833984375, + "learning_rate": 9.859204306690037e-07, + "loss": 5.7988228945760056e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 428, + "train_speed(iter/s)": 0.02234 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 196.375, + "completions/mean_length": 91.21875190734863, + "completions/min_length": 41.5, + "epoch": 0.8518242740134029, + "grad_norm": 0.004461792169384546, + "kl": 0.0748291015625, + "learning_rate": 9.858459938079439e-07, + "loss": 7.484655361622572e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 429, + "train_speed(iter/s)": 0.022337 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.5, + "completions/mean_length": 82.20833587646484, + "completions/min_length": 34.0, + "epoch": 0.8538098783817324, + "grad_norm": 0.006069683115572057, + "kl": 0.065399169921875, + "learning_rate": 9.85771363521217e-07, + "loss": 6.542236224049702e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 430, + "train_speed(iter/s)": 0.022337 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 84.65625286102295, + "completions/min_length": 39.125, + "epoch": 0.855795482750062, + "grad_norm": 0.007138089594645187, + "kl": 0.063385009765625, + "learning_rate": 9.856965398385342e-07, + "loss": 6.338374805636704e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 431, + "train_speed(iter/s)": 0.022337 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.875, + "completions/mean_length": 85.1354193687439, + "completions/min_length": 41.125, + "epoch": 0.8577810871183916, + "grad_norm": 0.003952132814534776, + "kl": 0.0693359375, + "learning_rate": 9.856215227896847e-07, + "loss": 6.939301238162443e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 432, + "train_speed(iter/s)": 0.022336 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.25, + "completions/mean_length": 79.28125286102295, + "completions/min_length": 34.25, + "epoch": 0.8597666914867212, + "grad_norm": 0.9416845816813911, + "kl": 0.087799072265625, + "learning_rate": 9.855463124045342e-07, + "loss": 0.003828667104244232, + "memory(GiB)": 94.21, + "reward": 1.4583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.45833333395421505, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 433, + "train_speed(iter/s)": 0.022339 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.375, + "completions/mean_length": 87.33333778381348, + "completions/min_length": 33.125, + "epoch": 0.8617522958550509, + "grad_norm": 0.006785789142508644, + "kl": 0.0637054443359375, + "learning_rate": 9.85470908713026e-07, + "loss": 6.379057595040649e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 434, + "train_speed(iter/s)": 0.022339 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 79.11458492279053, + "completions/min_length": 34.375, + "epoch": 0.8637379002233805, + "grad_norm": 0.0059735336974445635, + "kl": 0.059967041015625, + "learning_rate": 9.853953117451795e-07, + "loss": 5.999209679430351e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 435, + "train_speed(iter/s)": 0.022343 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 71.84375286102295, + "completions/min_length": 31.375, + "epoch": 0.8657235045917101, + "grad_norm": 0.005514369774256764, + "kl": 0.06683349609375, + "learning_rate": 9.853195215310915e-07, + "loss": 6.678507634205744e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 436, + "train_speed(iter/s)": 0.022343 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 74.83333587646484, + "completions/min_length": 32.5, + "epoch": 0.8677091089600397, + "grad_norm": 0.0035355475794871067, + "kl": 0.042266845703125, + "learning_rate": 9.85243538100936e-07, + "loss": 4.223368159728125e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 437, + "train_speed(iter/s)": 0.02234 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.0, + "completions/mean_length": 84.27083587646484, + "completions/min_length": 29.25, + "epoch": 0.8696947133283693, + "grad_norm": 0.004333809030999474, + "kl": 0.05462646484375, + "learning_rate": 9.851673614849632e-07, + "loss": 5.459976091515273e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 438, + "train_speed(iter/s)": 0.022332 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.125, + "completions/mean_length": 79.50000238418579, + "completions/min_length": 29.375, + "epoch": 0.871680317696699, + "grad_norm": 0.0051928642497738615, + "kl": 0.052764892578125, + "learning_rate": 9.850909917135009e-07, + "loss": 5.270736801321618e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 439, + "train_speed(iter/s)": 0.022322 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 73.25000095367432, + "completions/min_length": 35.125, + "epoch": 0.8736659220650286, + "grad_norm": 1.7041063182273106, + "kl": 0.066802978515625, + "learning_rate": 9.850144288169535e-07, + "loss": -0.007907523773610592, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 440, + "train_speed(iter/s)": 0.022327 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.625, + "completions/mean_length": 82.40625286102295, + "completions/min_length": 36.5, + "epoch": 0.8756515264333582, + "grad_norm": 0.006393325256835077, + "kl": 0.060028076171875, + "learning_rate": 9.849376728258022e-07, + "loss": 6.0007772844983265e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 441, + "train_speed(iter/s)": 0.022328 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.75, + "completions/mean_length": 81.51041984558105, + "completions/min_length": 36.125, + "epoch": 0.8776371308016878, + "grad_norm": 0.7432697453317926, + "kl": 0.056976318359375, + "learning_rate": 9.848607237706056e-07, + "loss": 0.007492425851523876, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 442, + "train_speed(iter/s)": 0.022329 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.25, + "completions/mean_length": 84.82291984558105, + "completions/min_length": 33.625, + "epoch": 0.8796227351700173, + "grad_norm": 0.9463608415933458, + "kl": 0.04425048828125, + "learning_rate": 9.847835816819982e-07, + "loss": -0.007214564364403486, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 443, + "train_speed(iter/s)": 0.022321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.125, + "completions/mean_length": 82.13541889190674, + "completions/min_length": 38.25, + "epoch": 0.8816083395383469, + "grad_norm": 0.6500412661226062, + "kl": 0.055908203125, + "learning_rate": 9.847062465906925e-07, + "loss": -0.00016089281416498125, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 444, + "train_speed(iter/s)": 0.022318 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.5, + "completions/mean_length": 78.54166793823242, + "completions/min_length": 32.25, + "epoch": 0.8835939439066766, + "grad_norm": 0.006739439862654462, + "kl": 0.05487060546875, + "learning_rate": 9.846287185274767e-07, + "loss": 5.484447319759056e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 445, + "train_speed(iter/s)": 0.022322 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 69.00000238418579, + "completions/min_length": 32.75, + "epoch": 0.8855795482750062, + "grad_norm": 0.004097462078476503, + "kl": 0.06689453125, + "learning_rate": 9.845509975232168e-07, + "loss": 6.683926039841026e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 446, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 80.86458683013916, + "completions/min_length": 35.0, + "epoch": 0.8875651526433358, + "grad_norm": 0.9098200202030318, + "kl": 0.057952880859375, + "learning_rate": 9.844730836088548e-07, + "loss": 5.786865949630737e-05, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393530294299126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 447, + "train_speed(iter/s)": 0.022325 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 221.125, + "completions/mean_length": 93.62500476837158, + "completions/min_length": 37.875, + "epoch": 0.8895507570116654, + "grad_norm": 0.012737751849066711, + "kl": 0.0550994873046875, + "learning_rate": 9.843949768154103e-07, + "loss": 5.510202026925981e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 448, + "train_speed(iter/s)": 0.022316 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.25, + "completions/mean_length": 86.38541793823242, + "completions/min_length": 34.125, + "epoch": 0.891536361379995, + "grad_norm": 1.1000038347729983, + "kl": 0.085540771484375, + "learning_rate": 9.84316677173979e-07, + "loss": -0.013191180303692818, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 449, + "train_speed(iter/s)": 0.022316 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 80.08333587646484, + "completions/min_length": 38.75, + "epoch": 0.8935219657483247, + "grad_norm": 0.9812317954361416, + "kl": 0.05841064453125, + "learning_rate": 9.842381847157338e-07, + "loss": 0.0026867142878472805, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 450, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 84.42708396911621, + "completions/min_length": 35.375, + "epoch": 0.8955075701166543, + "grad_norm": 1.0822573801673039, + "kl": 0.083709716796875, + "learning_rate": 9.841594994719242e-07, + "loss": -0.0019134258618578315, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.8229166679084301, + "rewards/CineAccuracyORM/std": 0.18335824459791183, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 451, + "train_speed(iter/s)": 0.022323 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 82.78125286102295, + "completions/min_length": 34.875, + "epoch": 0.8974931744849839, + "grad_norm": 0.010675053619692178, + "kl": 0.054962158203125, + "learning_rate": 9.840806214738763e-07, + "loss": 5.505572698893957e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 452, + "train_speed(iter/s)": 0.022329 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.125, + "completions/mean_length": 90.69792032241821, + "completions/min_length": 38.125, + "epoch": 0.8994787788533135, + "grad_norm": 0.004581280845179157, + "kl": 0.04107666015625, + "learning_rate": 9.840015507529936e-07, + "loss": 4.1029979911400005e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 453, + "train_speed(iter/s)": 0.022321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.625, + "completions/mean_length": 99.42708587646484, + "completions/min_length": 44.125, + "epoch": 0.9014643832216431, + "grad_norm": 0.685749072217824, + "kl": 0.065582275390625, + "learning_rate": 9.839222873407553e-07, + "loss": 0.009730573743581772, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 454, + "train_speed(iter/s)": 0.02232 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.875, + "completions/mean_length": 90.40625381469727, + "completions/min_length": 37.5, + "epoch": 0.9034499875899727, + "grad_norm": 0.003985228831653713, + "kl": 0.061492919921875, + "learning_rate": 9.838428312687179e-07, + "loss": 6.147942622192204e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 455, + "train_speed(iter/s)": 0.022312 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.0, + "completions/mean_length": 88.73958492279053, + "completions/min_length": 34.5, + "epoch": 0.9054355919583023, + "grad_norm": 1.5560878386072767, + "kl": 0.05950927734375, + "learning_rate": 9.83763182568515e-07, + "loss": -0.011519490741193295, + "memory(GiB)": 94.21, + "reward": 1.7916666865348816, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.12309149652719498, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 456, + "train_speed(iter/s)": 0.022302 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.75, + "completions/mean_length": 93.26041889190674, + "completions/min_length": 43.125, + "epoch": 0.9074211963266319, + "grad_norm": 0.006232388259372655, + "kl": 0.0594482421875, + "learning_rate": 9.83683341271856e-07, + "loss": 5.939140828559175e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 457, + "train_speed(iter/s)": 0.022303 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 217.25, + "completions/mean_length": 99.54167079925537, + "completions/min_length": 42.0, + "epoch": 0.9094068006949615, + "grad_norm": 0.005745981046939235, + "kl": 0.06011962890625, + "learning_rate": 9.836033074105277e-07, + "loss": 6.0100726841483265e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 458, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 203.0, + "completions/mean_length": 95.10416793823242, + "completions/min_length": 30.25, + "epoch": 0.9113924050632911, + "grad_norm": 0.5701364923659495, + "kl": 0.057586669921875, + "learning_rate": 9.835230810163932e-07, + "loss": -0.004036252852529287, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 459, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 198.375, + "completions/mean_length": 88.8854193687439, + "completions/min_length": 34.875, + "epoch": 0.9133780094316207, + "grad_norm": 0.004460898395404908, + "kl": 0.04986572265625, + "learning_rate": 9.834426621213923e-07, + "loss": 4.981301026418805e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 460, + "train_speed(iter/s)": 0.022273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.75, + "completions/mean_length": 97.92708683013916, + "completions/min_length": 39.5, + "epoch": 0.9153636137999503, + "grad_norm": 0.9662989797226541, + "kl": 0.065399169921875, + "learning_rate": 9.833620507575415e-07, + "loss": -0.0023719172459095716, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 461, + "train_speed(iter/s)": 0.022274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.25, + "completions/mean_length": 85.98958492279053, + "completions/min_length": 34.875, + "epoch": 0.91734921816828, + "grad_norm": 0.005387381371872264, + "kl": 0.064361572265625, + "learning_rate": 9.83281246956934e-07, + "loss": 6.429507629945874e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 462, + "train_speed(iter/s)": 0.02227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 81.69791841506958, + "completions/min_length": 39.375, + "epoch": 0.9193348225366096, + "grad_norm": 0.8071229644781568, + "kl": 0.06201171875, + "learning_rate": 9.832002507517392e-07, + "loss": 6.19615166215226e-05, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 463, + "train_speed(iter/s)": 0.022273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.125, + "completions/mean_length": 90.82291984558105, + "completions/min_length": 35.5, + "epoch": 0.9213204269049392, + "grad_norm": 0.0058570233010228345, + "kl": 0.0528564453125, + "learning_rate": 9.831190621742038e-07, + "loss": 5.286728992359713e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 464, + "train_speed(iter/s)": 0.022275 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.125, + "completions/mean_length": 81.32291841506958, + "completions/min_length": 29.875, + "epoch": 0.9233060312732688, + "grad_norm": 0.8001698029224038, + "kl": 0.077239990234375, + "learning_rate": 9.830376812566507e-07, + "loss": -0.0023334722500294447, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 465, + "train_speed(iter/s)": 0.022275 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 78.90625190734863, + "completions/min_length": 39.625, + "epoch": 0.9252916356415984, + "grad_norm": 0.9830800175828178, + "kl": 0.076934814453125, + "learning_rate": 9.829561080314793e-07, + "loss": -0.002045848174020648, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 466, + "train_speed(iter/s)": 0.022275 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.25, + "completions/mean_length": 91.01041793823242, + "completions/min_length": 32.25, + "epoch": 0.9272772400099281, + "grad_norm": 0.6224140283352512, + "kl": 0.064666748046875, + "learning_rate": 9.828743425311653e-07, + "loss": 0.0011969867628067732, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 467, + "train_speed(iter/s)": 0.022273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 88.89583683013916, + "completions/min_length": 36.625, + "epoch": 0.9292628443782577, + "grad_norm": 0.007733428656259304, + "kl": 0.068450927734375, + "learning_rate": 9.827923847882616e-07, + "loss": 6.8443245254457e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 468, + "train_speed(iter/s)": 0.022272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 68.41666841506958, + "completions/min_length": 30.375, + "epoch": 0.9312484487465872, + "grad_norm": 0.009634204070546371, + "kl": 0.09173583984375, + "learning_rate": 9.827102348353975e-07, + "loss": 9.169169061351568e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 469, + "train_speed(iter/s)": 0.022281 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.5, + "completions/mean_length": 79.47916889190674, + "completions/min_length": 32.125, + "epoch": 0.9332340531149168, + "grad_norm": 0.009121017923592918, + "kl": 0.0889892578125, + "learning_rate": 9.826278927052783e-07, + "loss": 8.899247040972114e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 470, + "train_speed(iter/s)": 0.022281 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.375, + "completions/mean_length": 70.67708539962769, + "completions/min_length": 30.875, + "epoch": 0.9352196574832464, + "grad_norm": 0.8894852280259735, + "kl": 0.10858154296875, + "learning_rate": 9.825453584306865e-07, + "loss": 0.0014512266498059034, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 471, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 74.69791889190674, + "completions/min_length": 34.875, + "epoch": 0.937205261851576, + "grad_norm": 0.7357905779175437, + "kl": 0.11627197265625, + "learning_rate": 9.824626320444803e-07, + "loss": 0.0033022775314748287, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 472, + "train_speed(iter/s)": 0.022287 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 76.45833587646484, + "completions/min_length": 37.25, + "epoch": 0.9391908662199057, + "grad_norm": 0.009486380069485866, + "kl": 0.104736328125, + "learning_rate": 9.823797135795955e-07, + "loss": 0.00010473228758201003, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 473, + "train_speed(iter/s)": 0.022289 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 80.36458492279053, + "completions/min_length": 32.375, + "epoch": 0.9411764705882353, + "grad_norm": 0.009200236837578818, + "kl": 0.11175537109375, + "learning_rate": 9.82296603069043e-07, + "loss": 0.00011178151180502027, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 474, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 75.43750190734863, + "completions/min_length": 30.375, + "epoch": 0.9431620749565649, + "grad_norm": 0.009447692408176854, + "kl": 0.1064453125, + "learning_rate": 9.822133005459115e-07, + "loss": 0.0001064460666384548, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 475, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 72.09375238418579, + "completions/min_length": 31.125, + "epoch": 0.9451476793248945, + "grad_norm": 0.9053407087114659, + "kl": 0.11505126953125, + "learning_rate": 9.82129806043365e-07, + "loss": 0.00011500219989102334, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.04865618422627449, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 476, + "train_speed(iter/s)": 0.022296 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 71.66666746139526, + "completions/min_length": 34.625, + "epoch": 0.9471332836932241, + "grad_norm": 0.009712097149882143, + "kl": 0.15216064453125, + "learning_rate": 9.820461195946446e-07, + "loss": 0.00015226402319967747, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 477, + "train_speed(iter/s)": 0.022295 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.5, + "completions/mean_length": 67.01041793823242, + "completions/min_length": 29.875, + "epoch": 0.9491188880615538, + "grad_norm": 0.9097396548529905, + "kl": 0.12908935546875, + "learning_rate": 9.81962241233068e-07, + "loss": 0.017307542264461517, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 478, + "train_speed(iter/s)": 0.022293 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.875, + "completions/mean_length": 65.73958492279053, + "completions/min_length": 31.875, + "epoch": 0.9511044924298834, + "grad_norm": 0.010650313531835813, + "kl": 0.14459228515625, + "learning_rate": 9.818781709920283e-07, + "loss": 0.00014469510642811656, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 479, + "train_speed(iter/s)": 0.022299 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 73.03125190734863, + "completions/min_length": 35.625, + "epoch": 0.953090096798213, + "grad_norm": 0.009855928168650403, + "kl": 0.12811279296875, + "learning_rate": 9.817939089049965e-07, + "loss": 0.0001281301665585488, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 480, + "train_speed(iter/s)": 0.022298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 73.45833492279053, + "completions/min_length": 29.375, + "epoch": 0.9550757011665426, + "grad_norm": 0.010171105548975897, + "kl": 0.12945556640625, + "learning_rate": 9.817094550055184e-07, + "loss": 0.00012945401249453425, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 481, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.875, + "completions/mean_length": 80.75000190734863, + "completions/min_length": 33.125, + "epoch": 0.9570613055348722, + "grad_norm": 0.009809432741606488, + "kl": 0.14013671875, + "learning_rate": 9.816248093272172e-07, + "loss": 0.00014013744657859206, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 482, + "train_speed(iter/s)": 0.022298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 74.56250047683716, + "completions/min_length": 32.75, + "epoch": 0.9590469099032017, + "grad_norm": 1.1564833909047878, + "kl": 0.1185302734375, + "learning_rate": 9.815399719037923e-07, + "loss": -0.003725347574800253, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 483, + "train_speed(iter/s)": 0.022303 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 75.45833396911621, + "completions/min_length": 30.25, + "epoch": 0.9610325142715314, + "grad_norm": 0.0097016888727827, + "kl": 0.1087646484375, + "learning_rate": 9.81454942769019e-07, + "loss": 0.00010866991215152666, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 484, + "train_speed(iter/s)": 0.022294 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 68.06250190734863, + "completions/min_length": 33.625, + "epoch": 0.963018118639861, + "grad_norm": 0.8021611569373516, + "kl": 0.12957763671875, + "learning_rate": 9.813697219567491e-07, + "loss": -0.007320082746446133, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 485, + "train_speed(iter/s)": 0.022296 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.75, + "completions/mean_length": 77.09375143051147, + "completions/min_length": 28.125, + "epoch": 0.9650037230081906, + "grad_norm": 0.6919317084759832, + "kl": 0.12451171875, + "learning_rate": 9.812843095009115e-07, + "loss": 0.007042422890663147, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 486, + "train_speed(iter/s)": 0.022295 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.875, + "completions/mean_length": 64.88541889190674, + "completions/min_length": 31.625, + "epoch": 0.9669893273765202, + "grad_norm": 0.01100579170144626, + "kl": 0.13653564453125, + "learning_rate": 9.811987054355101e-07, + "loss": 0.00013647452578879893, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 487, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 64.76041793823242, + "completions/min_length": 29.25, + "epoch": 0.9689749317448498, + "grad_norm": 1.2087116831368303, + "kl": 0.19085693359375, + "learning_rate": 9.81112909794626e-07, + "loss": -0.011763500049710274, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 488, + "train_speed(iter/s)": 0.022309 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.625, + "completions/mean_length": 72.67708539962769, + "completions/min_length": 34.5, + "epoch": 0.9709605361131795, + "grad_norm": 1.6311525741553783, + "kl": 0.1282958984375, + "learning_rate": 9.810269226124159e-07, + "loss": 0.005242710467427969, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 489, + "train_speed(iter/s)": 0.022314 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 68.51041841506958, + "completions/min_length": 33.625, + "epoch": 0.9729461404815091, + "grad_norm": 0.010023110660537278, + "kl": 0.107421875, + "learning_rate": 9.809407439231138e-07, + "loss": 0.00010748908971436322, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 490, + "train_speed(iter/s)": 0.022312 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 70.56250190734863, + "completions/min_length": 29.375, + "epoch": 0.9749317448498387, + "grad_norm": 0.009828444264579165, + "kl": 0.10894775390625, + "learning_rate": 9.808543737610286e-07, + "loss": 0.00010895646846620366, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 491, + "train_speed(iter/s)": 0.022317 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.875, + "completions/mean_length": 81.20833683013916, + "completions/min_length": 30.5, + "epoch": 0.9769173492181683, + "grad_norm": 0.8947125321222433, + "kl": 0.09814453125, + "learning_rate": 9.807678121605465e-07, + "loss": -0.0024527187924832106, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 492, + "train_speed(iter/s)": 0.022317 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 65.69791889190674, + "completions/min_length": 29.5, + "epoch": 0.9789029535864979, + "grad_norm": 0.010135246690629863, + "kl": 0.103271484375, + "learning_rate": 9.806810591561293e-07, + "loss": 0.00010322139132767916, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 493, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 69.72916841506958, + "completions/min_length": 30.25, + "epoch": 0.9808885579548275, + "grad_norm": 1.1786096067559, + "kl": 0.101806640625, + "learning_rate": 9.805941147823156e-07, + "loss": 0.00010183329868596047, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 494, + "train_speed(iter/s)": 0.022317 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 75.989586353302, + "completions/min_length": 38.0, + "epoch": 0.9828741623231572, + "grad_norm": 1.3332248847327404, + "kl": 0.066925048828125, + "learning_rate": 9.805069790737192e-07, + "loss": -0.014144557528197765, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.15789688751101494, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 495, + "train_speed(iter/s)": 0.022314 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.5, + "completions/mean_length": 79.25000190734863, + "completions/min_length": 32.5, + "epoch": 0.9848597666914867, + "grad_norm": 0.008184887848023242, + "kl": 0.079193115234375, + "learning_rate": 9.804196520650315e-07, + "loss": 7.917750917840749e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 496, + "train_speed(iter/s)": 0.02231 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.25, + "completions/mean_length": 79.40625381469727, + "completions/min_length": 37.25, + "epoch": 0.9868453710598163, + "grad_norm": 0.008136075588755821, + "kl": 0.08233642578125, + "learning_rate": 9.803321337910184e-07, + "loss": 8.249451639130712e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 497, + "train_speed(iter/s)": 0.022309 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 81.05208587646484, + "completions/min_length": 41.125, + "epoch": 0.9888309754281459, + "grad_norm": 0.914418444581033, + "kl": 0.084442138671875, + "learning_rate": 9.802444242865232e-07, + "loss": -0.004461756441742182, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 498, + "train_speed(iter/s)": 0.022306 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 71.48958492279053, + "completions/min_length": 26.5, + "epoch": 0.9908165797964755, + "grad_norm": 0.033248927385579524, + "kl": 0.119659423828125, + "learning_rate": 9.80156523586465e-07, + "loss": 0.00011982783325947821, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 499, + "train_speed(iter/s)": 0.02231 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 68.51041841506958, + "completions/min_length": 32.25, + "epoch": 0.9928021841648051, + "grad_norm": 0.013145718844948953, + "kl": 0.103851318359375, + "learning_rate": 9.80068431725839e-07, + "loss": 0.00010379446030128747, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 500, + "train_speed(iter/s)": 0.022314 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 70.00000238418579, + "completions/min_length": 27.875, + "epoch": 0.9947877885331348, + "grad_norm": 0.008375740313006693, + "kl": 0.078643798828125, + "learning_rate": 9.799801487397161e-07, + "loss": 7.863771315896884e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 501, + "train_speed(iter/s)": 0.022285 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.125, + "completions/mean_length": 74.84375333786011, + "completions/min_length": 32.875, + "epoch": 0.9967733929014644, + "grad_norm": 0.007199773473163106, + "kl": 0.0740966796875, + "learning_rate": 9.79891674663244e-07, + "loss": 7.400034519378096e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 502, + "train_speed(iter/s)": 0.02229 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 75.79166793823242, + "completions/min_length": 31.25, + "epoch": 0.998758997269794, + "grad_norm": 0.006870565108412514, + "kl": 0.063812255859375, + "learning_rate": 9.798030095316459e-07, + "loss": 6.379494880093262e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 503, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.25, + "completions/mean_length": 70.03125286102295, + "completions/min_length": 28.75, + "epoch": 1.0019856043683295, + "grad_norm": 0.005756176540661593, + "kl": 0.07269287109375, + "learning_rate": 9.797141533802213e-07, + "loss": 7.26907528587617e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 504, + "train_speed(iter/s)": 0.022281 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 73.33333444595337, + "completions/min_length": 32.125, + "epoch": 1.0039712087366592, + "grad_norm": 0.007158366064493304, + "kl": 0.074371337890625, + "learning_rate": 9.796251062443458e-07, + "loss": 7.43550481274724e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 505, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 73.13541889190674, + "completions/min_length": 30.125, + "epoch": 1.0059568131049887, + "grad_norm": 0.018076724632159164, + "kl": 0.09466552734375, + "learning_rate": 9.795358681594709e-07, + "loss": 9.470425720792264e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 506, + "train_speed(iter/s)": 0.02228 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 74.0416693687439, + "completions/min_length": 28.875, + "epoch": 1.0079424174733185, + "grad_norm": 0.00504712541718879, + "kl": 0.071502685546875, + "learning_rate": 9.794464391611244e-07, + "loss": 7.15227797627449e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 507, + "train_speed(iter/s)": 0.022282 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 67.44791746139526, + "completions/min_length": 30.125, + "epoch": 1.009928021841648, + "grad_norm": 0.01076316741107328, + "kl": 0.079925537109375, + "learning_rate": 9.793568192849098e-07, + "loss": 7.990360609255731e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 508, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 79.14583539962769, + "completions/min_length": 31.0, + "epoch": 1.0119136262099777, + "grad_norm": 0.006282426776595067, + "kl": 0.0953369140625, + "learning_rate": 9.792670085665063e-07, + "loss": 9.538599260849878e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 509, + "train_speed(iter/s)": 0.022287 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 75.69791793823242, + "completions/min_length": 32.75, + "epoch": 1.0138992305783072, + "grad_norm": 0.01716490730911805, + "kl": 0.094696044921875, + "learning_rate": 9.7917700704167e-07, + "loss": 9.4724862719886e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 510, + "train_speed(iter/s)": 0.022287 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 69.45833492279053, + "completions/min_length": 28.0, + "epoch": 1.015884834946637, + "grad_norm": 0.004842520584587723, + "kl": 0.07867431640625, + "learning_rate": 9.790868147462321e-07, + "loss": 7.858409662730992e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 511, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.0, + "completions/mean_length": 72.75000190734863, + "completions/min_length": 36.125, + "epoch": 1.0178704393149665, + "grad_norm": 2.162647037893844, + "kl": 0.10076904296875, + "learning_rate": 9.789964317161002e-07, + "loss": -0.0035934101324528456, + "memory(GiB)": 94.21, + "reward": 1.7500000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.7500000037252903, + "rewards/CineAccuracyORM/std": 0.2536497339606285, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 512, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 73.79166984558105, + "completions/min_length": 37.25, + "epoch": 1.0198560436832962, + "grad_norm": 0.004328347207335489, + "kl": 0.0701904296875, + "learning_rate": 9.789058579872578e-07, + "loss": 7.007941894698888e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 513, + "train_speed(iter/s)": 0.022285 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.625, + "completions/mean_length": 89.18750286102295, + "completions/min_length": 35.5, + "epoch": 1.0218416480516257, + "grad_norm": 1.8372919943631991, + "kl": 0.064453125, + "learning_rate": 9.788150935957643e-07, + "loss": -0.004091219510883093, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 514, + "train_speed(iter/s)": 0.022278 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 76.208336353302, + "completions/min_length": 34.625, + "epoch": 1.0238272524199554, + "grad_norm": 0.005050457461279664, + "kl": 0.071044921875, + "learning_rate": 9.787241385777545e-07, + "loss": 7.10004023858346e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 515, + "train_speed(iter/s)": 0.022281 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.375, + "completions/mean_length": 73.90625333786011, + "completions/min_length": 29.625, + "epoch": 1.025812856788285, + "grad_norm": 0.7704170526906402, + "kl": 0.086456298828125, + "learning_rate": 9.786329929694399e-07, + "loss": 8.654098201077431e-05, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 516, + "train_speed(iter/s)": 0.022282 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 69.11458492279053, + "completions/min_length": 30.75, + "epoch": 1.0277984611566144, + "grad_norm": 0.005801654499940909, + "kl": 0.061981201171875, + "learning_rate": 9.785416568071074e-07, + "loss": 6.214170571183786e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 517, + "train_speed(iter/s)": 0.022281 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 79.60416889190674, + "completions/min_length": 34.0, + "epoch": 1.0297840655249442, + "grad_norm": 0.004460711134197175, + "kl": 0.07073974609375, + "learning_rate": 9.7845013012712e-07, + "loss": 7.083349191816524e-05, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.45695383101701736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 518, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 72.54166841506958, + "completions/min_length": 34.25, + "epoch": 1.0317696698932737, + "grad_norm": 1.4908082648025123, + "kl": 0.07379150390625, + "learning_rate": 9.78358412965916e-07, + "loss": 0.004011109005659819, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 519, + "train_speed(iter/s)": 0.022282 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.25, + "completions/mean_length": 76.08333492279053, + "completions/min_length": 32.0, + "epoch": 1.0337552742616034, + "grad_norm": 0.0051476155769187475, + "kl": 0.056854248046875, + "learning_rate": 9.782665053600108e-07, + "loss": 5.6818025768734515e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 520, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.5, + "completions/mean_length": 89.37500190734863, + "completions/min_length": 45.875, + "epoch": 1.035740878629933, + "grad_norm": 0.004454868572791521, + "kl": 0.068878173828125, + "learning_rate": 9.781744073459941e-07, + "loss": 6.887844210723415e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 521, + "train_speed(iter/s)": 0.022276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.125, + "completions/mean_length": 80.09375381469727, + "completions/min_length": 36.625, + "epoch": 1.0377264829982626, + "grad_norm": 0.020376927241310686, + "kl": 0.103729248046875, + "learning_rate": 9.780821189605321e-07, + "loss": 0.00010364950867369771, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 522, + "train_speed(iter/s)": 0.022274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 81.53125190734863, + "completions/min_length": 41.25, + "epoch": 1.0397120873665922, + "grad_norm": 0.004244578044599986, + "kl": 0.071319580078125, + "learning_rate": 9.779896402403673e-07, + "loss": 7.135018677217886e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 523, + "train_speed(iter/s)": 0.02227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 75.16666841506958, + "completions/min_length": 32.0, + "epoch": 1.0416976917349219, + "grad_norm": 0.9229648514430092, + "kl": 0.062896728515625, + "learning_rate": 9.778969712223168e-07, + "loss": -0.00029640336288139224, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 524, + "train_speed(iter/s)": 0.022272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 81.65625333786011, + "completions/min_length": 31.75, + "epoch": 1.0436832961032514, + "grad_norm": 0.005766362042818235, + "kl": 0.07427978515625, + "learning_rate": 9.778041119432745e-07, + "loss": 7.427332457154989e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 525, + "train_speed(iter/s)": 0.022269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 70.98958587646484, + "completions/min_length": 33.5, + "epoch": 1.0456689004715811, + "grad_norm": 0.005632745793919945, + "kl": 0.08355712890625, + "learning_rate": 9.777110624402097e-07, + "loss": 8.361268555745482e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 526, + "train_speed(iter/s)": 0.022269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 77.302086353302, + "completions/min_length": 32.0, + "epoch": 1.0476545048399106, + "grad_norm": 0.788546399607603, + "kl": 0.07257080078125, + "learning_rate": 9.77617822750167e-07, + "loss": 0.00844595581293106, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 527, + "train_speed(iter/s)": 0.022271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.0, + "completions/mean_length": 78.57291793823242, + "completions/min_length": 33.625, + "epoch": 1.0496401092082404, + "grad_norm": 0.00458352611145281, + "kl": 0.080230712890625, + "learning_rate": 9.775243929102675e-07, + "loss": 8.02253489382565e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 528, + "train_speed(iter/s)": 0.022268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 84.77083492279053, + "completions/min_length": 35.375, + "epoch": 1.0516257135765699, + "grad_norm": 0.006605951247201382, + "kl": 0.07244873046875, + "learning_rate": 9.774307729577075e-07, + "loss": 7.250799535540864e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 529, + "train_speed(iter/s)": 0.022264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.25, + "completions/mean_length": 77.42708539962769, + "completions/min_length": 35.625, + "epoch": 1.0536113179448994, + "grad_norm": 0.7840239999005865, + "kl": 0.07177734375, + "learning_rate": 9.773369629297592e-07, + "loss": 0.013522615656256676, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 530, + "train_speed(iter/s)": 0.022264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 78.37500143051147, + "completions/min_length": 31.625, + "epoch": 1.055596922313229, + "grad_norm": 0.008212685233090624, + "kl": 0.074432373046875, + "learning_rate": 9.772429628637702e-07, + "loss": 7.443128561135381e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 531, + "train_speed(iter/s)": 0.022262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.625, + "completions/mean_length": 90.52083587646484, + "completions/min_length": 39.5, + "epoch": 1.0575825266815586, + "grad_norm": 0.7164334416660307, + "kl": 0.089874267578125, + "learning_rate": 9.77148772797164e-07, + "loss": -0.00206756847910583, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 532, + "train_speed(iter/s)": 0.022256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 76.44791984558105, + "completions/min_length": 35.25, + "epoch": 1.0595681310498883, + "grad_norm": 0.007644393277728821, + "kl": 0.068206787109375, + "learning_rate": 9.770543927674397e-07, + "loss": 6.809654587414116e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 533, + "train_speed(iter/s)": 0.022254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 69.43750143051147, + "completions/min_length": 33.625, + "epoch": 1.0615537354182178, + "grad_norm": 0.00816462313993516, + "kl": 0.079376220703125, + "learning_rate": 9.769598228121721e-07, + "loss": 7.938842463772744e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 534, + "train_speed(iter/s)": 0.022253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 69.57291793823242, + "completions/min_length": 33.0, + "epoch": 1.0635393397865476, + "grad_norm": 0.8271361955283226, + "kl": 0.071990966796875, + "learning_rate": 9.768650629690112e-07, + "loss": 0.006067268550395966, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 535, + "train_speed(iter/s)": 0.022255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 73.57291841506958, + "completions/min_length": 34.5, + "epoch": 1.065524944154877, + "grad_norm": 0.005765702871034344, + "kl": 0.091094970703125, + "learning_rate": 9.767701132756832e-07, + "loss": 9.09459195099771e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 536, + "train_speed(iter/s)": 0.022253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 74.59375190734863, + "completions/min_length": 34.25, + "epoch": 1.0675105485232068, + "grad_norm": 0.0075379752580999665, + "kl": 0.06256103515625, + "learning_rate": 9.766749737699894e-07, + "loss": 6.25491957180202e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 537, + "train_speed(iter/s)": 0.022256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 71.52083587646484, + "completions/min_length": 31.375, + "epoch": 1.0694961528915363, + "grad_norm": 0.007045632345683777, + "kl": 0.076202392578125, + "learning_rate": 9.765796444898073e-07, + "loss": 7.618629024364054e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 538, + "train_speed(iter/s)": 0.022253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 81.64583492279053, + "completions/min_length": 38.5, + "epoch": 1.071481757259866, + "grad_norm": 1.2930726323799988, + "kl": 0.0855712890625, + "learning_rate": 9.76484125473089e-07, + "loss": -0.0059394738636910915, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 539, + "train_speed(iter/s)": 0.022253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.0, + "completions/mean_length": 60.01041889190674, + "completions/min_length": 30.875, + "epoch": 1.0734673616281956, + "grad_norm": 1.6187815902552098, + "kl": 0.07476806640625, + "learning_rate": 9.76388416757863e-07, + "loss": 0.0015911355149000883, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 540, + "train_speed(iter/s)": 0.022259 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 68.67708539962769, + "completions/min_length": 32.125, + "epoch": 1.0754529659965253, + "grad_norm": 0.006886318912753367, + "kl": 0.088104248046875, + "learning_rate": 9.76292518382233e-07, + "loss": 8.805892139207572e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 541, + "train_speed(iter/s)": 0.02226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 70.92708396911621, + "completions/min_length": 30.75, + "epoch": 1.0774385703648548, + "grad_norm": 0.007738070130499908, + "kl": 0.087615966796875, + "learning_rate": 9.761964303843779e-07, + "loss": 8.774644084041938e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 542, + "train_speed(iter/s)": 0.022264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 79.00000143051147, + "completions/min_length": 34.25, + "epoch": 1.0794241747331843, + "grad_norm": 1.3135111635460766, + "kl": 0.08270263671875, + "learning_rate": 9.761001528025525e-07, + "loss": -0.00990224163979292, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.10518955811858177, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 543, + "train_speed(iter/s)": 0.022269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 65.88541793823242, + "completions/min_length": 31.125, + "epoch": 1.081409779101514, + "grad_norm": 0.9530289209328595, + "kl": 0.087738037109375, + "learning_rate": 9.760036856750871e-07, + "loss": 0.008948824368417263, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 544, + "train_speed(iter/s)": 0.022268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.375, + "completions/mean_length": 76.25000286102295, + "completions/min_length": 35.75, + "epoch": 1.0833953834698435, + "grad_norm": 0.8307619189537885, + "kl": 0.082550048828125, + "learning_rate": 9.759070290403872e-07, + "loss": -0.001533027971163392, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 545, + "train_speed(iter/s)": 0.022267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 67.15625238418579, + "completions/min_length": 28.25, + "epoch": 1.0853809878381733, + "grad_norm": 0.007410270803046887, + "kl": 0.088897705078125, + "learning_rate": 9.758101829369338e-07, + "loss": 8.878414519131184e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 546, + "train_speed(iter/s)": 0.022272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.0, + "completions/mean_length": 64.50000047683716, + "completions/min_length": 33.25, + "epoch": 1.0873665922065028, + "grad_norm": 0.006274003877070703, + "kl": 0.094268798828125, + "learning_rate": 9.757131474032836e-07, + "loss": 9.4304108642973e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 547, + "train_speed(iter/s)": 0.022269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 70.78125190734863, + "completions/min_length": 29.875, + "epoch": 1.0893521965748325, + "grad_norm": 0.008522866321777733, + "kl": 0.08306884765625, + "learning_rate": 9.756159224780685e-07, + "loss": 8.306295785587281e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 548, + "train_speed(iter/s)": 0.022269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.125, + "completions/mean_length": 78.2291693687439, + "completions/min_length": 33.25, + "epoch": 1.091337800943162, + "grad_norm": 0.7510567466480369, + "kl": 0.09552001953125, + "learning_rate": 9.755185081999955e-07, + "loss": -0.007611400447785854, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 549, + "train_speed(iter/s)": 0.022268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 69.33333492279053, + "completions/min_length": 27.125, + "epoch": 1.0933234053114917, + "grad_norm": 0.7092548104403367, + "kl": 0.08740234375, + "learning_rate": 9.754209046078478e-07, + "loss": 0.011761273257434368, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 550, + "train_speed(iter/s)": 0.022267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 72.59375143051147, + "completions/min_length": 32.75, + "epoch": 1.0953090096798213, + "grad_norm": 0.004961644686553474, + "kl": 0.07257080078125, + "learning_rate": 9.75323111740483e-07, + "loss": 7.263156294357032e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 551, + "train_speed(iter/s)": 0.022273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.75, + "completions/mean_length": 70.0416693687439, + "completions/min_length": 29.625, + "epoch": 1.097294614048151, + "grad_norm": 0.0045129546953054335, + "kl": 0.069488525390625, + "learning_rate": 9.75225129636835e-07, + "loss": 6.942203617654741e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 552, + "train_speed(iter/s)": 0.022273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 66.03125238418579, + "completions/min_length": 33.25, + "epoch": 1.0992802184164805, + "grad_norm": 0.008681414916424587, + "kl": 0.069915771484375, + "learning_rate": 9.75126958335912e-07, + "loss": 6.986403604969382e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 553, + "train_speed(iter/s)": 0.022273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 66.64583492279053, + "completions/min_length": 28.375, + "epoch": 1.1012658227848102, + "grad_norm": 1.0271986211342294, + "kl": 0.093963623046875, + "learning_rate": 9.750285978767986e-07, + "loss": 9.381522977491841e-05, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666679084301, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 554, + "train_speed(iter/s)": 0.022273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 66.58333444595337, + "completions/min_length": 29.625, + "epoch": 1.1032514271531397, + "grad_norm": 0.005193049814199608, + "kl": 0.0740966796875, + "learning_rate": 9.749300482986542e-07, + "loss": 7.403695781249553e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 555, + "train_speed(iter/s)": 0.022277 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 68.97916841506958, + "completions/min_length": 26.0, + "epoch": 1.1052370315214692, + "grad_norm": 0.006816479913573796, + "kl": 0.08026123046875, + "learning_rate": 9.74831309640713e-07, + "loss": 8.033551421249285e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 556, + "train_speed(iter/s)": 0.02228 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.375, + "completions/mean_length": 71.0104193687439, + "completions/min_length": 30.125, + "epoch": 1.107222635889799, + "grad_norm": 0.007952116847060918, + "kl": 0.0810546875, + "learning_rate": 9.747323819422854e-07, + "loss": 8.10863493825309e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 557, + "train_speed(iter/s)": 0.022275 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.625, + "completions/mean_length": 66.62500190734863, + "completions/min_length": 33.125, + "epoch": 1.1092082402581285, + "grad_norm": 0.009222802985643283, + "kl": 0.07696533203125, + "learning_rate": 9.746332652427565e-07, + "loss": 7.687990000704303e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 558, + "train_speed(iter/s)": 0.022279 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.5, + "completions/mean_length": 69.54166889190674, + "completions/min_length": 30.25, + "epoch": 1.1111938446264582, + "grad_norm": 0.005154444981706478, + "kl": 0.07647705078125, + "learning_rate": 9.745339595815866e-07, + "loss": 7.652018393855542e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 559, + "train_speed(iter/s)": 0.022282 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.5, + "completions/mean_length": 75.03125286102295, + "completions/min_length": 27.75, + "epoch": 1.1131794489947877, + "grad_norm": 0.0054852679381394535, + "kl": 0.078277587890625, + "learning_rate": 9.744344649983118e-07, + "loss": 7.82212518970482e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 560, + "train_speed(iter/s)": 0.022282 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 74.20833539962769, + "completions/min_length": 37.75, + "epoch": 1.1151650533631174, + "grad_norm": 0.0046925295878660934, + "kl": 0.07037353515625, + "learning_rate": 9.743347815325427e-07, + "loss": 7.043230289127678e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 561, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 68.68750143051147, + "completions/min_length": 28.125, + "epoch": 1.117150657731447, + "grad_norm": 0.0064295972139579525, + "kl": 0.082794189453125, + "learning_rate": 9.742349092239657e-07, + "loss": 8.272120612673461e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 562, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 73.55208587646484, + "completions/min_length": 28.625, + "epoch": 1.1191362620997767, + "grad_norm": 0.006041683775244193, + "kl": 0.06353759765625, + "learning_rate": 9.741348481123417e-07, + "loss": 6.355245568556711e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 563, + "train_speed(iter/s)": 0.022288 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 67.50000143051147, + "completions/min_length": 30.625, + "epoch": 1.1211218664681062, + "grad_norm": 0.005537210520376235, + "kl": 0.06201171875, + "learning_rate": 9.740345982375075e-07, + "loss": 6.209105777088553e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 564, + "train_speed(iter/s)": 0.02229 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.25, + "completions/mean_length": 87.48958539962769, + "completions/min_length": 31.5, + "epoch": 1.123107470836436, + "grad_norm": 0.004627467234734946, + "kl": 0.060821533203125, + "learning_rate": 9.739341596393744e-07, + "loss": 6.0824411775683984e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 565, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.125, + "completions/mean_length": 67.67708683013916, + "completions/min_length": 29.375, + "epoch": 1.1250930752047654, + "grad_norm": 0.005403305140264467, + "kl": 0.0694580078125, + "learning_rate": 9.738335323579296e-07, + "loss": 6.947192014195025e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 566, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 68.79166793823242, + "completions/min_length": 28.75, + "epoch": 1.1270786795730952, + "grad_norm": 0.008100067700864159, + "kl": 0.08172607421875, + "learning_rate": 9.737327164332345e-07, + "loss": 8.171075023710728e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 567, + "train_speed(iter/s)": 0.022287 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.125, + "completions/mean_length": 83.60417079925537, + "completions/min_length": 31.25, + "epoch": 1.1290642839414247, + "grad_norm": 0.005568777931783807, + "kl": 0.084014892578125, + "learning_rate": 9.736317119054266e-07, + "loss": 8.400918886763975e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 568, + "train_speed(iter/s)": 0.022286 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 73.78125143051147, + "completions/min_length": 29.375, + "epoch": 1.1310498883097542, + "grad_norm": 1.377599708980024, + "kl": 0.08050537109375, + "learning_rate": 9.735305188147174e-07, + "loss": -0.004226197954267263, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 569, + "train_speed(iter/s)": 0.022289 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 74.00000190734863, + "completions/min_length": 36.375, + "epoch": 1.133035492678084, + "grad_norm": 0.9006534554013635, + "kl": 0.079132080078125, + "learning_rate": 9.734291372013944e-07, + "loss": 0.004293154925107956, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6458333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 570, + "train_speed(iter/s)": 0.022286 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.625, + "completions/mean_length": 70.63541793823242, + "completions/min_length": 31.0, + "epoch": 1.1350210970464134, + "grad_norm": 0.006454065852785443, + "kl": 0.073272705078125, + "learning_rate": 9.733275671058194e-07, + "loss": 7.332953828154132e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 571, + "train_speed(iter/s)": 0.022283 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 65.14583539962769, + "completions/min_length": 28.375, + "epoch": 1.1370067014147431, + "grad_norm": 0.005973812324536165, + "kl": 0.075653076171875, + "learning_rate": 9.732258085684301e-07, + "loss": 7.564792758785188e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 572, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 63.47916841506958, + "completions/min_length": 27.375, + "epoch": 1.1389923057830726, + "grad_norm": 0.005360766270262526, + "kl": 0.0867919921875, + "learning_rate": 9.731238616297386e-07, + "loss": 8.681170584168285e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 573, + "train_speed(iter/s)": 0.022285 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 62.281250953674316, + "completions/min_length": 29.125, + "epoch": 1.1409779101514024, + "grad_norm": 0.0362138873978799, + "kl": 0.1376953125, + "learning_rate": 9.73021726330332e-07, + "loss": 0.00013740996655542403, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 574, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 67.67708587646484, + "completions/min_length": 31.25, + "epoch": 1.1429635145197319, + "grad_norm": 0.005293666035821966, + "kl": 0.063720703125, + "learning_rate": 9.729194027108727e-07, + "loss": 6.370741175487638e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 575, + "train_speed(iter/s)": 0.022294 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 67.43750238418579, + "completions/min_length": 29.75, + "epoch": 1.1449491188880616, + "grad_norm": 1.3119494316607527, + "kl": 0.06207275390625, + "learning_rate": 9.728168908120978e-07, + "loss": 6.201180804055184e-05, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 576, + "train_speed(iter/s)": 0.022298 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 249.5, + "completions/mean_length": 88.40625238418579, + "completions/min_length": 33.0, + "epoch": 1.1469347232563911, + "grad_norm": 0.3271877449936268, + "kl": 0.0828857421875, + "learning_rate": 9.727141906748195e-07, + "loss": 0.022682178765535355, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 577, + "train_speed(iter/s)": 0.022289 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.5, + "completions/mean_length": 70.32291793823242, + "completions/min_length": 27.75, + "epoch": 1.1489203276247208, + "grad_norm": 0.005576377585265308, + "kl": 0.09912109375, + "learning_rate": 9.726113023399248e-07, + "loss": 9.906635386869311e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 578, + "train_speed(iter/s)": 0.022285 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 72.19791793823242, + "completions/min_length": 31.25, + "epoch": 1.1509059319930504, + "grad_norm": 0.009045235726883746, + "kl": 0.09552001953125, + "learning_rate": 9.725082258483764e-07, + "loss": 9.553381096338853e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 579, + "train_speed(iter/s)": 0.022285 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.875, + "completions/mean_length": 79.12500286102295, + "completions/min_length": 29.875, + "epoch": 1.15289153636138, + "grad_norm": 1.0449247123185463, + "kl": 0.09832763671875, + "learning_rate": 9.724049612412103e-07, + "loss": -0.0012995228171348572, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 580, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 71.78125095367432, + "completions/min_length": 32.125, + "epoch": 1.1548771407297096, + "grad_norm": 0.00654337629304603, + "kl": 0.076019287109375, + "learning_rate": 9.72301508559539e-07, + "loss": 7.600930257467553e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 581, + "train_speed(iter/s)": 0.022287 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.0, + "completions/mean_length": 63.70833730697632, + "completions/min_length": 28.875, + "epoch": 1.156862745098039, + "grad_norm": 0.008240480651193175, + "kl": 0.073974609375, + "learning_rate": 9.72197867844549e-07, + "loss": 7.402162736980245e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 582, + "train_speed(iter/s)": 0.02229 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.75, + "completions/mean_length": 61.04166841506958, + "completions/min_length": 31.375, + "epoch": 1.1588483494663688, + "grad_norm": 0.010119890727403267, + "kl": 0.1004638671875, + "learning_rate": 9.720940391375017e-07, + "loss": 0.00010052209108835086, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 583, + "train_speed(iter/s)": 0.022295 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.375, + "completions/mean_length": 80.20833492279053, + "completions/min_length": 29.5, + "epoch": 1.1608339538346983, + "grad_norm": 0.8486786913735911, + "kl": 0.0975341796875, + "learning_rate": 9.71990022479734e-07, + "loss": -9.814029181143269e-05, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 584, + "train_speed(iter/s)": 0.022294 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 77.67708683013916, + "completions/min_length": 29.875, + "epoch": 1.162819558203028, + "grad_norm": 0.008552646877854078, + "kl": 0.081268310546875, + "learning_rate": 9.718858179126567e-07, + "loss": 8.129799243761227e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 585, + "train_speed(iter/s)": 0.022295 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.0, + "completions/mean_length": 76.46875190734863, + "completions/min_length": 27.75, + "epoch": 1.1648051625713576, + "grad_norm": 0.008366967018786422, + "kl": 0.08905029296875, + "learning_rate": 9.71781425477756e-07, + "loss": 8.896699728211388e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 586, + "train_speed(iter/s)": 0.022294 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.25, + "completions/mean_length": 81.80208587646484, + "completions/min_length": 34.0, + "epoch": 1.1667907669396873, + "grad_norm": 0.008196082771375898, + "kl": 0.085784912109375, + "learning_rate": 9.71676845216593e-07, + "loss": 8.579804853070527e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 587, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 72.92708539962769, + "completions/min_length": 26.75, + "epoch": 1.1687763713080168, + "grad_norm": 0.005775455703144766, + "kl": 0.091156005859375, + "learning_rate": 9.715720771708031e-07, + "loss": 9.109506936511025e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 588, + "train_speed(iter/s)": 0.022292 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 74.88541984558105, + "completions/min_length": 33.375, + "epoch": 1.1707619756763465, + "grad_norm": 1.2103084833535742, + "kl": 0.09063720703125, + "learning_rate": 9.714671213820966e-07, + "loss": -0.0009210556745529175, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 589, + "train_speed(iter/s)": 0.022296 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 79.07291841506958, + "completions/min_length": 32.0, + "epoch": 1.172747580044676, + "grad_norm": 0.00727854680400535, + "kl": 0.095062255859375, + "learning_rate": 9.713619778922587e-07, + "loss": 9.491811943007633e-05, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 590, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.125, + "completions/mean_length": 79.66666984558105, + "completions/min_length": 30.375, + "epoch": 1.1747331844130058, + "grad_norm": 0.005139729517205997, + "kl": 0.07672119140625, + "learning_rate": 9.712566467431496e-07, + "loss": 7.6712341979146e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 591, + "train_speed(iter/s)": 0.022302 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 75.00000238418579, + "completions/min_length": 28.75, + "epoch": 1.1767187887813353, + "grad_norm": 0.005262161215713062, + "kl": 0.076171875, + "learning_rate": 9.711511279767035e-07, + "loss": 7.62054551159963e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 592, + "train_speed(iter/s)": 0.022301 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 75.43750238418579, + "completions/min_length": 35.625, + "epoch": 1.178704393149665, + "grad_norm": 0.0066197236244243075, + "kl": 0.08331298828125, + "learning_rate": 9.710454216349298e-07, + "loss": 8.333769801538438e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 593, + "train_speed(iter/s)": 0.022299 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.0, + "completions/mean_length": 71.7604193687439, + "completions/min_length": 31.875, + "epoch": 1.1806899975179945, + "grad_norm": 0.9440998961946998, + "kl": 0.073974609375, + "learning_rate": 9.709395277599124e-07, + "loss": -0.002237787004560232, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.833333333954215, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 594, + "train_speed(iter/s)": 0.022296 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.75, + "completions/mean_length": 91.32291793823242, + "completions/min_length": 36.75, + "epoch": 1.182675601886324, + "grad_norm": 0.005108966715773797, + "kl": 0.06787109375, + "learning_rate": 9.7083344639381e-07, + "loss": 6.787019810872152e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 595, + "train_speed(iter/s)": 0.022293 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.375, + "completions/mean_length": 80.78125190734863, + "completions/min_length": 33.5, + "epoch": 1.1846612062546538, + "grad_norm": 1.192715802963661, + "kl": 0.068511962890625, + "learning_rate": 9.707271775788558e-07, + "loss": -3.7482786865439266e-05, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 596, + "train_speed(iter/s)": 0.022296 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 64.06250190734863, + "completions/min_length": 24.75, + "epoch": 1.1866468106229835, + "grad_norm": 0.7680531624455892, + "kl": 0.0888671875, + "learning_rate": 9.706207213573579e-07, + "loss": 0.003137031104415655, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 597, + "train_speed(iter/s)": 0.022291 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 68.83333587646484, + "completions/min_length": 27.75, + "epoch": 1.188632414991313, + "grad_norm": 0.9941811488501888, + "kl": 0.106414794921875, + "learning_rate": 9.705140777716985e-07, + "loss": -0.003719897475093603, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 598, + "train_speed(iter/s)": 0.022295 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 78.93750381469727, + "completions/min_length": 35.375, + "epoch": 1.1906180193596425, + "grad_norm": 0.005860471067882116, + "kl": 0.06390380859375, + "learning_rate": 9.704072468643347e-07, + "loss": 6.389830377884209e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 599, + "train_speed(iter/s)": 0.022294 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 79.62500238418579, + "completions/min_length": 30.875, + "epoch": 1.1926036237279722, + "grad_norm": 0.00509660365868744, + "kl": 0.082275390625, + "learning_rate": 9.703002286777983e-07, + "loss": 8.220366726163775e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 600, + "train_speed(iter/s)": 0.022293 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 73.88541841506958, + "completions/min_length": 27.375, + "epoch": 1.1945892280963017, + "grad_norm": 0.04406790502517936, + "kl": 0.099945068359375, + "learning_rate": 9.701930232546954e-07, + "loss": 9.99331459752284e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 601, + "train_speed(iter/s)": 0.022293 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 73.29166889190674, + "completions/min_length": 32.5, + "epoch": 1.1965748324646315, + "grad_norm": 0.00804967431869538, + "kl": 0.07000732421875, + "learning_rate": 9.70085630637707e-07, + "loss": 6.998522439971566e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 602, + "train_speed(iter/s)": 0.022295 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 255.25, + "completions/mean_length": 81.33333444595337, + "completions/min_length": 29.0, + "epoch": 1.198560436832961, + "grad_norm": 0.532666801069856, + "kl": 0.096282958984375, + "learning_rate": 9.69978050869588e-07, + "loss": 0.02345992624759674, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 603, + "train_speed(iter/s)": 0.022288 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 70.64583587646484, + "completions/min_length": 30.625, + "epoch": 1.2005460412012907, + "grad_norm": 1.0674061726574062, + "kl": 0.0982666015625, + "learning_rate": 9.698702839931687e-07, + "loss": 0.009132467210292816, + "memory(GiB)": 94.21, + "reward": 1.6041666716337204, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.6041666669771075, + "rewards/CineAccuracyORM/std": 0.2231760062277317, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 604, + "train_speed(iter/s)": 0.022288 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 67.56250190734863, + "completions/min_length": 30.0, + "epoch": 1.2025316455696202, + "grad_norm": 1.097897789618336, + "kl": 0.09222412109375, + "learning_rate": 9.69762330051353e-07, + "loss": 0.01471245288848877, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8333333432674408, + "rewards/CineAccuracyORM/std": 0.22787059843540192, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 605, + "train_speed(iter/s)": 0.022289 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 71.45833587646484, + "completions/min_length": 28.25, + "epoch": 1.20451724993795, + "grad_norm": 0.009278574148672496, + "kl": 0.0850830078125, + "learning_rate": 9.696541890871198e-07, + "loss": 8.51130680530332e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 606, + "train_speed(iter/s)": 0.022289 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 71.94791889190674, + "completions/min_length": 29.25, + "epoch": 1.2065028543062795, + "grad_norm": 0.009320926369159988, + "kl": 0.0885009765625, + "learning_rate": 9.695458611435228e-07, + "loss": 8.853545296005905e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 607, + "train_speed(iter/s)": 0.02229 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.5, + "completions/mean_length": 81.37500095367432, + "completions/min_length": 31.5, + "epoch": 1.208488458674609, + "grad_norm": 1.4641006828415697, + "kl": 0.10943603515625, + "learning_rate": 9.694373462636887e-07, + "loss": 0.006353606935590506, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 608, + "train_speed(iter/s)": 0.022284 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.0, + "completions/mean_length": 64.76041889190674, + "completions/min_length": 30.25, + "epoch": 1.2104740630429387, + "grad_norm": 0.009977849519231516, + "kl": 0.097137451171875, + "learning_rate": 9.693286444908201e-07, + "loss": 9.709088772069663e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 609, + "train_speed(iter/s)": 0.022289 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.0, + "completions/mean_length": 59.96875238418579, + "completions/min_length": 32.0, + "epoch": 1.2124596674112684, + "grad_norm": 0.011079030117479677, + "kl": 0.124267578125, + "learning_rate": 9.69219755868194e-07, + "loss": 0.00012425723252817988, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 610, + "train_speed(iter/s)": 0.022297 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 71.08333587646484, + "completions/min_length": 29.0, + "epoch": 1.214445271779598, + "grad_norm": 0.009538366947412816, + "kl": 0.1038818359375, + "learning_rate": 9.691106804391603e-07, + "loss": 0.00010387604561401531, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 611, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 72.29166841506958, + "completions/min_length": 32.375, + "epoch": 1.2164308761479274, + "grad_norm": 0.009378721437578636, + "kl": 0.10986328125, + "learning_rate": 9.690014182471447e-07, + "loss": 0.00010981389641528949, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 612, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.5, + "completions/mean_length": 83.25000190734863, + "completions/min_length": 27.75, + "epoch": 1.2184164805162572, + "grad_norm": 0.00875745529021343, + "kl": 0.0936279296875, + "learning_rate": 9.688919693356471e-07, + "loss": 9.357710223412141e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 613, + "train_speed(iter/s)": 0.022302 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 70.10416889190674, + "completions/min_length": 32.875, + "epoch": 1.2204020848845867, + "grad_norm": 0.02622649092908211, + "kl": 0.096343994140625, + "learning_rate": 9.68782333748241e-07, + "loss": 9.625627717468888e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 614, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.375, + "completions/mean_length": 70.08333539962769, + "completions/min_length": 31.25, + "epoch": 1.2223876892529164, + "grad_norm": 0.009778015591590647, + "kl": 0.096923828125, + "learning_rate": 9.686725115285753e-07, + "loss": 9.710974700283259e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 615, + "train_speed(iter/s)": 0.022301 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 68.37500286102295, + "completions/min_length": 27.625, + "epoch": 1.224373293621246, + "grad_norm": 0.008639201113997063, + "kl": 0.069488525390625, + "learning_rate": 9.685625027203717e-07, + "loss": 6.949243106646463e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 616, + "train_speed(iter/s)": 0.022302 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.125, + "completions/mean_length": 76.88541889190674, + "completions/min_length": 29.5, + "epoch": 1.2263588979895756, + "grad_norm": 0.7601950132829342, + "kl": 0.092315673828125, + "learning_rate": 9.684523073674279e-07, + "loss": 0.00995372049510479, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 617, + "train_speed(iter/s)": 0.0223 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.5, + "completions/mean_length": 82.02083492279053, + "completions/min_length": 27.375, + "epoch": 1.2283445023579052, + "grad_norm": 0.006405114926944636, + "kl": 0.074371337890625, + "learning_rate": 9.683419255136144e-07, + "loss": 7.436737359967083e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 618, + "train_speed(iter/s)": 0.022299 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 61.82291793823242, + "completions/min_length": 24.75, + "epoch": 1.2303301067262349, + "grad_norm": 0.008953339752428532, + "kl": 0.0848388671875, + "learning_rate": 9.682313572028767e-07, + "loss": 8.475745562463999e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 619, + "train_speed(iter/s)": 0.022304 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 71.10416841506958, + "completions/min_length": 30.875, + "epoch": 1.2323157110945644, + "grad_norm": 1.2336909977040258, + "kl": 0.095855712890625, + "learning_rate": 9.681206024792346e-07, + "loss": 0.01599309965968132, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 620, + "train_speed(iter/s)": 0.022308 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 66.40625143051147, + "completions/min_length": 25.75, + "epoch": 1.234301315462894, + "grad_norm": 0.0070794522303797664, + "kl": 0.08160400390625, + "learning_rate": 9.680096613867818e-07, + "loss": 8.146717300405726e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 621, + "train_speed(iter/s)": 0.022305 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.875, + "completions/mean_length": 80.18750190734863, + "completions/min_length": 27.125, + "epoch": 1.2362869198312236, + "grad_norm": 0.005021899244058689, + "kl": 0.08172607421875, + "learning_rate": 9.678985339696864e-07, + "loss": 8.161667210515589e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 622, + "train_speed(iter/s)": 0.022304 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 65.94791841506958, + "completions/min_length": 25.625, + "epoch": 1.2382725241995534, + "grad_norm": 2.2559389874404454, + "kl": 0.101318359375, + "learning_rate": 9.677872202721903e-07, + "loss": 0.015005329623818398, + "memory(GiB)": 94.21, + "reward": 1.9062500149011612, + "reward_std": 0.09878238290548325, + "rewards/CineAccuracyORM/mean": 0.9062500074505806, + "rewards/CineAccuracyORM/std": 0.16673530638217926, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 623, + "train_speed(iter/s)": 0.022306 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 67.87500333786011, + "completions/min_length": 27.5, + "epoch": 1.2402581285678829, + "grad_norm": 0.9838700404438614, + "kl": 0.10595703125, + "learning_rate": 9.676757203386106e-07, + "loss": 0.009951414540410042, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 624, + "train_speed(iter/s)": 0.022308 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 68.48958444595337, + "completions/min_length": 30.25, + "epoch": 1.2422437329362124, + "grad_norm": 0.012438820543306112, + "kl": 0.076507568359375, + "learning_rate": 9.67564034213337e-07, + "loss": 7.641559932380915e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 625, + "train_speed(iter/s)": 0.022312 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.75, + "completions/mean_length": 61.42708492279053, + "completions/min_length": 28.75, + "epoch": 1.244229337304542, + "grad_norm": 0.02635202543200512, + "kl": 0.090057373046875, + "learning_rate": 9.674521619408345e-07, + "loss": 9.003737068269402e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 626, + "train_speed(iter/s)": 0.022316 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 72.395836353302, + "completions/min_length": 24.625, + "epoch": 1.2462149416728716, + "grad_norm": 0.04149972290410046, + "kl": 0.112823486328125, + "learning_rate": 9.673401035656418e-07, + "loss": 0.00011268883099546656, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 627, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 65.80208492279053, + "completions/min_length": 28.5, + "epoch": 1.2482005460412013, + "grad_norm": 0.8450286055869382, + "kl": 0.103271484375, + "learning_rate": 9.672278591323715e-07, + "loss": -0.00820427667349577, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 628, + "train_speed(iter/s)": 0.022321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 60.72916793823242, + "completions/min_length": 28.625, + "epoch": 1.2501861504095308, + "grad_norm": 0.008435497557858766, + "kl": 0.079864501953125, + "learning_rate": 9.67115428685711e-07, + "loss": 7.984022522578016e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 629, + "train_speed(iter/s)": 0.022323 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 66.71875143051147, + "completions/min_length": 31.125, + "epoch": 1.2521717547778606, + "grad_norm": 2.9675699842817203, + "kl": 0.09912109375, + "learning_rate": 9.670028122704208e-07, + "loss": 0.001313959015533328, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166669771075, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 630, + "train_speed(iter/s)": 0.022322 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 72.27083539962769, + "completions/min_length": 29.375, + "epoch": 1.25415735914619, + "grad_norm": 0.008018207494434805, + "kl": 0.07928466796875, + "learning_rate": 9.668900099313363e-07, + "loss": 7.921218639239669e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 631, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 240.625, + "completions/mean_length": 74.53125381469727, + "completions/min_length": 26.125, + "epoch": 1.2561429635145198, + "grad_norm": 0.8151050059746938, + "kl": 0.11920166015625, + "learning_rate": 9.667770217133662e-07, + "loss": 0.027313342317938805, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.07654655165970325, + "rewards/CineAccuracyORM/mean": 0.9375000074505806, + "rewards/CineAccuracyORM/std": 0.10045047104358673, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 632, + "train_speed(iter/s)": 0.022312 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 64.94791793823242, + "completions/min_length": 31.25, + "epoch": 1.2581285678828493, + "grad_norm": 0.010397980907964339, + "kl": 0.077301025390625, + "learning_rate": 9.666638476614936e-07, + "loss": 7.72877101553604e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 633, + "train_speed(iter/s)": 0.022313 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 70.40625238418579, + "completions/min_length": 31.625, + "epoch": 1.2601141722511788, + "grad_norm": 1.4435889506206374, + "kl": 0.09027099609375, + "learning_rate": 9.665504878207756e-07, + "loss": -0.0059862323105335236, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 634, + "train_speed(iter/s)": 0.022315 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.25, + "completions/mean_length": 70.85416841506958, + "completions/min_length": 25.875, + "epoch": 1.2620997766195086, + "grad_norm": 1.2504565080965673, + "kl": 0.083251953125, + "learning_rate": 9.664369422363429e-07, + "loss": 0.008449604734778404, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 635, + "train_speed(iter/s)": 0.022315 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 67.39583539962769, + "completions/min_length": 29.0, + "epoch": 1.2640853809878383, + "grad_norm": 0.007656929688250345, + "kl": 0.10076904296875, + "learning_rate": 9.663232109534009e-07, + "loss": 0.00010077543265651911, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 636, + "train_speed(iter/s)": 0.022318 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 65.04166841506958, + "completions/min_length": 30.0, + "epoch": 1.2660709853561678, + "grad_norm": 1.46198711903342, + "kl": 0.101531982421875, + "learning_rate": 9.662092940172282e-07, + "loss": 0.0019501249771565199, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 637, + "train_speed(iter/s)": 0.022324 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 68.06250190734863, + "completions/min_length": 30.0, + "epoch": 1.2680565897244973, + "grad_norm": 0.8390499127228502, + "kl": 0.099578857421875, + "learning_rate": 9.660951914731774e-07, + "loss": -0.005982631351798773, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 638, + "train_speed(iter/s)": 0.022323 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.5, + "completions/mean_length": 68.42708587646484, + "completions/min_length": 31.25, + "epoch": 1.270042194092827, + "grad_norm": 1.6806593559938343, + "kl": 0.07940673828125, + "learning_rate": 9.659809033666753e-07, + "loss": -0.0035557765513658524, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 639, + "train_speed(iter/s)": 0.022322 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.125, + "completions/mean_length": 76.55208492279053, + "completions/min_length": 33.875, + "epoch": 1.2720277984611565, + "grad_norm": 0.8663757649264218, + "kl": 0.078521728515625, + "learning_rate": 9.658664297432225e-07, + "loss": 7.8544020652771e-05, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 640, + "train_speed(iter/s)": 0.022317 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.75, + "completions/mean_length": 61.333335399627686, + "completions/min_length": 27.75, + "epoch": 1.2740134028294863, + "grad_norm": 0.008434956470041294, + "kl": 0.07586669921875, + "learning_rate": 9.657517706483934e-07, + "loss": 7.581163663417101e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 641, + "train_speed(iter/s)": 0.02232 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 62.70833396911621, + "completions/min_length": 30.0, + "epoch": 1.2759990071978158, + "grad_norm": 0.010045195652376267, + "kl": 0.085357666015625, + "learning_rate": 9.656369261278361e-07, + "loss": 8.534900553058833e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 642, + "train_speed(iter/s)": 0.022321 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 68.27083444595337, + "completions/min_length": 26.125, + "epoch": 1.2779846115661455, + "grad_norm": 1.0554497550881037, + "kl": 0.11859130859375, + "learning_rate": 9.655218962272728e-07, + "loss": -0.010795101523399353, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 643, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 75.36458539962769, + "completions/min_length": 27.375, + "epoch": 1.279970215934475, + "grad_norm": 0.009788075197411747, + "kl": 0.11468505859375, + "learning_rate": 9.654066809924992e-07, + "loss": 0.00011463784903753549, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 644, + "train_speed(iter/s)": 0.022323 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 105.375, + "completions/mean_length": 57.156251430511475, + "completions/min_length": 25.375, + "epoch": 1.2819558203028047, + "grad_norm": 0.010643797160266798, + "kl": 0.094451904296875, + "learning_rate": 9.65291280469385e-07, + "loss": 9.443907765671611e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 645, + "train_speed(iter/s)": 0.022326 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.25, + "completions/mean_length": 55.677085399627686, + "completions/min_length": 26.5, + "epoch": 1.2839414246711343, + "grad_norm": 0.011833210739744362, + "kl": 0.13201904296875, + "learning_rate": 9.651756947038738e-07, + "loss": 0.00013195353676564991, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 646, + "train_speed(iter/s)": 0.022328 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 60.9791693687439, + "completions/min_length": 28.5, + "epoch": 1.2859270290394638, + "grad_norm": 0.011210033476712178, + "kl": 0.114990234375, + "learning_rate": 9.650599237419827e-07, + "loss": 0.00011511107732076198, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 647, + "train_speed(iter/s)": 0.02233 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.625, + "completions/mean_length": 72.83333444595337, + "completions/min_length": 27.125, + "epoch": 1.2879126334077935, + "grad_norm": 0.010959313179157357, + "kl": 0.12579345703125, + "learning_rate": 9.649439676298022e-07, + "loss": 0.0001258813717868179, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 648, + "train_speed(iter/s)": 0.022331 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 62.77083492279053, + "completions/min_length": 25.75, + "epoch": 1.2898982377761232, + "grad_norm": 2.044842511411833, + "kl": 0.23211669921875, + "learning_rate": 9.648278264134975e-07, + "loss": 0.008665397763252258, + "memory(GiB)": 94.21, + "reward": 1.7916666865348816, + "reward_std": 0.12429790198802948, + "rewards/CineAccuracyORM/mean": 0.7916666753590107, + "rewards/CineAccuracyORM/std": 0.2680988162755966, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 649, + "train_speed(iter/s)": 0.022336 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.875, + "completions/mean_length": 59.32291889190674, + "completions/min_length": 26.25, + "epoch": 1.2918838421444527, + "grad_norm": 0.011490252626536829, + "kl": 0.11663818359375, + "learning_rate": 9.647115001393065e-07, + "loss": 0.00011668381921481341, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 650, + "train_speed(iter/s)": 0.022339 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 59.2604193687439, + "completions/min_length": 24.25, + "epoch": 1.2938694465127822, + "grad_norm": 0.012767910335676542, + "kl": 0.15509033203125, + "learning_rate": 9.645949888535412e-07, + "loss": 0.00015494822582695633, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 651, + "train_speed(iter/s)": 0.022345 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 105.75, + "completions/mean_length": 58.35416841506958, + "completions/min_length": 33.0, + "epoch": 1.295855050881112, + "grad_norm": 1.2624373647543505, + "kl": 0.1265869140625, + "learning_rate": 9.644782926025876e-07, + "loss": -0.0012904548784717917, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 652, + "train_speed(iter/s)": 0.022347 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.625, + "completions/mean_length": 59.937501430511475, + "completions/min_length": 22.875, + "epoch": 1.2978406552494415, + "grad_norm": 0.011365666925268391, + "kl": 0.1207275390625, + "learning_rate": 9.643614114329045e-07, + "loss": 0.00012068400246789679, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 653, + "train_speed(iter/s)": 0.022347 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 241.875, + "completions/mean_length": 68.31250190734863, + "completions/min_length": 26.625, + "epoch": 1.2998262596177712, + "grad_norm": 0.44431536708994884, + "kl": 0.26446533203125, + "learning_rate": 9.642443453910248e-07, + "loss": 0.022468041628599167, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 654, + "train_speed(iter/s)": 0.022341 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 56.14583492279053, + "completions/min_length": 27.25, + "epoch": 1.3018118639861007, + "grad_norm": 2.4951135790609826, + "kl": 0.13372802734375, + "learning_rate": 9.641270945235553e-07, + "loss": -0.005837704055011272, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.09202304109930992, + "rewards/CineAccuracyORM/mean": 0.7708333395421505, + "rewards/CineAccuracyORM/std": 0.2407601661980152, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 655, + "train_speed(iter/s)": 0.022338 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.125, + "completions/mean_length": 58.32291793823242, + "completions/min_length": 29.5, + "epoch": 1.3037974683544304, + "grad_norm": 0.7090318736946667, + "kl": 0.1480712890625, + "learning_rate": 9.64009658877176e-07, + "loss": -0.018358584493398666, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 656, + "train_speed(iter/s)": 0.022341 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 101.875, + "completions/mean_length": 56.489585399627686, + "completions/min_length": 27.625, + "epoch": 1.30578307272276, + "grad_norm": 0.012592383066213889, + "kl": 0.140380859375, + "learning_rate": 9.638920384986406e-07, + "loss": 0.00014031633327249438, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 657, + "train_speed(iter/s)": 0.022349 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.0, + "completions/mean_length": 62.98958492279053, + "completions/min_length": 28.0, + "epoch": 1.3077686770910897, + "grad_norm": 0.6508886062228177, + "kl": 0.1754150390625, + "learning_rate": 9.63774233434776e-07, + "loss": -0.000887518166564405, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 658, + "train_speed(iter/s)": 0.02235 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.625, + "completions/mean_length": 57.90625238418579, + "completions/min_length": 25.75, + "epoch": 1.3097542814594192, + "grad_norm": 1.953887439658314, + "kl": 0.2039794921875, + "learning_rate": 9.636562437324831e-07, + "loss": -0.02238425426185131, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.8020833432674408, + "rewards/CineAccuracyORM/std": 0.24358049780130386, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 659, + "train_speed(iter/s)": 0.022354 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 61.87500047683716, + "completions/min_length": 24.125, + "epoch": 1.3117398858277487, + "grad_norm": 1.4339302867240842, + "kl": 0.1761474609375, + "learning_rate": 9.63538069438736e-07, + "loss": 0.0018901865696534514, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.833333333954215, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 660, + "train_speed(iter/s)": 0.022355 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.75, + "completions/mean_length": 53.97916793823242, + "completions/min_length": 22.5, + "epoch": 1.3137254901960784, + "grad_norm": 0.013851973952639802, + "kl": 0.205810546875, + "learning_rate": 9.634197106005829e-07, + "loss": 0.00020593572116922587, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 661, + "train_speed(iter/s)": 0.022355 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 64.26041746139526, + "completions/min_length": 29.25, + "epoch": 1.3157110945644082, + "grad_norm": 0.7603876055571326, + "kl": 0.1680908203125, + "learning_rate": 9.633011672651442e-07, + "loss": -0.015636317431926727, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666679084301, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 662, + "train_speed(iter/s)": 0.022355 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 103.25, + "completions/mean_length": 52.60416793823242, + "completions/min_length": 27.0, + "epoch": 1.3176966989327377, + "grad_norm": 0.7957485608138803, + "kl": 0.18548583984375, + "learning_rate": 9.631824394796151e-07, + "loss": 0.009681493043899536, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 663, + "train_speed(iter/s)": 0.022355 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.0, + "completions/mean_length": 60.11458492279053, + "completions/min_length": 27.375, + "epoch": 1.3196823033010672, + "grad_norm": 0.01204786438804525, + "kl": 0.1971435546875, + "learning_rate": 9.630635272912634e-07, + "loss": 0.00019730752683244646, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 664, + "train_speed(iter/s)": 0.022354 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 93.375, + "completions/mean_length": 54.739585399627686, + "completions/min_length": 28.25, + "epoch": 1.321667907669397, + "grad_norm": 0.013205136110286331, + "kl": 0.2052001953125, + "learning_rate": 9.629444307474307e-07, + "loss": 0.00020542668062262237, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 665, + "train_speed(iter/s)": 0.022356 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.375, + "completions/mean_length": 60.54166889190674, + "completions/min_length": 28.75, + "epoch": 1.3236535120377264, + "grad_norm": 0.012281741987093421, + "kl": 0.1839599609375, + "learning_rate": 9.62825149895532e-07, + "loss": 0.0001841779303504154, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 666, + "train_speed(iter/s)": 0.022357 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.0, + "completions/mean_length": 55.83333492279053, + "completions/min_length": 28.375, + "epoch": 1.3256391164060561, + "grad_norm": 0.013486519715070807, + "kl": 0.23095703125, + "learning_rate": 9.627056847830553e-07, + "loss": 0.00023079430684447289, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 667, + "train_speed(iter/s)": 0.022363 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 70.35416793823242, + "completions/min_length": 30.75, + "epoch": 1.3276247207743856, + "grad_norm": 0.011119500547131964, + "kl": 0.181396484375, + "learning_rate": 9.625860354575623e-07, + "loss": 0.00018149535753764212, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 668, + "train_speed(iter/s)": 0.022363 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.125, + "completions/mean_length": 56.64583492279053, + "completions/min_length": 32.625, + "epoch": 1.3296103251427154, + "grad_norm": 1.0749158168837043, + "kl": 0.1949462890625, + "learning_rate": 9.62466201966688e-07, + "loss": 0.011314323171973228, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 669, + "train_speed(iter/s)": 0.022367 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.5, + "completions/mean_length": 54.92708492279053, + "completions/min_length": 24.75, + "epoch": 1.3315959295110449, + "grad_norm": 0.01345581265965735, + "kl": 0.1976318359375, + "learning_rate": 9.623461843581407e-07, + "loss": 0.00019731343491002917, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 670, + "train_speed(iter/s)": 0.022371 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.625, + "completions/mean_length": 58.27083492279053, + "completions/min_length": 28.125, + "epoch": 1.3335815338793746, + "grad_norm": 2.2330408127778196, + "kl": 0.207763671875, + "learning_rate": 9.622259826797017e-07, + "loss": 0.030535360798239708, + "memory(GiB)": 94.21, + "reward": 1.8541666865348816, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.18837061524391174, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 671, + "train_speed(iter/s)": 0.022376 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.625, + "completions/mean_length": 53.78125190734863, + "completions/min_length": 28.25, + "epoch": 1.3355671382477041, + "grad_norm": 0.8557632294020242, + "kl": 0.204345703125, + "learning_rate": 9.621055969792262e-07, + "loss": -0.0044748904183506966, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 672, + "train_speed(iter/s)": 0.022381 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.0, + "completions/mean_length": 57.927085399627686, + "completions/min_length": 25.125, + "epoch": 1.3375527426160336, + "grad_norm": 1.5076789219125566, + "kl": 0.24554443359375, + "learning_rate": 9.619850273046425e-07, + "loss": -0.007958360016345978, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.645833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 673, + "train_speed(iter/s)": 0.022381 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.125, + "completions/mean_length": 58.677085399627686, + "completions/min_length": 24.625, + "epoch": 1.3395383469843634, + "grad_norm": 0.012931793869248696, + "kl": 0.207763671875, + "learning_rate": 9.618642737039512e-07, + "loss": 0.0002075552474707365, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 674, + "train_speed(iter/s)": 0.022386 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.25, + "completions/mean_length": 51.82291841506958, + "completions/min_length": 25.375, + "epoch": 1.341523951352693, + "grad_norm": 1.452857268047448, + "kl": 0.1881103515625, + "learning_rate": 9.617433362252277e-07, + "loss": 0.0001881470379885286, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 675, + "train_speed(iter/s)": 0.022389 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 87.375, + "completions/mean_length": 47.687500953674316, + "completions/min_length": 25.125, + "epoch": 1.3435095557210226, + "grad_norm": 0.014470825863822586, + "kl": 0.235595703125, + "learning_rate": 9.616222149166192e-07, + "loss": 0.00023564421280752867, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 676, + "train_speed(iter/s)": 0.022395 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.25, + "completions/mean_length": 53.437501430511475, + "completions/min_length": 25.75, + "epoch": 1.345495160089352, + "grad_norm": 0.013884520631398625, + "kl": 0.2369384765625, + "learning_rate": 9.61500909826347e-07, + "loss": 0.00023682885512243956, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 677, + "train_speed(iter/s)": 0.0224 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 91.625, + "completions/mean_length": 48.10416793823242, + "completions/min_length": 25.375, + "epoch": 1.3474807644576818, + "grad_norm": 1.9511758493697813, + "kl": 0.2664794921875, + "learning_rate": 9.61379421002705e-07, + "loss": 0.003057637019082904, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8333333432674408, + "rewards/CineAccuracyORM/std": 0.22787059843540192, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 678, + "train_speed(iter/s)": 0.022407 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.25, + "completions/mean_length": 52.458335876464844, + "completions/min_length": 24.75, + "epoch": 1.3494663688260113, + "grad_norm": 1.0348479635751253, + "kl": 0.2271728515625, + "learning_rate": 9.61257748494061e-07, + "loss": 0.009338478557765484, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 679, + "train_speed(iter/s)": 0.022412 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 88.625, + "completions/mean_length": 48.062500953674316, + "completions/min_length": 25.25, + "epoch": 1.351451973194341, + "grad_norm": 2.2576541116933613, + "kl": 0.2552490234375, + "learning_rate": 9.61135892348855e-07, + "loss": 0.012460976839065552, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.15789688751101494, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 680, + "train_speed(iter/s)": 0.022392 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 55.84375190734863, + "completions/min_length": 25.25, + "epoch": 1.3534375775626706, + "grad_norm": 0.40038093795369606, + "kl": 0.5283203125, + "learning_rate": 9.610138526156005e-07, + "loss": 0.0005270745605230331, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 681, + "train_speed(iter/s)": 0.02237 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 99.75, + "completions/mean_length": 55.281250953674316, + "completions/min_length": 31.75, + "epoch": 1.3554231819310003, + "grad_norm": 0.01284877358526103, + "kl": 0.21484375, + "learning_rate": 9.608916293428842e-07, + "loss": 0.00021472698426805437, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 682, + "train_speed(iter/s)": 0.022347 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 102.5, + "completions/mean_length": 54.14583444595337, + "completions/min_length": 29.0, + "epoch": 1.3574087862993298, + "grad_norm": 0.013208244398555247, + "kl": 0.218017578125, + "learning_rate": 9.60769222579366e-07, + "loss": 0.00021804519928991795, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 683, + "train_speed(iter/s)": 0.022326 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.875, + "completions/mean_length": 53.06250190734863, + "completions/min_length": 26.375, + "epoch": 1.3593943906676595, + "grad_norm": 0.0136822725478887, + "kl": 0.25439453125, + "learning_rate": 9.606466323737784e-07, + "loss": 0.00025410568923689425, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 684, + "train_speed(iter/s)": 0.022301 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 55.22916841506958, + "completions/min_length": 27.0, + "epoch": 1.361379995035989, + "grad_norm": 1.6210194059118768, + "kl": 0.2738037109375, + "learning_rate": 9.605238587749275e-07, + "loss": -0.02679913304746151, + "memory(GiB)": 94.21, + "reward": 1.7500000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.7500000074505806, + "rewards/CineAccuracyORM/std": 0.306039284914732, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 685, + "train_speed(iter/s)": 0.022292 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.75, + "completions/mean_length": 57.531251430511475, + "completions/min_length": 27.875, + "epoch": 1.3633655994043186, + "grad_norm": 1.1716004831406452, + "kl": 0.2353515625, + "learning_rate": 9.604009018316914e-07, + "loss": -0.0020323614589869976, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 686, + "train_speed(iter/s)": 0.022297 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 91.75, + "completions/mean_length": 49.916667461395264, + "completions/min_length": 26.75, + "epoch": 1.3653512037726483, + "grad_norm": 1.9434426537127008, + "kl": 0.2640380859375, + "learning_rate": 9.602777615930226e-07, + "loss": 0.000371312111383304, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.3820159323513508, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 687, + "train_speed(iter/s)": 0.022305 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.625, + "completions/mean_length": 54.08333444595337, + "completions/min_length": 26.625, + "epoch": 1.367336808140978, + "grad_norm": 1.5204574081738305, + "kl": 0.1988525390625, + "learning_rate": 9.601544381079457e-07, + "loss": -0.010278332978487015, + "memory(GiB)": 94.21, + "reward": 1.7812500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.7812500074505806, + "rewards/CineAccuracyORM/std": 0.15001969039440155, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 688, + "train_speed(iter/s)": 0.022311 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.625, + "completions/mean_length": 52.10416841506958, + "completions/min_length": 25.625, + "epoch": 1.3693224125093075, + "grad_norm": 1.2317309898053894, + "kl": 0.2557373046875, + "learning_rate": 9.600309314255582e-07, + "loss": -0.004393937066197395, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 689, + "train_speed(iter/s)": 0.022316 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.0, + "completions/mean_length": 56.86458492279053, + "completions/min_length": 27.875, + "epoch": 1.371308016877637, + "grad_norm": 0.012609256713666556, + "kl": 0.2109375, + "learning_rate": 9.59907241595031e-07, + "loss": 0.00021082788589410484, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 690, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 56.91666841506958, + "completions/min_length": 27.125, + "epoch": 1.3732936212459668, + "grad_norm": 1.1874806828738682, + "kl": 0.2598876953125, + "learning_rate": 9.59783368665607e-07, + "loss": -0.007028396241366863, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 691, + "train_speed(iter/s)": 0.022319 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 56.15625190734863, + "completions/min_length": 28.25, + "epoch": 1.3752792256142963, + "grad_norm": 0.012626782650926477, + "kl": 0.197509765625, + "learning_rate": 9.596593126866037e-07, + "loss": 0.00019758034613914788, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 692, + "train_speed(iter/s)": 0.022322 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.125, + "completions/mean_length": 56.55208492279053, + "completions/min_length": 27.25, + "epoch": 1.377264829982626, + "grad_norm": 0.01387648969366496, + "kl": 0.2310791015625, + "learning_rate": 9.595350737074099e-07, + "loss": 0.00023103014973457903, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 693, + "train_speed(iter/s)": 0.022322 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.875, + "completions/mean_length": 56.47916793823242, + "completions/min_length": 28.75, + "epoch": 1.3792504343509555, + "grad_norm": 0.01281697974194212, + "kl": 0.224853515625, + "learning_rate": 9.594106517774878e-07, + "loss": 0.00022499411716125906, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 694, + "train_speed(iter/s)": 0.022326 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.75, + "completions/mean_length": 59.8229193687439, + "completions/min_length": 30.75, + "epoch": 1.3812360387192852, + "grad_norm": 0.011790021274502231, + "kl": 0.2052001953125, + "learning_rate": 9.592860469463724e-07, + "loss": 0.000205205287784338, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 695, + "train_speed(iter/s)": 0.02233 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.0, + "completions/mean_length": 53.59375190734863, + "completions/min_length": 25.75, + "epoch": 1.3832216430876147, + "grad_norm": 1.4718861644210572, + "kl": 0.36279296875, + "learning_rate": 9.591612592636714e-07, + "loss": -0.004776414483785629, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 696, + "train_speed(iter/s)": 0.022334 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 53.94791793823242, + "completions/min_length": 25.25, + "epoch": 1.3852072474559445, + "grad_norm": 0.013371487625210303, + "kl": 0.2022705078125, + "learning_rate": 9.59036288779066e-07, + "loss": 0.0002022602129727602, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 697, + "train_speed(iter/s)": 0.022337 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 105.25, + "completions/mean_length": 55.48958492279053, + "completions/min_length": 27.5, + "epoch": 1.387192851824274, + "grad_norm": 0.012872637534962036, + "kl": 0.242919921875, + "learning_rate": 9.58911135542309e-07, + "loss": 0.00024293379101436585, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 698, + "train_speed(iter/s)": 0.022343 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 95.75, + "completions/mean_length": 53.47916793823242, + "completions/min_length": 27.5, + "epoch": 1.3891784561926035, + "grad_norm": 0.012993284476608873, + "kl": 0.174560546875, + "learning_rate": 9.587857996032269e-07, + "loss": 0.0001747296191751957, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 699, + "train_speed(iter/s)": 0.022343 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.375, + "completions/mean_length": 53.02083444595337, + "completions/min_length": 26.0, + "epoch": 1.3911640605609332, + "grad_norm": 0.013533895523861405, + "kl": 0.18487548828125, + "learning_rate": 9.586602810117185e-07, + "loss": 0.0001847328821895644, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 700, + "train_speed(iter/s)": 0.02235 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 59.052085399627686, + "completions/min_length": 27.625, + "epoch": 1.393149664929263, + "grad_norm": 1.1541233708292815, + "kl": 0.1884765625, + "learning_rate": 9.585345798177554e-07, + "loss": -0.014513371512293816, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.15789688751101494, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 701, + "train_speed(iter/s)": 0.022349 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 98.875, + "completions/mean_length": 58.93750286102295, + "completions/min_length": 29.125, + "epoch": 1.3951352692975925, + "grad_norm": 0.012091887388755121, + "kl": 0.16741943359375, + "learning_rate": 9.584086960713822e-07, + "loss": 0.0001672252401476726, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 702, + "train_speed(iter/s)": 0.022348 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.75, + "completions/mean_length": 61.218751430511475, + "completions/min_length": 27.625, + "epoch": 1.397120873665922, + "grad_norm": 0.01223138437901507, + "kl": 0.1796875, + "learning_rate": 9.582826298227157e-07, + "loss": 0.00017950995243154466, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 703, + "train_speed(iter/s)": 0.022352 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.625, + "completions/mean_length": 54.343751430511475, + "completions/min_length": 27.25, + "epoch": 1.3991064780342517, + "grad_norm": 0.012855877867865953, + "kl": 0.1754150390625, + "learning_rate": 9.581563811219453e-07, + "loss": 0.00017559510888531804, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 704, + "train_speed(iter/s)": 0.022355 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.125, + "completions/mean_length": 51.04166793823242, + "completions/min_length": 26.25, + "epoch": 1.4010920824025812, + "grad_norm": 0.013785684263398509, + "kl": 0.14404296875, + "learning_rate": 9.58029950019334e-07, + "loss": 0.0001441091881133616, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 705, + "train_speed(iter/s)": 0.022352 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 89.75, + "completions/mean_length": 49.92708444595337, + "completions/min_length": 25.625, + "epoch": 1.403077686770911, + "grad_norm": 0.014177132716538797, + "kl": 0.1724853515625, + "learning_rate": 9.579033365652158e-07, + "loss": 0.00017262960318475962, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 706, + "train_speed(iter/s)": 0.022356 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.0, + "completions/mean_length": 53.687500953674316, + "completions/min_length": 31.0, + "epoch": 1.4050632911392404, + "grad_norm": 0.9219507527625238, + "kl": 0.2100830078125, + "learning_rate": 9.577765408099992e-07, + "loss": -0.010596505366265774, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 707, + "train_speed(iter/s)": 0.02236 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.875, + "completions/mean_length": 56.604166984558105, + "completions/min_length": 29.375, + "epoch": 1.4070488955075702, + "grad_norm": 0.012459113957852184, + "kl": 0.1717529296875, + "learning_rate": 9.576495628041635e-07, + "loss": 0.00017167648184113204, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 708, + "train_speed(iter/s)": 0.022365 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 95.75, + "completions/mean_length": 53.58333444595337, + "completions/min_length": 29.875, + "epoch": 1.4090344998758997, + "grad_norm": 1.1965680565612369, + "kl": 0.14117431640625, + "learning_rate": 9.575224025982618e-07, + "loss": -0.007983053103089333, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 709, + "train_speed(iter/s)": 0.022371 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.75, + "completions/mean_length": 55.27083492279053, + "completions/min_length": 32.0, + "epoch": 1.4110201042442294, + "grad_norm": 0.01235083862781987, + "kl": 0.135986328125, + "learning_rate": 9.573950602429191e-07, + "loss": 0.00013611519534606487, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 710, + "train_speed(iter/s)": 0.022373 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.25, + "completions/mean_length": 50.78125190734863, + "completions/min_length": 24.375, + "epoch": 1.413005708612559, + "grad_norm": 0.014431757532512577, + "kl": 0.152587890625, + "learning_rate": 9.572675357888333e-07, + "loss": 0.00015238014748319983, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 711, + "train_speed(iter/s)": 0.022379 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 101.875, + "completions/mean_length": 50.479167461395264, + "completions/min_length": 27.375, + "epoch": 1.4149913129808884, + "grad_norm": 1.4901548759857677, + "kl": 0.14459228515625, + "learning_rate": 9.571398292867745e-07, + "loss": 0.0016813030233606696, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 712, + "train_speed(iter/s)": 0.022387 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.5, + "completions/mean_length": 56.32291793823242, + "completions/min_length": 30.5, + "epoch": 1.4169769173492182, + "grad_norm": 0.011463893774063328, + "kl": 0.1517333984375, + "learning_rate": 9.570119407875852e-07, + "loss": 0.0001517429482191801, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 713, + "train_speed(iter/s)": 0.022389 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.75, + "completions/mean_length": 54.98958492279053, + "completions/min_length": 24.25, + "epoch": 1.4189625217175479, + "grad_norm": 0.010824340381178518, + "kl": 0.12164306640625, + "learning_rate": 9.568838703421808e-07, + "loss": 0.0001216636155731976, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 714, + "train_speed(iter/s)": 0.022393 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.5, + "completions/mean_length": 56.72916889190674, + "completions/min_length": 29.25, + "epoch": 1.4209481260858774, + "grad_norm": 1.3064436753768311, + "kl": 0.137939453125, + "learning_rate": 9.56755618001549e-07, + "loss": -0.004561188630759716, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 715, + "train_speed(iter/s)": 0.022399 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 89.125, + "completions/mean_length": 50.92708444595337, + "completions/min_length": 23.5, + "epoch": 1.422933730454207, + "grad_norm": 0.01112279629375603, + "kl": 0.117431640625, + "learning_rate": 9.566271838167495e-07, + "loss": 0.00011737256863852963, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 716, + "train_speed(iter/s)": 0.022406 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.75, + "completions/mean_length": 54.177085876464844, + "completions/min_length": 27.375, + "epoch": 1.4249193348225366, + "grad_norm": 1.0983939192808494, + "kl": 0.26483154296875, + "learning_rate": 9.564985678389146e-07, + "loss": 0.0002649997768457979, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 717, + "train_speed(iter/s)": 0.022412 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 82.625, + "completions/mean_length": 47.11458444595337, + "completions/min_length": 24.5, + "epoch": 1.4269049391908661, + "grad_norm": 0.011622590056280616, + "kl": 0.10986328125, + "learning_rate": 9.563697701192494e-07, + "loss": 0.00010993148316629231, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 718, + "train_speed(iter/s)": 0.022419 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.625, + "completions/mean_length": 54.93750190734863, + "completions/min_length": 29.125, + "epoch": 1.4288905435591959, + "grad_norm": 1.6695279450262681, + "kl": 0.10980224609375, + "learning_rate": 9.562407907090312e-07, + "loss": 0.0017443995457142591, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 719, + "train_speed(iter/s)": 0.022418 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.875, + "completions/mean_length": 51.89583444595337, + "completions/min_length": 26.25, + "epoch": 1.4308761479275254, + "grad_norm": 2.056668722315016, + "kl": 0.13079833984375, + "learning_rate": 9.561116296596085e-07, + "loss": -0.0019818730652332306, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 720, + "train_speed(iter/s)": 0.022426 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 92.5, + "completions/mean_length": 50.947917461395264, + "completions/min_length": 25.875, + "epoch": 1.432861752295855, + "grad_norm": 0.9152808274658376, + "kl": 0.13629150390625, + "learning_rate": 9.55982287022404e-07, + "loss": -0.0006033803219906986, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 721, + "train_speed(iter/s)": 0.022433 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 99.375, + "completions/mean_length": 53.72916793823242, + "completions/min_length": 29.0, + "epoch": 1.4348473566641846, + "grad_norm": 0.988844908675142, + "kl": 0.11163330078125, + "learning_rate": 9.558527628489117e-07, + "loss": 0.0013558641076087952, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 722, + "train_speed(iter/s)": 0.022439 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.125, + "completions/mean_length": 53.95833444595337, + "completions/min_length": 30.5, + "epoch": 1.4368329610325143, + "grad_norm": 0.008405923776666074, + "kl": 0.1568603515625, + "learning_rate": 9.557230571906975e-07, + "loss": 0.00015692139277234674, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 723, + "train_speed(iter/s)": 0.022444 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 99.5, + "completions/mean_length": 51.208335399627686, + "completions/min_length": 26.625, + "epoch": 1.4388185654008439, + "grad_norm": 0.0078047463549477045, + "kl": 0.1087646484375, + "learning_rate": 9.555931700994004e-07, + "loss": 0.00010875804582610726, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 724, + "train_speed(iter/s)": 0.022444 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 59.73958444595337, + "completions/min_length": 28.75, + "epoch": 1.4408041697691734, + "grad_norm": 0.021594558458315073, + "kl": 0.13958740234375, + "learning_rate": 9.554631016267308e-07, + "loss": 0.00013938584015704691, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 725, + "train_speed(iter/s)": 0.022445 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.125, + "completions/mean_length": 52.312501430511475, + "completions/min_length": 21.25, + "epoch": 1.442789774137503, + "grad_norm": 0.015217344707581466, + "kl": 0.16424560546875, + "learning_rate": 9.55332851824472e-07, + "loss": 0.0001644698641030118, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 726, + "train_speed(iter/s)": 0.02245 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.125, + "completions/mean_length": 55.10416793823242, + "completions/min_length": 30.375, + "epoch": 1.4447753785058328, + "grad_norm": 0.8643791074440206, + "kl": 0.18280029296875, + "learning_rate": 9.552024207444794e-07, + "loss": -0.005577507428824902, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 727, + "train_speed(iter/s)": 0.022455 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 95.25, + "completions/mean_length": 50.85416793823242, + "completions/min_length": 25.625, + "epoch": 1.4467609828741623, + "grad_norm": 1.1838750787173362, + "kl": 0.11004638671875, + "learning_rate": 9.5507180843868e-07, + "loss": -0.0038419279735535383, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 728, + "train_speed(iter/s)": 0.022458 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.5, + "completions/mean_length": 56.19791793823242, + "completions/min_length": 25.25, + "epoch": 1.4487465872424918, + "grad_norm": 0.8455386843974868, + "kl": 0.13897705078125, + "learning_rate": 9.549410149590737e-07, + "loss": 0.000139145806315355, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 729, + "train_speed(iter/s)": 0.022459 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 61.56250190734863, + "completions/min_length": 28.5, + "epoch": 1.4507321916108216, + "grad_norm": 0.009484025716301098, + "kl": 0.09088134765625, + "learning_rate": 9.54810040357732e-07, + "loss": 9.089943341678008e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 730, + "train_speed(iter/s)": 0.022455 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.25, + "completions/mean_length": 54.593751430511475, + "completions/min_length": 26.25, + "epoch": 1.452717795979151, + "grad_norm": 0.00839230345880413, + "kl": 0.145782470703125, + "learning_rate": 9.546788846867987e-07, + "loss": 0.0001458572514820844, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 731, + "train_speed(iter/s)": 0.022457 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.75, + "completions/mean_length": 55.67708492279053, + "completions/min_length": 27.0, + "epoch": 1.4547034003474808, + "grad_norm": 0.007607238266323172, + "kl": 0.12261962890625, + "learning_rate": 9.545475479984898e-07, + "loss": 0.0001225980813615024, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 732, + "train_speed(iter/s)": 0.022461 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.25, + "completions/mean_length": 59.11458492279053, + "completions/min_length": 31.5, + "epoch": 1.4566890047158103, + "grad_norm": 0.007020407001812986, + "kl": 0.088836669921875, + "learning_rate": 9.544160303450927e-07, + "loss": 8.882784459274262e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 733, + "train_speed(iter/s)": 0.022466 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.5, + "completions/mean_length": 55.541667461395264, + "completions/min_length": 29.5, + "epoch": 1.45867460908414, + "grad_norm": 0.008425255303198686, + "kl": 0.14459228515625, + "learning_rate": 9.542843317789683e-07, + "loss": 0.00014448503497987986, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 734, + "train_speed(iter/s)": 0.022471 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.75, + "completions/mean_length": 63.09375238418579, + "completions/min_length": 31.0, + "epoch": 1.4606602134524695, + "grad_norm": 0.007488147097384869, + "kl": 0.12017822265625, + "learning_rate": 9.54152452352548e-07, + "loss": 0.00012000129208900034, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 735, + "train_speed(iter/s)": 0.022473 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.75, + "completions/mean_length": 54.468750953674316, + "completions/min_length": 24.0, + "epoch": 1.4626458178207993, + "grad_norm": 1.0455099198461375, + "kl": 0.11578369140625, + "learning_rate": 9.540203921183358e-07, + "loss": -0.0001914985477924347, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 736, + "train_speed(iter/s)": 0.022473 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 98.125, + "completions/mean_length": 57.09375190734863, + "completions/min_length": 29.125, + "epoch": 1.4646314221891288, + "grad_norm": 2.284600818709652, + "kl": 0.08966064453125, + "learning_rate": 9.538881511289078e-07, + "loss": 0.0029439865611493587, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 737, + "train_speed(iter/s)": 0.022481 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.0, + "completions/mean_length": 56.66666793823242, + "completions/min_length": 28.375, + "epoch": 1.4666170265574583, + "grad_norm": 1.5075898859184218, + "kl": 0.1265869140625, + "learning_rate": 9.537557294369122e-07, + "loss": 0.0031701817642897367, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 738, + "train_speed(iter/s)": 0.022481 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.5, + "completions/mean_length": 54.81250190734863, + "completions/min_length": 30.625, + "epoch": 1.468602630925788, + "grad_norm": 0.01026436959963118, + "kl": 0.1094970703125, + "learning_rate": 9.536231270950688e-07, + "loss": 0.00010945153189823031, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 739, + "train_speed(iter/s)": 0.022483 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 97.75, + "completions/mean_length": 57.13541841506958, + "completions/min_length": 26.125, + "epoch": 1.4705882352941178, + "grad_norm": 0.9306134003713924, + "kl": 0.12677001953125, + "learning_rate": 9.534903441561692e-07, + "loss": -0.0013227922609075904, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 740, + "train_speed(iter/s)": 0.022491 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 102.25, + "completions/mean_length": 54.27083444595337, + "completions/min_length": 30.625, + "epoch": 1.4725738396624473, + "grad_norm": 0.010444108516729786, + "kl": 0.1226806640625, + "learning_rate": 9.533573806730773e-07, + "loss": 0.00012264520046301186, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 741, + "train_speed(iter/s)": 0.022496 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.0, + "completions/mean_length": 56.531250953674316, + "completions/min_length": 28.75, + "epoch": 1.4745594440307768, + "grad_norm": 0.009595558594323825, + "kl": 0.096923828125, + "learning_rate": 9.532242366987286e-07, + "loss": 9.700939699541777e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 742, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.625, + "completions/mean_length": 56.250001430511475, + "completions/min_length": 29.5, + "epoch": 1.4765450483991065, + "grad_norm": 0.007014834369061131, + "kl": 0.10565185546875, + "learning_rate": 9.530909122861306e-07, + "loss": 0.00010564276453806087, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 743, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.75, + "completions/mean_length": 60.10416841506958, + "completions/min_length": 30.25, + "epoch": 1.4785306527674362, + "grad_norm": 0.007026090773405848, + "kl": 0.0994873046875, + "learning_rate": 9.529574074883627e-07, + "loss": 9.951293759513646e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 744, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.625, + "completions/mean_length": 59.06250047683716, + "completions/min_length": 26.875, + "epoch": 1.4805162571357657, + "grad_norm": 0.009650444704665776, + "kl": 0.14251708984375, + "learning_rate": 9.528237223585759e-07, + "loss": 0.00014239756274037063, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 745, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.875, + "completions/mean_length": 55.15625047683716, + "completions/min_length": 28.25, + "epoch": 1.4825018615040952, + "grad_norm": 0.009541456569087016, + "kl": 0.12774658203125, + "learning_rate": 9.526898569499931e-07, + "loss": 0.00012780143879354, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 746, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.875, + "completions/mean_length": 58.93750047683716, + "completions/min_length": 28.25, + "epoch": 1.484487465872425, + "grad_norm": 0.93812284308122, + "kl": 0.11083984375, + "learning_rate": 9.525558113159091e-07, + "loss": 0.0067501007579267025, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 747, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.125, + "completions/mean_length": 58.510417461395264, + "completions/min_length": 28.5, + "epoch": 1.4864730702407545, + "grad_norm": 0.008629387575274932, + "kl": 0.12152099609375, + "learning_rate": 9.524215855096903e-07, + "loss": 0.00012160909682279453, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 748, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.875, + "completions/mean_length": 58.94791889190674, + "completions/min_length": 27.75, + "epoch": 1.4884586746090842, + "grad_norm": 1.5815851591475962, + "kl": 0.1556396484375, + "learning_rate": 9.522871795847747e-07, + "loss": -0.01076371781527996, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.18617857620120049, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 749, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 93.125, + "completions/mean_length": 53.20833396911621, + "completions/min_length": 28.875, + "epoch": 1.4904442789774137, + "grad_norm": 0.009283020085570033, + "kl": 0.1112060546875, + "learning_rate": 9.521525935946722e-07, + "loss": 0.00011112240463262424, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 750, + "train_speed(iter/s)": 0.022534 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 61.812500953674316, + "completions/min_length": 27.875, + "epoch": 1.4924298833457432, + "grad_norm": 0.009667783486176394, + "kl": 0.12408447265625, + "learning_rate": 9.520178275929647e-07, + "loss": 0.00012400734703987837, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 751, + "train_speed(iter/s)": 0.022535 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.125, + "completions/mean_length": 62.20833492279053, + "completions/min_length": 26.0, + "epoch": 1.494415487714073, + "grad_norm": 0.009630252798554353, + "kl": 0.11883544921875, + "learning_rate": 9.518828816333049e-07, + "loss": 0.00011870354501297697, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 752, + "train_speed(iter/s)": 0.022533 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 65.60416889190674, + "completions/min_length": 30.625, + "epoch": 1.4964010920824027, + "grad_norm": 0.007012520130989951, + "kl": 0.11419677734375, + "learning_rate": 9.51747755769418e-07, + "loss": 0.00011400604125810787, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 753, + "train_speed(iter/s)": 0.022535 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.0, + "completions/mean_length": 60.791667461395264, + "completions/min_length": 29.0, + "epoch": 1.4983866964507322, + "grad_norm": 0.015739227950615216, + "kl": 0.113037109375, + "learning_rate": 9.516124500551004e-07, + "loss": 0.00011308235116302967, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 754, + "train_speed(iter/s)": 0.022533 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.875, + "completions/mean_length": 61.89583444595337, + "completions/min_length": 30.125, + "epoch": 1.5003723008190617, + "grad_norm": 0.007121488855440908, + "kl": 0.11822509765625, + "learning_rate": 9.514769645442202e-07, + "loss": 0.00011818177154054865, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 755, + "train_speed(iter/s)": 0.022536 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 67.68750190734863, + "completions/min_length": 33.625, + "epoch": 1.5023579051873914, + "grad_norm": 0.6177844241306097, + "kl": 0.1590576171875, + "learning_rate": 9.513412992907173e-07, + "loss": 0.011229190975427628, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 756, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 65.46875143051147, + "completions/min_length": 31.875, + "epoch": 1.5043435095557212, + "grad_norm": 0.9274971643782269, + "kl": 0.122314453125, + "learning_rate": 9.512054543486025e-07, + "loss": -0.00040559968329034746, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 757, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 63.697919845581055, + "completions/min_length": 31.5, + "epoch": 1.5063291139240507, + "grad_norm": 0.901867771077204, + "kl": 0.11737060546875, + "learning_rate": 9.510694297719588e-07, + "loss": -0.004113540053367615, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 758, + "train_speed(iter/s)": 0.022543 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.875, + "completions/mean_length": 59.7604193687439, + "completions/min_length": 30.375, + "epoch": 1.5083147182923802, + "grad_norm": 0.009326167634635434, + "kl": 0.13714599609375, + "learning_rate": 9.509332256149406e-07, + "loss": 0.0001371256948914379, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 759, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.25, + "completions/mean_length": 63.15625190734863, + "completions/min_length": 25.25, + "epoch": 1.51030032266071, + "grad_norm": 0.9037795662553252, + "kl": 0.111083984375, + "learning_rate": 9.507968419317736e-07, + "loss": -0.004540104418992996, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 760, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.75, + "completions/mean_length": 61.89583492279053, + "completions/min_length": 29.875, + "epoch": 1.5122859270290394, + "grad_norm": 1.4719736831373125, + "kl": 0.12567138671875, + "learning_rate": 9.506602787767549e-07, + "loss": -0.008734042756259441, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.6979166679084301, + "rewards/CineAccuracyORM/std": 0.31391648203134537, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 761, + "train_speed(iter/s)": 0.02255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 63.96875190734863, + "completions/min_length": 30.75, + "epoch": 1.5142715313973691, + "grad_norm": 0.014517003826497215, + "kl": 0.12335205078125, + "learning_rate": 9.505235362042534e-07, + "loss": 0.00012353870260994881, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 762, + "train_speed(iter/s)": 0.02255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 101.125, + "completions/mean_length": 55.04166841506958, + "completions/min_length": 29.125, + "epoch": 1.5162571357656986, + "grad_norm": 1.0381968233899606, + "kl": 0.111572265625, + "learning_rate": 9.503866142687091e-07, + "loss": -0.002629011869430542, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 763, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 64.32291889190674, + "completions/min_length": 29.375, + "epoch": 1.5182427401340282, + "grad_norm": 0.042451780322132066, + "kl": 0.1676025390625, + "learning_rate": 9.502495130246338e-07, + "loss": 0.00016781894373707473, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 764, + "train_speed(iter/s)": 0.022555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.375, + "completions/mean_length": 61.552085399627686, + "completions/min_length": 29.875, + "epoch": 1.5202283445023579, + "grad_norm": 0.0132875276350042, + "kl": 0.1226806640625, + "learning_rate": 9.501122325266103e-07, + "loss": 0.00012267596321180463, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 765, + "train_speed(iter/s)": 0.022558 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 63.85416889190674, + "completions/min_length": 31.25, + "epoch": 1.5222139488706876, + "grad_norm": 0.017579931638466623, + "kl": 0.14801025390625, + "learning_rate": 9.499747728292927e-07, + "loss": 0.0001481170766055584, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 766, + "train_speed(iter/s)": 0.022559 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 67.6354193687439, + "completions/min_length": 36.25, + "epoch": 1.5241995532390171, + "grad_norm": 0.017843904723149635, + "kl": 0.1181640625, + "learning_rate": 9.498371339874068e-07, + "loss": 0.0001180471372208558, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 767, + "train_speed(iter/s)": 0.022559 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.375, + "completions/mean_length": 60.78125047683716, + "completions/min_length": 30.125, + "epoch": 1.5261851576073466, + "grad_norm": 1.0855973909512628, + "kl": 0.158935546875, + "learning_rate": 9.496993160557494e-07, + "loss": 0.00015877436089795083, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 768, + "train_speed(iter/s)": 0.02256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 63.177085399627686, + "completions/min_length": 29.875, + "epoch": 1.5281707619756764, + "grad_norm": 0.013352749438859125, + "kl": 0.12994384765625, + "learning_rate": 9.495613190891891e-07, + "loss": 0.00013011903502047062, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 769, + "train_speed(iter/s)": 0.022562 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.5, + "completions/mean_length": 56.41666841506958, + "completions/min_length": 26.625, + "epoch": 1.530156366344006, + "grad_norm": 0.02242754711233612, + "kl": 0.12506103515625, + "learning_rate": 9.494231431426654e-07, + "loss": 0.00012502526806201786, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 770, + "train_speed(iter/s)": 0.022564 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 71.01041746139526, + "completions/min_length": 34.375, + "epoch": 1.5321419707123356, + "grad_norm": 0.00971207049700734, + "kl": 0.14654541015625, + "learning_rate": 9.492847882711888e-07, + "loss": 0.00014651428500656039, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 771, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 64.22916793823242, + "completions/min_length": 30.875, + "epoch": 1.534127575080665, + "grad_norm": 0.8385842916742702, + "kl": 0.1278076171875, + "learning_rate": 9.491462545298415e-07, + "loss": 0.0051539079286158085, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.708333333954215, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 772, + "train_speed(iter/s)": 0.022567 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 69.87500143051147, + "completions/min_length": 31.5, + "epoch": 1.5361131794489948, + "grad_norm": 0.7089185559306522, + "kl": 0.092315673828125, + "learning_rate": 9.490075419737767e-07, + "loss": -0.00862202700227499, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 773, + "train_speed(iter/s)": 0.022569 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.625, + "completions/mean_length": 71.97916841506958, + "completions/min_length": 34.625, + "epoch": 1.5380987838173243, + "grad_norm": 0.009889296254899278, + "kl": 0.13079833984375, + "learning_rate": 9.488686506582188e-07, + "loss": 0.0001309265790041536, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 774, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 65.6666693687439, + "completions/min_length": 33.75, + "epoch": 1.540084388185654, + "grad_norm": 0.011404412506493187, + "kl": 0.138671875, + "learning_rate": 9.487295806384636e-07, + "loss": 0.00013863175990991294, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 775, + "train_speed(iter/s)": 0.022571 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.375, + "completions/mean_length": 58.78125190734863, + "completions/min_length": 33.75, + "epoch": 1.5420699925539836, + "grad_norm": 0.011675313289628312, + "kl": 0.12164306640625, + "learning_rate": 9.485903319698776e-07, + "loss": 0.00012157539458712563, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 776, + "train_speed(iter/s)": 0.022572 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 70.02083444595337, + "completions/min_length": 35.0, + "epoch": 1.544055596922313, + "grad_norm": 0.009008345408477022, + "kl": 0.1031494140625, + "learning_rate": 9.484509047078989e-07, + "loss": 0.00010315240069758147, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 777, + "train_speed(iter/s)": 0.022573 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.25, + "completions/mean_length": 66.42708444595337, + "completions/min_length": 31.875, + "epoch": 1.5460412012906428, + "grad_norm": 0.037744752841359024, + "kl": 0.15655517578125, + "learning_rate": 9.483112989080363e-07, + "loss": 0.00015669949061702937, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 778, + "train_speed(iter/s)": 0.022576 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.625, + "completions/mean_length": 66.51041841506958, + "completions/min_length": 33.75, + "epoch": 1.5480268056589725, + "grad_norm": 0.8543397654982704, + "kl": 0.110107421875, + "learning_rate": 9.481715146258699e-07, + "loss": 0.00011021520185749978, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 779, + "train_speed(iter/s)": 0.022573 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 67.80208492279053, + "completions/min_length": 33.75, + "epoch": 1.550012410027302, + "grad_norm": 1.0463599205333745, + "kl": 0.16094970703125, + "learning_rate": 9.480315519170508e-07, + "loss": -0.01297999732196331, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.39076167345046997, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 780, + "train_speed(iter/s)": 0.022572 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 75.21875190734863, + "completions/min_length": 35.75, + "epoch": 1.5519980143956316, + "grad_norm": 0.9815733350719363, + "kl": 0.151611328125, + "learning_rate": 9.478914108373011e-07, + "loss": 0.006083416752517223, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 781, + "train_speed(iter/s)": 0.022572 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 66.48958492279053, + "completions/min_length": 33.875, + "epoch": 1.5539836187639613, + "grad_norm": 0.008523084624338484, + "kl": 0.107421875, + "learning_rate": 9.477510914424141e-07, + "loss": 0.00010765058686956763, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 782, + "train_speed(iter/s)": 0.022572 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.5, + "completions/mean_length": 72.47916793823242, + "completions/min_length": 32.5, + "epoch": 1.555969223132291, + "grad_norm": 0.010365001287214036, + "kl": 0.1300048828125, + "learning_rate": 9.476105937882537e-07, + "loss": 0.00012992092524655163, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 783, + "train_speed(iter/s)": 0.022571 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.375, + "completions/mean_length": 57.500001430511475, + "completions/min_length": 30.625, + "epoch": 1.5579548275006205, + "grad_norm": 0.010265576048434191, + "kl": 0.1270751953125, + "learning_rate": 9.474699179307552e-07, + "loss": 0.00012720597442239523, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 784, + "train_speed(iter/s)": 0.022571 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 74.21875286102295, + "completions/min_length": 37.875, + "epoch": 1.55994043186895, + "grad_norm": 0.009511907231798171, + "kl": 0.1357421875, + "learning_rate": 9.473290639259248e-07, + "loss": 0.00013579762890003622, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 785, + "train_speed(iter/s)": 0.022574 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.125, + "completions/mean_length": 71.56250143051147, + "completions/min_length": 28.875, + "epoch": 1.5619260362372798, + "grad_norm": 1.0826211721848011, + "kl": 0.13836669921875, + "learning_rate": 9.471880318298393e-07, + "loss": -0.026279527693986893, + "memory(GiB)": 94.21, + "reward": 1.7812500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.7812500074505806, + "rewards/CineAccuracyORM/std": 0.2805779278278351, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 786, + "train_speed(iter/s)": 0.022571 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 68.86458587646484, + "completions/min_length": 33.5, + "epoch": 1.5639116406056095, + "grad_norm": 0.00893109141753033, + "kl": 0.12847900390625, + "learning_rate": 9.470468216986464e-07, + "loss": 0.00012845388846471906, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 787, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.0, + "completions/mean_length": 70.739586353302, + "completions/min_length": 34.75, + "epoch": 1.565897244973939, + "grad_norm": 0.5305438404485602, + "kl": 0.13238525390625, + "learning_rate": 9.469054335885653e-07, + "loss": 0.01100987195968628, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 788, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.125, + "completions/mean_length": 77.94791889190674, + "completions/min_length": 37.625, + "epoch": 1.5678828493422685, + "grad_norm": 1.5412602990359585, + "kl": 0.1397705078125, + "learning_rate": 9.467638675558854e-07, + "loss": 0.009466740302741528, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 789, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 66.70833539962769, + "completions/min_length": 32.125, + "epoch": 1.569868453710598, + "grad_norm": 0.008285440752085574, + "kl": 0.128692626953125, + "learning_rate": 9.466221236569672e-07, + "loss": 0.00012855028035119176, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 790, + "train_speed(iter/s)": 0.022567 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.0, + "completions/mean_length": 65.08333539962769, + "completions/min_length": 30.75, + "epoch": 1.5718540580789278, + "grad_norm": 1.1063484598804467, + "kl": 0.13690185546875, + "learning_rate": 9.464802019482418e-07, + "loss": -0.0007010664558038116, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 791, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 67.94791793823242, + "completions/min_length": 31.375, + "epoch": 1.5738396624472575, + "grad_norm": 0.009371043807297407, + "kl": 0.11846923828125, + "learning_rate": 9.463381024862114e-07, + "loss": 0.00011852764873765409, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 792, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 73.87500286102295, + "completions/min_length": 37.125, + "epoch": 1.575825266815587, + "grad_norm": 1.6962383506131065, + "kl": 0.12066650390625, + "learning_rate": 9.461958253274489e-07, + "loss": 0.020231738686561584, + "memory(GiB)": 94.21, + "reward": 1.7604166865348816, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.2934674955904484, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 793, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.25, + "completions/mean_length": 65.73958492279053, + "completions/min_length": 33.5, + "epoch": 1.5778108711839165, + "grad_norm": 0.019906395330733715, + "kl": 0.1201171875, + "learning_rate": 9.460533705285978e-07, + "loss": 0.00012027585034957156, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 794, + "train_speed(iter/s)": 0.022569 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 62.114585399627686, + "completions/min_length": 32.375, + "epoch": 1.5797964755522462, + "grad_norm": 0.011294376926733699, + "kl": 0.12542724609375, + "learning_rate": 9.459107381463725e-07, + "loss": 0.0001254882081411779, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 795, + "train_speed(iter/s)": 0.022573 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 75.10416889190674, + "completions/min_length": 33.25, + "epoch": 1.581782079920576, + "grad_norm": 0.00795684719757277, + "kl": 0.12921142578125, + "learning_rate": 9.457679282375578e-07, + "loss": 0.00012897045235149562, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 796, + "train_speed(iter/s)": 0.02257 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 205.125, + "completions/mean_length": 74.08333492279053, + "completions/min_length": 30.125, + "epoch": 1.5837676842889055, + "grad_norm": 0.8817028583267403, + "kl": 0.18048095703125, + "learning_rate": 9.456249408590096e-07, + "loss": -0.006961992010474205, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 797, + "train_speed(iter/s)": 0.022566 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 68.84375238418579, + "completions/min_length": 33.625, + "epoch": 1.585753288657235, + "grad_norm": 1.0506957066411309, + "kl": 0.1197509765625, + "learning_rate": 9.45481776067654e-07, + "loss": 0.0053444355726242065, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 798, + "train_speed(iter/s)": 0.022564 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 73.73958444595337, + "completions/min_length": 37.875, + "epoch": 1.5877388930255647, + "grad_norm": 0.5890124463293395, + "kl": 0.1383056640625, + "learning_rate": 9.453384339204882e-07, + "loss": -0.00964347179979086, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 799, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 68.36458539962769, + "completions/min_length": 30.25, + "epoch": 1.5897244973938944, + "grad_norm": 0.017975294219367728, + "kl": 0.11297607421875, + "learning_rate": 9.451949144745795e-07, + "loss": 0.00011297940363874659, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 800, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 74.3854193687439, + "completions/min_length": 37.0, + "epoch": 1.591710101762224, + "grad_norm": 0.026791448363209736, + "kl": 0.1650390625, + "learning_rate": 9.450512177870662e-07, + "loss": 0.00016505643725395203, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 801, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.75, + "completions/mean_length": 77.29166793823242, + "completions/min_length": 33.0, + "epoch": 1.5936957061305534, + "grad_norm": 2.4540021883543295, + "kl": 0.134033203125, + "learning_rate": 9.449073439151572e-07, + "loss": 0.0023204542230814695, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 802, + "train_speed(iter/s)": 0.022562 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 210.875, + "completions/mean_length": 86.13541984558105, + "completions/min_length": 31.25, + "epoch": 1.595681310498883, + "grad_norm": 0.010410936371844324, + "kl": 0.149627685546875, + "learning_rate": 9.447632929161314e-07, + "loss": 0.0001496023323852569, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 803, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 69.63541889190674, + "completions/min_length": 30.5, + "epoch": 1.5976669148672127, + "grad_norm": 0.008618382565457883, + "kl": 0.13995361328125, + "learning_rate": 9.446190648473389e-07, + "loss": 0.00013988380669616163, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 804, + "train_speed(iter/s)": 0.022558 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.020833333333333332, + "completions/max_length": 345.625, + "completions/mean_length": 92.69791889190674, + "completions/min_length": 28.0, + "epoch": 1.5996525192355424, + "grad_norm": 1.3339048678842351, + "kl": 1.57891845703125, + "learning_rate": 9.444746597661997e-07, + "loss": 0.03770984709262848, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.10206206887960434, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.29628782719373703, + "rewards/Format/mean": 0.9791666716337204, + "rewards/Format/std": 0.07216878235340118, + "step": 805, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 68.66666841506958, + "completions/min_length": 32.75, + "epoch": 1.601638123603872, + "grad_norm": 0.008956449233589486, + "kl": 0.11895751953125, + "learning_rate": 9.443300777302049e-07, + "loss": 0.00011887378059327602, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 806, + "train_speed(iter/s)": 0.02255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.5, + "completions/mean_length": 75.47916889190674, + "completions/min_length": 32.75, + "epoch": 1.6036237279722014, + "grad_norm": 1.615914651645132, + "kl": 0.1416015625, + "learning_rate": 9.441853187969153e-07, + "loss": -0.00377776101231575, + "memory(GiB)": 94.21, + "reward": 1.6666666865348816, + "reward_std": 0.08330589532852173, + "rewards/CineAccuracyORM/mean": 0.666666679084301, + "rewards/CineAccuracyORM/std": 0.38611526414752007, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 807, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 78.29166889190674, + "completions/min_length": 38.75, + "epoch": 1.6056093323405312, + "grad_norm": 0.00690676209294394, + "kl": 0.117431640625, + "learning_rate": 9.440403830239628e-07, + "loss": 0.00011741237540263683, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 808, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.25, + "completions/mean_length": 74.20833587646484, + "completions/min_length": 30.0, + "epoch": 1.6075949367088609, + "grad_norm": 0.008620998577279193, + "kl": 0.1343994140625, + "learning_rate": 9.438952704690492e-07, + "loss": 0.0001343963813269511, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 809, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 69.03125143051147, + "completions/min_length": 31.625, + "epoch": 1.6095805410771904, + "grad_norm": 0.00776802004671236, + "kl": 0.12030029296875, + "learning_rate": 9.437499811899472e-07, + "loss": 0.00012033487291773781, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 810, + "train_speed(iter/s)": 0.02255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 77.46875333786011, + "completions/min_length": 35.0, + "epoch": 1.61156614544552, + "grad_norm": 0.00893684517133934, + "kl": 0.12255859375, + "learning_rate": 9.436045152444995e-07, + "loss": 0.00012254255125299096, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 811, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.5, + "completions/mean_length": 72.34375286102295, + "completions/min_length": 31.75, + "epoch": 1.6135517498138496, + "grad_norm": 0.007839196633365264, + "kl": 0.09814453125, + "learning_rate": 9.434588726906189e-07, + "loss": 9.823910659179091e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 812, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 65.08333683013916, + "completions/min_length": 28.25, + "epoch": 1.6155373541821794, + "grad_norm": 0.00821263143516481, + "kl": 0.1158447265625, + "learning_rate": 9.43313053586289e-07, + "loss": 0.00011564564192667603, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 813, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 73.76042032241821, + "completions/min_length": 35.375, + "epoch": 1.6175229585505089, + "grad_norm": 0.01971462604692211, + "kl": 0.130859375, + "learning_rate": 9.431670579895637e-07, + "loss": 0.00013073688023723662, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 814, + "train_speed(iter/s)": 0.02255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 72.55208587646484, + "completions/min_length": 35.75, + "epoch": 1.6195085629188384, + "grad_norm": 0.008274004499679967, + "kl": 0.104248046875, + "learning_rate": 9.430208859585666e-07, + "loss": 0.00010424414358567446, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 815, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.625, + "completions/mean_length": 75.75000238418579, + "completions/min_length": 37.125, + "epoch": 1.6214941672871679, + "grad_norm": 1.395976126387643, + "kl": 0.11334228515625, + "learning_rate": 9.428745375514924e-07, + "loss": 0.009927384555339813, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 816, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 71.34375286102295, + "completions/min_length": 27.125, + "epoch": 1.6234797716554976, + "grad_norm": 0.00677818219589261, + "kl": 0.11260986328125, + "learning_rate": 9.427280128266049e-07, + "loss": 0.00011265358625678346, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 817, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 74.48958539962769, + "completions/min_length": 34.25, + "epoch": 1.6254653760238273, + "grad_norm": 0.008810646902532714, + "kl": 0.1170654296875, + "learning_rate": 9.425813118422392e-07, + "loss": 0.00011714122956618667, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 818, + "train_speed(iter/s)": 0.022557 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 73.87500381469727, + "completions/min_length": 33.75, + "epoch": 1.6274509803921569, + "grad_norm": 0.8717749467815266, + "kl": 0.115447998046875, + "learning_rate": 9.424344346567999e-07, + "loss": 0.004413970746099949, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 819, + "train_speed(iter/s)": 0.02256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 74.708336353302, + "completions/min_length": 34.125, + "epoch": 1.6294365847604864, + "grad_norm": 0.00917564527205472, + "kl": 0.1219482421875, + "learning_rate": 9.42287381328762e-07, + "loss": 0.00012195239833090454, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 820, + "train_speed(iter/s)": 0.022563 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.25, + "completions/mean_length": 72.34375238418579, + "completions/min_length": 36.625, + "epoch": 1.631422189128816, + "grad_norm": 1.1646514068054032, + "kl": 0.14031982421875, + "learning_rate": 9.421401519166705e-07, + "loss": -0.00496150366961956, + "memory(GiB)": 94.21, + "reward": 1.4479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.4479166669771075, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 821, + "train_speed(iter/s)": 0.022561 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 77.63541841506958, + "completions/min_length": 39.25, + "epoch": 1.6334077934971458, + "grad_norm": 1.0734429124995073, + "kl": 0.13720703125, + "learning_rate": 9.419927464791406e-07, + "loss": -0.017771240323781967, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 822, + "train_speed(iter/s)": 0.022564 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 66.03125238418579, + "completions/min_length": 30.125, + "epoch": 1.6353933978654753, + "grad_norm": 0.009837349457302495, + "kl": 0.1162109375, + "learning_rate": 9.418451650748576e-07, + "loss": 0.0001161246545962058, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 823, + "train_speed(iter/s)": 0.022567 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 76.177086353302, + "completions/min_length": 35.125, + "epoch": 1.6373790022338048, + "grad_norm": 1.2371917012477311, + "kl": 0.12396240234375, + "learning_rate": 9.416974077625768e-07, + "loss": 0.006578117609024048, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 824, + "train_speed(iter/s)": 0.022569 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.625, + "completions/mean_length": 70.69791889190674, + "completions/min_length": 34.5, + "epoch": 1.6393646066021346, + "grad_norm": 0.008997371818953376, + "kl": 0.13775634765625, + "learning_rate": 9.415494746011236e-07, + "loss": 0.00013793342805001885, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 825, + "train_speed(iter/s)": 0.022572 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 74.77083539962769, + "completions/min_length": 33.75, + "epoch": 1.6413502109704643, + "grad_norm": 0.008095205405073396, + "kl": 0.13104248046875, + "learning_rate": 9.414013656493933e-07, + "loss": 0.00013096435577608645, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 826, + "train_speed(iter/s)": 0.022572 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 287.125, + "completions/mean_length": 85.65625143051147, + "completions/min_length": 36.875, + "epoch": 1.6433358153387938, + "grad_norm": 0.4205598680692579, + "kl": 0.12908935546875, + "learning_rate": 9.412530809663511e-07, + "loss": 0.02198687568306923, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.05103103816509247, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 827, + "train_speed(iter/s)": 0.022564 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 185.75, + "completions/mean_length": 84.09375190734863, + "completions/min_length": 36.5, + "epoch": 1.6453214197071233, + "grad_norm": 0.7356170916547373, + "kl": 0.12030029296875, + "learning_rate": 9.411046206110324e-07, + "loss": -5.795123797724955e-05, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 828, + "train_speed(iter/s)": 0.02256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 74.77083492279053, + "completions/min_length": 31.875, + "epoch": 1.6473070240754528, + "grad_norm": 0.010240680763762378, + "kl": 0.15985107421875, + "learning_rate": 9.409559846425425e-07, + "loss": 0.0001596625952515751, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 829, + "train_speed(iter/s)": 0.022557 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 77.8854193687439, + "completions/min_length": 31.875, + "epoch": 1.6492926284437825, + "grad_norm": 0.011050179391073294, + "kl": 0.16876220703125, + "learning_rate": 9.408071731200567e-07, + "loss": 0.00016890847473405302, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 830, + "train_speed(iter/s)": 0.022553 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 199.0, + "completions/mean_length": 79.614586353302, + "completions/min_length": 36.875, + "epoch": 1.6512782328121123, + "grad_norm": 0.009944493614827271, + "kl": 0.13702392578125, + "learning_rate": 9.406581861028196e-07, + "loss": 0.00013694663357455283, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 831, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 69.364586353302, + "completions/min_length": 30.125, + "epoch": 1.6532638371804418, + "grad_norm": 0.010700740199548475, + "kl": 0.13641357421875, + "learning_rate": 9.405090236501465e-07, + "loss": 0.00013627774023916572, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 832, + "train_speed(iter/s)": 0.02255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 71.77083492279053, + "completions/min_length": 35.125, + "epoch": 1.6552494415487713, + "grad_norm": 0.010773221818202457, + "kl": 0.154296875, + "learning_rate": 9.40359685821422e-07, + "loss": 0.00015416624955832958, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 833, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.75, + "completions/mean_length": 75.82291841506958, + "completions/min_length": 32.125, + "epoch": 1.657235045917101, + "grad_norm": 0.8533079746770784, + "kl": 0.1595458984375, + "learning_rate": 9.402101726761007e-07, + "loss": 0.00359630910679698, + "memory(GiB)": 94.21, + "reward": 1.59375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.59375, + "rewards/CineAccuracyORM/std": 0.4482080899178982, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 834, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 70.01041841506958, + "completions/min_length": 32.5, + "epoch": 1.6592206502854308, + "grad_norm": 0.010378373688934767, + "kl": 0.13818359375, + "learning_rate": 9.400604842737071e-07, + "loss": 0.0001382166228722781, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 835, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 66.92708492279053, + "completions/min_length": 34.625, + "epoch": 1.6612062546537603, + "grad_norm": 0.011075477637148815, + "kl": 0.15032958984375, + "learning_rate": 9.399106206738352e-07, + "loss": 0.0001504713000031188, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 836, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 67.66666841506958, + "completions/min_length": 35.875, + "epoch": 1.6631918590220898, + "grad_norm": 0.0470884733661479, + "kl": 0.169677734375, + "learning_rate": 9.397605819361488e-07, + "loss": 0.00016952768783085048, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 837, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 71.20833587646484, + "completions/min_length": 31.5, + "epoch": 1.6651774633904195, + "grad_norm": 0.010485424927340087, + "kl": 0.148681640625, + "learning_rate": 9.396103681203818e-07, + "loss": 0.00014874427870381624, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 838, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.5, + "completions/mean_length": 68.83333492279053, + "completions/min_length": 34.875, + "epoch": 1.6671630677587492, + "grad_norm": 1.5172244051541726, + "kl": 0.16845703125, + "learning_rate": 9.394599792863373e-07, + "loss": -0.0060480027459561825, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 839, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 76.43750286102295, + "completions/min_length": 37.375, + "epoch": 1.6691486721270787, + "grad_norm": 0.010490664202563641, + "kl": 0.15625, + "learning_rate": 9.393094154938884e-07, + "loss": 0.0001561249082442373, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 840, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 73.85416841506958, + "completions/min_length": 35.5, + "epoch": 1.6711342764954082, + "grad_norm": 0.010349280649444464, + "kl": 0.13507080078125, + "learning_rate": 9.391586768029778e-07, + "loss": 0.00013509612472262233, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 841, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 68.239586353302, + "completions/min_length": 32.125, + "epoch": 1.6731198808637378, + "grad_norm": 0.010679222826676917, + "kl": 0.14532470703125, + "learning_rate": 9.390077632736177e-07, + "loss": 0.00014528073370456696, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 842, + "train_speed(iter/s)": 0.02255 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 69.85416841506958, + "completions/min_length": 41.5, + "epoch": 1.6751054852320675, + "grad_norm": 0.010478496725245676, + "kl": 0.1351318359375, + "learning_rate": 9.388566749658902e-07, + "loss": 0.00013509014388546348, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 843, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 69.51041841506958, + "completions/min_length": 32.375, + "epoch": 1.6770910896003972, + "grad_norm": 0.010293932236527227, + "kl": 0.12689208984375, + "learning_rate": 9.387054119399465e-07, + "loss": 0.00012689763389062136, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 844, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.625, + "completions/mean_length": 65.39583492279053, + "completions/min_length": 37.125, + "epoch": 1.6790766939687267, + "grad_norm": 0.009183468936833618, + "kl": 0.11871337890625, + "learning_rate": 9.385539742560078e-07, + "loss": 0.00011860028462251648, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 845, + "train_speed(iter/s)": 0.02256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 71.59375095367432, + "completions/min_length": 32.625, + "epoch": 1.6810622983370562, + "grad_norm": 0.012968932210123681, + "kl": 0.143798828125, + "learning_rate": 9.384023619743646e-07, + "loss": 0.00014387266128323972, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 846, + "train_speed(iter/s)": 0.02256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 74.05208539962769, + "completions/min_length": 35.25, + "epoch": 1.683047902705386, + "grad_norm": 1.232171409556621, + "kl": 0.1568603515625, + "learning_rate": 9.382505751553771e-07, + "loss": -0.021087627857923508, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 847, + "train_speed(iter/s)": 0.022556 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 65.69791889190674, + "completions/min_length": 31.375, + "epoch": 1.6850335070737157, + "grad_norm": 0.9966612978467505, + "kl": 0.12884521484375, + "learning_rate": 9.380986138594748e-07, + "loss": -0.0031176875345408916, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 848, + "train_speed(iter/s)": 0.022558 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 73.96875190734863, + "completions/min_length": 31.75, + "epoch": 1.6870191114420452, + "grad_norm": 0.008043221089977237, + "kl": 0.11553955078125, + "learning_rate": 9.379464781471569e-07, + "loss": 0.0001155805221060291, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 849, + "train_speed(iter/s)": 0.022563 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.875, + "completions/mean_length": 63.781251430511475, + "completions/min_length": 30.875, + "epoch": 1.6890047158103747, + "grad_norm": 0.04178812752756811, + "kl": 0.13671875, + "learning_rate": 9.377941680789915e-07, + "loss": 0.00013680793927051127, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 850, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 70.19791841506958, + "completions/min_length": 33.5, + "epoch": 1.6909903201787044, + "grad_norm": 0.010513364300764605, + "kl": 0.1287841796875, + "learning_rate": 9.376416837156169e-07, + "loss": 0.00012870121281594038, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 851, + "train_speed(iter/s)": 0.022569 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 82.21875238418579, + "completions/min_length": 37.25, + "epoch": 1.6929759245470342, + "grad_norm": 0.8661182400745436, + "kl": 0.134521484375, + "learning_rate": 9.3748902511774e-07, + "loss": 0.015169864520430565, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 852, + "train_speed(iter/s)": 0.022566 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 78.62500381469727, + "completions/min_length": 37.125, + "epoch": 1.6949615289153637, + "grad_norm": 1.0640528014409796, + "kl": 0.117431640625, + "learning_rate": 9.373361923461378e-07, + "loss": -0.004249073565006256, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166669771075, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 853, + "train_speed(iter/s)": 0.022563 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 69.64583539962769, + "completions/min_length": 32.625, + "epoch": 1.6969471332836932, + "grad_norm": 0.04133701901778342, + "kl": 0.178466796875, + "learning_rate": 9.371831854616561e-07, + "loss": 0.00017831040895543993, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 854, + "train_speed(iter/s)": 0.022562 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 78.16666984558105, + "completions/min_length": 40.875, + "epoch": 1.6989327376520227, + "grad_norm": 0.8444156214137051, + "kl": 0.15545654296875, + "learning_rate": 9.370300045252103e-07, + "loss": -0.007017943542450666, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 855, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.25, + "completions/mean_length": 68.03125143051147, + "completions/min_length": 31.75, + "epoch": 1.7009183420203524, + "grad_norm": 0.028400781178857627, + "kl": 0.11895751953125, + "learning_rate": 9.368766495977849e-07, + "loss": 0.00011885771527886391, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 856, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 75.25000286102295, + "completions/min_length": 40.625, + "epoch": 1.7029039463886821, + "grad_norm": 1.0036904740880648, + "kl": 0.14837646484375, + "learning_rate": 9.367231207404339e-07, + "loss": 0.009655861184000969, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 857, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 202.75, + "completions/mean_length": 88.52083587646484, + "completions/min_length": 32.625, + "epoch": 1.7048895507570117, + "grad_norm": 1.3447539571216185, + "kl": 0.14324951171875, + "learning_rate": 9.365694180142802e-07, + "loss": 0.0016064762603491545, + "memory(GiB)": 94.21, + "reward": 1.9062500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.9062500074505806, + "rewards/CineAccuracyORM/std": 0.12591182813048363, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 858, + "train_speed(iter/s)": 0.022564 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 79.32291793823242, + "completions/min_length": 35.5, + "epoch": 1.7068751551253412, + "grad_norm": 0.009023147790031025, + "kl": 0.13037109375, + "learning_rate": 9.364155414805164e-07, + "loss": 0.0001304107572650537, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 859, + "train_speed(iter/s)": 0.022565 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 77.84375286102295, + "completions/min_length": 34.5, + "epoch": 1.7088607594936709, + "grad_norm": 1.0928371921396525, + "kl": 0.1142578125, + "learning_rate": 9.362614912004039e-07, + "loss": -0.005867685191333294, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 860, + "train_speed(iter/s)": 0.022563 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.25, + "completions/mean_length": 78.37500190734863, + "completions/min_length": 34.25, + "epoch": 1.7108463638620006, + "grad_norm": 0.0068493316499581075, + "kl": 0.11376953125, + "learning_rate": 9.361072672352735e-07, + "loss": 0.00011375833855709061, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 861, + "train_speed(iter/s)": 0.022564 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.75, + "completions/mean_length": 84.4791693687439, + "completions/min_length": 38.5, + "epoch": 1.7128319682303301, + "grad_norm": 0.007296132282546416, + "kl": 0.09765625, + "learning_rate": 9.359528696465248e-07, + "loss": 9.752794721862301e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 862, + "train_speed(iter/s)": 0.022558 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 70.01041841506958, + "completions/min_length": 30.0, + "epoch": 1.7148175725986596, + "grad_norm": 0.006151935780164156, + "kl": 0.0947265625, + "learning_rate": 9.357982984956271e-07, + "loss": 9.478656284045428e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 863, + "train_speed(iter/s)": 0.022558 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 78.239586353302, + "completions/min_length": 37.25, + "epoch": 1.7168031769669894, + "grad_norm": 1.186740585060766, + "kl": 0.13818359375, + "learning_rate": 9.356435538441183e-07, + "loss": 0.006953645497560501, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 864, + "train_speed(iter/s)": 0.022559 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.125, + "completions/mean_length": 85.62500286102295, + "completions/min_length": 35.25, + "epoch": 1.718788781335319, + "grad_norm": 0.008003721739039711, + "kl": 0.1339111328125, + "learning_rate": 9.354886357536056e-07, + "loss": 0.00013405291247181594, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 865, + "train_speed(iter/s)": 0.022557 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 82.90625143051147, + "completions/min_length": 36.875, + "epoch": 1.7207743857036486, + "grad_norm": 2.0441648839302324, + "kl": 0.11358642578125, + "learning_rate": 9.353335442857651e-07, + "loss": -0.002224589465186, + "memory(GiB)": 94.21, + "reward": 1.8125000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8125000055879354, + "rewards/CineAccuracyORM/std": 0.16259148344397545, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 866, + "train_speed(iter/s)": 0.022556 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.875, + "completions/mean_length": 82.44791793823242, + "completions/min_length": 39.75, + "epoch": 1.722759990071978, + "grad_norm": 0.00844535637371365, + "kl": 0.12396240234375, + "learning_rate": 9.351782795023421e-07, + "loss": 0.00012400794366840273, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 867, + "train_speed(iter/s)": 0.022555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 75.770836353302, + "completions/min_length": 38.125, + "epoch": 1.7247455944403076, + "grad_norm": 1.4014405933264378, + "kl": 0.1292724609375, + "learning_rate": 9.350228414651509e-07, + "loss": 0.005324948579072952, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 868, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 70.90625238418579, + "completions/min_length": 34.125, + "epoch": 1.7267311988086373, + "grad_norm": 1.2667894439507483, + "kl": 0.1318359375, + "learning_rate": 9.348672302360747e-07, + "loss": 0.007825274020433426, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 869, + "train_speed(iter/s)": 0.022555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.625, + "completions/mean_length": 81.5104193687439, + "completions/min_length": 37.5, + "epoch": 1.728716803176967, + "grad_norm": 0.008836764003010587, + "kl": 0.121337890625, + "learning_rate": 9.347114458770655e-07, + "loss": 0.00012140587205067277, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 870, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 82.84375143051147, + "completions/min_length": 31.875, + "epoch": 1.7307024075452966, + "grad_norm": 0.6426275280785991, + "kl": 0.1240234375, + "learning_rate": 9.345554884501446e-07, + "loss": -0.015416160225868225, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 871, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.625, + "completions/mean_length": 87.37500381469727, + "completions/min_length": 41.0, + "epoch": 1.732688011913626, + "grad_norm": 0.008714394265764763, + "kl": 0.13043212890625, + "learning_rate": 9.343993580174017e-07, + "loss": 0.00013047503307461739, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 872, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 78.2916693687439, + "completions/min_length": 33.875, + "epoch": 1.7346736162819558, + "grad_norm": 0.00848953472067005, + "kl": 0.11932373046875, + "learning_rate": 9.342430546409959e-07, + "loss": 0.00011928447929676622, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 873, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 80.44791841506958, + "completions/min_length": 38.5, + "epoch": 1.7366592206502856, + "grad_norm": 0.006718660070962197, + "kl": 0.1209716796875, + "learning_rate": 9.340865783831548e-07, + "loss": 0.0001210131958941929, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 874, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 75.83333492279053, + "completions/min_length": 36.5, + "epoch": 1.738644825018615, + "grad_norm": 0.9307086120262128, + "kl": 0.122802734375, + "learning_rate": 9.33929929306175e-07, + "loss": 0.00012271106243133545, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 875, + "train_speed(iter/s)": 0.022553 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.875, + "completions/mean_length": 86.46875190734863, + "completions/min_length": 34.875, + "epoch": 1.7406304293869446, + "grad_norm": 0.006989194255262667, + "kl": 0.1280517578125, + "learning_rate": 9.337731074724217e-07, + "loss": 0.00012786986189894378, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 876, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 76.50000238418579, + "completions/min_length": 33.625, + "epoch": 1.7426160337552743, + "grad_norm": 0.8613938715692814, + "kl": 0.12774658203125, + "learning_rate": 9.336161129443294e-07, + "loss": -0.002113398164510727, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 877, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 226.0, + "completions/mean_length": 89.64583587646484, + "completions/min_length": 37.0, + "epoch": 1.744601638123604, + "grad_norm": 0.006956607435981466, + "kl": 0.13836669921875, + "learning_rate": 9.334589457844005e-07, + "loss": 0.00013832177501171827, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 878, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 75.89583587646484, + "completions/min_length": 36.0, + "epoch": 1.7465872424919335, + "grad_norm": 0.018903615038227694, + "kl": 0.12786865234375, + "learning_rate": 9.333016060552068e-07, + "loss": 0.00012793888163287193, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 879, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 79.77083539962769, + "completions/min_length": 42.375, + "epoch": 1.748572846860263, + "grad_norm": 0.009768558421778843, + "kl": 0.13153076171875, + "learning_rate": 9.331440938193886e-07, + "loss": 0.00013149468577466905, + "memory(GiB)": 94.21, + "reward": 1.5, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 880, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 75.87500190734863, + "completions/min_length": 38.25, + "epoch": 1.7505584512285925, + "grad_norm": 0.007302215714605456, + "kl": 0.1165771484375, + "learning_rate": 9.329864091396551e-07, + "loss": 0.00011654042464215308, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 881, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 80.26041841506958, + "completions/min_length": 35.0, + "epoch": 1.7525440555969223, + "grad_norm": 0.008200742112699761, + "kl": 0.1165771484375, + "learning_rate": 9.328285520787836e-07, + "loss": 0.00011656450806185603, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 882, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 69.63541889190674, + "completions/min_length": 36.375, + "epoch": 1.754529659965252, + "grad_norm": 0.9512648471477639, + "kl": 0.13714599609375, + "learning_rate": 9.326705226996205e-07, + "loss": -0.004535972140729427, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 883, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 75.4791693687439, + "completions/min_length": 36.0, + "epoch": 1.7565152643335815, + "grad_norm": 0.010957816091449385, + "kl": 0.12603759765625, + "learning_rate": 9.325123210650808e-07, + "loss": 0.00012580420298036188, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 884, + "train_speed(iter/s)": 0.022555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 76.67708587646484, + "completions/min_length": 35.625, + "epoch": 1.758500868701911, + "grad_norm": 0.7032466702633126, + "kl": 0.11761474609375, + "learning_rate": 9.323539472381478e-07, + "loss": 0.00044058263301849365, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 885, + "train_speed(iter/s)": 0.022555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.75, + "completions/mean_length": 81.71875286102295, + "completions/min_length": 36.375, + "epoch": 1.7604864730702408, + "grad_norm": 0.007635854293946776, + "kl": 0.10650634765625, + "learning_rate": 9.321954012818736e-07, + "loss": 0.00010652549099177122, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 886, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 73.52083587646484, + "completions/min_length": 36.875, + "epoch": 1.7624720774385705, + "grad_norm": 0.012792755398809138, + "kl": 0.127197265625, + "learning_rate": 9.320366832593784e-07, + "loss": 0.00012726319255307317, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 887, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 75.42708587646484, + "completions/min_length": 33.625, + "epoch": 1.7644576818069, + "grad_norm": 0.05229859065893468, + "kl": 0.133056640625, + "learning_rate": 9.318777932338518e-07, + "loss": 0.00013305262837093323, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 888, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.25, + "completions/mean_length": 83.645836353302, + "completions/min_length": 37.125, + "epoch": 1.7664432861752295, + "grad_norm": 0.0062669782192380655, + "kl": 0.1102294921875, + "learning_rate": 9.317187312685508e-07, + "loss": 0.00011017799261026084, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 889, + "train_speed(iter/s)": 0.022553 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 72.34375190734863, + "completions/min_length": 33.125, + "epoch": 1.7684288905435592, + "grad_norm": 0.9103954532426873, + "kl": 0.12982177734375, + "learning_rate": 9.315594974268017e-07, + "loss": 0.001666487893089652, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 890, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 79.53125190734863, + "completions/min_length": 35.125, + "epoch": 1.770414494911889, + "grad_norm": 0.0077098493005936025, + "kl": 0.1195068359375, + "learning_rate": 9.314000917719989e-07, + "loss": 0.0001194100477732718, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 891, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.25, + "completions/mean_length": 76.677086353302, + "completions/min_length": 39.75, + "epoch": 1.7724000992802185, + "grad_norm": 1.7381051796500595, + "kl": 0.13616943359375, + "learning_rate": 9.312405143676049e-07, + "loss": 0.00013609975576400757, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 892, + "train_speed(iter/s)": 0.022555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 77.083336353302, + "completions/min_length": 39.875, + "epoch": 1.774385703648548, + "grad_norm": 0.008840746026892974, + "kl": 0.12017822265625, + "learning_rate": 9.31080765277151e-07, + "loss": 0.00012022841110592708, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 893, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 79.97916889190674, + "completions/min_length": 37.25, + "epoch": 1.7763713080168775, + "grad_norm": 0.9613325652328614, + "kl": 0.107177734375, + "learning_rate": 9.309208445642369e-07, + "loss": 0.0014907767763361335, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 894, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 85.91666889190674, + "completions/min_length": 43.375, + "epoch": 1.7783569123852072, + "grad_norm": 0.6204390112258411, + "kl": 0.10943603515625, + "learning_rate": 9.307607522925302e-07, + "loss": 0.007937220856547356, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 895, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.375, + "completions/mean_length": 84.09375381469727, + "completions/min_length": 39.0, + "epoch": 1.780342516753537, + "grad_norm": 0.006596129050990661, + "kl": 0.12164306640625, + "learning_rate": 9.306004885257673e-07, + "loss": 0.00012155827425885946, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 896, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 75.9791693687439, + "completions/min_length": 33.125, + "epoch": 1.7823281211218664, + "grad_norm": 0.8419564927483318, + "kl": 0.1160888671875, + "learning_rate": 9.304400533277526e-07, + "loss": -0.005623520817607641, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 897, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.375, + "completions/mean_length": 88.34375286102295, + "completions/min_length": 39.875, + "epoch": 1.784313725490196, + "grad_norm": 0.007441250195927309, + "kl": 0.150390625, + "learning_rate": 9.302794467623584e-07, + "loss": 0.00015034168609417975, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 898, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 77.03125190734863, + "completions/min_length": 42.625, + "epoch": 1.7862993298585257, + "grad_norm": 0.007088094241563425, + "kl": 0.1243896484375, + "learning_rate": 9.301186688935261e-07, + "loss": 0.00012434719246812165, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 899, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.0, + "completions/mean_length": 73.864586353302, + "completions/min_length": 32.0, + "epoch": 1.7882849342268554, + "grad_norm": 1.5123569867386064, + "kl": 0.112548828125, + "learning_rate": 9.299577197852644e-07, + "loss": 0.008591524325311184, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.833333333954215, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 900, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.625, + "completions/mean_length": 83.61458587646484, + "completions/min_length": 37.375, + "epoch": 1.790270538595185, + "grad_norm": 0.8705223897291671, + "kl": 0.11187744140625, + "learning_rate": 9.297965995016511e-07, + "loss": 0.013531377539038658, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 901, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 79.28125190734863, + "completions/min_length": 39.5, + "epoch": 1.7922561429635144, + "grad_norm": 0.00771821969783833, + "kl": 0.11553955078125, + "learning_rate": 9.296353081068309e-07, + "loss": 0.00011562125291675329, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 902, + "train_speed(iter/s)": 0.022538 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 78.25000286102295, + "completions/min_length": 35.75, + "epoch": 1.7942417473318442, + "grad_norm": 1.0038647966048213, + "kl": 0.12664794921875, + "learning_rate": 9.29473845665018e-07, + "loss": -0.008175798691809177, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 903, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.5, + "completions/mean_length": 83.48958778381348, + "completions/min_length": 36.875, + "epoch": 1.796227351700174, + "grad_norm": 0.6299893745867365, + "kl": 0.126953125, + "learning_rate": 9.293122122404937e-07, + "loss": -0.005444802343845367, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666679084301, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 904, + "train_speed(iter/s)": 0.022534 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 76.0729193687439, + "completions/min_length": 36.125, + "epoch": 1.7982129560685034, + "grad_norm": 0.006683691544014729, + "kl": 0.1065673828125, + "learning_rate": 9.291504078976078e-07, + "loss": 0.00010656008817022666, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 905, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 74.89583539962769, + "completions/min_length": 33.125, + "epoch": 1.800198560436833, + "grad_norm": 0.8765895081658152, + "kl": 0.12567138671875, + "learning_rate": 9.289884327007782e-07, + "loss": -0.006656238343566656, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 906, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.25, + "completions/mean_length": 80.28125238418579, + "completions/min_length": 34.25, + "epoch": 1.8021841648051624, + "grad_norm": 0.6817033388841132, + "kl": 0.121429443359375, + "learning_rate": 9.288262867144905e-07, + "loss": 0.013028094545006752, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 907, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.875, + "completions/mean_length": 75.08333492279053, + "completions/min_length": 33.125, + "epoch": 1.8041697691734921, + "grad_norm": 0.009188903603812635, + "kl": 0.12896728515625, + "learning_rate": 9.286639700032984e-07, + "loss": 0.00012894233805127442, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 908, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.875, + "completions/mean_length": 95.34375286102295, + "completions/min_length": 36.0, + "epoch": 1.8061553735418219, + "grad_norm": 0.021478704084264718, + "kl": 0.16107177734375, + "learning_rate": 9.28501482631824e-07, + "loss": 0.00016091513680294156, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 909, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 75.58333492279053, + "completions/min_length": 33.5, + "epoch": 1.8081409779101514, + "grad_norm": 1.4350633322121908, + "kl": 0.122802734375, + "learning_rate": 9.283388246647565e-07, + "loss": -0.004109155386686325, + "memory(GiB)": 94.21, + "reward": 1.8125000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8125000074505806, + "rewards/CineAccuracyORM/std": 0.2407601661980152, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 910, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 77.41666793823242, + "completions/min_length": 32.75, + "epoch": 1.810126582278481, + "grad_norm": 0.009620782843549312, + "kl": 0.14208984375, + "learning_rate": 9.281759961668541e-07, + "loss": 0.00014202986494638026, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 911, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 78.75000190734863, + "completions/min_length": 36.375, + "epoch": 1.8121121866468106, + "grad_norm": 0.008964483892672192, + "kl": 0.115234375, + "learning_rate": 9.280129972029418e-07, + "loss": 0.00011530831397976726, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 912, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 77.364586353302, + "completions/min_length": 36.375, + "epoch": 1.8140977910151403, + "grad_norm": 0.009324073321952835, + "kl": 0.13043212890625, + "learning_rate": 9.278498278379134e-07, + "loss": 0.0001303391472902149, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 913, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 70.73958492279053, + "completions/min_length": 35.5, + "epoch": 1.8160833953834699, + "grad_norm": 0.008782209003762748, + "kl": 0.13873291015625, + "learning_rate": 9.276864881367297e-07, + "loss": 0.00013890663103666157, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 914, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 74.28125190734863, + "completions/min_length": 33.125, + "epoch": 1.8180689997517994, + "grad_norm": 1.2257257286392702, + "kl": 0.12548828125, + "learning_rate": 9.275229781644199e-07, + "loss": -0.010700320824980736, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 915, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.875, + "completions/mean_length": 66.36458587646484, + "completions/min_length": 35.125, + "epoch": 1.820054604120129, + "grad_norm": 0.015152993301309597, + "kl": 0.141845703125, + "learning_rate": 9.273592979860808e-07, + "loss": 0.0001418372557964176, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 916, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 79.78125190734863, + "completions/min_length": 36.75, + "epoch": 1.8220402084884588, + "grad_norm": 1.164141743224586, + "kl": 0.1375732421875, + "learning_rate": 9.271954476668771e-07, + "loss": -0.013721026480197906, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 917, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 76.08333587646484, + "completions/min_length": 34.75, + "epoch": 1.8240258128567883, + "grad_norm": 0.9718368314130161, + "kl": 0.11944580078125, + "learning_rate": 9.27031427272041e-07, + "loss": -0.0009028440108522773, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 918, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 81.333336353302, + "completions/min_length": 36.625, + "epoch": 1.8260114172251178, + "grad_norm": 0.008453763390296949, + "kl": 0.12518310546875, + "learning_rate": 9.268672368668724e-07, + "loss": 0.0001252280198968947, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 919, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 79.92708587646484, + "completions/min_length": 35.75, + "epoch": 1.8279970215934473, + "grad_norm": 0.015727045827087466, + "kl": 0.13104248046875, + "learning_rate": 9.267028765167391e-07, + "loss": 0.00013102087541483343, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 920, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.125, + "completions/mean_length": 78.92708539962769, + "completions/min_length": 34.0, + "epoch": 1.829982625961777, + "grad_norm": 0.007909932223443645, + "kl": 0.125732421875, + "learning_rate": 9.265383462870766e-07, + "loss": 0.00012571165279950947, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 921, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 82.93750190734863, + "completions/min_length": 36.75, + "epoch": 1.8319682303301068, + "grad_norm": 0.0077653537751326725, + "kl": 0.10980224609375, + "learning_rate": 9.263736462433878e-07, + "loss": 0.00010963292879750952, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 922, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 80.95833587646484, + "completions/min_length": 35.75, + "epoch": 1.8339538346984363, + "grad_norm": 0.007045395069392264, + "kl": 0.11553955078125, + "learning_rate": 9.262087764512432e-07, + "loss": 0.00011566970351850614, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 923, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 80.20833492279053, + "completions/min_length": 39.0, + "epoch": 1.8359394390667658, + "grad_norm": 0.7339331511708622, + "kl": 0.12884521484375, + "learning_rate": 9.260437369762812e-07, + "loss": 0.010075999423861504, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 924, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 69.0104193687439, + "completions/min_length": 31.25, + "epoch": 1.8379250434350956, + "grad_norm": 0.7720731741630739, + "kl": 0.11944580078125, + "learning_rate": 9.258785278842074e-07, + "loss": 0.0039865667931735516, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 925, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 77.78125333786011, + "completions/min_length": 34.5, + "epoch": 1.8399106478034253, + "grad_norm": 0.006722094555020101, + "kl": 0.125244140625, + "learning_rate": 9.257131492407951e-07, + "loss": 0.00012531019456218928, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 926, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.625, + "completions/mean_length": 72.2916693687439, + "completions/min_length": 38.375, + "epoch": 1.8418962521717548, + "grad_norm": 1.0453864667150048, + "kl": 0.1173095703125, + "learning_rate": 9.255476011118851e-07, + "loss": 0.007351234555244446, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 927, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.25, + "completions/mean_length": 79.70833587646484, + "completions/min_length": 35.375, + "epoch": 1.8438818565400843, + "grad_norm": 0.007729291555005753, + "kl": 0.12371826171875, + "learning_rate": 9.253818835633855e-07, + "loss": 0.00012368014722596854, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 928, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 76.55208683013916, + "completions/min_length": 33.75, + "epoch": 1.845867460908414, + "grad_norm": 0.008353250396239633, + "kl": 0.11431884765625, + "learning_rate": 9.252159966612722e-07, + "loss": 0.0001142207402153872, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 929, + "train_speed(iter/s)": 0.022534 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 74.30208587646484, + "completions/min_length": 33.875, + "epoch": 1.8478530652767438, + "grad_norm": 0.008869208536607625, + "kl": 0.12701416015625, + "learning_rate": 9.250499404715882e-07, + "loss": 0.00012701813830062747, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 930, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.875, + "completions/mean_length": 88.63541984558105, + "completions/min_length": 37.875, + "epoch": 1.8498386696450733, + "grad_norm": 0.007087772849495382, + "kl": 0.13250732421875, + "learning_rate": 9.248837150604441e-07, + "loss": 0.00013244128786027431, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 931, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 77.92708587646484, + "completions/min_length": 40.125, + "epoch": 1.8518242740134028, + "grad_norm": 0.0065413932307173605, + "kl": 0.11614990234375, + "learning_rate": 9.247173204940176e-07, + "loss": 0.00011607255146373063, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 932, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 81.83333396911621, + "completions/min_length": 35.25, + "epoch": 1.8538098783817323, + "grad_norm": 0.007473283230136595, + "kl": 0.12152099609375, + "learning_rate": 9.245507568385541e-07, + "loss": 0.00012152847921242937, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 933, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.375, + "completions/mean_length": 74.65625190734863, + "completions/min_length": 29.125, + "epoch": 1.855795482750062, + "grad_norm": 1.1503934173831112, + "kl": 0.13226318359375, + "learning_rate": 9.243840241603662e-07, + "loss": 0.007513184100389481, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 934, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 71.38541889190674, + "completions/min_length": 33.875, + "epoch": 1.8577810871183917, + "grad_norm": 0.8074052184135067, + "kl": 0.1397705078125, + "learning_rate": 9.242171225258335e-07, + "loss": -0.0060031963512301445, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 935, + "train_speed(iter/s)": 0.022533 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 68.11458587646484, + "completions/min_length": 37.0, + "epoch": 1.8597666914867212, + "grad_norm": 0.007230071813683354, + "kl": 0.12274169921875, + "learning_rate": 9.240500520014034e-07, + "loss": 0.0001225871965289116, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 936, + "train_speed(iter/s)": 0.022536 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 69.83333444595337, + "completions/min_length": 35.625, + "epoch": 1.8617522958550508, + "grad_norm": 0.812551116027472, + "kl": 0.1329345703125, + "learning_rate": 9.238828126535901e-07, + "loss": 0.007950139231979847, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 937, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 71.45833539962769, + "completions/min_length": 34.375, + "epoch": 1.8637379002233805, + "grad_norm": 0.007693412059398112, + "kl": 0.13232421875, + "learning_rate": 9.23715404548975e-07, + "loss": 0.0001322162861470133, + "memory(GiB)": 94.21, + "reward": 1.5, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 938, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 70.93750143051147, + "completions/min_length": 37.375, + "epoch": 1.8657235045917102, + "grad_norm": 1.3189935162431023, + "kl": 0.1307373046875, + "learning_rate": 9.23547827754207e-07, + "loss": 0.0037108275573700666, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 939, + "train_speed(iter/s)": 0.022543 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.75, + "completions/mean_length": 75.17708539962769, + "completions/min_length": 35.875, + "epoch": 1.8677091089600397, + "grad_norm": 0.007031096849271503, + "kl": 0.12457275390625, + "learning_rate": 9.233800823360022e-07, + "loss": 0.00012451031943783164, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 940, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 71.17708587646484, + "completions/min_length": 34.125, + "epoch": 1.8696947133283692, + "grad_norm": 1.5032450276054758, + "kl": 0.1480712890625, + "learning_rate": 9.232121683611434e-07, + "loss": 0.005250070244073868, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 941, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 73.73958587646484, + "completions/min_length": 34.25, + "epoch": 1.871680317696699, + "grad_norm": 0.006310721623254409, + "kl": 0.0950927734375, + "learning_rate": 9.230440858964805e-07, + "loss": 9.510396921541542e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 942, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 76.10416889190674, + "completions/min_length": 39.375, + "epoch": 1.8736659220650287, + "grad_norm": 0.9377452971936192, + "kl": 0.110595703125, + "learning_rate": 9.228758350089313e-07, + "loss": 0.00011061008262913674, + "memory(GiB)": 94.21, + "reward": 1.9791666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9791666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 943, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 67.77083492279053, + "completions/min_length": 33.5, + "epoch": 1.8756515264333582, + "grad_norm": 0.013447880880508639, + "kl": 0.114990234375, + "learning_rate": 9.227074157654796e-07, + "loss": 0.00011495831131469458, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 944, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.875, + "completions/mean_length": 74.95833539962769, + "completions/min_length": 32.125, + "epoch": 1.8776371308016877, + "grad_norm": 0.010348588107422185, + "kl": 0.1312255859375, + "learning_rate": 9.225388282331769e-07, + "loss": 0.00013122425298206508, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 945, + "train_speed(iter/s)": 0.022543 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 77.81250190734863, + "completions/min_length": 35.125, + "epoch": 1.8796227351700172, + "grad_norm": 0.012148346145160555, + "kl": 0.14202880859375, + "learning_rate": 9.223700724791416e-07, + "loss": 0.00014211777306627482, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 946, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 70.677086353302, + "completions/min_length": 32.5, + "epoch": 1.881608339538347, + "grad_norm": 1.1922451130482545, + "kl": 0.1201171875, + "learning_rate": 9.222011485705585e-07, + "loss": -0.0010681685525923967, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 947, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.125, + "completions/mean_length": 84.36458587646484, + "completions/min_length": 37.125, + "epoch": 1.8835939439066767, + "grad_norm": 0.0073728058023891, + "kl": 0.15185546875, + "learning_rate": 9.220320565746805e-07, + "loss": 0.0001520304795121774, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 948, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.625, + "completions/mean_length": 83.90625333786011, + "completions/min_length": 37.5, + "epoch": 1.8855795482750062, + "grad_norm": 2.933732312981029, + "kl": 0.10418701171875, + "learning_rate": 9.218627965588261e-07, + "loss": 0.010607926174998283, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 949, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 71.770836353302, + "completions/min_length": 35.625, + "epoch": 1.8875651526433357, + "grad_norm": 1.3556053830761863, + "kl": 0.11370849609375, + "learning_rate": 9.216933685903818e-07, + "loss": 0.005140597932040691, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 950, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 75.56250143051147, + "completions/min_length": 35.625, + "epoch": 1.8895507570116654, + "grad_norm": 0.01602748227186556, + "kl": 0.12286376953125, + "learning_rate": 9.215237727368002e-07, + "loss": 0.00012275311746634543, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 951, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 69.34375190734863, + "completions/min_length": 31.5, + "epoch": 1.8915363613799951, + "grad_norm": 0.008974914022572961, + "kl": 0.1324462890625, + "learning_rate": 9.213540090656013e-07, + "loss": 0.00013249943731352687, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 952, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.0, + "completions/mean_length": 68.89583539962769, + "completions/min_length": 35.75, + "epoch": 1.8935219657483247, + "grad_norm": 0.00667277933473568, + "kl": 0.11700439453125, + "learning_rate": 9.211840776443713e-07, + "loss": 0.00011695442663040012, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 953, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 68.55208539962769, + "completions/min_length": 29.625, + "epoch": 1.8955075701166542, + "grad_norm": 0.007696458386247361, + "kl": 0.11077880859375, + "learning_rate": 9.210139785407638e-07, + "loss": 0.00011075345537392423, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 954, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.125, + "completions/mean_length": 67.79166841506958, + "completions/min_length": 36.0, + "epoch": 1.897493174484984, + "grad_norm": 1.3722402511075555, + "kl": 0.1376953125, + "learning_rate": 9.208437118224987e-07, + "loss": -0.003108623204752803, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 955, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 75.61458539962769, + "completions/min_length": 36.125, + "epoch": 1.8994787788533136, + "grad_norm": 0.060150345637783584, + "kl": 0.13623046875, + "learning_rate": 9.20673277557363e-07, + "loss": 0.0001361073082080111, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 956, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.375, + "completions/mean_length": 79.25000238418579, + "completions/min_length": 34.0, + "epoch": 1.9014643832216431, + "grad_norm": 0.7202040623211893, + "kl": 0.12237548828125, + "learning_rate": 9.205026758132102e-07, + "loss": -0.00564401363953948, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 957, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 75.70833683013916, + "completions/min_length": 35.5, + "epoch": 1.9034499875899726, + "grad_norm": 0.009994373339757778, + "kl": 0.10723876953125, + "learning_rate": 9.203319066579603e-07, + "loss": 0.00010721605212893337, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 958, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.0, + "completions/mean_length": 64.51041793823242, + "completions/min_length": 32.75, + "epoch": 1.9054355919583021, + "grad_norm": 0.012637668678729314, + "kl": 0.1270751953125, + "learning_rate": 9.201609701596003e-07, + "loss": 0.00012694580073002726, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 959, + "train_speed(iter/s)": 0.022553 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.875, + "completions/mean_length": 75.42708492279053, + "completions/min_length": 35.25, + "epoch": 1.9074211963266319, + "grad_norm": 0.7784184826385184, + "kl": 0.11529541015625, + "learning_rate": 9.199898663861836e-07, + "loss": -0.004092290066182613, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 960, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 74.32291889190674, + "completions/min_length": 36.625, + "epoch": 1.9094068006949616, + "grad_norm": 1.3496105805474334, + "kl": 0.131561279296875, + "learning_rate": 9.198185954058304e-07, + "loss": 0.008745516650378704, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 961, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 70.59375143051147, + "completions/min_length": 36.5, + "epoch": 1.9113924050632911, + "grad_norm": 0.031016430453186167, + "kl": 0.116455078125, + "learning_rate": 9.196471572867272e-07, + "loss": 0.00011654614354483783, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 962, + "train_speed(iter/s)": 0.022553 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 71.30208444595337, + "completions/min_length": 34.0, + "epoch": 1.9133780094316206, + "grad_norm": 0.008761079412151937, + "kl": 0.11737060546875, + "learning_rate": 9.194755520971272e-07, + "loss": 0.00011732772691175342, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 963, + "train_speed(iter/s)": 0.022554 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 74.95833539962769, + "completions/min_length": 35.75, + "epoch": 1.9153636137999503, + "grad_norm": 0.007919570470525118, + "kl": 0.1171875, + "learning_rate": 9.193037799053502e-07, + "loss": 0.00011698844900820404, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 964, + "train_speed(iter/s)": 0.022555 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 81.46875286102295, + "completions/min_length": 42.25, + "epoch": 1.91734921816828, + "grad_norm": 1.0164432816307807, + "kl": 0.13189697265625, + "learning_rate": 9.191318407797823e-07, + "loss": 0.004111527930945158, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 965, + "train_speed(iter/s)": 0.022553 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 74.94791984558105, + "completions/min_length": 32.375, + "epoch": 1.9193348225366096, + "grad_norm": 0.0062646852844806585, + "kl": 0.11651611328125, + "learning_rate": 9.189597347888761e-07, + "loss": 0.00011658138100756332, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 966, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.625, + "completions/mean_length": 76.32291889190674, + "completions/min_length": 33.875, + "epoch": 1.921320426904939, + "grad_norm": 0.9086865320292101, + "kl": 0.13519287109375, + "learning_rate": 9.187874620011506e-07, + "loss": -0.0014109518378973007, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 967, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 72.81250190734863, + "completions/min_length": 32.875, + "epoch": 1.9233060312732688, + "grad_norm": 0.006868939690706016, + "kl": 0.10931396484375, + "learning_rate": 9.186150224851916e-07, + "loss": 0.00010914703307207674, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 968, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 72.18750143051147, + "completions/min_length": 34.0, + "epoch": 1.9252916356415986, + "grad_norm": 0.006124398119808875, + "kl": 0.1202392578125, + "learning_rate": 9.184424163096507e-07, + "loss": 0.0001203574865940027, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 969, + "train_speed(iter/s)": 0.022551 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.625, + "completions/mean_length": 71.98958539962769, + "completions/min_length": 36.25, + "epoch": 1.927277240009928, + "grad_norm": 1.106137844984077, + "kl": 0.12115478515625, + "learning_rate": 9.18269643543246e-07, + "loss": -0.01140589639544487, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 970, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 75.70833587646484, + "completions/min_length": 37.5, + "epoch": 1.9292628443782576, + "grad_norm": 1.408283715374188, + "kl": 0.12353515625, + "learning_rate": 9.180967042547623e-07, + "loss": -0.0031010694801807404, + "memory(GiB)": 94.21, + "reward": 1.6250000149011612, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.6250000027939677, + "rewards/CineAccuracyORM/std": 0.29628782719373703, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 971, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 85.0104193687439, + "completions/min_length": 33.75, + "epoch": 1.931248448746587, + "grad_norm": 0.00550594396251065, + "kl": 0.10125732421875, + "learning_rate": 9.179235985130503e-07, + "loss": 0.00010118850332219154, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 972, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 68.72916793823242, + "completions/min_length": 34.0, + "epoch": 1.9332340531149168, + "grad_norm": 0.005789034473788149, + "kl": 0.09600830078125, + "learning_rate": 9.17750326387027e-07, + "loss": 9.600781777407974e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 973, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 80.53125286102295, + "completions/min_length": 35.625, + "epoch": 1.9352196574832465, + "grad_norm": 0.006653491650915339, + "kl": 0.11248779296875, + "learning_rate": 9.175768879456758e-07, + "loss": 0.00011252841068198904, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 974, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 85.53125286102295, + "completions/min_length": 42.125, + "epoch": 1.937205261851576, + "grad_norm": 1.534650739331518, + "kl": 0.1253662109375, + "learning_rate": 9.174032832580464e-07, + "loss": -0.008691448718309402, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 975, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 75.3229193687439, + "completions/min_length": 32.625, + "epoch": 1.9391908662199056, + "grad_norm": 1.0375136763787156, + "kl": 0.10107421875, + "learning_rate": 9.172295123932543e-07, + "loss": 0.006832679267972708, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 976, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.125, + "completions/mean_length": 90.90625286102295, + "completions/min_length": 32.125, + "epoch": 1.9411764705882353, + "grad_norm": 0.014643125295771123, + "kl": 0.1373291015625, + "learning_rate": 9.170555754204816e-07, + "loss": 0.0001373680424876511, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 977, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 82.4166693687439, + "completions/min_length": 30.0, + "epoch": 1.943162074956565, + "grad_norm": 0.006284893212359794, + "kl": 0.10443115234375, + "learning_rate": 9.168814724089762e-07, + "loss": 0.00010449031105963513, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 978, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.25, + "completions/mean_length": 85.84375190734863, + "completions/min_length": 34.875, + "epoch": 1.9451476793248945, + "grad_norm": 0.01115706204516085, + "kl": 0.123779296875, + "learning_rate": 9.167072034280521e-07, + "loss": 0.00012373040954116732, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 979, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 80.56250286102295, + "completions/min_length": 38.0, + "epoch": 1.947133283693224, + "grad_norm": 0.006286787971024888, + "kl": 0.1273193359375, + "learning_rate": 9.165327685470898e-07, + "loss": 0.00012719148071482778, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 980, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.125, + "completions/mean_length": 78.40625190734863, + "completions/min_length": 32.125, + "epoch": 1.9491188880615538, + "grad_norm": 0.01634320920442587, + "kl": 0.1168212890625, + "learning_rate": 9.163581678355354e-07, + "loss": 0.00011691331019392237, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 981, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.25, + "completions/mean_length": 78.06250333786011, + "completions/min_length": 34.5, + "epoch": 1.9511044924298835, + "grad_norm": 0.058598749258318386, + "kl": 0.1265869140625, + "learning_rate": 9.161834013629013e-07, + "loss": 0.00012664358655456454, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 982, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.375, + "completions/mean_length": 83.81250286102295, + "completions/min_length": 36.0, + "epoch": 1.953090096798213, + "grad_norm": 0.0061883987722713436, + "kl": 0.1102294921875, + "learning_rate": 9.160084691987655e-07, + "loss": 0.00011022534454241395, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 983, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 87.33333587646484, + "completions/min_length": 41.125, + "epoch": 1.9550757011665425, + "grad_norm": 0.005076111608750147, + "kl": 0.11309814453125, + "learning_rate": 9.158333714127724e-07, + "loss": 0.00011316935706418008, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 984, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 76.03125190734863, + "completions/min_length": 33.75, + "epoch": 1.9570613055348722, + "grad_norm": 0.02151332621758537, + "kl": 0.10296630859375, + "learning_rate": 9.15658108074632e-07, + "loss": 0.00010303198359906673, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 985, + "train_speed(iter/s)": 0.022547 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 87.80208587646484, + "completions/min_length": 43.25, + "epoch": 1.9590469099032017, + "grad_norm": 0.7284794464559009, + "kl": 0.1131591796875, + "learning_rate": 9.154826792541208e-07, + "loss": 0.005956694483757019, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 986, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 75.23958539962769, + "completions/min_length": 37.875, + "epoch": 1.9610325142715315, + "grad_norm": 0.07276071906342804, + "kl": 0.167724609375, + "learning_rate": 9.153070850210802e-07, + "loss": 0.00016750108625274152, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 987, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.875, + "completions/mean_length": 93.04166889190674, + "completions/min_length": 42.625, + "epoch": 1.963018118639861, + "grad_norm": 0.8704256667366844, + "kl": 0.130126953125, + "learning_rate": 9.151313254454185e-07, + "loss": 0.0001302175223827362, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 988, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.625, + "completions/mean_length": 89.47916984558105, + "completions/min_length": 43.375, + "epoch": 1.9650037230081905, + "grad_norm": 0.005230738743073667, + "kl": 0.0809326171875, + "learning_rate": 9.149554005971092e-07, + "loss": 8.098968828562647e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 989, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 83.302086353302, + "completions/min_length": 37.5, + "epoch": 1.9669893273765202, + "grad_norm": 0.0058885007571412495, + "kl": 0.09527587890625, + "learning_rate": 9.147793105461915e-07, + "loss": 9.51946058194153e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 990, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 82.6666693687439, + "completions/min_length": 35.75, + "epoch": 1.96897493174485, + "grad_norm": 0.005708314042677562, + "kl": 0.10791015625, + "learning_rate": 9.146030553627708e-07, + "loss": 0.00010792820830829442, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 991, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.625, + "completions/mean_length": 85.27083492279053, + "completions/min_length": 37.875, + "epoch": 1.9709605361131795, + "grad_norm": 1.1799515770943712, + "kl": 0.13043212890625, + "learning_rate": 9.144266351170183e-07, + "loss": -0.00883845891803503, + "memory(GiB)": 94.21, + "reward": 1.6041666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6041666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 992, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 74.79166889190674, + "completions/min_length": 34.25, + "epoch": 1.972946140481509, + "grad_norm": 0.007826950674859187, + "kl": 0.11181640625, + "learning_rate": 9.142500498791701e-07, + "loss": 0.00011182423622813076, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 993, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 82.84375286102295, + "completions/min_length": 35.0, + "epoch": 1.9749317448498387, + "grad_norm": 1.2607068242976633, + "kl": 0.12030029296875, + "learning_rate": 9.14073299719529e-07, + "loss": -0.004849262535572052, + "memory(GiB)": 94.21, + "reward": 1.7291666865348816, + "reward_std": 0.05103103630244732, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.33328525722026825, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 994, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 76.88541984558105, + "completions/min_length": 31.5, + "epoch": 1.9769173492181684, + "grad_norm": 0.007823724574330898, + "kl": 0.12518310546875, + "learning_rate": 9.138963847084629e-07, + "loss": 0.00012523704208433628, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 995, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.375, + "completions/mean_length": 69.83333539962769, + "completions/min_length": 31.125, + "epoch": 1.978902953586498, + "grad_norm": 0.008872477191515333, + "kl": 0.12982177734375, + "learning_rate": 9.137193049164053e-07, + "loss": 0.00012983387568965554, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 996, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 74.97916841506958, + "completions/min_length": 32.125, + "epoch": 1.9808885579548274, + "grad_norm": 0.00861972477172187, + "kl": 0.1126708984375, + "learning_rate": 9.135420604138557e-07, + "loss": 0.00011255948629695922, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 997, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 79.17708444595337, + "completions/min_length": 36.125, + "epoch": 1.9828741623231572, + "grad_norm": 0.00820892144127085, + "kl": 0.1422119140625, + "learning_rate": 9.133646512713787e-07, + "loss": 0.00014218440628610551, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 998, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 77.16666793823242, + "completions/min_length": 37.75, + "epoch": 1.9848597666914867, + "grad_norm": 0.008523308134671392, + "kl": 0.120849609375, + "learning_rate": 9.131870775596049e-07, + "loss": 0.00012071570381522179, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 999, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 83.71875333786011, + "completions/min_length": 35.25, + "epoch": 1.9868453710598164, + "grad_norm": 0.007716056017196739, + "kl": 0.11724853515625, + "learning_rate": 9.1300933934923e-07, + "loss": 0.00011719940084731206, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1000, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 79.3854193687439, + "completions/min_length": 33.625, + "epoch": 1.988830975428146, + "grad_norm": 0.08496033932824887, + "kl": 0.2257080078125, + "learning_rate": 9.128314367110153e-07, + "loss": 0.00022642673866357654, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1001, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 81.94791984558105, + "completions/min_length": 39.0, + "epoch": 1.9908165797964754, + "grad_norm": 0.7926007549032419, + "kl": 0.1217041015625, + "learning_rate": 9.126533697157878e-07, + "loss": 0.0036898914258927107, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1002, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 72.55208492279053, + "completions/min_length": 34.125, + "epoch": 1.9928021841648051, + "grad_norm": 0.008641688889488621, + "kl": 0.1109619140625, + "learning_rate": 9.124751384344399e-07, + "loss": 0.00011089236795669422, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1003, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.125, + "completions/mean_length": 83.1666669845581, + "completions/min_length": 40.25, + "epoch": 1.9947877885331349, + "grad_norm": 0.008197931670343743, + "kl": 0.1312255859375, + "learning_rate": 9.122967429379291e-07, + "loss": 0.00013110280269756913, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1004, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.0, + "completions/mean_length": 85.54166841506958, + "completions/min_length": 37.0, + "epoch": 1.9967733929014644, + "grad_norm": 0.007737891161198206, + "kl": 0.118408203125, + "learning_rate": 9.121181832972784e-07, + "loss": 0.00011833791359094903, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1005, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 72.94791841506958, + "completions/min_length": 37.625, + "epoch": 1.998758997269794, + "grad_norm": 0.008068687150658662, + "kl": 0.11572265625, + "learning_rate": 9.119394595835764e-07, + "loss": 0.00011578643170651048, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1006, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 69.75000190734863, + "completions/min_length": 35.625, + "epoch": 2.0019856043683295, + "grad_norm": 0.007674557960704678, + "kl": 0.119384765625, + "learning_rate": 9.117605718679765e-07, + "loss": 0.00011955931404372677, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1007, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 73.46875190734863, + "completions/min_length": 30.75, + "epoch": 2.003971208736659, + "grad_norm": 0.006536672252896576, + "kl": 0.1129150390625, + "learning_rate": 9.115815202216981e-07, + "loss": 0.00011291628470644355, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1008, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.875, + "completions/mean_length": 75.41666889190674, + "completions/min_length": 30.25, + "epoch": 2.005956813104989, + "grad_norm": 0.8521227456772538, + "kl": 0.1092529296875, + "learning_rate": 9.114023047160253e-07, + "loss": 0.002899589715525508, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1009, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 77.03125190734863, + "completions/min_length": 38.125, + "epoch": 2.0079424174733185, + "grad_norm": 0.006690528016540262, + "kl": 0.1190185546875, + "learning_rate": 9.112229254223077e-07, + "loss": 0.00011910950706806034, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1010, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 67.83333539962769, + "completions/min_length": 27.25, + "epoch": 2.009928021841648, + "grad_norm": 0.0056127323151639675, + "kl": 0.12060546875, + "learning_rate": 9.110433824119598e-07, + "loss": 0.00012048385542584583, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1011, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 74.37500190734863, + "completions/min_length": 31.625, + "epoch": 2.0119136262099775, + "grad_norm": 3.354626009773243, + "kl": 0.16973876953125, + "learning_rate": 9.108636757564618e-07, + "loss": 0.00016950443387031555, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.375051774084568, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1012, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 79.09375333786011, + "completions/min_length": 35.0, + "epoch": 2.0138992305783074, + "grad_norm": 0.00575384200275158, + "kl": 0.1220703125, + "learning_rate": 9.106838055273587e-07, + "loss": 0.00012213800800964236, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1013, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 73.59375286102295, + "completions/min_length": 33.0, + "epoch": 2.015884834946637, + "grad_norm": 0.006643985887206167, + "kl": 0.11700439453125, + "learning_rate": 9.105037717962604e-07, + "loss": 0.00011702807387337089, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1014, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 74.78125286102295, + "completions/min_length": 34.375, + "epoch": 2.0178704393149665, + "grad_norm": 0.006479767740960207, + "kl": 0.117919921875, + "learning_rate": 9.103235746348426e-07, + "loss": 0.00011776233441196382, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1015, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.25, + "completions/mean_length": 75.37500286102295, + "completions/min_length": 30.625, + "epoch": 2.019856043683296, + "grad_norm": 0.005070033278772015, + "kl": 0.10150146484375, + "learning_rate": 9.101432141148453e-07, + "loss": 0.00010151336755370721, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1016, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 73.51041793823242, + "completions/min_length": 37.0, + "epoch": 2.021841648051626, + "grad_norm": 0.005564099290104957, + "kl": 0.08929443359375, + "learning_rate": 9.099626903080742e-07, + "loss": 8.932495256885886e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1017, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 70.98958587646484, + "completions/min_length": 35.125, + "epoch": 2.0238272524199554, + "grad_norm": 0.006085392073596954, + "kl": 0.12060546875, + "learning_rate": 9.097820032863992e-07, + "loss": 0.00012077610881533474, + "memory(GiB)": 94.21, + "reward": 1.5, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1018, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 75.84375143051147, + "completions/min_length": 32.875, + "epoch": 2.025812856788285, + "grad_norm": 0.005497172903734264, + "kl": 0.0960693359375, + "learning_rate": 9.096011531217561e-07, + "loss": 9.60447359830141e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1019, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 78.83333587646484, + "completions/min_length": 34.375, + "epoch": 2.0277984611566144, + "grad_norm": 0.923182384940218, + "kl": 0.11322021484375, + "learning_rate": 9.094201398861451e-07, + "loss": -0.005555596202611923, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1020, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 87.31250286102295, + "completions/min_length": 43.0, + "epoch": 2.029784065524944, + "grad_norm": 0.8479972341631841, + "kl": 0.1124267578125, + "learning_rate": 9.092389636516313e-07, + "loss": -0.00932252500206232, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1021, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.0, + "completions/mean_length": 73.57291793823242, + "completions/min_length": 32.125, + "epoch": 2.031769669893274, + "grad_norm": 2.0600097902769576, + "kl": 0.15057373046875, + "learning_rate": 9.090576244903452e-07, + "loss": 0.003625646000728011, + "memory(GiB)": 94.21, + "reward": 1.59375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.59375, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1022, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 83.42708492279053, + "completions/min_length": 33.25, + "epoch": 2.0337552742616034, + "grad_norm": 0.8182778996245744, + "kl": 0.1546630859375, + "learning_rate": 9.088761224744812e-07, + "loss": 0.007634392939507961, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1023, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.625, + "completions/mean_length": 87.76041793823242, + "completions/min_length": 36.0, + "epoch": 2.035740878629933, + "grad_norm": 0.913421053950445, + "kl": 0.522216796875, + "learning_rate": 9.086944576762996e-07, + "loss": -0.009745832532644272, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1024, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 77.83333444595337, + "completions/min_length": 35.875, + "epoch": 2.0377264829982624, + "grad_norm": 0.0074229322061054245, + "kl": 0.09735107421875, + "learning_rate": 9.085126301681247e-07, + "loss": 9.729337762109935e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1025, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.625, + "completions/mean_length": 81.39583587646484, + "completions/min_length": 31.5, + "epoch": 2.0397120873665924, + "grad_norm": 0.011784362958371148, + "kl": 0.10107421875, + "learning_rate": 9.083306400223463e-07, + "loss": 0.00010111034498549998, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1026, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 72.42708396911621, + "completions/min_length": 35.625, + "epoch": 2.041697691734922, + "grad_norm": 0.007941043755597331, + "kl": 0.09197998046875, + "learning_rate": 9.081484873114185e-07, + "loss": 9.204033995047212e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1027, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 82.03125190734863, + "completions/min_length": 41.875, + "epoch": 2.0436832961032514, + "grad_norm": 0.7966503026283959, + "kl": 0.10577392578125, + "learning_rate": 9.079661721078597e-07, + "loss": -0.0001545312552480027, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166669771075, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1028, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 79.12500190734863, + "completions/min_length": 36.5, + "epoch": 2.045668900471581, + "grad_norm": 0.006309244666020647, + "kl": 0.09771728515625, + "learning_rate": 9.077836944842541e-07, + "loss": 9.768415475264192e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1029, + "train_speed(iter/s)": 0.022516 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 76.51041841506958, + "completions/min_length": 33.375, + "epoch": 2.047654504839911, + "grad_norm": 0.004705715962469841, + "kl": 0.12359619140625, + "learning_rate": 9.076010545132496e-07, + "loss": 0.00012360823166090995, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1030, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.5, + "completions/mean_length": 80.72916984558105, + "completions/min_length": 35.625, + "epoch": 2.0496401092082404, + "grad_norm": 0.006474807694392289, + "kl": 0.107666015625, + "learning_rate": 9.074182522675591e-07, + "loss": 0.00010779083822853863, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1031, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.75, + "completions/mean_length": 82.38541793823242, + "completions/min_length": 34.5, + "epoch": 2.05162571357657, + "grad_norm": 0.006962552036572687, + "kl": 0.1025390625, + "learning_rate": 9.0723528781996e-07, + "loss": 0.0001025118981488049, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1032, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.125, + "completions/mean_length": 89.45833587646484, + "completions/min_length": 39.25, + "epoch": 2.0536113179448994, + "grad_norm": 0.004949851198072381, + "kl": 0.1192626953125, + "learning_rate": 9.070521612432946e-07, + "loss": 0.00011945145524805412, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1033, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 84.56250190734863, + "completions/min_length": 33.0, + "epoch": 2.055596922313229, + "grad_norm": 0.6804908403865937, + "kl": 0.11773681640625, + "learning_rate": 9.068688726104696e-07, + "loss": 0.0001176235600723885, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1034, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 84.18750238418579, + "completions/min_length": 41.875, + "epoch": 2.057582526681559, + "grad_norm": 0.005253636511616478, + "kl": 0.12042236328125, + "learning_rate": 9.066854219944555e-07, + "loss": 0.00012024200987070799, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1035, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 88.39583587646484, + "completions/min_length": 32.25, + "epoch": 2.0595681310498883, + "grad_norm": 0.84495968668063, + "kl": 0.11614990234375, + "learning_rate": 9.065018094682885e-07, + "loss": -0.004611310549080372, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1036, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.75, + "completions/mean_length": 97.18750190734863, + "completions/min_length": 42.5, + "epoch": 2.061553735418218, + "grad_norm": 0.7685448506632714, + "kl": 0.1378173828125, + "learning_rate": 9.063180351050685e-07, + "loss": 0.0009650582214817405, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.708333333954215, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1037, + "train_speed(iter/s)": 0.022516 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.25, + "completions/mean_length": 78.1354193687439, + "completions/min_length": 32.75, + "epoch": 2.0635393397865474, + "grad_norm": 0.008468823906944947, + "kl": 0.11431884765625, + "learning_rate": 9.0613409897796e-07, + "loss": 0.00011438851652201265, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1038, + "train_speed(iter/s)": 0.022516 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.0, + "completions/mean_length": 90.63541793823242, + "completions/min_length": 36.375, + "epoch": 2.0655249441548773, + "grad_norm": 0.007569330285043941, + "kl": 0.107666015625, + "learning_rate": 9.059500011601917e-07, + "loss": 0.00010769099753815681, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1039, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 82.1979193687439, + "completions/min_length": 32.5, + "epoch": 2.067510548523207, + "grad_norm": 0.007785800349068557, + "kl": 0.12628173828125, + "learning_rate": 9.057657417250572e-07, + "loss": 0.00012620570487342775, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1040, + "train_speed(iter/s)": 0.022511 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 86.55208683013916, + "completions/min_length": 34.75, + "epoch": 2.0694961528915363, + "grad_norm": 0.0074693102772766294, + "kl": 0.11376953125, + "learning_rate": 9.05581320745914e-07, + "loss": 0.00011375600297469646, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1041, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 81.98958539962769, + "completions/min_length": 37.0, + "epoch": 2.071481757259866, + "grad_norm": 0.0065098106774316095, + "kl": 0.111083984375, + "learning_rate": 9.053967382961838e-07, + "loss": 0.00011106421879958361, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1042, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.875, + "completions/mean_length": 84.94791793823242, + "completions/min_length": 34.5, + "epoch": 2.073467361628196, + "grad_norm": 0.007556840446950588, + "kl": 0.112548828125, + "learning_rate": 9.052119944493531e-07, + "loss": 0.00011260159953963012, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1043, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 84.10416841506958, + "completions/min_length": 43.0, + "epoch": 2.0754529659965253, + "grad_norm": 1.7653653031823724, + "kl": 0.12640380859375, + "learning_rate": 9.050270892789724e-07, + "loss": -0.0049756355583667755, + "memory(GiB)": 94.21, + "reward": 1.7604166865348816, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.7604166697710752, + "rewards/CineAccuracyORM/std": 0.1783013790845871, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1044, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 87.04166889190674, + "completions/min_length": 46.0, + "epoch": 2.077438570364855, + "grad_norm": 0.9740486323335305, + "kl": 0.1143798828125, + "learning_rate": 9.048420228586562e-07, + "loss": -0.005567749496549368, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1045, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.5, + "completions/mean_length": 89.86458683013916, + "completions/min_length": 28.75, + "epoch": 2.0794241747331843, + "grad_norm": 0.0063396094559172375, + "kl": 0.1116943359375, + "learning_rate": 9.046567952620834e-07, + "loss": 0.000111713758087717, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1046, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.75, + "completions/mean_length": 92.41666889190674, + "completions/min_length": 39.25, + "epoch": 2.081409779101514, + "grad_norm": 0.8248505321288238, + "kl": 0.10968017578125, + "learning_rate": 9.044714065629973e-07, + "loss": -0.009053878486156464, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166669771075, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1047, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.0, + "completions/mean_length": 87.42708587646484, + "completions/min_length": 34.875, + "epoch": 2.0833953834698438, + "grad_norm": 1.248438982243334, + "kl": 0.13739013671875, + "learning_rate": 9.042858568352048e-07, + "loss": 0.0103627173230052, + "memory(GiB)": 94.21, + "reward": 1.4791666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.4791666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1048, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.0, + "completions/mean_length": 90.11458587646484, + "completions/min_length": 37.875, + "epoch": 2.0853809878381733, + "grad_norm": 1.2050207981394843, + "kl": 0.11712646484375, + "learning_rate": 9.041001461525773e-07, + "loss": 0.00585666298866272, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1049, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.25, + "completions/mean_length": 87.25000286102295, + "completions/min_length": 40.0, + "epoch": 2.087366592206503, + "grad_norm": 0.0072672567395674105, + "kl": 0.1226806640625, + "learning_rate": 9.039142745890504e-07, + "loss": 0.00012268760474398732, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1050, + "train_speed(iter/s)": 0.022498 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 90.84375286102295, + "completions/min_length": 40.0, + "epoch": 2.0893521965748323, + "grad_norm": 1.0383994865817225, + "kl": 0.112548828125, + "learning_rate": 9.037282422186232e-07, + "loss": -0.0059965322725474834, + "memory(GiB)": 94.21, + "reward": 1.8750000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8750000074505806, + "rewards/CineAccuracyORM/std": 0.17548104748129845, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1051, + "train_speed(iter/s)": 0.0225 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 89.78125190734863, + "completions/min_length": 34.125, + "epoch": 2.0913378009431622, + "grad_norm": 0.007294551673038089, + "kl": 0.12835693359375, + "learning_rate": 9.035420491153595e-07, + "loss": 0.00012832277570851147, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1052, + "train_speed(iter/s)": 0.022498 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 80.35416841506958, + "completions/min_length": 34.875, + "epoch": 2.0933234053114917, + "grad_norm": 0.007842351601243245, + "kl": 0.11505126953125, + "learning_rate": 9.033556953533865e-07, + "loss": 0.00011521436681505293, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1053, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 203.875, + "completions/mean_length": 97.59375190734863, + "completions/min_length": 44.25, + "epoch": 2.0953090096798213, + "grad_norm": 1.067991023922792, + "kl": 0.12652587890625, + "learning_rate": 9.031691810068958e-07, + "loss": 0.0001265754399355501, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393530294299126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1054, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 80.47916793823242, + "completions/min_length": 31.5, + "epoch": 2.0972946140481508, + "grad_norm": 0.009867823516612157, + "kl": 0.1175537109375, + "learning_rate": 9.029825061501423e-07, + "loss": 0.00011762286158045754, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1055, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 196.625, + "completions/mean_length": 94.07291984558105, + "completions/min_length": 37.125, + "epoch": 2.0992802184164807, + "grad_norm": 0.007818309239812216, + "kl": 0.11761474609375, + "learning_rate": 9.02795670857446e-07, + "loss": 0.00011771730351028964, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1056, + "train_speed(iter/s)": 0.022494 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 77.2291693687439, + "completions/min_length": 40.25, + "epoch": 2.1012658227848102, + "grad_norm": 0.008170669737652658, + "kl": 0.12646484375, + "learning_rate": 9.026086752031895e-07, + "loss": 0.00012647028779610991, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1057, + "train_speed(iter/s)": 0.022496 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 87.06250190734863, + "completions/min_length": 31.5, + "epoch": 2.1032514271531397, + "grad_norm": 0.00757295019159829, + "kl": 0.128662109375, + "learning_rate": 9.024215192618199e-07, + "loss": 0.000128539526485838, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1058, + "train_speed(iter/s)": 0.022492 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 80.56250143051147, + "completions/min_length": 30.75, + "epoch": 2.1052370315214692, + "grad_norm": 0.006256571396549088, + "kl": 0.14801025390625, + "learning_rate": 9.022342031078478e-07, + "loss": 0.0001482020306866616, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1059, + "train_speed(iter/s)": 0.022493 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 76.38541841506958, + "completions/min_length": 28.0, + "epoch": 2.1072226358897987, + "grad_norm": 0.00860863893703562, + "kl": 0.12225341796875, + "learning_rate": 9.020467268158481e-07, + "loss": 0.00012234578025527298, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1060, + "train_speed(iter/s)": 0.022496 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 77.58333587646484, + "completions/min_length": 29.75, + "epoch": 2.1092082402581287, + "grad_norm": 0.009146683724391846, + "kl": 0.135986328125, + "learning_rate": 9.018590904604588e-07, + "loss": 0.00013591634342446923, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1061, + "train_speed(iter/s)": 0.022496 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 78.21875333786011, + "completions/min_length": 31.375, + "epoch": 2.111193844626458, + "grad_norm": 0.008593579235551981, + "kl": 0.1422119140625, + "learning_rate": 9.016712941163823e-07, + "loss": 0.00014218749129213393, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1062, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 83.73958492279053, + "completions/min_length": 35.625, + "epoch": 2.1131794489947877, + "grad_norm": 0.007734910252291212, + "kl": 0.1141357421875, + "learning_rate": 9.014833378583839e-07, + "loss": 0.00011415178596507758, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1063, + "train_speed(iter/s)": 0.022494 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.375, + "completions/mean_length": 85.76041889190674, + "completions/min_length": 35.875, + "epoch": 2.115165053363117, + "grad_norm": 0.007636118890987433, + "kl": 0.10968017578125, + "learning_rate": 9.012952217612933e-07, + "loss": 0.00010967616981361061, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1064, + "train_speed(iter/s)": 0.022489 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 208.0, + "completions/mean_length": 88.33333492279053, + "completions/min_length": 32.5, + "epoch": 2.117150657731447, + "grad_norm": 0.007225957331834589, + "kl": 0.11090087890625, + "learning_rate": 9.011069459000034e-07, + "loss": 0.00011098825780209154, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1065, + "train_speed(iter/s)": 0.022485 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 79.92708587646484, + "completions/min_length": 32.625, + "epoch": 2.1191362620997767, + "grad_norm": 0.7410801038965984, + "kl": 0.10748291015625, + "learning_rate": 9.00918510349471e-07, + "loss": 0.01757071539759636, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1066, + "train_speed(iter/s)": 0.022485 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.25, + "completions/mean_length": 83.03125238418579, + "completions/min_length": 27.5, + "epoch": 2.121121866468106, + "grad_norm": 0.007781396418530573, + "kl": 0.11456298828125, + "learning_rate": 9.007299151847161e-07, + "loss": 0.00011462807015050203, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1067, + "train_speed(iter/s)": 0.022484 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.25, + "completions/mean_length": 71.04166793823242, + "completions/min_length": 28.625, + "epoch": 2.1231074708364357, + "grad_norm": 0.007355446293214044, + "kl": 0.10284423828125, + "learning_rate": 9.005411604808226e-07, + "loss": 0.00010296957771060988, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1068, + "train_speed(iter/s)": 0.022487 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.25, + "completions/mean_length": 78.66666841506958, + "completions/min_length": 35.5, + "epoch": 2.1250930752047656, + "grad_norm": 0.17747489489888124, + "kl": 0.228271484375, + "learning_rate": 9.003522463129377e-07, + "loss": 0.00022821266611572355, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1069, + "train_speed(iter/s)": 0.022487 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 79.28125143051147, + "completions/min_length": 29.625, + "epoch": 2.127078679573095, + "grad_norm": 0.0060307482758721765, + "kl": 0.13299560546875, + "learning_rate": 9.001631727562723e-07, + "loss": 0.0001330701052211225, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1070, + "train_speed(iter/s)": 0.022487 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 75.63541841506958, + "completions/min_length": 33.125, + "epoch": 2.1290642839414247, + "grad_norm": 0.007732253928501234, + "kl": 0.10595703125, + "learning_rate": 8.999739398861005e-07, + "loss": 0.00010610777826514095, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1071, + "train_speed(iter/s)": 0.022488 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.25, + "completions/mean_length": 64.06250143051147, + "completions/min_length": 31.75, + "epoch": 2.131049888309754, + "grad_norm": 0.006459854447195919, + "kl": 0.11016845703125, + "learning_rate": 8.9978454777776e-07, + "loss": 0.00011024588457075879, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1072, + "train_speed(iter/s)": 0.022487 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 70.14583587646484, + "completions/min_length": 25.75, + "epoch": 2.1330354926780837, + "grad_norm": 0.016317735798137844, + "kl": 0.1470947265625, + "learning_rate": 8.995949965066518e-07, + "loss": 0.0001472176518291235, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1073, + "train_speed(iter/s)": 0.022485 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 66.00000238418579, + "completions/min_length": 28.25, + "epoch": 2.1350210970464136, + "grad_norm": 0.0065803074565239875, + "kl": 0.108642578125, + "learning_rate": 8.994052861482404e-07, + "loss": 0.00010851970000658184, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1074, + "train_speed(iter/s)": 0.022487 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 76.14583444595337, + "completions/min_length": 38.25, + "epoch": 2.137006701414743, + "grad_norm": 0.007288344398015291, + "kl": 0.10345458984375, + "learning_rate": 8.992154167780535e-07, + "loss": 0.00010349667718401179, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1075, + "train_speed(iter/s)": 0.022487 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 75.72916889190674, + "completions/min_length": 32.125, + "epoch": 2.1389923057830726, + "grad_norm": 0.010876767868097825, + "kl": 0.110595703125, + "learning_rate": 8.99025388471682e-07, + "loss": 0.00011058989184675738, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1076, + "train_speed(iter/s)": 0.022489 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.125, + "completions/mean_length": 73.35416841506958, + "completions/min_length": 28.75, + "epoch": 2.140977910151402, + "grad_norm": 0.006220464356667581, + "kl": 0.097412109375, + "learning_rate": 8.988352013047804e-07, + "loss": 9.744061389937997e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1077, + "train_speed(iter/s)": 0.022486 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 68.70833492279053, + "completions/min_length": 26.5, + "epoch": 2.142963514519732, + "grad_norm": 0.005311529691512592, + "kl": 0.0982666015625, + "learning_rate": 8.986448553530663e-07, + "loss": 9.82498750090599e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1078, + "train_speed(iter/s)": 0.022486 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.625, + "completions/mean_length": 88.31250333786011, + "completions/min_length": 35.375, + "epoch": 2.1449491188880616, + "grad_norm": 0.0056511354602318455, + "kl": 0.104736328125, + "learning_rate": 8.984543506923204e-07, + "loss": 0.0001046436809701845, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1079, + "train_speed(iter/s)": 0.022484 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 76.46875143051147, + "completions/min_length": 35.875, + "epoch": 2.146934723256391, + "grad_norm": 0.005674849337966061, + "kl": 0.121826171875, + "learning_rate": 8.982636873983866e-07, + "loss": 0.00012185898231109604, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1080, + "train_speed(iter/s)": 0.022486 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 72.55208587646484, + "completions/min_length": 34.875, + "epoch": 2.1489203276247206, + "grad_norm": 1.0265947895890666, + "kl": 0.10137939453125, + "learning_rate": 8.980728655471723e-07, + "loss": 0.008604519069194794, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1081, + "train_speed(iter/s)": 0.022486 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 70.87500095367432, + "completions/min_length": 19.5, + "epoch": 2.1509059319930506, + "grad_norm": 0.005386526977581848, + "kl": 0.103363037109375, + "learning_rate": 8.978818852146476e-07, + "loss": 0.00010324212780687958, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1082, + "train_speed(iter/s)": 0.022485 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.625, + "completions/mean_length": 83.82291984558105, + "completions/min_length": 39.25, + "epoch": 2.15289153636138, + "grad_norm": 0.006918107163843888, + "kl": 0.11614990234375, + "learning_rate": 8.976907464768458e-07, + "loss": 0.00011630808876361698, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1083, + "train_speed(iter/s)": 0.022482 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.5, + "completions/mean_length": 61.73958444595337, + "completions/min_length": 26.25, + "epoch": 2.1548771407297096, + "grad_norm": 0.00863867048723788, + "kl": 0.108795166015625, + "learning_rate": 8.974994494098634e-07, + "loss": 0.00010880948684643954, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1084, + "train_speed(iter/s)": 0.022482 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 77.50000381469727, + "completions/min_length": 38.0, + "epoch": 2.156862745098039, + "grad_norm": 0.018856742454225597, + "kl": 0.124755859375, + "learning_rate": 8.973079940898596e-07, + "loss": 0.0001246890751644969, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1085, + "train_speed(iter/s)": 0.022483 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.25, + "completions/mean_length": 67.0104193687439, + "completions/min_length": 27.5, + "epoch": 2.1588483494663686, + "grad_norm": 0.02102008715921952, + "kl": 0.1031494140625, + "learning_rate": 8.971163805930572e-07, + "loss": 0.00010311186633771285, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1086, + "train_speed(iter/s)": 0.022486 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.5, + "completions/mean_length": 66.42708539962769, + "completions/min_length": 31.375, + "epoch": 2.1608339538346986, + "grad_norm": 0.00669141793201854, + "kl": 0.0999755859375, + "learning_rate": 8.969246089957414e-07, + "loss": 9.997960296459496e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1087, + "train_speed(iter/s)": 0.022489 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.0, + "completions/mean_length": 68.00000238418579, + "completions/min_length": 28.75, + "epoch": 2.162819558203028, + "grad_norm": 0.01247525483515014, + "kl": 0.08660888671875, + "learning_rate": 8.967326793742606e-07, + "loss": 8.656363934278488e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1088, + "train_speed(iter/s)": 0.022493 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.125, + "completions/mean_length": 68.50000238418579, + "completions/min_length": 29.75, + "epoch": 2.1648051625713576, + "grad_norm": 0.00547189786184364, + "kl": 0.10394287109375, + "learning_rate": 8.965405918050263e-07, + "loss": 0.00010401105100754648, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1089, + "train_speed(iter/s)": 0.022494 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 72.7604193687439, + "completions/min_length": 25.5, + "epoch": 2.166790766939687, + "grad_norm": 1.0410738737807892, + "kl": 0.1177978515625, + "learning_rate": 8.963483463645124e-07, + "loss": 0.012273518368601799, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1090, + "train_speed(iter/s)": 0.022492 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.75, + "completions/mean_length": 65.38541841506958, + "completions/min_length": 21.625, + "epoch": 2.168776371308017, + "grad_norm": 0.005649499770030795, + "kl": 0.0968017578125, + "learning_rate": 8.96155943129256e-07, + "loss": 9.678161586634815e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1091, + "train_speed(iter/s)": 0.022493 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 70.20833492279053, + "completions/min_length": 28.375, + "epoch": 2.1707619756763465, + "grad_norm": 0.006201369966540897, + "kl": 0.0999755859375, + "learning_rate": 8.95963382175857e-07, + "loss": 9.990476246457547e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1092, + "train_speed(iter/s)": 0.022495 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 66.96875143051147, + "completions/min_length": 26.0, + "epoch": 2.172747580044676, + "grad_norm": 0.005604728673251856, + "kl": 0.08880615234375, + "learning_rate": 8.957706635809779e-07, + "loss": 8.882778638508171e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1093, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 59.57291793823242, + "completions/min_length": 25.75, + "epoch": 2.1747331844130056, + "grad_norm": 1.473368609186764, + "kl": 0.09686279296875, + "learning_rate": 8.955777874213443e-07, + "loss": 0.0037911073304712772, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1094, + "train_speed(iter/s)": 0.022498 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.625, + "completions/mean_length": 61.625000953674316, + "completions/min_length": 30.75, + "epoch": 2.1767187887813355, + "grad_norm": 0.007552661565256119, + "kl": 0.09954833984375, + "learning_rate": 8.953847537737441e-07, + "loss": 9.963195043383166e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1095, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 66.14583587646484, + "completions/min_length": 22.875, + "epoch": 2.178704393149665, + "grad_norm": 0.005680964975348198, + "kl": 0.097900390625, + "learning_rate": 8.951915627150282e-07, + "loss": 9.784003486856818e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1096, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 76.58333587646484, + "completions/min_length": 29.625, + "epoch": 2.1806899975179945, + "grad_norm": 0.0752183079850825, + "kl": 0.2479248046875, + "learning_rate": 8.9499821432211e-07, + "loss": 0.00024750837474130094, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1097, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.375, + "completions/mean_length": 63.8854193687439, + "completions/min_length": 21.375, + "epoch": 2.182675601886324, + "grad_norm": 0.005957281964740377, + "kl": 0.09930419921875, + "learning_rate": 8.948047086719658e-07, + "loss": 9.928665531333536e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1098, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 62.17708492279053, + "completions/min_length": 24.375, + "epoch": 2.1846612062546535, + "grad_norm": 0.006526659169941049, + "kl": 0.1005859375, + "learning_rate": 8.946110458416343e-07, + "loss": 0.00010057906911242753, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1099, + "train_speed(iter/s)": 0.022511 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.625, + "completions/mean_length": 65.38541889190674, + "completions/min_length": 32.0, + "epoch": 2.1866468106229835, + "grad_norm": 0.004639691676157582, + "kl": 0.09539794921875, + "learning_rate": 8.944172259082165e-07, + "loss": 9.535030403640121e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1100, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 64.28125143051147, + "completions/min_length": 25.125, + "epoch": 2.188632414991313, + "grad_norm": 0.005595707415697775, + "kl": 0.107666015625, + "learning_rate": 8.942232489488768e-07, + "loss": 0.0001077161286957562, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1101, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 72.50000190734863, + "completions/min_length": 27.75, + "epoch": 2.1906180193596425, + "grad_norm": 0.0057670649819886515, + "kl": 0.10455322265625, + "learning_rate": 8.940291150408412e-07, + "loss": 0.00010457433381816372, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1102, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.125, + "completions/mean_length": 66.34375238418579, + "completions/min_length": 27.875, + "epoch": 2.192603623727972, + "grad_norm": 0.005926890020697525, + "kl": 0.10186767578125, + "learning_rate": 8.938348242613985e-07, + "loss": 0.00010185446444666013, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1103, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 68.21875143051147, + "completions/min_length": 26.25, + "epoch": 2.194589228096302, + "grad_norm": 1.2148774553492736, + "kl": 0.106689453125, + "learning_rate": 8.936403766879003e-07, + "loss": 0.0001067506818799302, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1104, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.5, + "completions/mean_length": 62.895835876464844, + "completions/min_length": 21.0, + "epoch": 2.1965748324646315, + "grad_norm": 0.007109448105471694, + "kl": 0.11492919921875, + "learning_rate": 8.934457723977601e-07, + "loss": 0.0001150758471339941, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1105, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.625, + "completions/mean_length": 63.63541889190674, + "completions/min_length": 22.5, + "epoch": 2.198560436832961, + "grad_norm": 0.007050399758887717, + "kl": 0.09521484375, + "learning_rate": 8.932510114684542e-07, + "loss": 9.520860476186499e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1106, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 73.57291793823242, + "completions/min_length": 22.625, + "epoch": 2.2005460412012905, + "grad_norm": 0.006297160309753514, + "kl": 0.0894775390625, + "learning_rate": 8.930560939775207e-07, + "loss": 8.949158655013889e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1107, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 68.83333587646484, + "completions/min_length": 31.25, + "epoch": 2.2025316455696204, + "grad_norm": 0.006087930528336131, + "kl": 0.08905029296875, + "learning_rate": 8.92861020002561e-07, + "loss": 8.9189488789998e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1108, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 69.37500238418579, + "completions/min_length": 29.375, + "epoch": 2.20451724993795, + "grad_norm": 0.00517770403285071, + "kl": 0.11773681640625, + "learning_rate": 8.926657896212379e-07, + "loss": 0.00011782505316659808, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1109, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 65.28125238418579, + "completions/min_length": 22.75, + "epoch": 2.2065028543062795, + "grad_norm": 0.005232882423403717, + "kl": 0.10479736328125, + "learning_rate": 8.924704029112767e-07, + "loss": 0.00010488222324056551, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1110, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 73.22916889190674, + "completions/min_length": 27.75, + "epoch": 2.208488458674609, + "grad_norm": 1.1187840684918657, + "kl": 0.110595703125, + "learning_rate": 8.922748599504653e-07, + "loss": 0.007763413246721029, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1111, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 76.35416889190674, + "completions/min_length": 31.375, + "epoch": 2.2104740630429385, + "grad_norm": 0.005699094021220918, + "kl": 0.11663818359375, + "learning_rate": 8.920791608166532e-07, + "loss": 0.0001166003494290635, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1112, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 64.26041793823242, + "completions/min_length": 17.0, + "epoch": 2.2124596674112684, + "grad_norm": 1.4140946198929851, + "kl": 0.0966796875, + "learning_rate": 8.918833055877526e-07, + "loss": -0.010779529809951782, + "memory(GiB)": 94.21, + "reward": 1.895833358168602, + "reward_std": 0.08330589719116688, + "rewards/CineAccuracyORM/mean": 0.8958333432674408, + "rewards/CineAccuracyORM/std": 0.16199621930718422, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1113, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 64.21875238418579, + "completions/min_length": 22.5, + "epoch": 2.214445271779598, + "grad_norm": 0.007454915580742328, + "kl": 0.08990478515625, + "learning_rate": 8.916872943417375e-07, + "loss": 8.991503273136914e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1114, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 59.437501430511475, + "completions/min_length": 17.75, + "epoch": 2.2164308761479274, + "grad_norm": 0.006675639053861326, + "kl": 0.10076904296875, + "learning_rate": 8.914911271566444e-07, + "loss": 0.0001005775629892014, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1115, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 72.71875190734863, + "completions/min_length": 26.875, + "epoch": 2.218416480516257, + "grad_norm": 0.007704553634753637, + "kl": 0.10906982421875, + "learning_rate": 8.912948041105714e-07, + "loss": 0.00010915064194705337, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1116, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 72.52083587646484, + "completions/min_length": 25.75, + "epoch": 2.220402084884587, + "grad_norm": 1.5760444915876597, + "kl": 0.108367919921875, + "learning_rate": 8.910983252816793e-07, + "loss": 0.022869249805808067, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1117, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 68.47916984558105, + "completions/min_length": 23.5, + "epoch": 2.2223876892529164, + "grad_norm": 0.006970770514677127, + "kl": 0.1058349609375, + "learning_rate": 8.909016907481899e-07, + "loss": 0.00010587855649646372, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1118, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 73.65625286102295, + "completions/min_length": 28.0, + "epoch": 2.224373293621246, + "grad_norm": 0.007271300911421996, + "kl": 0.09588623046875, + "learning_rate": 8.907049005883882e-07, + "loss": 9.595022129360586e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1119, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 70.06250190734863, + "completions/min_length": 21.25, + "epoch": 2.2263588979895754, + "grad_norm": 0.008990944019646025, + "kl": 0.1044921875, + "learning_rate": 8.905079548806203e-07, + "loss": 0.0001044741366058588, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1120, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.125, + "completions/mean_length": 73.56250143051147, + "completions/min_length": 20.0, + "epoch": 2.2283445023579054, + "grad_norm": 0.009539653340183514, + "kl": 0.12603759765625, + "learning_rate": 8.903108537032943e-07, + "loss": 0.00012604176299646497, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1121, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 72.21875333786011, + "completions/min_length": 17.75, + "epoch": 2.230330106726235, + "grad_norm": 0.5214478080123781, + "kl": 0.1253662109375, + "learning_rate": 8.901135971348807e-07, + "loss": 0.021506179124116898, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1122, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 70.44791889190674, + "completions/min_length": 20.125, + "epoch": 2.2323157110945644, + "grad_norm": 1.5761155413424663, + "kl": 0.1195068359375, + "learning_rate": 8.899161852539115e-07, + "loss": 0.021353479474782944, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.25145769491791725, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1123, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 65.62500190734863, + "completions/min_length": 15.25, + "epoch": 2.234301315462894, + "grad_norm": 1.179133881102878, + "kl": 0.1259765625, + "learning_rate": 8.897186181389804e-07, + "loss": 0.0004408457316458225, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1124, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 63.76041841506958, + "completions/min_length": 16.375, + "epoch": 2.2362869198312234, + "grad_norm": 0.011007733049022849, + "kl": 0.1217041015625, + "learning_rate": 8.895208958687435e-07, + "loss": 0.00012160721234977245, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1125, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 64.18750190734863, + "completions/min_length": 16.875, + "epoch": 2.2382725241995534, + "grad_norm": 0.714532313149669, + "kl": 0.16357421875, + "learning_rate": 8.893230185219176e-07, + "loss": -0.013636423274874687, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1126, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 76.60416793823242, + "completions/min_length": 18.375, + "epoch": 2.240258128567883, + "grad_norm": 1.320655274004454, + "kl": 0.13323974609375, + "learning_rate": 8.891249861772826e-07, + "loss": 0.007070520427078009, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.31391648203134537, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1127, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.0, + "completions/mean_length": 71.16666889190674, + "completions/min_length": 23.0, + "epoch": 2.2422437329362124, + "grad_norm": 0.00492348795733696, + "kl": 0.0859375, + "learning_rate": 8.88926798913679e-07, + "loss": 8.598122803959996e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1128, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 59.25000190734863, + "completions/min_length": 23.375, + "epoch": 2.244229337304542, + "grad_norm": 0.005775537796384591, + "kl": 0.11737060546875, + "learning_rate": 8.887284568100094e-07, + "loss": 0.00011733293649740517, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1129, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.125, + "completions/mean_length": 78.80208683013916, + "completions/min_length": 21.25, + "epoch": 2.246214941672872, + "grad_norm": 0.020279422360607837, + "kl": 0.1282958984375, + "learning_rate": 8.88529959945238e-07, + "loss": 0.00012847153993789107, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1130, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 68.73958444595337, + "completions/min_length": 19.0, + "epoch": 2.2482005460412013, + "grad_norm": 0.04876869427415476, + "kl": 0.168212890625, + "learning_rate": 8.88331308398391e-07, + "loss": 0.00016822278848849237, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1131, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.25, + "completions/mean_length": 71.41666889190674, + "completions/min_length": 16.0, + "epoch": 2.250186150409531, + "grad_norm": 2.2953937110179563, + "kl": 0.15020751953125, + "learning_rate": 8.881325022485554e-07, + "loss": 0.020766139030456543, + "memory(GiB)": 94.21, + "reward": 1.6875000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.6875000074505806, + "rewards/CineAccuracyORM/std": 0.2407601661980152, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1132, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.375, + "completions/mean_length": 84.04166984558105, + "completions/min_length": 28.375, + "epoch": 2.2521717547778604, + "grad_norm": 0.048150000507869155, + "kl": 0.1700439453125, + "learning_rate": 8.879335415748803e-07, + "loss": 0.00017000603838823736, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1133, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 65.95833492279053, + "completions/min_length": 24.375, + "epoch": 2.2541573591461903, + "grad_norm": 0.030746473142806686, + "kl": 0.143310546875, + "learning_rate": 8.877344264565764e-07, + "loss": 0.00014336864114739, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1134, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.75, + "completions/mean_length": 79.82291984558105, + "completions/min_length": 21.875, + "epoch": 2.25614296351452, + "grad_norm": 0.035033529873740685, + "kl": 0.14453125, + "learning_rate": 8.875351569729155e-07, + "loss": 0.00014458972145803273, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1135, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 75.25000190734863, + "completions/min_length": 21.75, + "epoch": 2.2581285678828493, + "grad_norm": 0.5105725162771391, + "kl": 0.1651611328125, + "learning_rate": 8.873357332032308e-07, + "loss": 0.023152565583586693, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1136, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 73.51041889190674, + "completions/min_length": 29.625, + "epoch": 2.260114172251179, + "grad_norm": 1.0343112659956455, + "kl": 0.15692138671875, + "learning_rate": 8.871361552269176e-07, + "loss": -0.0024548075161874294, + "memory(GiB)": 94.21, + "reward": 1.6145833432674408, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.6145833386108279, + "rewards/CineAccuracyORM/std": 0.21529880911111832, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1137, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.0, + "completions/mean_length": 71.2604193687439, + "completions/min_length": 17.625, + "epoch": 2.2620997766195083, + "grad_norm": 0.014767302642669088, + "kl": 0.09637451171875, + "learning_rate": 8.86936423123432e-07, + "loss": 9.631804277887568e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1138, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 74.3229193687439, + "completions/min_length": 26.75, + "epoch": 2.2640853809878383, + "grad_norm": 0.6742620185526368, + "kl": 0.15142822265625, + "learning_rate": 8.867365369722914e-07, + "loss": 0.004045408219099045, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1139, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 68.37500143051147, + "completions/min_length": 24.375, + "epoch": 2.266070985356168, + "grad_norm": 0.005572392431531506, + "kl": 0.10986328125, + "learning_rate": 8.865364968530751e-07, + "loss": 0.0001099392757168971, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1140, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 69.83333539962769, + "completions/min_length": 25.25, + "epoch": 2.2680565897244973, + "grad_norm": 0.004426089086485255, + "kl": 0.09466552734375, + "learning_rate": 8.863363028454231e-07, + "loss": 9.467983909416944e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1141, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.5, + "completions/mean_length": 92.48958492279053, + "completions/min_length": 39.375, + "epoch": 2.270042194092827, + "grad_norm": 1.2811652679665897, + "kl": 0.11492919921875, + "learning_rate": 8.861359550290371e-07, + "loss": 0.006492177955806255, + "memory(GiB)": 94.21, + "reward": 1.6354166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.2934674955904484, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1142, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 204.25, + "completions/mean_length": 83.82291984558105, + "completions/min_length": 31.375, + "epoch": 2.2720277984611568, + "grad_norm": 0.9211205091348875, + "kl": 0.1025390625, + "learning_rate": 8.859354534836796e-07, + "loss": 0.006440839730203152, + "memory(GiB)": 94.21, + "reward": 1.96875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.96875, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1143, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 82.26041793823242, + "completions/min_length": 23.625, + "epoch": 2.2740134028294863, + "grad_norm": 1.3242005158688273, + "kl": 0.110107421875, + "learning_rate": 8.857347982891748e-07, + "loss": -0.003253697184845805, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1144, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 79.12500238418579, + "completions/min_length": 32.625, + "epoch": 2.275999007197816, + "grad_norm": 0.007456759149966841, + "kl": 0.10943603515625, + "learning_rate": 8.855339895254076e-07, + "loss": 0.00010936538456007838, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1145, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.625, + "completions/mean_length": 68.48958444595337, + "completions/min_length": 21.25, + "epoch": 2.2779846115661453, + "grad_norm": 0.0076247539582459155, + "kl": 0.11993408203125, + "learning_rate": 8.853330272723242e-07, + "loss": 0.00011989235645160079, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1146, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 67.7916693687439, + "completions/min_length": 16.5, + "epoch": 2.2799702159344752, + "grad_norm": 0.005242432177648585, + "kl": 0.09405517578125, + "learning_rate": 8.851319116099325e-07, + "loss": 9.39883611863479e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1147, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 68.90625190734863, + "completions/min_length": 22.25, + "epoch": 2.2819558203028047, + "grad_norm": 0.8355247066094097, + "kl": 0.1044921875, + "learning_rate": 8.849306426183004e-07, + "loss": -0.005248870700597763, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1148, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 73.65625143051147, + "completions/min_length": 23.125, + "epoch": 2.2839414246711343, + "grad_norm": 0.9478221971336297, + "kl": 0.096221923828125, + "learning_rate": 8.847292203775574e-07, + "loss": -0.011721128597855568, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1149, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.75, + "completions/mean_length": 77.36458683013916, + "completions/min_length": 24.75, + "epoch": 2.2859270290394638, + "grad_norm": 0.005528089719292132, + "kl": 0.09344482421875, + "learning_rate": 8.845276449678942e-07, + "loss": 9.348720777779818e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1150, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.0, + "completions/mean_length": 79.21875238418579, + "completions/min_length": 24.875, + "epoch": 2.2879126334077933, + "grad_norm": 0.7386188458974698, + "kl": 0.09613037109375, + "learning_rate": 8.843259164695624e-07, + "loss": 0.00979701615869999, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1151, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 81.05208587646484, + "completions/min_length": 29.125, + "epoch": 2.2898982377761232, + "grad_norm": 1.348597986088773, + "kl": 0.11956787109375, + "learning_rate": 8.84124034962874e-07, + "loss": -0.0016678448300808668, + "memory(GiB)": 94.21, + "reward": 1.927083358168602, + "reward_std": 0.07654655165970325, + "rewards/CineAccuracyORM/mean": 0.9270833432674408, + "rewards/CineAccuracyORM/std": 0.13653486222028732, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1152, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 73.23958587646484, + "completions/min_length": 26.625, + "epoch": 2.2918838421444527, + "grad_norm": 0.6987493894869504, + "kl": 0.1328125, + "learning_rate": 8.839220005282026e-07, + "loss": 0.021390598267316818, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1153, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 68.93750190734863, + "completions/min_length": 24.625, + "epoch": 2.2938694465127822, + "grad_norm": 0.005944869094262386, + "kl": 0.10888671875, + "learning_rate": 8.837198132459827e-07, + "loss": 0.00010898825712502003, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1154, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 72.36458587646484, + "completions/min_length": 29.375, + "epoch": 2.2958550508811117, + "grad_norm": 0.007026317985682431, + "kl": 0.097442626953125, + "learning_rate": 8.835174731967087e-07, + "loss": 9.741432586451992e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1155, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 70.96875238418579, + "completions/min_length": 23.875, + "epoch": 2.2978406552494417, + "grad_norm": 0.00559194582259412, + "kl": 0.1007080078125, + "learning_rate": 8.833149804609371e-07, + "loss": 0.00010077822662424296, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1156, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.0, + "completions/mean_length": 80.87500286102295, + "completions/min_length": 27.625, + "epoch": 2.299826259617771, + "grad_norm": 0.0056649427093886946, + "kl": 0.1041259765625, + "learning_rate": 8.831123351192844e-07, + "loss": 0.00010407360969111323, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1157, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 75.36458539962769, + "completions/min_length": 24.75, + "epoch": 2.3018118639861007, + "grad_norm": 0.7607588873475549, + "kl": 0.11187744140625, + "learning_rate": 8.829095372524278e-07, + "loss": -0.0063299760222435, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1158, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.75, + "completions/mean_length": 75.61458539962769, + "completions/min_length": 18.5, + "epoch": 2.3037974683544302, + "grad_norm": 0.6261683644274237, + "kl": 0.1658935546875, + "learning_rate": 8.827065869411059e-07, + "loss": 0.0054514347575604916, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1159, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 74.12500190734863, + "completions/min_length": 18.0, + "epoch": 2.30578307272276, + "grad_norm": 0.013372830515073058, + "kl": 0.111083984375, + "learning_rate": 8.825034842661171e-07, + "loss": 0.00011106727470178157, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1160, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.5, + "completions/mean_length": 80.91666984558105, + "completions/min_length": 28.875, + "epoch": 2.3077686770910897, + "grad_norm": 0.011937438269505276, + "kl": 0.10406494140625, + "learning_rate": 8.823002293083213e-07, + "loss": 0.00010405677312519401, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1161, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 78.98958539962769, + "completions/min_length": 27.0, + "epoch": 2.309754281459419, + "grad_norm": 0.8855845150495578, + "kl": 0.13714599609375, + "learning_rate": 8.820968221486382e-07, + "loss": 0.005876713898032904, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1162, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 75.35416841506958, + "completions/min_length": 25.875, + "epoch": 2.3117398858277487, + "grad_norm": 0.031529671487283195, + "kl": 0.12542724609375, + "learning_rate": 8.818932628680491e-07, + "loss": 0.0001254369708476588, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1163, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 68.04166889190674, + "completions/min_length": 24.25, + "epoch": 2.313725490196078, + "grad_norm": 0.06521873477939312, + "kl": 0.20062255859375, + "learning_rate": 8.816895515475948e-07, + "loss": 0.00020067018340341747, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1164, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.625, + "completions/mean_length": 78.07291984558105, + "completions/min_length": 28.25, + "epoch": 2.315711094564408, + "grad_norm": 0.016632657710600417, + "kl": 0.1331787109375, + "learning_rate": 8.814856882683774e-07, + "loss": 0.0001331377134192735, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1165, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.25, + "completions/mean_length": 87.92708683013916, + "completions/min_length": 26.0, + "epoch": 2.3176966989327377, + "grad_norm": 0.012337044537867275, + "kl": 0.1123046875, + "learning_rate": 8.812816731115594e-07, + "loss": 0.00011230561358388513, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1166, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.0, + "completions/mean_length": 82.27083683013916, + "completions/min_length": 31.0, + "epoch": 2.319682303301067, + "grad_norm": 0.028205607552615104, + "kl": 0.138916015625, + "learning_rate": 8.81077506158363e-07, + "loss": 0.0001389819517498836, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1167, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.25, + "completions/mean_length": 77.87500190734863, + "completions/min_length": 28.875, + "epoch": 2.3216679076693967, + "grad_norm": 0.018319302237655877, + "kl": 0.10888671875, + "learning_rate": 8.808731874900719e-07, + "loss": 0.00010872550774365664, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1168, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.875, + "completions/mean_length": 81.60416793823242, + "completions/min_length": 31.25, + "epoch": 2.3236535120377266, + "grad_norm": 0.9703407456113551, + "kl": 0.12335205078125, + "learning_rate": 8.806687171880296e-07, + "loss": 0.0016537992050871253, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1169, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.25, + "completions/mean_length": 85.02083683013916, + "completions/min_length": 28.875, + "epoch": 2.325639116406056, + "grad_norm": 1.1244762493953908, + "kl": 0.1112060546875, + "learning_rate": 8.8046409533364e-07, + "loss": 0.005542024038732052, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1170, + "train_speed(iter/s)": 0.022516 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 76.22916889190674, + "completions/min_length": 21.75, + "epoch": 2.3276247207743856, + "grad_norm": 0.009141530987340777, + "kl": 0.11712646484375, + "learning_rate": 8.802593220083676e-07, + "loss": 0.00011731675476767123, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1171, + "train_speed(iter/s)": 0.022514 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 81.21875286102295, + "completions/min_length": 29.5, + "epoch": 2.329610325142715, + "grad_norm": 0.008837238644923327, + "kl": 0.11541748046875, + "learning_rate": 8.80054397293737e-07, + "loss": 0.00011541452840901911, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1172, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 80.59375190734863, + "completions/min_length": 30.875, + "epoch": 2.331595929511045, + "grad_norm": 0.009101768879644433, + "kl": 0.112060546875, + "learning_rate": 8.79849321271333e-07, + "loss": 0.00011213422840228304, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1173, + "train_speed(iter/s)": 0.022516 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.25, + "completions/mean_length": 86.65625095367432, + "completions/min_length": 36.875, + "epoch": 2.3335815338793746, + "grad_norm": 0.007434057245448074, + "kl": 0.11505126953125, + "learning_rate": 8.796440940228009e-07, + "loss": 0.00011487871961435303, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1174, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 215.875, + "completions/mean_length": 92.020836353302, + "completions/min_length": 34.0, + "epoch": 2.335567138247704, + "grad_norm": 1.023240112078257, + "kl": 0.124755859375, + "learning_rate": 8.794387156298458e-07, + "loss": 0.001069599180482328, + "memory(GiB)": 94.21, + "reward": 1.5520833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.5520833358168602, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1175, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 77.36458539962769, + "completions/min_length": 31.0, + "epoch": 2.3375527426160336, + "grad_norm": 0.007349093758855563, + "kl": 0.11334228515625, + "learning_rate": 8.792331861742335e-07, + "loss": 0.00011330414417898282, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1176, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 73.68750143051147, + "completions/min_length": 22.0, + "epoch": 2.339538346984363, + "grad_norm": 0.007021138723531153, + "kl": 0.094482421875, + "learning_rate": 8.790275057377896e-07, + "loss": 9.447755292057991e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1177, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.0, + "completions/mean_length": 80.36458683013916, + "completions/min_length": 34.875, + "epoch": 2.341523951352693, + "grad_norm": 0.009904770515817793, + "kl": 0.10528564453125, + "learning_rate": 8.788216744023997e-07, + "loss": 0.00010507451952435076, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1178, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.25, + "completions/mean_length": 72.85416889190674, + "completions/min_length": 18.75, + "epoch": 2.3435095557210226, + "grad_norm": 0.012841128010172167, + "kl": 0.10296630859375, + "learning_rate": 8.786156922500098e-07, + "loss": 0.00010292684601154178, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1179, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.375, + "completions/mean_length": 75.50000286102295, + "completions/min_length": 27.125, + "epoch": 2.345495160089352, + "grad_norm": 0.020190310360136656, + "kl": 0.135009765625, + "learning_rate": 8.784095593626258e-07, + "loss": 0.00013489092816598713, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1180, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.125, + "completions/mean_length": 84.47916889190674, + "completions/min_length": 29.25, + "epoch": 2.347480764457682, + "grad_norm": 0.8718207861699887, + "kl": 0.12261962890625, + "learning_rate": 8.782032758223137e-07, + "loss": 0.00225023808889091, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1181, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 84.302086353302, + "completions/min_length": 30.875, + "epoch": 2.3494663688260116, + "grad_norm": 0.026752438334008994, + "kl": 0.143310546875, + "learning_rate": 8.77996841711199e-07, + "loss": 0.0001431004930054769, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1182, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 72.72917032241821, + "completions/min_length": 26.25, + "epoch": 2.351451973194341, + "grad_norm": 1.1784184054246503, + "kl": 0.11566162109375, + "learning_rate": 8.77790257111468e-07, + "loss": -0.0012859385460615158, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1183, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 77.04166889190674, + "completions/min_length": 33.5, + "epoch": 2.3534375775626706, + "grad_norm": 0.01927069965275057, + "kl": 0.13763427734375, + "learning_rate": 8.775835221053662e-07, + "loss": 0.00013751001097261906, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1184, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.625, + "completions/mean_length": 77.02083539962769, + "completions/min_length": 34.625, + "epoch": 2.355423181931, + "grad_norm": 0.02217662582705634, + "kl": 0.13751220703125, + "learning_rate": 8.773766367751992e-07, + "loss": 0.00013747252523899078, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1185, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 75.31250190734863, + "completions/min_length": 25.375, + "epoch": 2.35740878629933, + "grad_norm": 0.020428484098549613, + "kl": 0.10638427734375, + "learning_rate": 8.771696012033325e-07, + "loss": 0.00010635077342158183, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1186, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 76.13541841506958, + "completions/min_length": 34.875, + "epoch": 2.3593943906676595, + "grad_norm": 0.0428335817202583, + "kl": 0.17236328125, + "learning_rate": 8.769624154721915e-07, + "loss": 0.00017243428737856448, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1187, + "train_speed(iter/s)": 0.022502 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 77.06250190734863, + "completions/min_length": 31.875, + "epoch": 2.361379995035989, + "grad_norm": 0.02224551925648367, + "kl": 0.1319580078125, + "learning_rate": 8.767550796642611e-07, + "loss": 0.00013200273679103702, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1188, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 74.95833444595337, + "completions/min_length": 30.625, + "epoch": 2.3633655994043186, + "grad_norm": 0.017314477823991956, + "kl": 0.1260986328125, + "learning_rate": 8.76547593862086e-07, + "loss": 0.00012607741518877447, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1189, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 80.09375190734863, + "completions/min_length": 36.5, + "epoch": 2.365351203772648, + "grad_norm": 0.8632068783026354, + "kl": 0.10430908203125, + "learning_rate": 8.763399581482712e-07, + "loss": 0.009489011950790882, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1190, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 69.94791793823242, + "completions/min_length": 27.75, + "epoch": 2.367336808140978, + "grad_norm": 1.0407693807454945, + "kl": 0.1737060546875, + "learning_rate": 8.761321726054805e-07, + "loss": 0.007885940372943878, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1191, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 80.95833587646484, + "completions/min_length": 31.5, + "epoch": 2.3693224125093075, + "grad_norm": 0.9518744178247593, + "kl": 0.12188720703125, + "learning_rate": 8.759242373164379e-07, + "loss": 0.0028071056585758924, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1192, + "train_speed(iter/s)": 0.022507 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.375, + "completions/mean_length": 81.28125190734863, + "completions/min_length": 36.375, + "epoch": 2.371308016877637, + "grad_norm": 0.008708198488745992, + "kl": 0.11004638671875, + "learning_rate": 8.757161523639269e-07, + "loss": 0.00011010012531187385, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1193, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 76.22916889190674, + "completions/min_length": 36.0, + "epoch": 2.373293621245967, + "grad_norm": 0.9432423355965983, + "kl": 0.10760498046875, + "learning_rate": 8.755079178307906e-07, + "loss": -0.00908761378377676, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1194, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 69.89583492279053, + "completions/min_length": 28.125, + "epoch": 2.3752792256142965, + "grad_norm": 0.008827989042322936, + "kl": 0.10565185546875, + "learning_rate": 8.752995337999315e-07, + "loss": 0.00010562210809439421, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1195, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 79.79166984558105, + "completions/min_length": 39.25, + "epoch": 2.377264829982626, + "grad_norm": 0.0059477336151238, + "kl": 0.1082763671875, + "learning_rate": 8.750910003543117e-07, + "loss": 0.00010813317203428596, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1196, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.625, + "completions/mean_length": 81.83333587646484, + "completions/min_length": 28.25, + "epoch": 2.3792504343509555, + "grad_norm": 0.005592197007666392, + "kl": 0.11285400390625, + "learning_rate": 8.74882317576953e-07, + "loss": 0.00011294342402834445, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1197, + "train_speed(iter/s)": 0.022502 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.25, + "completions/mean_length": 70.083336353302, + "completions/min_length": 28.125, + "epoch": 2.381236038719285, + "grad_norm": 0.007282324312840882, + "kl": 0.10418701171875, + "learning_rate": 8.746734855509363e-07, + "loss": 0.00010426364315208048, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1198, + "train_speed(iter/s)": 0.0225 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.25, + "completions/mean_length": 74.09375286102295, + "completions/min_length": 26.625, + "epoch": 2.383221643087615, + "grad_norm": 0.007868494682890831, + "kl": 0.1396484375, + "learning_rate": 8.744645043594023e-07, + "loss": 0.0001394712453475222, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1199, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 73.73958492279053, + "completions/min_length": 32.0, + "epoch": 2.3852072474559445, + "grad_norm": 0.008215865347069914, + "kl": 0.149169921875, + "learning_rate": 8.742553740855505e-07, + "loss": 0.000149146027979441, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1200, + "train_speed(iter/s)": 0.022502 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 71.47916889190674, + "completions/min_length": 31.875, + "epoch": 2.387192851824274, + "grad_norm": 0.0069754904572254086, + "kl": 0.11346435546875, + "learning_rate": 8.740460948126405e-07, + "loss": 0.00011325508239679039, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1201, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.875, + "completions/mean_length": 66.84375238418579, + "completions/min_length": 32.625, + "epoch": 2.3891784561926035, + "grad_norm": 0.0094413946466077, + "kl": 0.14093017578125, + "learning_rate": 8.738366666239907e-07, + "loss": 0.00014111213386058807, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1202, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 74.54166889190674, + "completions/min_length": 38.25, + "epoch": 2.391164060560933, + "grad_norm": 0.006099917995374272, + "kl": 0.10516357421875, + "learning_rate": 8.736270896029789e-07, + "loss": 0.0001052111474564299, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1203, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 68.58333492279053, + "completions/min_length": 32.75, + "epoch": 2.393149664929263, + "grad_norm": 0.00804770697128756, + "kl": 0.11505126953125, + "learning_rate": 8.734173638330425e-07, + "loss": 0.00011503610585350543, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1204, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 77.03125286102295, + "completions/min_length": 34.625, + "epoch": 2.3951352692975925, + "grad_norm": 1.0915854298598817, + "kl": 0.63726806640625, + "learning_rate": 8.732074893976773e-07, + "loss": 0.005252781789749861, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1205, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 78.34375286102295, + "completions/min_length": 34.125, + "epoch": 2.397120873665922, + "grad_norm": 0.007907001958771044, + "kl": 0.1240234375, + "learning_rate": 8.72997466380439e-07, + "loss": 0.000123914098367095, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1206, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.75, + "completions/mean_length": 67.15625286102295, + "completions/min_length": 25.375, + "epoch": 2.399106478034252, + "grad_norm": 0.00932980057288349, + "kl": 0.11688232421875, + "learning_rate": 8.727872948649424e-07, + "loss": 0.00011681941396091133, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1207, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 72.35416889190674, + "completions/min_length": 33.25, + "epoch": 2.4010920824025814, + "grad_norm": 1.020334589866774, + "kl": 0.13568115234375, + "learning_rate": 8.725769749348612e-07, + "loss": -0.009047575294971466, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1208, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 69.09375190734863, + "completions/min_length": 34.25, + "epoch": 2.403077686770911, + "grad_norm": 1.0897846341075441, + "kl": 0.1197509765625, + "learning_rate": 8.723665066739281e-07, + "loss": -0.005535339470952749, + "memory(GiB)": 94.21, + "reward": 1.7291666865348816, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.18837061524391174, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1209, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 64.70833539962769, + "completions/min_length": 26.625, + "epoch": 2.4050632911392404, + "grad_norm": 1.1656621547897874, + "kl": 0.1461181640625, + "learning_rate": 8.721558901659352e-07, + "loss": 0.02129194885492325, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1210, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.625, + "completions/mean_length": 68.46875286102295, + "completions/min_length": 28.125, + "epoch": 2.40704889550757, + "grad_norm": 0.008905996911485681, + "kl": 0.13385009765625, + "learning_rate": 8.719451254947333e-07, + "loss": 0.00013398613373283297, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1211, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 63.083335876464844, + "completions/min_length": 30.75, + "epoch": 2.4090344998759, + "grad_norm": 0.9979978206550499, + "kl": 0.11883544921875, + "learning_rate": 8.717342127442324e-07, + "loss": 0.00011886656284332275, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1212, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.375, + "completions/mean_length": 74.68750286102295, + "completions/min_length": 28.75, + "epoch": 2.4110201042442294, + "grad_norm": 1.4113740934437338, + "kl": 0.16485595703125, + "learning_rate": 8.715231519984014e-07, + "loss": -0.0006730010500177741, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.7395833432674408, + "rewards/CineAccuracyORM/std": 0.30885961651802063, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1213, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 68.80208492279053, + "completions/min_length": 32.0, + "epoch": 2.413005708612559, + "grad_norm": 0.009435189902738072, + "kl": 0.1397705078125, + "learning_rate": 8.713119433412681e-07, + "loss": 0.0001397547748638317, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1214, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 72.9166693687439, + "completions/min_length": 33.125, + "epoch": 2.4149913129808884, + "grad_norm": 0.008912927790464232, + "kl": 0.130126953125, + "learning_rate": 8.71100586856919e-07, + "loss": 0.00013003393542021513, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1215, + "train_speed(iter/s)": 0.022507 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.75, + "completions/mean_length": 67.37500238418579, + "completions/min_length": 31.375, + "epoch": 2.416976917349218, + "grad_norm": 0.007215197280285389, + "kl": 0.09991455078125, + "learning_rate": 8.708890826294997e-07, + "loss": 0.00010004551586462185, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1216, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.125, + "completions/mean_length": 74.36458683013916, + "completions/min_length": 33.625, + "epoch": 2.418962521717548, + "grad_norm": 0.00732358925408056, + "kl": 0.12322998046875, + "learning_rate": 8.706774307432147e-07, + "loss": 0.00012320814130362123, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1217, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 65.77083539962769, + "completions/min_length": 31.5, + "epoch": 2.4209481260858774, + "grad_norm": 0.008629798265974205, + "kl": 0.134765625, + "learning_rate": 8.704656312823271e-07, + "loss": 0.00013469200348481536, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1218, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 70.4166693687439, + "completions/min_length": 28.75, + "epoch": 2.422933730454207, + "grad_norm": 0.007550538981952436, + "kl": 0.12774658203125, + "learning_rate": 8.702536843311585e-07, + "loss": 0.0001277501869481057, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1219, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 71.92708444595337, + "completions/min_length": 28.75, + "epoch": 2.424919334822537, + "grad_norm": 0.007348547344810667, + "kl": 0.105010986328125, + "learning_rate": 8.7004158997409e-07, + "loss": 0.00010493632726138458, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1220, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 68.14583492279053, + "completions/min_length": 26.875, + "epoch": 2.4269049391908664, + "grad_norm": 0.007929975208294991, + "kl": 0.12506103515625, + "learning_rate": 8.698293482955605e-07, + "loss": 0.00012495600094553083, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1221, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 70.95833539962769, + "completions/min_length": 27.25, + "epoch": 2.428890543559196, + "grad_norm": 0.008878034930621323, + "kl": 0.12506103515625, + "learning_rate": 8.69616959380068e-07, + "loss": 0.00012495438568294048, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1222, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 66.65625143051147, + "completions/min_length": 27.375, + "epoch": 2.4308761479275254, + "grad_norm": 0.00811625898495114, + "kl": 0.1143798828125, + "learning_rate": 8.694044233121693e-07, + "loss": 0.00011435621127020568, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1223, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 65.88541841506958, + "completions/min_length": 27.125, + "epoch": 2.432861752295855, + "grad_norm": 0.007651631684715641, + "kl": 0.10736083984375, + "learning_rate": 8.691917401764792e-07, + "loss": 0.00010723310697358102, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1224, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 69.45833492279053, + "completions/min_length": 26.0, + "epoch": 2.434847356664185, + "grad_norm": 0.007440471633059999, + "kl": 0.1180419921875, + "learning_rate": 8.689789100576716e-07, + "loss": 0.00011803848610725254, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1225, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 78.33333539962769, + "completions/min_length": 31.125, + "epoch": 2.4368329610325143, + "grad_norm": 0.0076252891489103514, + "kl": 0.1207275390625, + "learning_rate": 8.687659330404789e-07, + "loss": 0.00012083293404430151, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1226, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 67.48958587646484, + "completions/min_length": 31.25, + "epoch": 2.438818565400844, + "grad_norm": 0.7727236106868278, + "kl": 0.110595703125, + "learning_rate": 8.685528092096914e-07, + "loss": -0.0126656424254179, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1227, + "train_speed(iter/s)": 0.022511 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 70.75000190734863, + "completions/min_length": 28.75, + "epoch": 2.4408041697691734, + "grad_norm": 0.006787463818572936, + "kl": 0.10882568359375, + "learning_rate": 8.683395386501585e-07, + "loss": 0.00010873173596337438, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1228, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 71.61458587646484, + "completions/min_length": 23.375, + "epoch": 2.442789774137503, + "grad_norm": 0.8975252343855715, + "kl": 0.126220703125, + "learning_rate": 8.681261214467877e-07, + "loss": 0.012133456766605377, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1229, + "train_speed(iter/s)": 0.022514 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 63.177086353302, + "completions/min_length": 29.125, + "epoch": 2.444775378505833, + "grad_norm": 1.1452085521087776, + "kl": 0.15301513671875, + "learning_rate": 8.67912557684545e-07, + "loss": 0.016196615993976593, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1230, + "train_speed(iter/s)": 0.022514 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.375, + "completions/mean_length": 61.12500238418579, + "completions/min_length": 28.375, + "epoch": 2.4467609828741623, + "grad_norm": 0.005251437163449944, + "kl": 0.1148681640625, + "learning_rate": 8.676988474484547e-07, + "loss": 0.00011488603195175529, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1231, + "train_speed(iter/s)": 0.022515 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.125, + "completions/mean_length": 57.88541889190674, + "completions/min_length": 27.25, + "epoch": 2.448746587242492, + "grad_norm": 0.007546645666077838, + "kl": 0.1295166015625, + "learning_rate": 8.674849908235993e-07, + "loss": 0.00012949170195497572, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1232, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.75, + "completions/mean_length": 62.01041793823242, + "completions/min_length": 30.25, + "epoch": 2.450732191610822, + "grad_norm": 0.005556366281487293, + "kl": 0.12762451171875, + "learning_rate": 8.672709878951198e-07, + "loss": 0.00012759763922076672, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1233, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 63.00000238418579, + "completions/min_length": 32.0, + "epoch": 2.4527177959791513, + "grad_norm": 0.014333723316308819, + "kl": 0.123809814453125, + "learning_rate": 8.670568387482152e-07, + "loss": 0.00012387729657348245, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1234, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 66.57291841506958, + "completions/min_length": 28.375, + "epoch": 2.454703400347481, + "grad_norm": 0.9524225547703068, + "kl": 0.0809326171875, + "learning_rate": 8.66842543468143e-07, + "loss": 0.0014120943378657103, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1235, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.5, + "completions/mean_length": 60.833335399627686, + "completions/min_length": 24.0, + "epoch": 2.4566890047158103, + "grad_norm": 0.005431258183396817, + "kl": 0.097686767578125, + "learning_rate": 8.666281021402187e-07, + "loss": 9.764005517354235e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1236, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 70.31250238418579, + "completions/min_length": 30.75, + "epoch": 2.45867460908414, + "grad_norm": 1.2921519935369679, + "kl": 0.11627197265625, + "learning_rate": 8.66413514849816e-07, + "loss": -0.009187010116875172, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1237, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 68.55208587646484, + "completions/min_length": 30.625, + "epoch": 2.4606602134524698, + "grad_norm": 0.00583919048751687, + "kl": 0.1014404296875, + "learning_rate": 8.661987816823663e-07, + "loss": 0.00010138032666873187, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1238, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 66.23958539962769, + "completions/min_length": 29.875, + "epoch": 2.4626458178207993, + "grad_norm": 0.011416457497404964, + "kl": 0.12127685546875, + "learning_rate": 8.659839027233602e-07, + "loss": 0.00012118097947677597, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1239, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 67.44791841506958, + "completions/min_length": 29.75, + "epoch": 2.464631422189129, + "grad_norm": 0.8326084365128613, + "kl": 0.110595703125, + "learning_rate": 8.65768878058345e-07, + "loss": -0.0035335025750100613, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1240, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.125, + "completions/mean_length": 60.79166841506958, + "completions/min_length": 27.625, + "epoch": 2.4666170265574583, + "grad_norm": 0.006716772724864029, + "kl": 0.1087646484375, + "learning_rate": 8.655537077729268e-07, + "loss": 0.00010880798799917102, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1241, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.25, + "completions/mean_length": 63.75000238418579, + "completions/min_length": 34.75, + "epoch": 2.468602630925788, + "grad_norm": 0.0052529777711340575, + "kl": 0.09698486328125, + "learning_rate": 8.653383919527695e-07, + "loss": 9.685917757451534e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1242, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.25, + "completions/mean_length": 58.062500953674316, + "completions/min_length": 25.75, + "epoch": 2.4705882352941178, + "grad_norm": 2.2772604945789503, + "kl": 0.11431884765625, + "learning_rate": 8.65122930683595e-07, + "loss": -0.007184515707194805, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1243, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 62.04166793823242, + "completions/min_length": 22.125, + "epoch": 2.4725738396624473, + "grad_norm": 0.01782565112665753, + "kl": 0.120697021484375, + "learning_rate": 8.649073240511829e-07, + "loss": 0.00012075152335455641, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1244, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.75, + "completions/mean_length": 62.67708492279053, + "completions/min_length": 27.375, + "epoch": 2.4745594440307768, + "grad_norm": 0.005790817326568715, + "kl": 0.1043701171875, + "learning_rate": 8.646915721413707e-07, + "loss": 0.0001043564043357037, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1245, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 64.78125238418579, + "completions/min_length": 25.5, + "epoch": 2.4765450483991067, + "grad_norm": 0.005827619480205973, + "kl": 0.1199951171875, + "learning_rate": 8.644756750400542e-07, + "loss": 0.00011991198698524386, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1246, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.5, + "completions/mean_length": 64.76041889190674, + "completions/min_length": 29.25, + "epoch": 2.4785306527674362, + "grad_norm": 0.9690639469618161, + "kl": 0.120849609375, + "learning_rate": 8.642596328331864e-07, + "loss": -0.0034683975391089916, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.3829289712011814, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1247, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 68.79166793823242, + "completions/min_length": 32.5, + "epoch": 2.4805162571357657, + "grad_norm": 1.3023367059067472, + "kl": 0.100341796875, + "learning_rate": 8.640434456067784e-07, + "loss": -0.0025977070908993483, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.05103103630244732, + "rewards/CineAccuracyORM/mean": 0.7083333432674408, + "rewards/CineAccuracyORM/std": 0.3245695158839226, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1248, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 69.8854193687439, + "completions/min_length": 27.75, + "epoch": 2.4825018615040952, + "grad_norm": 0.007727919870984804, + "kl": 0.110107421875, + "learning_rate": 8.638271134468987e-07, + "loss": 0.00011016281496267766, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1249, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 72.58333683013916, + "completions/min_length": 32.625, + "epoch": 2.4844874658724247, + "grad_norm": 0.004938654901134946, + "kl": 0.08905029296875, + "learning_rate": 8.636106364396743e-07, + "loss": 8.90390801941976e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1250, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 65.95833539962769, + "completions/min_length": 30.75, + "epoch": 2.4864730702407547, + "grad_norm": 0.8056809430110817, + "kl": 0.1300048828125, + "learning_rate": 8.633940146712887e-07, + "loss": -0.005963850766420364, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1251, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 69.34375238418579, + "completions/min_length": 30.25, + "epoch": 2.488458674609084, + "grad_norm": 0.6215788283193011, + "kl": 0.11474609375, + "learning_rate": 8.63177248227984e-07, + "loss": 0.00595555966719985, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1252, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 67.27083539962769, + "completions/min_length": 31.25, + "epoch": 2.4904442789774137, + "grad_norm": 0.9085952655668573, + "kl": 0.1231689453125, + "learning_rate": 8.629603371960597e-07, + "loss": 0.00015979260206222534, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1253, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.625, + "completions/mean_length": 68.18750286102295, + "completions/min_length": 31.625, + "epoch": 2.4924298833457432, + "grad_norm": 1.3074962345170322, + "kl": 0.127197265625, + "learning_rate": 8.627432816618723e-07, + "loss": -0.0011720409383997321, + "memory(GiB)": 94.21, + "reward": 1.7500000149011612, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.7500000074505806, + "rewards/CineAccuracyORM/std": 0.1657295897603035, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1254, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.125, + "completions/mean_length": 70.05208492279053, + "completions/min_length": 35.5, + "epoch": 2.4944154877140727, + "grad_norm": 0.004037832876464606, + "kl": 0.10247802734375, + "learning_rate": 8.625260817118365e-07, + "loss": 0.00010241218842566013, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1255, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 70.20833539962769, + "completions/min_length": 26.75, + "epoch": 2.4964010920824027, + "grad_norm": 0.005016021323085701, + "kl": 0.10919189453125, + "learning_rate": 8.623087374324243e-07, + "loss": 0.00010927939729299396, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1256, + "train_speed(iter/s)": 0.022533 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 66.56250190734863, + "completions/min_length": 27.0, + "epoch": 2.498386696450732, + "grad_norm": 1.164837192050835, + "kl": 0.1170654296875, + "learning_rate": 8.620912489101648e-07, + "loss": 0.0018511468078941107, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.3829289712011814, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1257, + "train_speed(iter/s)": 0.022533 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 67.79166841506958, + "completions/min_length": 29.875, + "epoch": 2.5003723008190617, + "grad_norm": 0.007688784232444334, + "kl": 0.10491943359375, + "learning_rate": 8.618736162316452e-07, + "loss": 0.00010482000652700663, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1258, + "train_speed(iter/s)": 0.022535 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 69.72916841506958, + "completions/min_length": 27.0, + "epoch": 2.5023579051873917, + "grad_norm": 0.004010491728274601, + "kl": 0.09417724609375, + "learning_rate": 8.616558394835094e-07, + "loss": 9.424250310985371e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1259, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 67.3229193687439, + "completions/min_length": 28.5, + "epoch": 2.504343509555721, + "grad_norm": 0.00876373767103568, + "kl": 0.109619140625, + "learning_rate": 8.614379187524592e-07, + "loss": 0.00010951112199109048, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1260, + "train_speed(iter/s)": 0.022538 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.0, + "completions/mean_length": 65.14583539962769, + "completions/min_length": 27.125, + "epoch": 2.5063291139240507, + "grad_norm": 0.01383530453205163, + "kl": 0.093170166015625, + "learning_rate": 8.612198541252533e-07, + "loss": 9.319156379206106e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1261, + "train_speed(iter/s)": 0.022536 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.25, + "completions/mean_length": 71.40625190734863, + "completions/min_length": 31.0, + "epoch": 2.50831471829238, + "grad_norm": 0.004555176220282828, + "kl": 0.087493896484375, + "learning_rate": 8.610016456887081e-07, + "loss": 8.742274803807959e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1262, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.25, + "completions/mean_length": 79.64583587646484, + "completions/min_length": 27.0, + "epoch": 2.5103003226607097, + "grad_norm": 0.7435270165886403, + "kl": 0.12066650390625, + "learning_rate": 8.60783293529697e-07, + "loss": 0.004668924957513809, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1263, + "train_speed(iter/s)": 0.022536 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.25, + "completions/mean_length": 67.28125333786011, + "completions/min_length": 35.5, + "epoch": 2.5122859270290396, + "grad_norm": 0.6936743294366742, + "kl": 0.09375, + "learning_rate": 8.605647977351504e-07, + "loss": 0.011062691919505596, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1264, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 77.12500143051147, + "completions/min_length": 36.0, + "epoch": 2.514271531397369, + "grad_norm": 1.3547422065193302, + "kl": 0.12725830078125, + "learning_rate": 8.603461583920565e-07, + "loss": -0.007573738694190979, + "memory(GiB)": 94.21, + "reward": 1.6979166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3217491842806339, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1265, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.875, + "completions/mean_length": 80.9791693687439, + "completions/min_length": 34.375, + "epoch": 2.5162571357656986, + "grad_norm": 0.999756075272307, + "kl": 0.11419677734375, + "learning_rate": 8.6012737558746e-07, + "loss": 0.012018587440252304, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1266, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 73.57291984558105, + "completions/min_length": 32.5, + "epoch": 2.518242740134028, + "grad_norm": 0.01325856595419211, + "kl": 0.11181640625, + "learning_rate": 8.599084494084632e-07, + "loss": 0.000111827437649481, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1267, + "train_speed(iter/s)": 0.022543 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 69.15625190734863, + "completions/min_length": 32.25, + "epoch": 2.5202283445023577, + "grad_norm": 0.019721392582024988, + "kl": 0.12646484375, + "learning_rate": 8.596893799422254e-07, + "loss": 0.0001263372105313465, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1268, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 84.01041984558105, + "completions/min_length": 33.375, + "epoch": 2.5222139488706876, + "grad_norm": 0.02195426559168214, + "kl": 0.14727783203125, + "learning_rate": 8.594701672759624e-07, + "loss": 0.0001472388976253569, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1269, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 225.875, + "completions/mean_length": 102.91666889190674, + "completions/min_length": 37.25, + "epoch": 2.524199553239017, + "grad_norm": 0.5849893807496976, + "kl": 0.1099853515625, + "learning_rate": 8.592508114969478e-07, + "loss": -0.013651542365550995, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1270, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.625, + "completions/mean_length": 80.43750286102295, + "completions/min_length": 34.375, + "epoch": 2.5261851576073466, + "grad_norm": 0.014336507152132232, + "kl": 0.1121826171875, + "learning_rate": 8.590313126925117e-07, + "loss": 0.0001122748653870076, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1271, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.0, + "completions/mean_length": 77.53125286102295, + "completions/min_length": 32.0, + "epoch": 2.5281707619756766, + "grad_norm": 0.882991996349277, + "kl": 0.12274169921875, + "learning_rate": 8.588116709500413e-07, + "loss": 0.0012082557659596205, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1272, + "train_speed(iter/s)": 0.022539 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 84.11458587646484, + "completions/min_length": 24.5, + "epoch": 2.530156366344006, + "grad_norm": 0.011855462864836377, + "kl": 0.11419677734375, + "learning_rate": 8.585918863569806e-07, + "loss": 0.00011433433974161744, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1273, + "train_speed(iter/s)": 0.022538 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.5, + "completions/mean_length": 78.77083587646484, + "completions/min_length": 33.625, + "epoch": 2.5321419707123356, + "grad_norm": 0.010006827491617286, + "kl": 0.1190185546875, + "learning_rate": 8.583719590008307e-07, + "loss": 0.00011909760360140353, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1274, + "train_speed(iter/s)": 0.022538 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.375, + "completions/mean_length": 85.02083492279053, + "completions/min_length": 31.625, + "epoch": 2.534127575080665, + "grad_norm": 0.8140748755303191, + "kl": 0.110107421875, + "learning_rate": 8.581518889691492e-07, + "loss": 0.008183024823665619, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1275, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.875, + "completions/mean_length": 80.47916841506958, + "completions/min_length": 31.5, + "epoch": 2.5361131794489946, + "grad_norm": 0.6115519651841012, + "kl": 0.1204833984375, + "learning_rate": 8.579316763495508e-07, + "loss": -5.078440153738484e-05, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1276, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 270.375, + "completions/mean_length": 93.114586353302, + "completions/min_length": 33.75, + "epoch": 2.5380987838173246, + "grad_norm": 0.530750422750695, + "kl": 0.1123046875, + "learning_rate": 8.577113212297067e-07, + "loss": 0.023251429200172424, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.05103103816509247, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1277, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 82.00000190734863, + "completions/min_length": 35.625, + "epoch": 2.540084388185654, + "grad_norm": 0.9372763006868186, + "kl": 0.120849609375, + "learning_rate": 8.574908236973453e-07, + "loss": -0.002739655552431941, + "memory(GiB)": 94.21, + "reward": 1.96875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.96875, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1278, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 85.67708492279053, + "completions/min_length": 38.125, + "epoch": 2.5420699925539836, + "grad_norm": 1.031068644172506, + "kl": 0.1212158203125, + "learning_rate": 8.572701838402509e-07, + "loss": 0.0028132374864071608, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.7395833432674408, + "rewards/CineAccuracyORM/std": 0.30885961651802063, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1279, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 82.20833539962769, + "completions/min_length": 29.25, + "epoch": 2.544055596922313, + "grad_norm": 0.6829913359240926, + "kl": 0.1195068359375, + "learning_rate": 8.570494017462654e-07, + "loss": -0.012835456989705563, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1280, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.75, + "completions/mean_length": 70.9166693687439, + "completions/min_length": 32.125, + "epoch": 2.5460412012906426, + "grad_norm": 0.007156827865647327, + "kl": 0.11328125, + "learning_rate": 8.568284775032866e-07, + "loss": 0.00011326756066409871, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1281, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 210.625, + "completions/mean_length": 86.48958587646484, + "completions/min_length": 32.25, + "epoch": 2.5480268056589725, + "grad_norm": 0.891820701300227, + "kl": 0.14813232421875, + "learning_rate": 8.566074111992691e-07, + "loss": 0.007472541183233261, + "memory(GiB)": 94.21, + "reward": 1.5729166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.5729166669771075, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1282, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 80.51041793823242, + "completions/min_length": 27.25, + "epoch": 2.550012410027302, + "grad_norm": 1.2947927361767624, + "kl": 0.1209716796875, + "learning_rate": 8.563862029222244e-07, + "loss": -0.00583060784265399, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.17046867683529854, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1283, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 78.23958587646484, + "completions/min_length": 25.875, + "epoch": 2.5519980143956316, + "grad_norm": 0.009804443375801785, + "kl": 0.1531982421875, + "learning_rate": 8.561648527602202e-07, + "loss": 0.0001531544839963317, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1284, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.375, + "completions/mean_length": 78.87500143051147, + "completions/min_length": 27.125, + "epoch": 2.5539836187639615, + "grad_norm": 0.008180539122622494, + "kl": 0.1741943359375, + "learning_rate": 8.559433608013803e-07, + "loss": 0.00017420492076780647, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1285, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 72.27083492279053, + "completions/min_length": 33.625, + "epoch": 2.555969223132291, + "grad_norm": 0.008897538748529256, + "kl": 0.122314453125, + "learning_rate": 8.557217271338859e-07, + "loss": 0.00012234285532031208, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1286, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 80.17708587646484, + "completions/min_length": 36.75, + "epoch": 2.5579548275006205, + "grad_norm": 0.008184283066285509, + "kl": 0.1473388671875, + "learning_rate": 8.554999518459738e-07, + "loss": 0.0001473360462114215, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1287, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.0, + "completions/mean_length": 82.25000190734863, + "completions/min_length": 38.0, + "epoch": 2.55994043186895, + "grad_norm": 1.35495675413901, + "kl": 0.1729736328125, + "learning_rate": 8.552780350259377e-07, + "loss": 0.004792386200278997, + "memory(GiB)": 94.21, + "reward": 1.53125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.53125, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1288, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 72.72916984558105, + "completions/min_length": 29.875, + "epoch": 2.5619260362372795, + "grad_norm": 0.009232165518929135, + "kl": 0.15264892578125, + "learning_rate": 8.55055976762127e-07, + "loss": 0.00015288709255401045, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1289, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 87.40625286102295, + "completions/min_length": 34.0, + "epoch": 2.5639116406056095, + "grad_norm": 0.008074296664395918, + "kl": 0.14410400390625, + "learning_rate": 8.548337771429483e-07, + "loss": 0.00014415831537917256, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1290, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 77.17708587646484, + "completions/min_length": 30.875, + "epoch": 2.565897244973939, + "grad_norm": 0.7284026919730846, + "kl": 0.1558837890625, + "learning_rate": 8.546114362568639e-07, + "loss": -0.006634535267949104, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1291, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.125, + "completions/mean_length": 65.59375143051147, + "completions/min_length": 31.875, + "epoch": 2.5678828493422685, + "grad_norm": 0.009507296785289696, + "kl": 0.1470947265625, + "learning_rate": 8.543889541923924e-07, + "loss": 0.00014710548566654325, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1292, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 74.54166793823242, + "completions/min_length": 31.25, + "epoch": 2.569868453710598, + "grad_norm": 1.3247329599925473, + "kl": 0.14898681640625, + "learning_rate": 8.541663310381086e-07, + "loss": 0.0009728459408506751, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1293, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 70.927086353302, + "completions/min_length": 26.625, + "epoch": 2.5718540580789275, + "grad_norm": 0.009207493064240839, + "kl": 0.15557861328125, + "learning_rate": 8.539435668826436e-07, + "loss": 0.00015577912563458085, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1294, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 75.45833492279053, + "completions/min_length": 32.625, + "epoch": 2.5738396624472575, + "grad_norm": 0.009062648059217483, + "kl": 0.1727294921875, + "learning_rate": 8.537206618146846e-07, + "loss": 0.0001725937909213826, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1295, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 76.66666793823242, + "completions/min_length": 27.75, + "epoch": 2.575825266815587, + "grad_norm": 1.066817069442844, + "kl": 0.15185546875, + "learning_rate": 8.534976159229748e-07, + "loss": 0.009802289307117462, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1296, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 82.37500095367432, + "completions/min_length": 34.875, + "epoch": 2.5778108711839165, + "grad_norm": 0.8241548990271572, + "kl": 0.15191650390625, + "learning_rate": 8.532744292963137e-07, + "loss": -0.005012545734643936, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1297, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 76.00000190734863, + "completions/min_length": 29.625, + "epoch": 2.5797964755522464, + "grad_norm": 0.7848137503011022, + "kl": 0.15087890625, + "learning_rate": 8.530511020235564e-07, + "loss": -0.0070297615602612495, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1298, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 77.09375238418579, + "completions/min_length": 38.375, + "epoch": 2.581782079920576, + "grad_norm": 0.95367213580266, + "kl": 0.13055419921875, + "learning_rate": 8.528276341936145e-07, + "loss": 0.009501341730356216, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1299, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 78.43750190734863, + "completions/min_length": 28.125, + "epoch": 2.5837676842889055, + "grad_norm": 0.9737161878568172, + "kl": 0.1533203125, + "learning_rate": 8.52604025895455e-07, + "loss": -0.00982861127704382, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1300, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 77.64583587646484, + "completions/min_length": 32.625, + "epoch": 2.585753288657235, + "grad_norm": 1.0194936374565085, + "kl": 0.14801025390625, + "learning_rate": 8.523802772181015e-07, + "loss": 0.0013373075053095818, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1301, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 71.88541793823242, + "completions/min_length": 24.75, + "epoch": 2.5877388930255645, + "grad_norm": 0.008911836301347677, + "kl": 0.15283203125, + "learning_rate": 8.52156388250633e-07, + "loss": 0.00015283057291526347, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1302, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.5, + "completions/mean_length": 89.21875286102295, + "completions/min_length": 34.0, + "epoch": 2.5897244973938944, + "grad_norm": 0.007574798749012008, + "kl": 0.14727783203125, + "learning_rate": 8.519323590821843e-07, + "loss": 0.00014703706256113946, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1303, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 77.47916746139526, + "completions/min_length": 31.625, + "epoch": 2.591710101762224, + "grad_norm": 0.00863429389958237, + "kl": 0.15069580078125, + "learning_rate": 8.517081898019464e-07, + "loss": 0.00015076817362569273, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1304, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 82.48958587646484, + "completions/min_length": 31.375, + "epoch": 2.5936957061305534, + "grad_norm": 1.0320039224761541, + "kl": 0.17291259765625, + "learning_rate": 8.514838804991659e-07, + "loss": -0.00018205369997303933, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6458333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1305, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 73.37500190734863, + "completions/min_length": 33.25, + "epoch": 2.595681310498883, + "grad_norm": 1.0379692642717284, + "kl": 0.1519775390625, + "learning_rate": 8.51259431263145e-07, + "loss": 0.00015195335436146706, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1306, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 79.50000286102295, + "completions/min_length": 38.25, + "epoch": 2.5976669148672125, + "grad_norm": 0.008103738078906093, + "kl": 0.1436767578125, + "learning_rate": 8.510348421832419e-07, + "loss": 0.00014363221998792142, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1307, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 79.95833492279053, + "completions/min_length": 34.375, + "epoch": 2.5996525192355424, + "grad_norm": 1.0338789266755384, + "kl": 0.13372802734375, + "learning_rate": 8.508101133488701e-07, + "loss": 0.009784862399101257, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1308, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 76.25000286102295, + "completions/min_length": 35.0, + "epoch": 2.601638123603872, + "grad_norm": 1.119029014093334, + "kl": 0.158935546875, + "learning_rate": 8.50585244849499e-07, + "loss": 0.0003059332666452974, + "memory(GiB)": 94.21, + "reward": 1.9687500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.9687500074505806, + "rewards/CineAccuracyORM/std": 0.08474057167768478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1309, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 70.47916889190674, + "completions/min_length": 28.0, + "epoch": 2.6036237279722014, + "grad_norm": 0.5193173980277985, + "kl": 0.1607666015625, + "learning_rate": 8.503602367746537e-07, + "loss": 0.015041163191199303, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1310, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.375, + "completions/mean_length": 79.65625238418579, + "completions/min_length": 28.0, + "epoch": 2.6056093323405314, + "grad_norm": 0.9321107861342776, + "kl": 0.15869140625, + "learning_rate": 8.501350892139144e-07, + "loss": 0.00015891841030679643, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1311, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 70.04166841506958, + "completions/min_length": 22.625, + "epoch": 2.607594936708861, + "grad_norm": 0.9320382310567638, + "kl": 0.1514892578125, + "learning_rate": 8.499098022569176e-07, + "loss": 0.0013471394777297974, + "memory(GiB)": 94.21, + "reward": 1.5520833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.5520833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1312, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 80.06250190734863, + "completions/min_length": 35.125, + "epoch": 2.6095805410771904, + "grad_norm": 0.00856575391455546, + "kl": 0.144775390625, + "learning_rate": 8.496843759933546e-07, + "loss": 0.0001444444787921384, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1313, + "train_speed(iter/s)": 0.022524 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 85.11458683013916, + "completions/min_length": 36.625, + "epoch": 2.61156614544552, + "grad_norm": 0.0076121562291528635, + "kl": 0.1314697265625, + "learning_rate": 8.494588105129723e-07, + "loss": 0.0001313299871981144, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1314, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 82.31250190734863, + "completions/min_length": 35.875, + "epoch": 2.6135517498138494, + "grad_norm": 0.007664222850183941, + "kl": 0.125732421875, + "learning_rate": 8.492331059055733e-07, + "loss": 0.0001255877286894247, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1315, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 80.61458587646484, + "completions/min_length": 34.0, + "epoch": 2.6155373541821794, + "grad_norm": 1.0865390814622746, + "kl": 0.12030029296875, + "learning_rate": 8.490072622610155e-07, + "loss": 0.0038716073613613844, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1316, + "train_speed(iter/s)": 0.022523 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 80.10416841506958, + "completions/min_length": 25.25, + "epoch": 2.617522958550509, + "grad_norm": 0.008505398848228332, + "kl": 0.1298828125, + "learning_rate": 8.487812796692119e-07, + "loss": 0.00012996768055018038, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1317, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 84.3854193687439, + "completions/min_length": 36.5, + "epoch": 2.6195085629188384, + "grad_norm": 0.00893929579214178, + "kl": 0.11041259765625, + "learning_rate": 8.485551582201311e-07, + "loss": 0.0001104215916711837, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1318, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 76.70833492279053, + "completions/min_length": 22.75, + "epoch": 2.621494167287168, + "grad_norm": 0.008245143619473348, + "kl": 0.126220703125, + "learning_rate": 8.483288980037968e-07, + "loss": 0.00012604551739059389, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1319, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 80.18750190734863, + "completions/min_length": 28.625, + "epoch": 2.6234797716554974, + "grad_norm": 0.008405254164002814, + "kl": 0.13531494140625, + "learning_rate": 8.481024991102881e-07, + "loss": 0.00013513855810742825, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1320, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.75, + "completions/mean_length": 80.37500429153442, + "completions/min_length": 29.25, + "epoch": 2.6254653760238273, + "grad_norm": 0.016090463890943034, + "kl": 0.134521484375, + "learning_rate": 8.478759616297391e-07, + "loss": 0.00013450313417706639, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1321, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 76.92708396911621, + "completions/min_length": 27.625, + "epoch": 2.627450980392157, + "grad_norm": 0.007823547707000309, + "kl": 0.1368408203125, + "learning_rate": 8.476492856523395e-07, + "loss": 0.0001368635566905141, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1322, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 73.27083587646484, + "completions/min_length": 26.25, + "epoch": 2.6294365847604864, + "grad_norm": 0.008968148464996984, + "kl": 0.13140869140625, + "learning_rate": 8.474224712683336e-07, + "loss": 0.00013138602662365884, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1323, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 72.65625190734863, + "completions/min_length": 32.75, + "epoch": 2.6314221891288163, + "grad_norm": 0.7785698642165746, + "kl": 0.1202392578125, + "learning_rate": 8.471955185680211e-07, + "loss": -0.006980721838772297, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1324, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 85.31250238418579, + "completions/min_length": 42.25, + "epoch": 2.633407793497146, + "grad_norm": 0.9217489896409802, + "kl": 0.12884521484375, + "learning_rate": 8.469684276417568e-07, + "loss": -0.00790985394269228, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1325, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 83.41666984558105, + "completions/min_length": 34.5, + "epoch": 2.6353933978654753, + "grad_norm": 0.7917415869369991, + "kl": 0.146484375, + "learning_rate": 8.467411985799501e-07, + "loss": 0.010213149711489677, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1326, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 90.020836353302, + "completions/min_length": 37.625, + "epoch": 2.637379002233805, + "grad_norm": 0.005110986229138953, + "kl": 0.12506103515625, + "learning_rate": 8.465138314730665e-07, + "loss": 0.00012517454160843045, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1327, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 239.5, + "completions/mean_length": 84.54167032241821, + "completions/min_length": 29.125, + "epoch": 2.6393646066021343, + "grad_norm": 1.0625792399177012, + "kl": 0.1378173828125, + "learning_rate": 8.462863264116249e-07, + "loss": 0.012299126014113426, + "memory(GiB)": 94.21, + "reward": 1.6562500149011612, + "reward_std": 0.0765465535223484, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.23100870847702026, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1328, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.625, + "completions/mean_length": 89.43750238418579, + "completions/min_length": 29.375, + "epoch": 2.6413502109704643, + "grad_norm": 0.006112201117864251, + "kl": 0.107666015625, + "learning_rate": 8.460586834862003e-07, + "loss": 0.00010770794324344024, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1329, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.25, + "completions/mean_length": 94.5416693687439, + "completions/min_length": 34.125, + "epoch": 2.643335815338794, + "grad_norm": 1.5566139667025687, + "kl": 0.1217041015625, + "learning_rate": 8.458309027874221e-07, + "loss": 0.016946181654930115, + "memory(GiB)": 94.21, + "reward": 1.7812500149011612, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.7812500037252903, + "rewards/CineAccuracyORM/std": 0.2281883768737316, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1330, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 89.97916889190674, + "completions/min_length": 29.75, + "epoch": 2.6453214197071233, + "grad_norm": 0.771765800031731, + "kl": 0.15472412109375, + "learning_rate": 8.456029844059749e-07, + "loss": 0.0006598147447220981, + "memory(GiB)": 94.21, + "reward": 1.5520833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.5520833358168602, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1331, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 77.05208539962769, + "completions/min_length": 35.25, + "epoch": 2.647307024075453, + "grad_norm": 1.5266480852141266, + "kl": 0.14337158203125, + "learning_rate": 8.453749284325975e-07, + "loss": 0.00491324020549655, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.15789688751101494, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1332, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.0, + "completions/mean_length": 82.52083492279053, + "completions/min_length": 35.0, + "epoch": 2.6492926284437823, + "grad_norm": 0.006688470649768745, + "kl": 0.12078857421875, + "learning_rate": 8.451467349580843e-07, + "loss": 0.00012085602793376893, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1333, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 77.22916984558105, + "completions/min_length": 29.875, + "epoch": 2.6512782328121123, + "grad_norm": 0.01877478238714203, + "kl": 0.12115478515625, + "learning_rate": 8.449184040732835e-07, + "loss": 0.00012115540448576212, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1334, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 77.44791841506958, + "completions/min_length": 23.375, + "epoch": 2.653263837180442, + "grad_norm": 0.008819124035832295, + "kl": 0.11065673828125, + "learning_rate": 8.446899358690988e-07, + "loss": 0.00011072001507272944, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1335, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.25, + "completions/mean_length": 75.08333587646484, + "completions/min_length": 24.125, + "epoch": 2.6552494415487713, + "grad_norm": 0.013503528681694781, + "kl": 0.111328125, + "learning_rate": 8.444613304364884e-07, + "loss": 0.00011119978444185108, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1336, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.375, + "completions/mean_length": 98.88541889190674, + "completions/min_length": 44.0, + "epoch": 2.6572350459171012, + "grad_norm": 0.7448557592854417, + "kl": 0.1416015625, + "learning_rate": 8.442325878664647e-07, + "loss": 0.0029394521843641996, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1337, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 234.0, + "completions/mean_length": 80.23958539962769, + "completions/min_length": 21.75, + "epoch": 2.6592206502854308, + "grad_norm": 0.43129873660356244, + "kl": 0.1107177734375, + "learning_rate": 8.440037082500952e-07, + "loss": 0.023522820323705673, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.05103103816509247, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1338, + "train_speed(iter/s)": 0.022502 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.625, + "completions/mean_length": 89.90625190734863, + "completions/min_length": 36.125, + "epoch": 2.6612062546537603, + "grad_norm": 0.8433296350736127, + "kl": 0.1353759765625, + "learning_rate": 8.437746916785016e-07, + "loss": 0.0008302840287797153, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1339, + "train_speed(iter/s)": 0.022502 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.125, + "completions/mean_length": 86.94791984558105, + "completions/min_length": 32.125, + "epoch": 2.6631918590220898, + "grad_norm": 0.9609011623011292, + "kl": 0.144287109375, + "learning_rate": 8.435455382428604e-07, + "loss": -0.0032931778114289045, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1340, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 77.04166889190674, + "completions/min_length": 25.875, + "epoch": 2.6651774633904193, + "grad_norm": 1.1867880233310772, + "kl": 0.12060546875, + "learning_rate": 8.433162480344025e-07, + "loss": -0.0025646924041211605, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1341, + "train_speed(iter/s)": 0.022502 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 74.37500095367432, + "completions/min_length": 32.0, + "epoch": 2.6671630677587492, + "grad_norm": 0.8745896428759916, + "kl": 0.1317138671875, + "learning_rate": 8.430868211444132e-07, + "loss": 0.003729822114109993, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1342, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 73.39583444595337, + "completions/min_length": 26.5, + "epoch": 2.6691486721270787, + "grad_norm": 0.9216401697430731, + "kl": 0.12255859375, + "learning_rate": 8.428572576642323e-07, + "loss": 0.0001722034066915512, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.708333333954215, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1343, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.25, + "completions/mean_length": 81.20833492279053, + "completions/min_length": 26.25, + "epoch": 2.6711342764954082, + "grad_norm": 0.04291237396572143, + "kl": 0.180908203125, + "learning_rate": 8.426275576852537e-07, + "loss": 0.0001807756198104471, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1344, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 85.08333492279053, + "completions/min_length": 38.875, + "epoch": 2.6731198808637378, + "grad_norm": 0.006270111741976449, + "kl": 0.113525390625, + "learning_rate": 8.423977212989262e-07, + "loss": 0.00011347224790370092, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1345, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 79.73958492279053, + "completions/min_length": 38.375, + "epoch": 2.6751054852320673, + "grad_norm": 0.0067943967459486165, + "kl": 0.116455078125, + "learning_rate": 8.421677485967522e-07, + "loss": 0.0001163799170171842, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1346, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.75, + "completions/mean_length": 85.79166793823242, + "completions/min_length": 37.75, + "epoch": 2.677091089600397, + "grad_norm": 2.0153539815768373, + "kl": 0.1279296875, + "learning_rate": 8.419376396702891e-07, + "loss": -0.0022262874990701675, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1347, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 71.18750143051147, + "completions/min_length": 24.375, + "epoch": 2.6790766939687267, + "grad_norm": 0.7440739006692163, + "kl": 0.120849609375, + "learning_rate": 8.41707394611148e-07, + "loss": 0.000641676306258887, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1348, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 77.95833492279053, + "completions/min_length": 33.5, + "epoch": 2.6810622983370562, + "grad_norm": 1.4485607817041966, + "kl": 0.1287841796875, + "learning_rate": 8.414770135109944e-07, + "loss": 0.025922708213329315, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1349, + "train_speed(iter/s)": 0.0225 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 68.17708539962769, + "completions/min_length": 20.625, + "epoch": 2.683047902705386, + "grad_norm": 0.007698389524162958, + "kl": 0.11895751953125, + "learning_rate": 8.41246496461548e-07, + "loss": 0.00011914019705727696, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1350, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 71.97916793823242, + "completions/min_length": 23.75, + "epoch": 2.6850335070737157, + "grad_norm": 0.00861903647237541, + "kl": 0.11956787109375, + "learning_rate": 8.410158435545824e-07, + "loss": 0.0001197346136905253, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1351, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 74.1041693687439, + "completions/min_length": 26.875, + "epoch": 2.687019111442045, + "grad_norm": 0.006559222700376209, + "kl": 0.111083984375, + "learning_rate": 8.407850548819256e-07, + "loss": 0.00011097540846094489, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1352, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 79.20833539962769, + "completions/min_length": 35.5, + "epoch": 2.6890047158103747, + "grad_norm": 0.007154168608604736, + "kl": 0.11968994140625, + "learning_rate": 8.405541305354595e-07, + "loss": 0.00011971910134889185, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1353, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.5, + "completions/mean_length": 86.81250190734863, + "completions/min_length": 29.625, + "epoch": 2.690990320178704, + "grad_norm": 1.1629349061949692, + "kl": 0.12652587890625, + "learning_rate": 8.403230706071199e-07, + "loss": -0.004309754353016615, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1354, + "train_speed(iter/s)": 0.022499 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 72.15625238418579, + "completions/min_length": 30.5, + "epoch": 2.692975924547034, + "grad_norm": 0.007050255414054574, + "kl": 0.135009765625, + "learning_rate": 8.400918751888968e-07, + "loss": 0.0001351383834844455, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1355, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 78.18750143051147, + "completions/min_length": 32.0, + "epoch": 2.6949615289153637, + "grad_norm": 0.006916079687601611, + "kl": 0.12237548828125, + "learning_rate": 8.39860544372834e-07, + "loss": 0.00012238250928930938, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1356, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 71.93750238418579, + "completions/min_length": 35.375, + "epoch": 2.696947133283693, + "grad_norm": 1.3044029231440082, + "kl": 0.116943359375, + "learning_rate": 8.396290782510291e-07, + "loss": -0.0025276965461671352, + "memory(GiB)": 94.21, + "reward": 1.8854166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.16290925815701485, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1357, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.625, + "completions/mean_length": 83.58333492279053, + "completions/min_length": 29.75, + "epoch": 2.6989327376520227, + "grad_norm": 0.92937170599385, + "kl": 0.16668701171875, + "learning_rate": 8.393974769156341e-07, + "loss": 0.01582413725554943, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1358, + "train_speed(iter/s)": 0.022497 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 70.26041889190674, + "completions/min_length": 31.875, + "epoch": 2.700918342020352, + "grad_norm": 1.086936993087712, + "kl": 0.1446533203125, + "learning_rate": 8.391657404588539e-07, + "loss": 0.0037132850848138332, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1359, + "train_speed(iter/s)": 0.0225 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.625, + "completions/mean_length": 85.17708587646484, + "completions/min_length": 37.0, + "epoch": 2.702903946388682, + "grad_norm": 0.007170555789073165, + "kl": 0.1513671875, + "learning_rate": 8.389338689729482e-07, + "loss": 0.00015143706696107984, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.45695383101701736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1360, + "train_speed(iter/s)": 0.022501 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 79.07291889190674, + "completions/min_length": 37.5, + "epoch": 2.7048895507570117, + "grad_norm": 0.006994287834309996, + "kl": 0.14501953125, + "learning_rate": 8.387018625502296e-07, + "loss": 0.00014508981257677078, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1361, + "train_speed(iter/s)": 0.022503 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 71.71875238418579, + "completions/min_length": 30.625, + "epoch": 2.706875155125341, + "grad_norm": 1.062936932162798, + "kl": 0.123779296875, + "learning_rate": 8.384697212830651e-07, + "loss": 0.0001237119286088273, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1362, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 71.79166841506958, + "completions/min_length": 28.0, + "epoch": 2.708860759493671, + "grad_norm": 0.006636822809046916, + "kl": 0.12030029296875, + "learning_rate": 8.382374452638752e-07, + "loss": 0.0001202690982609056, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1363, + "train_speed(iter/s)": 0.022504 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 74.47916984558105, + "completions/min_length": 29.625, + "epoch": 2.7108463638620006, + "grad_norm": 0.007775758167551887, + "kl": 0.135498046875, + "learning_rate": 8.380050345851337e-07, + "loss": 0.0001355545682599768, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1364, + "train_speed(iter/s)": 0.022505 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 71.90625095367432, + "completions/min_length": 29.5, + "epoch": 2.71283196823033, + "grad_norm": 0.008164639734205177, + "kl": 0.1463623046875, + "learning_rate": 8.377724893393681e-07, + "loss": 0.00014629887300543487, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1365, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 74.96875143051147, + "completions/min_length": 29.5, + "epoch": 2.7148175725986596, + "grad_norm": 0.842683376715099, + "kl": 0.15594482421875, + "learning_rate": 8.375398096191599e-07, + "loss": 0.0029901652596890926, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1366, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 69.19791889190674, + "completions/min_length": 24.0, + "epoch": 2.716803176966989, + "grad_norm": 0.006092739181903076, + "kl": 0.14398193359375, + "learning_rate": 8.373069955171439e-07, + "loss": 0.00014385800750460476, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1367, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 70.80208683013916, + "completions/min_length": 31.25, + "epoch": 2.718788781335319, + "grad_norm": 0.9550216108791946, + "kl": 0.17108154296875, + "learning_rate": 8.370740471260083e-07, + "loss": -0.012372470460832119, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1368, + "train_speed(iter/s)": 0.022507 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 67.25000190734863, + "completions/min_length": 31.75, + "epoch": 2.7207743857036486, + "grad_norm": 1.500281898064778, + "kl": 0.125, + "learning_rate": 8.368409645384948e-07, + "loss": 0.010152310132980347, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2486373633146286, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1369, + "train_speed(iter/s)": 0.022506 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 74.27083587646484, + "completions/min_length": 27.625, + "epoch": 2.722759990071978, + "grad_norm": 0.008196801744404895, + "kl": 0.17449951171875, + "learning_rate": 8.366077478473986e-07, + "loss": 0.00017460837261751294, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1370, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 68.84375238418579, + "completions/min_length": 24.125, + "epoch": 2.7247455944403076, + "grad_norm": 0.007700150006095348, + "kl": 0.1231689453125, + "learning_rate": 8.36374397145568e-07, + "loss": 0.00012310505553614348, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1371, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 66.86458492279053, + "completions/min_length": 25.875, + "epoch": 2.726731198808637, + "grad_norm": 0.0058778998107458765, + "kl": 0.12078857421875, + "learning_rate": 8.361409125259052e-07, + "loss": 0.00012079622683813795, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1372, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 61.79166793823242, + "completions/min_length": 22.625, + "epoch": 2.728716803176967, + "grad_norm": 0.009208210840519383, + "kl": 0.1243896484375, + "learning_rate": 8.359072940813654e-07, + "loss": 0.00012440350838005543, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1373, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 71.1666693687439, + "completions/min_length": 27.0, + "epoch": 2.7307024075452966, + "grad_norm": 1.030550533546693, + "kl": 0.12689208984375, + "learning_rate": 8.35673541904957e-07, + "loss": 0.0001268411724595353, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393530294299126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1374, + "train_speed(iter/s)": 0.022507 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 68.92708444595337, + "completions/min_length": 28.375, + "epoch": 2.732688011913626, + "grad_norm": 0.007006007264362935, + "kl": 0.12786865234375, + "learning_rate": 8.354396560897417e-07, + "loss": 0.00012788604362867773, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1375, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 71.10416793823242, + "completions/min_length": 32.375, + "epoch": 2.734673616281956, + "grad_norm": 1.4262557443690458, + "kl": 0.13214111328125, + "learning_rate": 8.352056367288343e-07, + "loss": 0.006337085738778114, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1376, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.625, + "completions/mean_length": 69.39583492279053, + "completions/min_length": 31.875, + "epoch": 2.7366592206502856, + "grad_norm": 0.8361035810946986, + "kl": 0.19073486328125, + "learning_rate": 8.349714839154034e-07, + "loss": 0.004298907704651356, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1377, + "train_speed(iter/s)": 0.022511 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 73.91666889190674, + "completions/min_length": 24.5, + "epoch": 2.738644825018615, + "grad_norm": 0.005415316153608817, + "kl": 0.142578125, + "learning_rate": 8.347371977426698e-07, + "loss": 0.00014272108091972768, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1378, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 65.11458683013916, + "completions/min_length": 27.875, + "epoch": 2.7406304293869446, + "grad_norm": 0.006202513753180842, + "kl": 0.1195068359375, + "learning_rate": 8.34502778303908e-07, + "loss": 0.00011957129754591733, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1379, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 80.42708587646484, + "completions/min_length": 31.875, + "epoch": 2.742616033755274, + "grad_norm": 0.0053600651643142695, + "kl": 0.136474609375, + "learning_rate": 8.342682256924452e-07, + "loss": 0.00013654532085638493, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1380, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 74.59375286102295, + "completions/min_length": 29.625, + "epoch": 2.744601638123604, + "grad_norm": 0.0054725932049495525, + "kl": 0.12518310546875, + "learning_rate": 8.340335400016622e-07, + "loss": 0.00012522964971140027, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1381, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 67.85416746139526, + "completions/min_length": 27.5, + "epoch": 2.7465872424919335, + "grad_norm": 1.3390968288556966, + "kl": 0.12908935546875, + "learning_rate": 8.337987213249919e-07, + "loss": -0.003195242490619421, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1382, + "train_speed(iter/s)": 0.022508 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 62.8229193687439, + "completions/min_length": 23.0, + "epoch": 2.748572846860263, + "grad_norm": 0.02902153182439986, + "kl": 0.1741943359375, + "learning_rate": 8.33563769755921e-07, + "loss": 0.0001743856118991971, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1383, + "train_speed(iter/s)": 0.022509 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 73.41666984558105, + "completions/min_length": 27.875, + "epoch": 2.7505584512285925, + "grad_norm": 0.00896930133056037, + "kl": 0.13958740234375, + "learning_rate": 8.333286853879886e-07, + "loss": 0.0001395608705934137, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1384, + "train_speed(iter/s)": 0.02251 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 67.54166841506958, + "completions/min_length": 25.875, + "epoch": 2.752544055596922, + "grad_norm": 0.007423741642248186, + "kl": 0.11932373046875, + "learning_rate": 8.330934683147868e-07, + "loss": 0.00011926879960810766, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1385, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 68.35416889190674, + "completions/min_length": 26.5, + "epoch": 2.754529659965252, + "grad_norm": 1.5686116768359737, + "kl": 0.115478515625, + "learning_rate": 8.328581186299603e-07, + "loss": -0.0058824447914958, + "memory(GiB)": 94.21, + "reward": 1.7812500149011612, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.7812500074505806, + "rewards/CineAccuracyORM/std": 0.2805779278278351, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1386, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.125, + "completions/mean_length": 63.989585399627686, + "completions/min_length": 25.75, + "epoch": 2.7565152643335815, + "grad_norm": 0.9989886668778429, + "kl": 0.11767578125, + "learning_rate": 8.326226364272076e-07, + "loss": -0.016778334975242615, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1387, + "train_speed(iter/s)": 0.022513 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.25, + "completions/mean_length": 67.802086353302, + "completions/min_length": 28.375, + "epoch": 2.758500868701911, + "grad_norm": 1.0287269146830489, + "kl": 0.11572265625, + "learning_rate": 8.323870218002782e-07, + "loss": 0.0013600036036223173, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166669771075, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1388, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 73.14583444595337, + "completions/min_length": 26.875, + "epoch": 2.760486473070241, + "grad_norm": 0.006112785978903015, + "kl": 0.12384033203125, + "learning_rate": 8.32151274842976e-07, + "loss": 0.00012375880032777786, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1389, + "train_speed(iter/s)": 0.022512 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 73.51041889190674, + "completions/min_length": 28.75, + "epoch": 2.7624720774385705, + "grad_norm": 1.15875894898111, + "kl": 0.14385986328125, + "learning_rate": 8.319153956491567e-07, + "loss": -0.012648469768464565, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.8541666679084301, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1390, + "train_speed(iter/s)": 0.022514 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 74.98958587646484, + "completions/min_length": 34.0, + "epoch": 2.7644576818069, + "grad_norm": 0.689123238107339, + "kl": 0.116455078125, + "learning_rate": 8.31679384312729e-07, + "loss": -0.011478595435619354, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1391, + "train_speed(iter/s)": 0.022515 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 69.37500286102295, + "completions/min_length": 27.75, + "epoch": 2.7664432861752295, + "grad_norm": 0.8361020109355061, + "kl": 0.12188720703125, + "learning_rate": 8.314432409276537e-07, + "loss": 0.012938725762069225, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1392, + "train_speed(iter/s)": 0.022515 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 71.97916984558105, + "completions/min_length": 24.0, + "epoch": 2.768428890543559, + "grad_norm": 0.006949124885682619, + "kl": 0.13018798828125, + "learning_rate": 8.312069655879447e-07, + "loss": 0.00013012596173211932, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1393, + "train_speed(iter/s)": 0.022516 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.625, + "completions/mean_length": 79.42708539962769, + "completions/min_length": 33.75, + "epoch": 2.770414494911889, + "grad_norm": 0.889382183754225, + "kl": 0.155517578125, + "learning_rate": 8.309705583876682e-07, + "loss": -0.011101406067609787, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1394, + "train_speed(iter/s)": 0.022517 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 70.35416841506958, + "completions/min_length": 31.125, + "epoch": 2.7724000992802185, + "grad_norm": 0.006303712871865013, + "kl": 0.15771484375, + "learning_rate": 8.307340194209434e-07, + "loss": 0.00015773381164763123, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1395, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 77.8541693687439, + "completions/min_length": 26.5, + "epoch": 2.774385703648548, + "grad_norm": 0.007990167338355657, + "kl": 0.12969970703125, + "learning_rate": 8.304973487819408e-07, + "loss": 0.00012961360334884375, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1396, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 67.48958539962769, + "completions/min_length": 28.875, + "epoch": 2.7763713080168775, + "grad_norm": 0.008385126680202701, + "kl": 0.1513671875, + "learning_rate": 8.302605465648846e-07, + "loss": 0.00015145835641305894, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1397, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 73.32291889190674, + "completions/min_length": 29.0, + "epoch": 2.778356912385207, + "grad_norm": 0.005962248547681376, + "kl": 0.15325927734375, + "learning_rate": 8.300236128640506e-07, + "loss": 0.00015337191871367395, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1398, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 76.54166793823242, + "completions/min_length": 32.375, + "epoch": 2.780342516753537, + "grad_norm": 0.0073952386973508534, + "kl": 0.13641357421875, + "learning_rate": 8.297865477737671e-07, + "loss": 0.0001365236093988642, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1399, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 71.52083539962769, + "completions/min_length": 34.25, + "epoch": 2.7823281211218664, + "grad_norm": 0.008862816460142224, + "kl": 0.15911865234375, + "learning_rate": 8.295493513884147e-07, + "loss": 0.00015923750470392406, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1400, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 76.9791693687439, + "completions/min_length": 25.75, + "epoch": 2.784313725490196, + "grad_norm": 0.008993882924231113, + "kl": 0.12677001953125, + "learning_rate": 8.293120238024267e-07, + "loss": 0.00012676059850491583, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1401, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 72.8854193687439, + "completions/min_length": 23.125, + "epoch": 2.786299329858526, + "grad_norm": 0.9148067337260869, + "kl": 0.1453857421875, + "learning_rate": 8.290745651102881e-07, + "loss": -0.005603249184787273, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1402, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 73.33333492279053, + "completions/min_length": 27.125, + "epoch": 2.7882849342268554, + "grad_norm": 0.008487777371436116, + "kl": 0.13037109375, + "learning_rate": 8.288369754065362e-07, + "loss": 0.00013030279660597444, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1403, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.0, + "completions/mean_length": 68.69791841506958, + "completions/min_length": 25.5, + "epoch": 2.790270538595185, + "grad_norm": 0.008718197550629272, + "kl": 0.12799072265625, + "learning_rate": 8.285992547857606e-07, + "loss": 0.00012790513574145734, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1404, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.875, + "completions/mean_length": 63.645835399627686, + "completions/min_length": 30.25, + "epoch": 2.7922561429635144, + "grad_norm": 0.00936139026550106, + "kl": 0.14501953125, + "learning_rate": 8.28361403342603e-07, + "loss": 0.0001449327974114567, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1405, + "train_speed(iter/s)": 0.022519 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.75, + "completions/mean_length": 67.81250190734863, + "completions/min_length": 25.375, + "epoch": 2.794241747331844, + "grad_norm": 1.2365704567101985, + "kl": 0.131591796875, + "learning_rate": 8.281234211717571e-07, + "loss": -0.019905827939510345, + "memory(GiB)": 94.21, + "reward": 1.9166666865348816, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1406, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 78.71875238418579, + "completions/min_length": 32.625, + "epoch": 2.796227351700174, + "grad_norm": 0.5859171366524915, + "kl": 0.142822265625, + "learning_rate": 8.278853083679686e-07, + "loss": -0.011180834844708443, + "memory(GiB)": 94.21, + "reward": 1.6145833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6145833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1407, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 69.17708539962769, + "completions/min_length": 22.875, + "epoch": 2.7982129560685034, + "grad_norm": 1.3298291526335313, + "kl": 0.45050048828125, + "learning_rate": 8.276470650260354e-07, + "loss": 0.0004501368384808302, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1408, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 75.79166793823242, + "completions/min_length": 33.625, + "epoch": 2.800198560436833, + "grad_norm": 0.007243277595671766, + "kl": 0.1629638671875, + "learning_rate": 8.274086912408072e-07, + "loss": 0.00016277949907816947, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1409, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.25, + "completions/mean_length": 76.77083539962769, + "completions/min_length": 31.125, + "epoch": 2.8021841648051624, + "grad_norm": 0.8237896117751228, + "kl": 0.1285400390625, + "learning_rate": 8.271701871071856e-07, + "loss": -0.003687500488013029, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1410, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 73.00000238418579, + "completions/min_length": 31.375, + "epoch": 2.804169769173492, + "grad_norm": 0.007847470511905078, + "kl": 0.14788818359375, + "learning_rate": 8.269315527201246e-07, + "loss": 0.00014789986016694456, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1411, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 71.11458539962769, + "completions/min_length": 29.75, + "epoch": 2.806155373541822, + "grad_norm": 0.9627034644907219, + "kl": 0.16937255859375, + "learning_rate": 8.266927881746292e-07, + "loss": -0.009243253618478775, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1412, + "train_speed(iter/s)": 0.022518 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.875, + "completions/mean_length": 70.36458492279053, + "completions/min_length": 28.375, + "epoch": 2.8081409779101514, + "grad_norm": 0.008300632538199536, + "kl": 0.12542724609375, + "learning_rate": 8.26453893565757e-07, + "loss": 0.00012547594087664038, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1413, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.375, + "completions/mean_length": 74.37500190734863, + "completions/min_length": 29.75, + "epoch": 2.810126582278481, + "grad_norm": 0.007618235772152284, + "kl": 0.11700439453125, + "learning_rate": 8.262148689886168e-07, + "loss": 0.00011700506729539484, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1414, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.625, + "completions/mean_length": 67.53125190734863, + "completions/min_length": 28.0, + "epoch": 2.812112186646811, + "grad_norm": 0.007688761501630678, + "kl": 0.11102294921875, + "learning_rate": 8.259757145383695e-07, + "loss": 0.00011099201947217807, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1415, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 69.8854193687439, + "completions/min_length": 32.125, + "epoch": 2.8140977910151403, + "grad_norm": 1.343903973579174, + "kl": 0.14813232421875, + "learning_rate": 8.257364303102274e-07, + "loss": 0.005972947925329208, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1416, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 74.29166889190674, + "completions/min_length": 30.875, + "epoch": 2.81608339538347, + "grad_norm": 0.029783994439841928, + "kl": 0.15594482421875, + "learning_rate": 8.254970163994548e-07, + "loss": 0.00015598084428347647, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1417, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.5, + "completions/mean_length": 78.52083587646484, + "completions/min_length": 24.875, + "epoch": 2.8180689997517994, + "grad_norm": 0.7988913801820672, + "kl": 0.13018798828125, + "learning_rate": 8.252574729013677e-07, + "loss": -0.003261163830757141, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1418, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 75.44791889190674, + "completions/min_length": 29.375, + "epoch": 2.820054604120129, + "grad_norm": 0.8552109222444627, + "kl": 0.12353515625, + "learning_rate": 8.250177999113333e-07, + "loss": -0.0049566589295864105, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1419, + "train_speed(iter/s)": 0.02252 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 73.62500190734863, + "completions/min_length": 31.375, + "epoch": 2.822040208488459, + "grad_norm": 0.9785746544204191, + "kl": 0.13995361328125, + "learning_rate": 8.247779975247704e-07, + "loss": 0.015292221680283546, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1420, + "train_speed(iter/s)": 0.022521 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 69.30208539962769, + "completions/min_length": 29.125, + "epoch": 2.8240258128567883, + "grad_norm": 0.011251864331453034, + "kl": 0.134521484375, + "learning_rate": 8.245380658371497e-07, + "loss": 0.00013448242680169642, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1421, + "train_speed(iter/s)": 0.022522 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 75.32292032241821, + "completions/min_length": 29.125, + "epoch": 2.826011417225118, + "grad_norm": 0.007631644570041815, + "kl": 0.130615234375, + "learning_rate": 8.24298004943993e-07, + "loss": 0.00013047009997535497, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1422, + "train_speed(iter/s)": 0.022525 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 74.35416793823242, + "completions/min_length": 34.625, + "epoch": 2.8279970215934473, + "grad_norm": 0.8628131323138515, + "kl": 0.13714599609375, + "learning_rate": 8.240578149408736e-07, + "loss": 0.013426492922008038, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1423, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 69.36458492279053, + "completions/min_length": 31.375, + "epoch": 2.829982625961777, + "grad_norm": 0.9888953028542412, + "kl": 0.10894775390625, + "learning_rate": 8.238174959234164e-07, + "loss": 0.005210637580603361, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1424, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.125, + "completions/mean_length": 68.92708444595337, + "completions/min_length": 26.125, + "epoch": 2.831968230330107, + "grad_norm": 0.0048824230579252725, + "kl": 0.1336669921875, + "learning_rate": 8.235770479872975e-07, + "loss": 0.00013350117660593241, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1425, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 77.50000190734863, + "completions/min_length": 28.0, + "epoch": 2.8339538346984363, + "grad_norm": 0.006484761067630234, + "kl": 0.13372802734375, + "learning_rate": 8.233364712282444e-07, + "loss": 0.0001336161803919822, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1426, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 73.35416889190674, + "completions/min_length": 35.25, + "epoch": 2.835939439066766, + "grad_norm": 0.004804114223585765, + "kl": 0.103271484375, + "learning_rate": 8.230957657420357e-07, + "loss": 0.00010324568575015292, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1427, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 78.89583587646484, + "completions/min_length": 33.0, + "epoch": 2.8379250434350958, + "grad_norm": 0.006104541315067581, + "kl": 0.1317138671875, + "learning_rate": 8.228549316245015e-07, + "loss": 0.0001317253481829539, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1428, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 69.802086353302, + "completions/min_length": 28.875, + "epoch": 2.8399106478034253, + "grad_norm": 0.005271093076398512, + "kl": 0.10577392578125, + "learning_rate": 8.226139689715231e-07, + "loss": 0.00010573517647571862, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1429, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 67.51041793823242, + "completions/min_length": 23.625, + "epoch": 2.841896252171755, + "grad_norm": 0.004990119311447365, + "kl": 0.11224365234375, + "learning_rate": 8.223728778790327e-07, + "loss": 0.00011221389286220074, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1430, + "train_speed(iter/s)": 0.022526 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 72.083336353302, + "completions/min_length": 29.625, + "epoch": 2.8438818565400843, + "grad_norm": 0.005855503043211252, + "kl": 0.1468505859375, + "learning_rate": 8.221316584430139e-07, + "loss": 0.0001468592236051336, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1431, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 76.84375286102295, + "completions/min_length": 30.75, + "epoch": 2.845867460908414, + "grad_norm": 0.005628915528885774, + "kl": 0.1168212890625, + "learning_rate": 8.218903107595013e-07, + "loss": 0.00011697282752720639, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1432, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 75.43750047683716, + "completions/min_length": 28.375, + "epoch": 2.8478530652767438, + "grad_norm": 0.731331720790925, + "kl": 0.113037109375, + "learning_rate": 8.216488349245807e-07, + "loss": 0.0035803269129246473, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1433, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 79.52083683013916, + "completions/min_length": 35.375, + "epoch": 2.8498386696450733, + "grad_norm": 1.3085785172556932, + "kl": 0.144775390625, + "learning_rate": 8.214072310343884e-07, + "loss": -0.010267219506204128, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1434, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.25, + "completions/mean_length": 66.71875190734863, + "completions/min_length": 22.0, + "epoch": 2.8518242740134028, + "grad_norm": 0.0076299383308353645, + "kl": 0.11676025390625, + "learning_rate": 8.211654991851126e-07, + "loss": 0.00011669890955090523, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1435, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 78.30208587646484, + "completions/min_length": 31.375, + "epoch": 2.8538098783817323, + "grad_norm": 0.005879776018045658, + "kl": 0.1221923828125, + "learning_rate": 8.209236394729915e-07, + "loss": 0.00012216811592224985, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1436, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 74.09375286102295, + "completions/min_length": 32.25, + "epoch": 2.855795482750062, + "grad_norm": 0.9273476449528576, + "kl": 0.10113525390625, + "learning_rate": 8.206816519943147e-07, + "loss": 0.01098918728530407, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1437, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 70.26042079925537, + "completions/min_length": 25.25, + "epoch": 2.8577810871183917, + "grad_norm": 0.004866233949659845, + "kl": 0.12310791015625, + "learning_rate": 8.204395368454227e-07, + "loss": 0.00012310323654673994, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1438, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.625, + "completions/mean_length": 63.562501430511475, + "completions/min_length": 24.0, + "epoch": 2.8597666914867212, + "grad_norm": 0.7256411017452613, + "kl": 0.11138916015625, + "learning_rate": 8.201972941227066e-07, + "loss": 0.004826472606509924, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1439, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 65.05208587646484, + "completions/min_length": 24.5, + "epoch": 2.8617522958550508, + "grad_norm": 1.0358364028269755, + "kl": 0.1162109375, + "learning_rate": 8.199549239226087e-07, + "loss": -0.0063631231896579266, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1440, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 73.89583587646484, + "completions/min_length": 35.75, + "epoch": 2.8637379002233807, + "grad_norm": 0.20889823968628643, + "kl": 0.19183349609375, + "learning_rate": 8.197124263416212e-07, + "loss": 0.0001918570778798312, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1441, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 77.42708492279053, + "completions/min_length": 32.375, + "epoch": 2.86572350459171, + "grad_norm": 0.005072774086277166, + "kl": 0.1138916015625, + "learning_rate": 8.19469801476288e-07, + "loss": 0.0001139915402745828, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1442, + "train_speed(iter/s)": 0.022527 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 68.31250190734863, + "completions/min_length": 26.5, + "epoch": 2.8677091089600397, + "grad_norm": 1.1566943000938172, + "kl": 0.1531982421875, + "learning_rate": 8.192270494232031e-07, + "loss": 0.00015316407370846719, + "memory(GiB)": 94.21, + "reward": 1.5833333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.5833333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1443, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 66.42708539962769, + "completions/min_length": 24.375, + "epoch": 2.8696947133283692, + "grad_norm": 1.1246203247076416, + "kl": 0.1102294921875, + "learning_rate": 8.189841702790113e-07, + "loss": -0.0027241259813308716, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1444, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.25, + "completions/mean_length": 62.708335399627686, + "completions/min_length": 28.25, + "epoch": 2.8716803176966987, + "grad_norm": 0.004868078520723859, + "kl": 0.1181640625, + "learning_rate": 8.187411641404079e-07, + "loss": 0.00011820593499578536, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1445, + "train_speed(iter/s)": 0.02253 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 75.6666693687439, + "completions/min_length": 35.875, + "epoch": 2.8736659220650287, + "grad_norm": 0.00675082123519357, + "kl": 0.12176513671875, + "learning_rate": 8.184980311041389e-07, + "loss": 0.00012161044287495315, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1446, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.625, + "completions/mean_length": 65.70833492279053, + "completions/min_length": 31.25, + "epoch": 2.875651526433358, + "grad_norm": 0.0074126792257329065, + "kl": 0.10748291015625, + "learning_rate": 8.182547712670009e-07, + "loss": 0.00010747680062195286, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1447, + "train_speed(iter/s)": 0.022528 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 72.66666793823242, + "completions/min_length": 29.75, + "epoch": 2.8776371308016877, + "grad_norm": 1.0132898334738905, + "kl": 0.1456298828125, + "learning_rate": 8.180113847258407e-07, + "loss": 0.002355144824832678, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1448, + "train_speed(iter/s)": 0.022529 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.0, + "completions/mean_length": 56.13541841506958, + "completions/min_length": 28.875, + "epoch": 2.879622735170017, + "grad_norm": 0.005886849891316731, + "kl": 0.11798095703125, + "learning_rate": 8.177678715775555e-07, + "loss": 0.00011804667883552611, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1449, + "train_speed(iter/s)": 0.022531 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 66.37500286102295, + "completions/min_length": 27.5, + "epoch": 2.8816083395383467, + "grad_norm": 0.8040701841214775, + "kl": 0.13275146484375, + "learning_rate": 8.175242319190933e-07, + "loss": -0.002023050095885992, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.39076167345046997, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1450, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 69.72916889190674, + "completions/min_length": 30.375, + "epoch": 2.8835939439066767, + "grad_norm": 0.6531414831931729, + "kl": 0.13861083984375, + "learning_rate": 8.172804658474524e-07, + "loss": -0.017453964799642563, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1451, + "train_speed(iter/s)": 0.022533 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.375, + "completions/mean_length": 66.33333587646484, + "completions/min_length": 28.875, + "epoch": 2.885579548275006, + "grad_norm": 0.006807021043302423, + "kl": 0.11956787109375, + "learning_rate": 8.170365734596809e-07, + "loss": 0.00011951306805713102, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1452, + "train_speed(iter/s)": 0.022532 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 102.75, + "completions/mean_length": 61.88541793823242, + "completions/min_length": 30.125, + "epoch": 2.8875651526433357, + "grad_norm": 1.9871537706263327, + "kl": 0.15252685546875, + "learning_rate": 8.167925548528778e-07, + "loss": 0.017631176859140396, + "memory(GiB)": 94.21, + "reward": 1.6875000149011612, + "reward_std": 0.05103103630244732, + "rewards/CineAccuracyORM/mean": 0.6875000074505806, + "rewards/CineAccuracyORM/std": 0.23100870847702026, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1453, + "train_speed(iter/s)": 0.022534 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 64.91666889190674, + "completions/min_length": 29.75, + "epoch": 2.8895507570116656, + "grad_norm": 0.0061998901518053795, + "kl": 0.17431640625, + "learning_rate": 8.165484101241922e-07, + "loss": 0.00017421328811906278, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1454, + "train_speed(iter/s)": 0.022535 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.75, + "completions/mean_length": 61.98958444595337, + "completions/min_length": 29.875, + "epoch": 2.891536361379995, + "grad_norm": 0.0088234855479808, + "kl": 0.1619873046875, + "learning_rate": 8.16304139370823e-07, + "loss": 0.000161778720212169, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1455, + "train_speed(iter/s)": 0.022536 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 66.97916841506958, + "completions/min_length": 36.0, + "epoch": 2.8935219657483247, + "grad_norm": 0.007207244463214481, + "kl": 0.12506103515625, + "learning_rate": 8.1605974269002e-07, + "loss": 0.00012480223085731268, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1456, + "train_speed(iter/s)": 0.022537 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 65.43750143051147, + "completions/min_length": 27.875, + "epoch": 2.895507570116654, + "grad_norm": 0.009396924191404418, + "kl": 0.18670654296875, + "learning_rate": 8.158152201790825e-07, + "loss": 0.000186647564987652, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1457, + "train_speed(iter/s)": 0.02254 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 64.0416693687439, + "completions/min_length": 27.875, + "epoch": 2.8974931744849837, + "grad_norm": 0.00837550795957956, + "kl": 0.155517578125, + "learning_rate": 8.155705719353603e-07, + "loss": 0.00015590095426887274, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1458, + "train_speed(iter/s)": 0.022541 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.625, + "completions/mean_length": 60.406250953674316, + "completions/min_length": 21.125, + "epoch": 2.8994787788533136, + "grad_norm": 0.009514208531011406, + "kl": 0.1513671875, + "learning_rate": 8.153257980562527e-07, + "loss": 0.0001515389740234241, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1459, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 70.6041693687439, + "completions/min_length": 29.375, + "epoch": 2.901464383221643, + "grad_norm": 1.5024427882299138, + "kl": 0.15057373046875, + "learning_rate": 8.150808986392099e-07, + "loss": -0.011073566973209381, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1460, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 66.53125238418579, + "completions/min_length": 29.0, + "epoch": 2.9034499875899726, + "grad_norm": 0.008846818484931816, + "kl": 0.1688232421875, + "learning_rate": 8.148358737817314e-07, + "loss": 0.000168795813806355, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1461, + "train_speed(iter/s)": 0.022542 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 63.739585876464844, + "completions/min_length": 23.25, + "epoch": 2.905435591958302, + "grad_norm": 0.008808979179739496, + "kl": 0.157470703125, + "learning_rate": 8.145907235813666e-07, + "loss": 0.0001573076588101685, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1462, + "train_speed(iter/s)": 0.022543 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 61.864585399627686, + "completions/min_length": 30.0, + "epoch": 2.9074211963266317, + "grad_norm": 0.008767488252256643, + "kl": 0.175048828125, + "learning_rate": 8.143454481357154e-07, + "loss": 0.00017518477397970855, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1463, + "train_speed(iter/s)": 0.022544 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.375, + "completions/mean_length": 64.31250238418579, + "completions/min_length": 27.375, + "epoch": 2.9094068006949616, + "grad_norm": 0.008334862853160221, + "kl": 0.1494140625, + "learning_rate": 8.14100047542427e-07, + "loss": 0.00014919511158950627, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1464, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 72.06250238418579, + "completions/min_length": 33.875, + "epoch": 2.911392405063291, + "grad_norm": 0.008241886230823801, + "kl": 0.13592529296875, + "learning_rate": 8.138545218992007e-07, + "loss": 0.00013571848103310913, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1465, + "train_speed(iter/s)": 0.022545 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 105.5, + "completions/mean_length": 59.84375238418579, + "completions/min_length": 27.125, + "epoch": 2.9133780094316206, + "grad_norm": 1.272127426155022, + "kl": 0.1795654296875, + "learning_rate": 8.136088713037854e-07, + "loss": 0.0034899346064776182, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1466, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 68.27083539962769, + "completions/min_length": 24.75, + "epoch": 2.9153636137999506, + "grad_norm": 0.8505587241612501, + "kl": 0.126220703125, + "learning_rate": 8.133630958539799e-07, + "loss": -0.0005638438160531223, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1467, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 61.250001430511475, + "completions/min_length": 27.75, + "epoch": 2.91734921816828, + "grad_norm": 0.026378017488213848, + "kl": 0.1458740234375, + "learning_rate": 8.131171956476327e-07, + "loss": 0.00014579706476069987, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1468, + "train_speed(iter/s)": 0.022546 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.375, + "completions/mean_length": 60.385417461395264, + "completions/min_length": 24.625, + "epoch": 2.9193348225366096, + "grad_norm": 0.008137518476156475, + "kl": 0.11883544921875, + "learning_rate": 8.128711707826419e-07, + "loss": 0.00011880746751558036, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1469, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 66.30208492279053, + "completions/min_length": 32.625, + "epoch": 2.921320426904939, + "grad_norm": 0.008175863267556689, + "kl": 0.14495849609375, + "learning_rate": 8.126250213569552e-07, + "loss": 0.00014478585217148066, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1470, + "train_speed(iter/s)": 0.022548 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 75.1354193687439, + "completions/min_length": 32.625, + "epoch": 2.9233060312732686, + "grad_norm": 1.5416747528024735, + "kl": 0.24981689453125, + "learning_rate": 8.1237874746857e-07, + "loss": -0.00014942388224881142, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.06846532225608826, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.30890411138534546, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1471, + "train_speed(iter/s)": 0.022549 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 70.79166889190674, + "completions/min_length": 32.375, + "epoch": 2.9252916356415986, + "grad_norm": 0.008609547587590537, + "kl": 0.1717529296875, + "learning_rate": 8.121323492155331e-07, + "loss": 0.0001717947016004473, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1472, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.875, + "completions/mean_length": 60.302085399627686, + "completions/min_length": 24.0, + "epoch": 2.927277240009928, + "grad_norm": 1.0367589834391397, + "kl": 0.52276611328125, + "learning_rate": 8.118858266959411e-07, + "loss": 0.0005225278437137604, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1473, + "train_speed(iter/s)": 0.022552 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.25, + "completions/mean_length": 60.47916841506958, + "completions/min_length": 26.25, + "epoch": 2.9292628443782576, + "grad_norm": 1.0673394001006085, + "kl": 0.156494140625, + "learning_rate": 8.116391800079396e-07, + "loss": -0.003934068139642477, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1474, + "train_speed(iter/s)": 0.022553 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 65.11458539962769, + "completions/min_length": 28.75, + "epoch": 2.931248448746587, + "grad_norm": 0.006109337117363829, + "kl": 0.1097412109375, + "learning_rate": 8.113924092497243e-07, + "loss": 0.00010968661808874458, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1475, + "train_speed(iter/s)": 0.022556 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 63.520835399627686, + "completions/min_length": 28.0, + "epoch": 2.9332340531149166, + "grad_norm": 0.005539485434986862, + "kl": 0.1204833984375, + "learning_rate": 8.111455145195395e-07, + "loss": 0.00012051354860886931, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1476, + "train_speed(iter/s)": 0.022558 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.25, + "completions/mean_length": 59.968751430511475, + "completions/min_length": 25.0, + "epoch": 2.9352196574832465, + "grad_norm": 0.005401624956637818, + "kl": 0.1068115234375, + "learning_rate": 8.108984959156794e-07, + "loss": 0.0001067964913090691, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1477, + "train_speed(iter/s)": 0.022558 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 65.08333492279053, + "completions/min_length": 30.25, + "epoch": 2.937205261851576, + "grad_norm": 1.397537299436228, + "kl": 0.1685791015625, + "learning_rate": 8.106513535364879e-07, + "loss": 0.00730905681848526, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1478, + "train_speed(iter/s)": 0.02256 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.5, + "completions/mean_length": 65.18750190734863, + "completions/min_length": 31.125, + "epoch": 2.9391908662199056, + "grad_norm": 1.5686493034540239, + "kl": 0.14581298828125, + "learning_rate": 8.104040874803567e-07, + "loss": -0.0014158705016598105, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1479, + "train_speed(iter/s)": 0.022563 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.375, + "completions/mean_length": 63.22916841506958, + "completions/min_length": 23.0, + "epoch": 2.9411764705882355, + "grad_norm": 0.011239554803719819, + "kl": 0.14276123046875, + "learning_rate": 8.101566978457283e-07, + "loss": 0.00014275385183282197, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1480, + "train_speed(iter/s)": 0.022566 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 61.864585399627686, + "completions/min_length": 26.125, + "epoch": 2.943162074956565, + "grad_norm": 0.012829281598694838, + "kl": 0.132080078125, + "learning_rate": 8.09909184731094e-07, + "loss": 0.0001320371957262978, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1481, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 62.08333396911621, + "completions/min_length": 23.625, + "epoch": 2.9451476793248945, + "grad_norm": 0.01109441611794084, + "kl": 0.15496826171875, + "learning_rate": 8.096615482349934e-07, + "loss": 0.00015477568376809359, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1482, + "train_speed(iter/s)": 0.022567 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.625, + "completions/mean_length": 60.62500190734863, + "completions/min_length": 31.75, + "epoch": 2.947133283693224, + "grad_norm": 1.6843770387249337, + "kl": 0.15081787109375, + "learning_rate": 8.094137884560164e-07, + "loss": -0.0042047323659062386, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1483, + "train_speed(iter/s)": 0.02257 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 67.65625238418579, + "completions/min_length": 22.25, + "epoch": 2.9491188880615535, + "grad_norm": 0.00495848903304674, + "kl": 0.09881591796875, + "learning_rate": 8.091659054928011e-07, + "loss": 9.89195832516998e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1484, + "train_speed(iter/s)": 0.02257 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 65.15625190734863, + "completions/min_length": 29.25, + "epoch": 2.9511044924298835, + "grad_norm": 0.017875211617790078, + "kl": 0.173095703125, + "learning_rate": 8.089178994440354e-07, + "loss": 0.00017328646208625287, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1485, + "train_speed(iter/s)": 0.022569 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 224.25, + "completions/mean_length": 67.78125190734863, + "completions/min_length": 20.625, + "epoch": 2.953090096798213, + "grad_norm": 0.3371229798900412, + "kl": 0.22271728515625, + "learning_rate": 8.086697704084555e-07, + "loss": 0.022648287937045097, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.05103103816509247, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1486, + "train_speed(iter/s)": 0.022568 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 69.05208539962769, + "completions/min_length": 35.875, + "epoch": 2.9550757011665425, + "grad_norm": 0.005369353377220404, + "kl": 0.1336669921875, + "learning_rate": 8.08421518484847e-07, + "loss": 0.00013374185073189437, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1487, + "train_speed(iter/s)": 0.02257 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 65.42708539962769, + "completions/min_length": 22.75, + "epoch": 2.9570613055348725, + "grad_norm": 0.007293737262870558, + "kl": 0.13165283203125, + "learning_rate": 8.081731437720443e-07, + "loss": 0.00013149608275853097, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1488, + "train_speed(iter/s)": 0.02257 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 69.37500238418579, + "completions/min_length": 29.875, + "epoch": 2.9590469099032015, + "grad_norm": 1.7411004373327508, + "kl": 0.144287109375, + "learning_rate": 8.079246463689307e-07, + "loss": -0.006723719649016857, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1489, + "train_speed(iter/s)": 0.022569 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 67.53125190734863, + "completions/min_length": 20.625, + "epoch": 2.9610325142715315, + "grad_norm": 0.005379372389635459, + "kl": 0.11724853515625, + "learning_rate": 8.07676026374438e-07, + "loss": 0.00011731521226465702, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1490, + "train_speed(iter/s)": 0.022569 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 66.31250095367432, + "completions/min_length": 30.625, + "epoch": 2.963018118639861, + "grad_norm": 0.00802514287380977, + "kl": 0.16046142578125, + "learning_rate": 8.074272838875476e-07, + "loss": 0.00016023658099584281, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1491, + "train_speed(iter/s)": 0.022571 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.875, + "completions/mean_length": 69.52083539962769, + "completions/min_length": 30.0, + "epoch": 2.9650037230081905, + "grad_norm": 0.0064130846191252176, + "kl": 0.15924072265625, + "learning_rate": 8.07178419007289e-07, + "loss": 0.00015932274982333183, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1492, + "train_speed(iter/s)": 0.022573 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 67.01041841506958, + "completions/min_length": 24.25, + "epoch": 2.9669893273765204, + "grad_norm": 0.007921036904481689, + "kl": 0.14178466796875, + "learning_rate": 8.069294318327404e-07, + "loss": 0.00014174518582876772, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1493, + "train_speed(iter/s)": 0.022574 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.0, + "completions/mean_length": 60.87500190734863, + "completions/min_length": 26.125, + "epoch": 2.96897493174485, + "grad_norm": 0.007579677061138093, + "kl": 0.12335205078125, + "learning_rate": 8.066803224630294e-07, + "loss": 0.0001233743387274444, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1494, + "train_speed(iter/s)": 0.022575 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 66.40625286102295, + "completions/min_length": 29.75, + "epoch": 2.9709605361131795, + "grad_norm": 0.006112900260031259, + "kl": 0.131591796875, + "learning_rate": 8.064310909973314e-07, + "loss": 0.0001316461421083659, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1495, + "train_speed(iter/s)": 0.022574 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 69.81250286102295, + "completions/min_length": 23.0, + "epoch": 2.972946140481509, + "grad_norm": 1.0294497379184306, + "kl": 0.155029296875, + "learning_rate": 8.061817375348707e-07, + "loss": 0.00525694340467453, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1496, + "train_speed(iter/s)": 0.022572 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.875, + "completions/mean_length": 58.864585399627686, + "completions/min_length": 21.625, + "epoch": 2.9749317448498385, + "grad_norm": 0.008121450909917942, + "kl": 0.1361083984375, + "learning_rate": 8.059322621749205e-07, + "loss": 0.00013605313142761588, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1497, + "train_speed(iter/s)": 0.022575 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.125, + "completions/mean_length": 64.72916889190674, + "completions/min_length": 26.75, + "epoch": 2.9769173492181684, + "grad_norm": 0.9191924081952824, + "kl": 0.14227294921875, + "learning_rate": 8.056826650168023e-07, + "loss": -0.010151153430342674, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1498, + "train_speed(iter/s)": 0.022577 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 72.08333587646484, + "completions/min_length": 32.75, + "epoch": 2.978902953586498, + "grad_norm": 0.007612136316154548, + "kl": 0.193359375, + "learning_rate": 8.054329461598858e-07, + "loss": 0.0001932987943291664, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1499, + "train_speed(iter/s)": 0.022579 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 73.35417079925537, + "completions/min_length": 37.875, + "epoch": 2.9808885579548274, + "grad_norm": 0.00918510865586141, + "kl": 0.1856689453125, + "learning_rate": 8.051831057035895e-07, + "loss": 0.00018577344599179924, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1500, + "train_speed(iter/s)": 0.022579 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.125, + "completions/mean_length": 64.47916889190674, + "completions/min_length": 28.5, + "epoch": 2.9828741623231574, + "grad_norm": 0.009015554771147506, + "kl": 0.17510986328125, + "learning_rate": 8.049331437473803e-07, + "loss": 0.0001751594099914655, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1501, + "train_speed(iter/s)": 0.02257 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 72.90625143051147, + "completions/min_length": 32.0, + "epoch": 2.9848597666914864, + "grad_norm": 1.155739748358851, + "kl": 0.1541748046875, + "learning_rate": 8.046830603907735e-07, + "loss": -0.007544038351625204, + "memory(GiB)": 94.21, + "reward": 1.5729166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.5729166669771075, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1502, + "train_speed(iter/s)": 0.02257 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.0, + "completions/mean_length": 67.958336353302, + "completions/min_length": 22.875, + "epoch": 2.9868453710598164, + "grad_norm": 0.006895625891506662, + "kl": 0.1402587890625, + "learning_rate": 8.044328557333322e-07, + "loss": 0.00014047148579265922, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1503, + "train_speed(iter/s)": 0.022571 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 59.854167461395264, + "completions/min_length": 22.125, + "epoch": 2.988830975428146, + "grad_norm": 0.009594255590401089, + "kl": 0.16192626953125, + "learning_rate": 8.041825298746687e-07, + "loss": 0.00016186795255634934, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1504, + "train_speed(iter/s)": 0.022573 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 64.58333492279053, + "completions/min_length": 23.25, + "epoch": 2.9908165797964754, + "grad_norm": 0.00819396788136529, + "kl": 0.16033935546875, + "learning_rate": 8.039320829144429e-07, + "loss": 0.00016033969586715102, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1505, + "train_speed(iter/s)": 0.022575 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.625, + "completions/mean_length": 62.06250190734863, + "completions/min_length": 28.5, + "epoch": 2.9928021841648054, + "grad_norm": 0.007635576011688738, + "kl": 0.1573486328125, + "learning_rate": 8.036815149523629e-07, + "loss": 0.00015726122364867479, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1506, + "train_speed(iter/s)": 0.022577 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.875, + "completions/mean_length": 61.16666793823242, + "completions/min_length": 26.75, + "epoch": 2.994787788533135, + "grad_norm": 1.7164526760418497, + "kl": 0.1575927734375, + "learning_rate": 8.034308260881853e-07, + "loss": -0.013045396655797958, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.15789688751101494, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1507, + "train_speed(iter/s)": 0.022578 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.875, + "completions/mean_length": 60.36458396911621, + "completions/min_length": 25.25, + "epoch": 2.9967733929014644, + "grad_norm": 0.008682301091845916, + "kl": 0.13787841796875, + "learning_rate": 8.031800164217149e-07, + "loss": 0.00013781135203316808, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1508, + "train_speed(iter/s)": 0.022579 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.125, + "completions/mean_length": 67.10416889190674, + "completions/min_length": 26.375, + "epoch": 2.998758997269794, + "grad_norm": 0.00516058452820252, + "kl": 0.1015625, + "learning_rate": 8.02929086052804e-07, + "loss": 0.00010153975745197386, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1509, + "train_speed(iter/s)": 0.022577 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 66.35416841506958, + "completions/min_length": 27.625, + "epoch": 3.0019856043683295, + "grad_norm": 1.0896806214591497, + "kl": 0.129150390625, + "learning_rate": 8.026780350813536e-07, + "loss": 0.011337703093886375, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1510, + "train_speed(iter/s)": 0.022577 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 72.09375333786011, + "completions/min_length": 24.625, + "epoch": 3.003971208736659, + "grad_norm": 0.007754142705121224, + "kl": 0.18450927734375, + "learning_rate": 8.024268636073124e-07, + "loss": 0.00018431153148412704, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1511, + "train_speed(iter/s)": 0.022578 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 68.97916793823242, + "completions/min_length": 27.75, + "epoch": 3.005956813104989, + "grad_norm": 0.007182274155993902, + "kl": 0.15289306640625, + "learning_rate": 8.021755717306771e-07, + "loss": 0.0001527169079054147, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1512, + "train_speed(iter/s)": 0.022578 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 64.61458444595337, + "completions/min_length": 25.125, + "epoch": 3.0079424174733185, + "grad_norm": 0.006520480244141717, + "kl": 0.1590576171875, + "learning_rate": 8.019241595514923e-07, + "loss": 0.00015905409236438572, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1513, + "train_speed(iter/s)": 0.022581 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 67.40625190734863, + "completions/min_length": 25.875, + "epoch": 3.009928021841648, + "grad_norm": 0.00647501823146812, + "kl": 0.14263916015625, + "learning_rate": 8.016726271698507e-07, + "loss": 0.00014270719839259982, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1514, + "train_speed(iter/s)": 0.022582 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.875, + "completions/mean_length": 67.44791889190674, + "completions/min_length": 23.5, + "epoch": 3.0119136262099775, + "grad_norm": 0.010008172723007629, + "kl": 0.17230224609375, + "learning_rate": 8.014209746858927e-07, + "loss": 0.0001721722073853016, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1515, + "train_speed(iter/s)": 0.022582 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 64.13541841506958, + "completions/min_length": 26.625, + "epoch": 3.0138992305783074, + "grad_norm": 0.004923106032262659, + "kl": 0.13189697265625, + "learning_rate": 8.011692021998063e-07, + "loss": 0.00013180731912143528, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1516, + "train_speed(iter/s)": 0.022584 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 70.93750286102295, + "completions/min_length": 24.75, + "epoch": 3.015884834946637, + "grad_norm": 1.1770413939614202, + "kl": 0.127197265625, + "learning_rate": 8.009173098118278e-07, + "loss": 0.005528264679014683, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1517, + "train_speed(iter/s)": 0.022584 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 69.46875143051147, + "completions/min_length": 25.125, + "epoch": 3.0178704393149665, + "grad_norm": 0.004896991507392525, + "kl": 0.1171875, + "learning_rate": 8.006652976222408e-07, + "loss": 0.00011717713641701266, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1518, + "train_speed(iter/s)": 0.022584 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 73.22916889190674, + "completions/min_length": 26.875, + "epoch": 3.019856043683296, + "grad_norm": 0.0053673222848831265, + "kl": 0.1318359375, + "learning_rate": 8.004131657313767e-07, + "loss": 0.00013186127762310207, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1519, + "train_speed(iter/s)": 0.022583 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 68.35416793823242, + "completions/min_length": 25.75, + "epoch": 3.021841648051626, + "grad_norm": 0.006364017956235598, + "kl": 0.1448974609375, + "learning_rate": 8.001609142396149e-07, + "loss": 0.00014463487605098635, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1520, + "train_speed(iter/s)": 0.022583 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.75, + "completions/mean_length": 66.48958396911621, + "completions/min_length": 30.375, + "epoch": 3.0238272524199554, + "grad_norm": 0.0060466522403471255, + "kl": 0.15087890625, + "learning_rate": 7.999085432473815e-07, + "loss": 0.0001509106659796089, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1521, + "train_speed(iter/s)": 0.022585 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 69.17708492279053, + "completions/min_length": 33.0, + "epoch": 3.025812856788285, + "grad_norm": 0.005313967523752258, + "kl": 0.13458251953125, + "learning_rate": 7.996560528551512e-07, + "loss": 0.00013458832108881325, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1522, + "train_speed(iter/s)": 0.022586 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 71.84375333786011, + "completions/min_length": 27.25, + "epoch": 3.0277984611566144, + "grad_norm": 0.006082233498838991, + "kl": 0.14105224609375, + "learning_rate": 7.99403443163446e-07, + "loss": 0.00014113588258624077, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1523, + "train_speed(iter/s)": 0.022589 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 65.55208587646484, + "completions/min_length": 25.25, + "epoch": 3.029784065524944, + "grad_norm": 0.006015814920365411, + "kl": 0.1278076171875, + "learning_rate": 7.991507142728348e-07, + "loss": 0.00012768225860781968, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1524, + "train_speed(iter/s)": 0.022589 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 68.55208587646484, + "completions/min_length": 20.25, + "epoch": 3.031769669893274, + "grad_norm": 0.0065464985752072555, + "kl": 0.15277099609375, + "learning_rate": 7.988978662839345e-07, + "loss": 0.00015271530719473958, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1525, + "train_speed(iter/s)": 0.022591 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 72.90625238418579, + "completions/min_length": 29.75, + "epoch": 3.0337552742616034, + "grad_norm": 0.0054327752949591065, + "kl": 0.14862060546875, + "learning_rate": 7.986448992974095e-07, + "loss": 0.00014869638835079968, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1526, + "train_speed(iter/s)": 0.022591 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 68.73958587646484, + "completions/min_length": 25.625, + "epoch": 3.035740878629933, + "grad_norm": 0.004604420692129586, + "kl": 0.1121826171875, + "learning_rate": 7.983918134139709e-07, + "loss": 0.00011213271500309929, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1527, + "train_speed(iter/s)": 0.022591 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 66.50000286102295, + "completions/min_length": 20.75, + "epoch": 3.0377264829982624, + "grad_norm": 0.004298526752156533, + "kl": 0.106689453125, + "learning_rate": 7.98138608734378e-07, + "loss": 0.00010664231376722455, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1528, + "train_speed(iter/s)": 0.022592 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 70.13541889190674, + "completions/min_length": 30.25, + "epoch": 3.0397120873665924, + "grad_norm": 2.163072702180223, + "kl": 0.31298828125, + "learning_rate": 7.978852853594368e-07, + "loss": -0.01896550878882408, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1529, + "train_speed(iter/s)": 0.022593 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.375, + "completions/mean_length": 58.42708492279053, + "completions/min_length": 19.375, + "epoch": 3.041697691734922, + "grad_norm": 0.007024291819665847, + "kl": 0.1513671875, + "learning_rate": 7.976318433900011e-07, + "loss": 0.00015135837020352483, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1530, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 68.13541746139526, + "completions/min_length": 31.625, + "epoch": 3.0436832961032514, + "grad_norm": 0.006125760556760419, + "kl": 0.13555908203125, + "learning_rate": 7.97378282926971e-07, + "loss": 0.00013557649799622595, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1531, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 68.35416889190674, + "completions/min_length": 29.625, + "epoch": 3.045668900471581, + "grad_norm": 0.006626752656693549, + "kl": 0.13238525390625, + "learning_rate": 7.971246040712949e-07, + "loss": 0.00013254185614641756, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1532, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 103.125, + "completions/mean_length": 61.30208492279053, + "completions/min_length": 25.125, + "epoch": 3.047654504839911, + "grad_norm": 0.00742816743800988, + "kl": 0.1488037109375, + "learning_rate": 7.968708069239672e-07, + "loss": 0.00014870602171868086, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1533, + "train_speed(iter/s)": 0.022596 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.375, + "completions/mean_length": 61.843750953674316, + "completions/min_length": 25.875, + "epoch": 3.0496401092082404, + "grad_norm": 1.9699282701268699, + "kl": 0.16021728515625, + "learning_rate": 7.966168915860303e-07, + "loss": 0.004624407738447189, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1534, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 64.4166693687439, + "completions/min_length": 26.875, + "epoch": 3.05162571357657, + "grad_norm": 0.0074266227728457, + "kl": 0.14752197265625, + "learning_rate": 7.963628581585733e-07, + "loss": 0.00014756032032892108, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1535, + "train_speed(iter/s)": 0.022598 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 64.52083444595337, + "completions/min_length": 22.5, + "epoch": 3.0536113179448994, + "grad_norm": 0.0057018590434599874, + "kl": 0.1346435546875, + "learning_rate": 7.961087067427323e-07, + "loss": 0.00013436665176413953, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1536, + "train_speed(iter/s)": 0.022598 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 62.84375190734863, + "completions/min_length": 29.25, + "epoch": 3.055596922313229, + "grad_norm": 1.8346653634143228, + "kl": 0.14892578125, + "learning_rate": 7.958544374396905e-07, + "loss": 0.01360081322491169, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1537, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.625, + "completions/mean_length": 67.47916793823242, + "completions/min_length": 25.125, + "epoch": 3.057582526681559, + "grad_norm": 0.897447843873728, + "kl": 0.15631103515625, + "learning_rate": 7.956000503506778e-07, + "loss": 0.01820121705532074, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1538, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 65.25000190734863, + "completions/min_length": 29.0, + "epoch": 3.0595681310498883, + "grad_norm": 0.9788267283128639, + "kl": 0.15155029296875, + "learning_rate": 7.953455455769711e-07, + "loss": -0.00905262678861618, + "memory(GiB)": 94.21, + "reward": 1.5729166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.5729166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1539, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 69.78125143051147, + "completions/min_length": 27.0, + "epoch": 3.061553735418218, + "grad_norm": 0.005789859360527402, + "kl": 0.17724609375, + "learning_rate": 7.950909232198943e-07, + "loss": 0.00017713612760417163, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1540, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.0, + "completions/mean_length": 69.46875286102295, + "completions/min_length": 27.125, + "epoch": 3.0635393397865474, + "grad_norm": 0.00676607615441637, + "kl": 0.18475341796875, + "learning_rate": 7.94836183380818e-07, + "loss": 0.00018466528854332864, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1541, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 70.91666889190674, + "completions/min_length": 27.25, + "epoch": 3.0655249441548773, + "grad_norm": 1.797685149306257, + "kl": 0.172607421875, + "learning_rate": 7.945813261611596e-07, + "loss": -0.004025423899292946, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1542, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.375, + "completions/mean_length": 65.23958539962769, + "completions/min_length": 26.375, + "epoch": 3.067510548523207, + "grad_norm": 0.004778188011642266, + "kl": 0.1251220703125, + "learning_rate": 7.943263516623832e-07, + "loss": 0.00012511663953773677, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1543, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 69.81250286102295, + "completions/min_length": 28.75, + "epoch": 3.0694961528915363, + "grad_norm": 0.009174524159952393, + "kl": 0.144287109375, + "learning_rate": 7.940712599859994e-07, + "loss": 0.00014421633386518806, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1544, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 75.1041693687439, + "completions/min_length": 27.5, + "epoch": 3.071481757259866, + "grad_norm": 1.2381296100232917, + "kl": 0.17138671875, + "learning_rate": 7.938160512335658e-07, + "loss": -0.011360193602740765, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1545, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 69.65625190734863, + "completions/min_length": 25.5, + "epoch": 3.073467361628196, + "grad_norm": 1.4010088060761308, + "kl": 0.160888671875, + "learning_rate": 7.935607255066865e-07, + "loss": 0.0196949765086174, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1546, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 70.89583492279053, + "completions/min_length": 24.5, + "epoch": 3.0754529659965253, + "grad_norm": 0.004868994031868303, + "kl": 0.12860107421875, + "learning_rate": 7.93305282907012e-07, + "loss": 0.00012856232933700085, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1547, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.75, + "completions/mean_length": 62.156251430511475, + "completions/min_length": 26.125, + "epoch": 3.077438570364855, + "grad_norm": 0.010414335426126249, + "kl": 0.12957763671875, + "learning_rate": 7.930497235362394e-07, + "loss": 0.00012960430467501283, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1548, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.25, + "completions/mean_length": 73.78125238418579, + "completions/min_length": 25.25, + "epoch": 3.0794241747331843, + "grad_norm": 0.0042866194595961565, + "kl": 0.131103515625, + "learning_rate": 7.927940474961127e-07, + "loss": 0.00013111383304931223, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1549, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.25, + "completions/mean_length": 68.71875286102295, + "completions/min_length": 25.875, + "epoch": 3.081409779101514, + "grad_norm": 0.00534739297847197, + "kl": 0.11309814453125, + "learning_rate": 7.925382548884216e-07, + "loss": 0.00011304750660201535, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1550, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.0, + "completions/mean_length": 75.62500143051147, + "completions/min_length": 28.5, + "epoch": 3.0833953834698438, + "grad_norm": 0.9133512132839091, + "kl": 0.13824462890625, + "learning_rate": 7.922823458150029e-07, + "loss": 0.00013828588998876512, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1551, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 76.62500286102295, + "completions/min_length": 33.25, + "epoch": 3.0853809878381733, + "grad_norm": 1.5811772141221476, + "kl": 0.17047119140625, + "learning_rate": 7.920263203777391e-07, + "loss": 0.0076699345372617245, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6458333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1552, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 70.63541793823242, + "completions/min_length": 30.75, + "epoch": 3.087366592206503, + "grad_norm": 0.007425176945217928, + "kl": 0.1614990234375, + "learning_rate": 7.917701786785598e-07, + "loss": 0.00016143161337822676, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1553, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 80.958336353302, + "completions/min_length": 27.125, + "epoch": 3.0893521965748323, + "grad_norm": 0.9908209038784358, + "kl": 0.1368408203125, + "learning_rate": 7.915139208194404e-07, + "loss": -0.003357556415721774, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1554, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 77.60416984558105, + "completions/min_length": 27.125, + "epoch": 3.0913378009431622, + "grad_norm": 0.0049730104096962725, + "kl": 0.1412353515625, + "learning_rate": 7.912575469024022e-07, + "loss": 0.00014133579679764807, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1555, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 68.58333539962769, + "completions/min_length": 23.75, + "epoch": 3.0933234053114917, + "grad_norm": 0.004789400367678571, + "kl": 0.148681640625, + "learning_rate": 7.910010570295136e-07, + "loss": 0.00014869125152472407, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1556, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.75, + "completions/mean_length": 67.92708492279053, + "completions/min_length": 32.875, + "epoch": 3.0953090096798213, + "grad_norm": 0.02444720777992413, + "kl": 0.15936279296875, + "learning_rate": 7.907444513028887e-07, + "loss": 0.00015949964290484786, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1557, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 69.1041693687439, + "completions/min_length": 29.75, + "epoch": 3.0972946140481508, + "grad_norm": 0.004765885175389616, + "kl": 0.1185302734375, + "learning_rate": 7.904877298246874e-07, + "loss": 0.00011849829752463847, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1558, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 70.8541693687439, + "completions/min_length": 25.125, + "epoch": 3.0992802184164807, + "grad_norm": 0.007165502492879713, + "kl": 0.13323974609375, + "learning_rate": 7.902308926971164e-07, + "loss": 0.00013328055501915514, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1559, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 88.37500381469727, + "completions/min_length": 34.75, + "epoch": 3.1012658227848102, + "grad_norm": 0.006680680389758271, + "kl": 0.1689453125, + "learning_rate": 7.899739400224277e-07, + "loss": 0.00016898289322853088, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1560, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 78.302086353302, + "completions/min_length": 33.375, + "epoch": 3.1032514271531397, + "grad_norm": 0.0051663776460708, + "kl": 0.13824462890625, + "learning_rate": 7.897168719029197e-07, + "loss": 0.00013828356168232858, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1561, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.5, + "completions/mean_length": 92.56250286102295, + "completions/min_length": 42.125, + "epoch": 3.1052370315214692, + "grad_norm": 0.004840308813397641, + "kl": 0.14678955078125, + "learning_rate": 7.894596884409368e-07, + "loss": 0.00014671373355668038, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1562, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 69.36458587646484, + "completions/min_length": 22.25, + "epoch": 3.1072226358897987, + "grad_norm": 0.004287770768888566, + "kl": 0.12872314453125, + "learning_rate": 7.892023897388694e-07, + "loss": 0.00012859402340836823, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1563, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.125, + "completions/mean_length": 75.82291889190674, + "completions/min_length": 23.375, + "epoch": 3.1092082402581287, + "grad_norm": 1.1276802479244754, + "kl": 0.166015625, + "learning_rate": 7.889449758991533e-07, + "loss": 0.0007246571476571262, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1564, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 77.86458587646484, + "completions/min_length": 27.375, + "epoch": 3.111193844626458, + "grad_norm": 0.004584033784166918, + "kl": 0.14996337890625, + "learning_rate": 7.886874470242706e-07, + "loss": 0.00014988250040914863, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1565, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 69.51041889190674, + "completions/min_length": 23.0, + "epoch": 3.1131794489947877, + "grad_norm": 0.05598645754923216, + "kl": 0.1702880859375, + "learning_rate": 7.884298032167489e-07, + "loss": 0.000170221523148939, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1566, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 77.26041793823242, + "completions/min_length": 33.25, + "epoch": 3.115165053363117, + "grad_norm": 0.004858184001340611, + "kl": 0.15155029296875, + "learning_rate": 7.88172044579162e-07, + "loss": 0.00015155557775869966, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1567, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 67.52083539962769, + "completions/min_length": 17.375, + "epoch": 3.117150657731447, + "grad_norm": 0.004925319584826496, + "kl": 0.12481689453125, + "learning_rate": 7.879141712141288e-07, + "loss": 0.00012457444972824305, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1568, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 74.46875190734863, + "completions/min_length": 27.75, + "epoch": 3.1191362620997767, + "grad_norm": 0.004628264780416145, + "kl": 0.12054443359375, + "learning_rate": 7.876561832243143e-07, + "loss": 0.00012040646834066138, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1569, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 70.37500238418579, + "completions/min_length": 24.625, + "epoch": 3.121121866468106, + "grad_norm": 0.0059331130645311324, + "kl": 0.11474609375, + "learning_rate": 7.873980807124292e-07, + "loss": 0.00011482219997560605, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1570, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.25, + "completions/mean_length": 82.93750190734863, + "completions/min_length": 36.0, + "epoch": 3.1231074708364357, + "grad_norm": 0.14866760214561833, + "kl": 0.25732421875, + "learning_rate": 7.871398637812294e-07, + "loss": 0.00025705574080348015, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1571, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 74.42708587646484, + "completions/min_length": 31.375, + "epoch": 3.1250930752047656, + "grad_norm": 0.007044968112130971, + "kl": 0.1356201171875, + "learning_rate": 7.868815325335168e-07, + "loss": 0.0001355307176709175, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1572, + "train_speed(iter/s)": 0.022599 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 79.46875286102295, + "completions/min_length": 26.5, + "epoch": 3.127078679573095, + "grad_norm": 0.9901700131135319, + "kl": 0.1767578125, + "learning_rate": 7.866230870721383e-07, + "loss": 0.003998658154159784, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1573, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 63.32291841506958, + "completions/min_length": 18.125, + "epoch": 3.1290642839414247, + "grad_norm": 0.005600105945306169, + "kl": 0.118896484375, + "learning_rate": 7.863645274999868e-07, + "loss": 0.0001190089387819171, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1574, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 74.50000190734863, + "completions/min_length": 20.75, + "epoch": 3.131049888309754, + "grad_norm": 0.005303313553400478, + "kl": 0.13140869140625, + "learning_rate": 7.861058539200003e-07, + "loss": 0.00013143512478563935, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1575, + "train_speed(iter/s)": 0.022599 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 69.71875333786011, + "completions/min_length": 19.375, + "epoch": 3.1330354926780837, + "grad_norm": 0.005189209603158734, + "kl": 0.1573486328125, + "learning_rate": 7.858470664351622e-07, + "loss": 0.00015709316357970238, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1576, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 75.22916889190674, + "completions/min_length": 28.5, + "epoch": 3.1350210970464136, + "grad_norm": 0.9197710343234746, + "kl": 0.14508056640625, + "learning_rate": 7.855881651485015e-07, + "loss": 0.003865651786327362, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1577, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.25, + "completions/mean_length": 78.63541984558105, + "completions/min_length": 27.375, + "epoch": 3.137006701414743, + "grad_norm": 0.00627293701984189, + "kl": 0.12786865234375, + "learning_rate": 7.853291501630921e-07, + "loss": 0.0001278404815820977, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1578, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 73.0729193687439, + "completions/min_length": 26.0, + "epoch": 3.1389923057830726, + "grad_norm": 0.0064907021217699016, + "kl": 0.15643310546875, + "learning_rate": 7.850700215820536e-07, + "loss": 0.00015644051018171012, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1579, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 196.625, + "completions/mean_length": 80.37500286102295, + "completions/min_length": 22.375, + "epoch": 3.140977910151402, + "grad_norm": 0.005542807300237455, + "kl": 0.1475830078125, + "learning_rate": 7.848107795085506e-07, + "loss": 0.0001476538018323481, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1580, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.0, + "completions/mean_length": 64.08333492279053, + "completions/min_length": 25.25, + "epoch": 3.142963514519732, + "grad_norm": 1.2039381621943268, + "kl": 0.11163330078125, + "learning_rate": 7.845514240457928e-07, + "loss": -0.0027569918893277645, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1581, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.25, + "completions/mean_length": 68.22916889190674, + "completions/min_length": 21.75, + "epoch": 3.1449491188880616, + "grad_norm": 0.0069590060510106655, + "kl": 0.16790771484375, + "learning_rate": 7.842919552970353e-07, + "loss": 0.00016799391596578062, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1582, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 66.29166841506958, + "completions/min_length": 19.25, + "epoch": 3.146934723256391, + "grad_norm": 0.006224080690219878, + "kl": 0.13818359375, + "learning_rate": 7.840323733655778e-07, + "loss": 0.0001380950416205451, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1583, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 56.01041841506958, + "completions/min_length": 16.25, + "epoch": 3.1489203276247206, + "grad_norm": 0.009383479954545693, + "kl": 0.14349365234375, + "learning_rate": 7.83772678354766e-07, + "loss": 0.0001433076395187527, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1584, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 77.71875190734863, + "completions/min_length": 25.25, + "epoch": 3.1509059319930506, + "grad_norm": 0.006408260173547487, + "kl": 0.1298828125, + "learning_rate": 7.835128703679895e-07, + "loss": 0.00012998521560803056, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1585, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 79.94791984558105, + "completions/min_length": 28.0, + "epoch": 3.15289153636138, + "grad_norm": 1.3341875464657413, + "kl": 0.141845703125, + "learning_rate": 7.832529495086837e-07, + "loss": 0.015978978946805, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1586, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 68.17708492279053, + "completions/min_length": 22.375, + "epoch": 3.1548771407297096, + "grad_norm": 0.9855642828401813, + "kl": 0.125, + "learning_rate": 7.829929158803285e-07, + "loss": 0.0016770760994404554, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1587, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 68.34375238418579, + "completions/min_length": 24.5, + "epoch": 3.156862745098039, + "grad_norm": 0.006972472354643738, + "kl": 0.1658935546875, + "learning_rate": 7.82732769586449e-07, + "loss": 0.00016592885367572308, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1588, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.625, + "completions/mean_length": 65.15625190734863, + "completions/min_length": 18.75, + "epoch": 3.1588483494663686, + "grad_norm": 0.007042093284151057, + "kl": 0.12786865234375, + "learning_rate": 7.824725107306148e-07, + "loss": 0.00012775440700352192, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1589, + "train_speed(iter/s)": 0.022611 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 66.01041889190674, + "completions/min_length": 21.75, + "epoch": 3.1608339538346986, + "grad_norm": 0.006367888551967647, + "kl": 0.12127685546875, + "learning_rate": 7.822121394164406e-07, + "loss": 0.0001213420182466507, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1590, + "train_speed(iter/s)": 0.022611 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 72.98958587646484, + "completions/min_length": 31.125, + "epoch": 3.162819558203028, + "grad_norm": 0.0059194410346197, + "kl": 0.13812255859375, + "learning_rate": 7.819516557475858e-07, + "loss": 0.00013797497376799583, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1591, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.0, + "completions/mean_length": 66.64583587646484, + "completions/min_length": 29.5, + "epoch": 3.1648051625713576, + "grad_norm": 0.004671946892964499, + "kl": 0.1226806640625, + "learning_rate": 7.816910598277545e-07, + "loss": 0.0001227743923664093, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1592, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 63.90625047683716, + "completions/min_length": 18.5, + "epoch": 3.166790766939687, + "grad_norm": 0.00857730893593178, + "kl": 0.1259765625, + "learning_rate": 7.814303517606955e-07, + "loss": 0.00012605068332049996, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1593, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 61.40625190734863, + "completions/min_length": 26.5, + "epoch": 3.168776371308017, + "grad_norm": 0.00711534361282793, + "kl": 0.13616943359375, + "learning_rate": 7.811695316502022e-07, + "loss": 0.00013601114915218204, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1594, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 66.09375143051147, + "completions/min_length": 29.25, + "epoch": 3.1707619756763465, + "grad_norm": 1.1142694759945453, + "kl": 0.171875, + "learning_rate": 7.809085996001129e-07, + "loss": -0.006257231347262859, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1595, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.625, + "completions/mean_length": 75.21875238418579, + "completions/min_length": 21.75, + "epoch": 3.172747580044676, + "grad_norm": 0.00723908059239392, + "kl": 0.16400146484375, + "learning_rate": 7.8064755571431e-07, + "loss": 0.00016430024697910994, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1596, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 61.718751430511475, + "completions/min_length": 17.25, + "epoch": 3.1747331844130056, + "grad_norm": 0.008161048778028125, + "kl": 0.12396240234375, + "learning_rate": 7.803864000967204e-07, + "loss": 0.00012396546662785113, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.45695383101701736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1597, + "train_speed(iter/s)": 0.022617 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 74.11458539962769, + "completions/min_length": 25.875, + "epoch": 3.1767187887813355, + "grad_norm": 0.00808004752736522, + "kl": 0.16107177734375, + "learning_rate": 7.801251328513163e-07, + "loss": 0.0001611294865142554, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1598, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.75, + "completions/mean_length": 63.31250238418579, + "completions/min_length": 21.875, + "epoch": 3.178704393149665, + "grad_norm": 0.005077917861440402, + "kl": 0.14080810546875, + "learning_rate": 7.798637540821133e-07, + "loss": 0.00014077738160267472, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1599, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.0, + "completions/mean_length": 59.31250286102295, + "completions/min_length": 25.375, + "epoch": 3.1806899975179945, + "grad_norm": 0.004822578514240816, + "kl": 0.13470458984375, + "learning_rate": 7.796022638931721e-07, + "loss": 0.0001347649667877704, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1600, + "train_speed(iter/s)": 0.022621 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.125, + "completions/mean_length": 68.46875286102295, + "completions/min_length": 20.75, + "epoch": 3.182675601886324, + "grad_norm": 1.0845245994786377, + "kl": 0.1588134765625, + "learning_rate": 7.793406623885975e-07, + "loss": -0.006992870010435581, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1601, + "train_speed(iter/s)": 0.02262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 70.57291889190674, + "completions/min_length": 31.0, + "epoch": 3.1846612062546535, + "grad_norm": 1.1340500403690734, + "kl": 0.16326904296875, + "learning_rate": 7.790789496725387e-07, + "loss": -0.0015656118048354983, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1602, + "train_speed(iter/s)": 0.02262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 78.55208539962769, + "completions/min_length": 30.5, + "epoch": 3.1866468106229835, + "grad_norm": 0.004941984622011276, + "kl": 0.1529541015625, + "learning_rate": 7.78817125849189e-07, + "loss": 0.00015300113591365516, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1603, + "train_speed(iter/s)": 0.02262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.125, + "completions/mean_length": 70.11458587646484, + "completions/min_length": 17.5, + "epoch": 3.188632414991313, + "grad_norm": 0.011448552207716163, + "kl": 0.14312744140625, + "learning_rate": 7.78555191022786e-07, + "loss": 0.00014319675392471254, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1604, + "train_speed(iter/s)": 0.022619 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 66.45833492279053, + "completions/min_length": 22.125, + "epoch": 3.1906180193596425, + "grad_norm": 0.8050589697827149, + "kl": 0.13763427734375, + "learning_rate": 7.782931452976118e-07, + "loss": -0.002308471826836467, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1605, + "train_speed(iter/s)": 0.02262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 69.25000143051147, + "completions/min_length": 26.75, + "epoch": 3.192603623727972, + "grad_norm": 0.03288996300852182, + "kl": 0.1339111328125, + "learning_rate": 7.780309887779921e-07, + "loss": 0.00013382203178480268, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1606, + "train_speed(iter/s)": 0.02262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 61.52083492279053, + "completions/min_length": 25.625, + "epoch": 3.194589228096302, + "grad_norm": 0.006532461641287733, + "kl": 0.14849853515625, + "learning_rate": 7.777687215682972e-07, + "loss": 0.0001485254178987816, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1607, + "train_speed(iter/s)": 0.022621 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 70.11458492279053, + "completions/min_length": 25.0, + "epoch": 3.1965748324646315, + "grad_norm": 0.005846996579979615, + "kl": 0.13287353515625, + "learning_rate": 7.775063437729413e-07, + "loss": 0.00013302621664479375, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1608, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 64.44791793823242, + "completions/min_length": 24.0, + "epoch": 3.198560436832961, + "grad_norm": 0.030037338411353125, + "kl": 0.14862060546875, + "learning_rate": 7.772438554963826e-07, + "loss": 0.00014869551523588598, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1609, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 69.30208444595337, + "completions/min_length": 24.25, + "epoch": 3.2005460412012905, + "grad_norm": 0.012863827599513405, + "kl": 0.12738037109375, + "learning_rate": 7.769812568431231e-07, + "loss": 0.0001272565859835595, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1610, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.25, + "completions/mean_length": 67.23958587646484, + "completions/min_length": 24.25, + "epoch": 3.2025316455696204, + "grad_norm": 0.9533867712540602, + "kl": 0.11541748046875, + "learning_rate": 7.767185479177092e-07, + "loss": -0.004328140988945961, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1611, + "train_speed(iter/s)": 0.022624 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 74.2916693687439, + "completions/min_length": 28.125, + "epoch": 3.20451724993795, + "grad_norm": 0.008510642382629566, + "kl": 0.1422119140625, + "learning_rate": 7.764557288247307e-07, + "loss": 0.00014209389337338507, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1612, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 62.083335399627686, + "completions/min_length": 23.125, + "epoch": 3.2065028543062795, + "grad_norm": 0.004026934549716962, + "kl": 0.10986328125, + "learning_rate": 7.761927996688217e-07, + "loss": 0.00010998039215337485, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1613, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.375, + "completions/mean_length": 65.22916889190674, + "completions/min_length": 14.625, + "epoch": 3.208488458674609, + "grad_norm": 0.02000490640244023, + "kl": 0.1162109375, + "learning_rate": 7.759297605546596e-07, + "loss": 0.00011624234321061522, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1614, + "train_speed(iter/s)": 0.022624 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 65.00000333786011, + "completions/min_length": 20.5, + "epoch": 3.2104740630429385, + "grad_norm": 0.005323828133178569, + "kl": 0.1217041015625, + "learning_rate": 7.756666115869664e-07, + "loss": 0.00012178064207546413, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1615, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 73.08333492279053, + "completions/min_length": 22.875, + "epoch": 3.2124596674112684, + "grad_norm": 0.9789591487573561, + "kl": 0.15283203125, + "learning_rate": 7.754033528705069e-07, + "loss": -0.009583230130374432, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1616, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 72.00000143051147, + "completions/min_length": 15.625, + "epoch": 3.214445271779598, + "grad_norm": 1.206109593050698, + "kl": 0.1192626953125, + "learning_rate": 7.751399845100899e-07, + "loss": -0.009789164178073406, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1617, + "train_speed(iter/s)": 0.022624 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 78.8229193687439, + "completions/min_length": 28.375, + "epoch": 3.2164308761479274, + "grad_norm": 1.3171703713872511, + "kl": 0.14453125, + "learning_rate": 7.748765066105684e-07, + "loss": 0.013453269377350807, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.12089945748448372, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1618, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 68.83333539962769, + "completions/min_length": 26.375, + "epoch": 3.218416480516257, + "grad_norm": 0.004575279815882035, + "kl": 0.11480712890625, + "learning_rate": 7.746129192768385e-07, + "loss": 0.0001146999275078997, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1619, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 71.58333539962769, + "completions/min_length": 29.625, + "epoch": 3.220402084884587, + "grad_norm": 0.004567171997031353, + "kl": 0.10455322265625, + "learning_rate": 7.743492226138397e-07, + "loss": 0.00010440638288855553, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1620, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 74.48958444595337, + "completions/min_length": 25.625, + "epoch": 3.2223876892529164, + "grad_norm": 0.8820630926357819, + "kl": 0.15264892578125, + "learning_rate": 7.740854167265556e-07, + "loss": -0.016015177592635155, + "memory(GiB)": 94.21, + "reward": 1.6145833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6145833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1621, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 67.95833539962769, + "completions/min_length": 18.375, + "epoch": 3.224373293621246, + "grad_norm": 0.0052832388436746735, + "kl": 0.14251708984375, + "learning_rate": 7.738215017200126e-07, + "loss": 0.00014245050260797143, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1622, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 76.21875286102295, + "completions/min_length": 27.125, + "epoch": 3.2263588979895754, + "grad_norm": 0.00569019222712594, + "kl": 0.122802734375, + "learning_rate": 7.735574776992812e-07, + "loss": 0.00012273839092813432, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1623, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 82.76042079925537, + "completions/min_length": 28.875, + "epoch": 3.2283445023579054, + "grad_norm": 0.89556832340245, + "kl": 0.141357421875, + "learning_rate": 7.732933447694748e-07, + "loss": -0.004790339153259993, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1624, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 75.28125190734863, + "completions/min_length": 28.75, + "epoch": 3.230330106726235, + "grad_norm": 0.0040374499945258924, + "kl": 0.11529541015625, + "learning_rate": 7.730291030357504e-07, + "loss": 0.00011524982255650684, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1625, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 68.5104193687439, + "completions/min_length": 23.875, + "epoch": 3.2323157110945644, + "grad_norm": 0.018725914039625435, + "kl": 0.16650390625, + "learning_rate": 7.727647526033083e-07, + "loss": 0.0001662827271502465, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1626, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 83.29166984558105, + "completions/min_length": 36.625, + "epoch": 3.234301315462894, + "grad_norm": 0.7779134959574959, + "kl": 0.1260986328125, + "learning_rate": 7.725002935773921e-07, + "loss": 0.0023582628928124905, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1627, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 69.64583683013916, + "completions/min_length": 27.5, + "epoch": 3.2362869198312234, + "grad_norm": 0.00375191172626384, + "kl": 0.11846923828125, + "learning_rate": 7.722357260632886e-07, + "loss": 0.00011851306771859527, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1628, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 76.75000190734863, + "completions/min_length": 29.125, + "epoch": 3.2382725241995534, + "grad_norm": 0.9177772428418636, + "kl": 0.16412353515625, + "learning_rate": 7.719710501663277e-07, + "loss": 0.010430104099214077, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1629, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.0, + "completions/mean_length": 82.70833539962769, + "completions/min_length": 31.125, + "epoch": 3.240258128567883, + "grad_norm": 0.00582702891715206, + "kl": 0.14862060546875, + "learning_rate": 7.717062659918825e-07, + "loss": 0.00014866201672703028, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1630, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 75.29166841506958, + "completions/min_length": 28.25, + "epoch": 3.2422437329362124, + "grad_norm": 0.010167066215420423, + "kl": 0.13433837890625, + "learning_rate": 7.714413736453693e-07, + "loss": 0.00013425570796243846, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1631, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.625, + "completions/mean_length": 87.26041841506958, + "completions/min_length": 28.625, + "epoch": 3.244229337304542, + "grad_norm": 0.003856639380792482, + "kl": 0.12005615234375, + "learning_rate": 7.711763732322475e-07, + "loss": 0.00011997627734672278, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1632, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.25, + "completions/mean_length": 70.29166889190674, + "completions/min_length": 23.625, + "epoch": 3.246214941672872, + "grad_norm": 0.0047362573819903645, + "kl": 0.10357666015625, + "learning_rate": 7.709112648580194e-07, + "loss": 0.00010354580444982275, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1633, + "train_speed(iter/s)": 0.022624 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 74.01041889190674, + "completions/min_length": 26.0, + "epoch": 3.2482005460412013, + "grad_norm": 0.13616774127687276, + "kl": 0.1837158203125, + "learning_rate": 7.7064604862823e-07, + "loss": 0.0001835908042266965, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1634, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 77.78125190734863, + "completions/min_length": 35.25, + "epoch": 3.250186150409531, + "grad_norm": 0.004290667829682497, + "kl": 0.1402587890625, + "learning_rate": 7.703807246484679e-07, + "loss": 0.0001403296337230131, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1635, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 79.3854193687439, + "completions/min_length": 27.75, + "epoch": 3.2521717547778604, + "grad_norm": 0.007670113679000537, + "kl": 0.12652587890625, + "learning_rate": 7.701152930243641e-07, + "loss": 0.00012663830420933664, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1636, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 77.61458587646484, + "completions/min_length": 31.5, + "epoch": 3.2541573591461903, + "grad_norm": 1.205690192312357, + "kl": 0.1190185546875, + "learning_rate": 7.698497538615927e-07, + "loss": 0.014132445678114891, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1637, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 64.57291793823242, + "completions/min_length": 25.5, + "epoch": 3.25614296351452, + "grad_norm": 0.003923149670308882, + "kl": 0.1015625, + "learning_rate": 7.695841072658702e-07, + "loss": 0.00010168996959691867, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1638, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 83.75000286102295, + "completions/min_length": 40.625, + "epoch": 3.2581285678828493, + "grad_norm": 0.004258045179516714, + "kl": 0.1298828125, + "learning_rate": 7.693183533429566e-07, + "loss": 0.00012992811389267445, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1639, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 82.63541984558105, + "completions/min_length": 33.5, + "epoch": 3.260114172251179, + "grad_norm": 0.003951889895466826, + "kl": 0.10870361328125, + "learning_rate": 7.690524921986541e-07, + "loss": 0.00010872267739614472, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1640, + "train_speed(iter/s)": 0.022624 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.125, + "completions/mean_length": 63.781251430511475, + "completions/min_length": 25.375, + "epoch": 3.2620997766195083, + "grad_norm": 1.0159283661393874, + "kl": 0.561767578125, + "learning_rate": 7.687865239388074e-07, + "loss": -0.010935746133327484, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1641, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 72.52083587646484, + "completions/min_length": 31.125, + "epoch": 3.2640853809878383, + "grad_norm": 0.004971517003292599, + "kl": 0.114013671875, + "learning_rate": 7.685204486693046e-07, + "loss": 0.00011402781819924712, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1642, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 69.55208539962769, + "completions/min_length": 28.0, + "epoch": 3.266070985356168, + "grad_norm": 0.014124415914013335, + "kl": 0.1290283203125, + "learning_rate": 7.682542664960756e-07, + "loss": 0.00012876864639110863, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1643, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 82.364586353302, + "completions/min_length": 36.375, + "epoch": 3.2680565897244973, + "grad_norm": 0.9671038369069495, + "kl": 0.145263671875, + "learning_rate": 7.679879775250933e-07, + "loss": 0.00014527887105941772, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1644, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.375, + "completions/mean_length": 79.09375238418579, + "completions/min_length": 31.125, + "epoch": 3.270042194092827, + "grad_norm": 1.215322346308037, + "kl": 0.12432861328125, + "learning_rate": 7.67721581862373e-07, + "loss": -0.0004590476746670902, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1645, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 72.18750095367432, + "completions/min_length": 24.625, + "epoch": 3.2720277984611568, + "grad_norm": 0.004351051363441635, + "kl": 0.1134033203125, + "learning_rate": 7.674550796139728e-07, + "loss": 0.00011344471568008885, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1646, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 79.81250286102295, + "completions/min_length": 33.625, + "epoch": 3.2740134028294863, + "grad_norm": 1.4283401699234863, + "kl": 0.12200927734375, + "learning_rate": 7.671884708859926e-07, + "loss": -0.00461210822686553, + "memory(GiB)": 94.21, + "reward": 1.7604166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.2934674955904484, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1647, + "train_speed(iter/s)": 0.022624 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 77.61458539962769, + "completions/min_length": 23.875, + "epoch": 3.275999007197816, + "grad_norm": 0.004063448915065144, + "kl": 0.1136474609375, + "learning_rate": 7.66921755784575e-07, + "loss": 0.00011347376130288467, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1648, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 78.81250238418579, + "completions/min_length": 32.25, + "epoch": 3.2779846115661453, + "grad_norm": 1.2102170022127292, + "kl": 0.16961669921875, + "learning_rate": 7.666549344159053e-07, + "loss": 0.0011187988566234708, + "memory(GiB)": 94.21, + "reward": 1.7812500149011612, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.7812500074505806, + "rewards/CineAccuracyORM/std": 0.15001969039440155, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1649, + "train_speed(iter/s)": 0.022625 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 82.43750286102295, + "completions/min_length": 31.125, + "epoch": 3.2799702159344752, + "grad_norm": 0.004321021860241708, + "kl": 0.13226318359375, + "learning_rate": 7.663880068862105e-07, + "loss": 0.00013228798343334347, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1650, + "train_speed(iter/s)": 0.022626 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 70.55208539962769, + "completions/min_length": 32.75, + "epoch": 3.2819558203028047, + "grad_norm": 0.006157701966564095, + "kl": 0.13311767578125, + "learning_rate": 7.661209733017602e-07, + "loss": 0.00013323294115252793, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1651, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 68.8229193687439, + "completions/min_length": 28.625, + "epoch": 3.2839414246711343, + "grad_norm": 0.006003061476838553, + "kl": 0.1304931640625, + "learning_rate": 7.658538337688662e-07, + "loss": 0.00013045336527284235, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1652, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 75.87500190734863, + "completions/min_length": 32.0, + "epoch": 3.2859270290394638, + "grad_norm": 0.005259941679482467, + "kl": 0.1142578125, + "learning_rate": 7.655865883938825e-07, + "loss": 0.00011412893945816904, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1653, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 77.15625190734863, + "completions/min_length": 33.0, + "epoch": 3.2879126334077933, + "grad_norm": 0.005645225135338039, + "kl": 0.12353515625, + "learning_rate": 7.653192372832053e-07, + "loss": 0.00012346345465630293, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1654, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 76.19791889190674, + "completions/min_length": 36.875, + "epoch": 3.2898982377761232, + "grad_norm": 0.004232910251998929, + "kl": 0.1103515625, + "learning_rate": 7.650517805432723e-07, + "loss": 0.00011038359662052244, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1655, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 75.91666889190674, + "completions/min_length": 32.0, + "epoch": 3.2918838421444527, + "grad_norm": 0.005116356027105769, + "kl": 0.1380615234375, + "learning_rate": 7.647842182805644e-07, + "loss": 0.00013812229735776782, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1656, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 73.55208539962769, + "completions/min_length": 31.125, + "epoch": 3.2938694465127822, + "grad_norm": 0.004553823528121366, + "kl": 0.12689208984375, + "learning_rate": 7.645165506016034e-07, + "loss": 0.00012686976697295904, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1657, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 77.16667032241821, + "completions/min_length": 33.25, + "epoch": 3.2958550508811117, + "grad_norm": 0.005727086790194925, + "kl": 0.15771484375, + "learning_rate": 7.642487776129538e-07, + "loss": 0.00015769053425174206, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1658, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 76.989586353302, + "completions/min_length": 28.125, + "epoch": 3.2978406552494417, + "grad_norm": 0.55710548896836, + "kl": 0.128814697265625, + "learning_rate": 7.639808994212216e-07, + "loss": -0.02060895785689354, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1659, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 75.63541793823242, + "completions/min_length": 30.875, + "epoch": 3.299826259617771, + "grad_norm": 1.0852818349107676, + "kl": 0.1715087890625, + "learning_rate": 7.637129161330546e-07, + "loss": -0.006823991425335407, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1660, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.75, + "completions/mean_length": 74.04166889190674, + "completions/min_length": 31.875, + "epoch": 3.3018118639861007, + "grad_norm": 0.7534462709329907, + "kl": 0.121826171875, + "learning_rate": 7.634448278551431e-07, + "loss": 0.01573183760046959, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.39076167345046997, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1661, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 67.78125238418579, + "completions/min_length": 27.0, + "epoch": 3.3037974683544302, + "grad_norm": 0.00765202887471509, + "kl": 0.1141357421875, + "learning_rate": 7.631766346942186e-07, + "loss": 0.00011410520528443158, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1662, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 73.55208587646484, + "completions/min_length": 24.5, + "epoch": 3.30578307272276, + "grad_norm": 0.005772935990358881, + "kl": 0.129638671875, + "learning_rate": 7.629083367570545e-07, + "loss": 0.00012965813220944256, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1663, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 72.80208492279053, + "completions/min_length": 34.625, + "epoch": 3.3077686770910897, + "grad_norm": 0.8868981699109365, + "kl": 0.14312744140625, + "learning_rate": 7.626399341504659e-07, + "loss": -0.0033722962252795696, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1664, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.5, + "completions/mean_length": 62.03125238418579, + "completions/min_length": 29.625, + "epoch": 3.309754281459419, + "grad_norm": 0.004636103307137744, + "kl": 0.10882568359375, + "learning_rate": 7.623714269813097e-07, + "loss": 0.00010876665328396484, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1665, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 78.18750286102295, + "completions/min_length": 36.375, + "epoch": 3.3117398858277487, + "grad_norm": 0.004663018321275086, + "kl": 0.12786865234375, + "learning_rate": 7.621028153564842e-07, + "loss": 0.0001278170821024105, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1666, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 77.05208444595337, + "completions/min_length": 27.75, + "epoch": 3.313725490196078, + "grad_norm": 1.0943482974223269, + "kl": 0.13677978515625, + "learning_rate": 7.618340993829296e-07, + "loss": -0.014341501519083977, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1667, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 67.51041841506958, + "completions/min_length": 29.375, + "epoch": 3.315711094564408, + "grad_norm": 0.02768554531048945, + "kl": 0.132568359375, + "learning_rate": 7.615652791676275e-07, + "loss": 0.00013254139048513025, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1668, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.375, + "completions/mean_length": 63.06250190734863, + "completions/min_length": 26.5, + "epoch": 3.3176966989327377, + "grad_norm": 0.005198287396105113, + "kl": 0.11370849609375, + "learning_rate": 7.612963548176006e-07, + "loss": 0.00011383212404325604, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1669, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 79.40625190734863, + "completions/min_length": 32.75, + "epoch": 3.319682303301067, + "grad_norm": 0.7140729524804544, + "kl": 0.11859130859375, + "learning_rate": 7.610273264399139e-07, + "loss": -0.013541224412620068, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1670, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 72.802086353302, + "completions/min_length": 25.875, + "epoch": 3.3216679076693967, + "grad_norm": 0.0054114449703056115, + "kl": 0.12188720703125, + "learning_rate": 7.60758194141673e-07, + "loss": 0.00012187685206299648, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1671, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 72.68750238418579, + "completions/min_length": 32.25, + "epoch": 3.3236535120377266, + "grad_norm": 0.005123265492942159, + "kl": 0.12640380859375, + "learning_rate": 7.604889580300253e-07, + "loss": 0.00012642424553632736, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1672, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.0, + "completions/mean_length": 61.69791793823242, + "completions/min_length": 32.125, + "epoch": 3.325639116406056, + "grad_norm": 1.4226433850996956, + "kl": 0.09539794921875, + "learning_rate": 7.602196182121597e-07, + "loss": 0.005553328897804022, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1673, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 67.583336353302, + "completions/min_length": 32.875, + "epoch": 3.3276247207743856, + "grad_norm": 0.008505849012252384, + "kl": 0.15631103515625, + "learning_rate": 7.599501747953058e-07, + "loss": 0.00015634068404324353, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1674, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.375, + "completions/mean_length": 63.7916693687439, + "completions/min_length": 28.75, + "epoch": 3.329610325142715, + "grad_norm": 0.014416546446628905, + "kl": 0.14996337890625, + "learning_rate": 7.596806278867349e-07, + "loss": 0.00014982081484049559, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1675, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 70.05208492279053, + "completions/min_length": 27.375, + "epoch": 3.331595929511045, + "grad_norm": 0.022812507937014836, + "kl": 0.1673583984375, + "learning_rate": 7.594109775937594e-07, + "loss": 0.00016750273061916232, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1676, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.375, + "completions/mean_length": 63.302085399627686, + "completions/min_length": 31.375, + "epoch": 3.3335815338793746, + "grad_norm": 0.009978699406551094, + "kl": 0.14642333984375, + "learning_rate": 7.591412240237328e-07, + "loss": 0.00014648112119175494, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1677, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.0, + "completions/mean_length": 76.85416984558105, + "completions/min_length": 34.75, + "epoch": 3.335567138247704, + "grad_norm": 0.01161245548076017, + "kl": 0.15521240234375, + "learning_rate": 7.588713672840499e-07, + "loss": 0.0001550914894323796, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1678, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 75.87500286102295, + "completions/min_length": 27.0, + "epoch": 3.3375527426160336, + "grad_norm": 0.006369140127020487, + "kl": 0.148681640625, + "learning_rate": 7.586014074821463e-07, + "loss": 0.00014846259728074074, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1679, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.625, + "completions/mean_length": 74.67708492279053, + "completions/min_length": 32.125, + "epoch": 3.339538346984363, + "grad_norm": 0.01972339892496228, + "kl": 0.1590576171875, + "learning_rate": 7.583313447254986e-07, + "loss": 0.00015887395420577377, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1680, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 67.86458539962769, + "completions/min_length": 28.875, + "epoch": 3.341523951352693, + "grad_norm": 1.6950877150456949, + "kl": 0.14044189453125, + "learning_rate": 7.580611791216249e-07, + "loss": 0.006854488514363766, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1681, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.5, + "completions/mean_length": 72.06250238418579, + "completions/min_length": 32.75, + "epoch": 3.3435095557210226, + "grad_norm": 0.01185761618753757, + "kl": 0.145263671875, + "learning_rate": 7.577909107780836e-07, + "loss": 0.00014527476741932333, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1682, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.0, + "completions/mean_length": 70.91666793823242, + "completions/min_length": 36.125, + "epoch": 3.345495160089352, + "grad_norm": 0.006486787342394677, + "kl": 0.12518310546875, + "learning_rate": 7.575205398024747e-07, + "loss": 0.00012520681775640696, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1683, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 73.54166984558105, + "completions/min_length": 31.625, + "epoch": 3.347480764457682, + "grad_norm": 0.006158420987590548, + "kl": 0.14495849609375, + "learning_rate": 7.572500663024382e-07, + "loss": 0.00014497833035420626, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1684, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.375, + "completions/mean_length": 81.91666793823242, + "completions/min_length": 33.5, + "epoch": 3.3494663688260116, + "grad_norm": 1.1276844325681221, + "kl": 0.1416015625, + "learning_rate": 7.569794903856554e-07, + "loss": 0.006271720863878727, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1685, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 74.71875286102295, + "completions/min_length": 34.375, + "epoch": 3.351451973194341, + "grad_norm": 0.008847755641461988, + "kl": 0.16290283203125, + "learning_rate": 7.567088121598489e-07, + "loss": 0.0001630194892641157, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1686, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 65.23958492279053, + "completions/min_length": 30.375, + "epoch": 3.3534375775626706, + "grad_norm": 0.009989974382605888, + "kl": 0.14715576171875, + "learning_rate": 7.564380317327809e-07, + "loss": 0.00014691035903524607, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1687, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.625, + "completions/mean_length": 67.927086353302, + "completions/min_length": 29.5, + "epoch": 3.355423181931, + "grad_norm": 0.006753273323245216, + "kl": 0.12701416015625, + "learning_rate": 7.561671492122551e-07, + "loss": 0.00012727115245070308, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1688, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 62.07291889190674, + "completions/min_length": 29.125, + "epoch": 3.35740878629933, + "grad_norm": 0.007011487067309397, + "kl": 0.11737060546875, + "learning_rate": 7.558961647061155e-07, + "loss": 0.000117397794383578, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1689, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 65.3541693687439, + "completions/min_length": 28.875, + "epoch": 3.3593943906676595, + "grad_norm": 0.006668422601808058, + "kl": 0.14642333984375, + "learning_rate": 7.55625078322247e-07, + "loss": 0.0001464033266529441, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1690, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 74.96875238418579, + "completions/min_length": 25.75, + "epoch": 3.361379995035989, + "grad_norm": 0.9052303731246248, + "kl": 0.14935302734375, + "learning_rate": 7.553538901685749e-07, + "loss": -0.011964105069637299, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1691, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 65.71875238418579, + "completions/min_length": 27.5, + "epoch": 3.3633655994043186, + "grad_norm": 0.07706817762589453, + "kl": 0.2091064453125, + "learning_rate": 7.550826003530648e-07, + "loss": 0.00020899111405014992, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1692, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 76.58333587646484, + "completions/min_length": 35.375, + "epoch": 3.365351203772648, + "grad_norm": 0.00727086030957464, + "kl": 0.15716552734375, + "learning_rate": 7.54811208983723e-07, + "loss": 0.0001571819157106802, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1693, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 72.19791841506958, + "completions/min_length": 28.5, + "epoch": 3.367336808140978, + "grad_norm": 0.7756703641063131, + "kl": 0.140625, + "learning_rate": 7.545397161685965e-07, + "loss": -0.003865304170176387, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1694, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 68.79166889190674, + "completions/min_length": 27.25, + "epoch": 3.3693224125093075, + "grad_norm": 0.007018035201710085, + "kl": 0.1446533203125, + "learning_rate": 7.542681220157719e-07, + "loss": 0.0001447978720534593, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1695, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 76.21875190734863, + "completions/min_length": 32.375, + "epoch": 3.371308016877637, + "grad_norm": 0.007976663372422239, + "kl": 0.14532470703125, + "learning_rate": 7.539964266333769e-07, + "loss": 0.00014518079115077853, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1696, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.25, + "completions/mean_length": 65.63541889190674, + "completions/min_length": 31.75, + "epoch": 3.373293621245967, + "grad_norm": 0.009945207154231856, + "kl": 0.16845703125, + "learning_rate": 7.537246301295792e-07, + "loss": 0.00016843291814439, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1697, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 73.67708587646484, + "completions/min_length": 36.625, + "epoch": 3.3752792256142965, + "grad_norm": 0.7434967715816888, + "kl": 0.1787109375, + "learning_rate": 7.534527326125871e-07, + "loss": 0.007135696243494749, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6979166669771075, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1698, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 69.08333587646484, + "completions/min_length": 29.0, + "epoch": 3.377264829982626, + "grad_norm": 0.010068434496510612, + "kl": 0.17083740234375, + "learning_rate": 7.531807341906482e-07, + "loss": 0.00017061618564184755, + "memory(GiB)": 94.21, + "reward": 1.5, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1699, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.125, + "completions/mean_length": 63.46875238418579, + "completions/min_length": 31.625, + "epoch": 3.3792504343509555, + "grad_norm": 0.00976635779637887, + "kl": 0.1580810546875, + "learning_rate": 7.529086349720513e-07, + "loss": 0.0001582516561029479, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1700, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 65.12500238418579, + "completions/min_length": 29.0, + "epoch": 3.381236038719285, + "grad_norm": 0.9611885138389086, + "kl": 0.168701171875, + "learning_rate": 7.526364350651248e-07, + "loss": -0.003184668719768524, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1701, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 69.35416841506958, + "completions/min_length": 30.375, + "epoch": 3.383221643087615, + "grad_norm": 0.009766277868600749, + "kl": 0.15380859375, + "learning_rate": 7.523641345782373e-07, + "loss": 0.00015370766050182283, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1702, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 76.97916984558105, + "completions/min_length": 35.5, + "epoch": 3.3852072474559445, + "grad_norm": 0.7288127808365923, + "kl": 0.184814453125, + "learning_rate": 7.520917336197976e-07, + "loss": -0.0007909226114861667, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1703, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 67.35416841506958, + "completions/min_length": 28.0, + "epoch": 3.387192851824274, + "grad_norm": 0.010006117930618948, + "kl": 0.160400390625, + "learning_rate": 7.51819232298254e-07, + "loss": 0.00016042486822698265, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1704, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 72.68750238418579, + "completions/min_length": 24.875, + "epoch": 3.3891784561926035, + "grad_norm": 1.3397785533930207, + "kl": 0.1732177734375, + "learning_rate": 7.515466307220954e-07, + "loss": 0.003318143542855978, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1705, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 69.50000190734863, + "completions/min_length": 21.625, + "epoch": 3.391164060560933, + "grad_norm": 0.01128887071312138, + "kl": 0.20703125, + "learning_rate": 7.512739289998502e-07, + "loss": 0.00020709529053419828, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1706, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 70.69791889190674, + "completions/min_length": 26.25, + "epoch": 3.393149664929263, + "grad_norm": 0.009535280218155568, + "kl": 0.16607666015625, + "learning_rate": 7.510011272400867e-07, + "loss": 0.00016635702922940254, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1707, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.125, + "completions/mean_length": 67.37500143051147, + "completions/min_length": 27.875, + "epoch": 3.3951352692975925, + "grad_norm": 0.009928533263206538, + "kl": 0.1678466796875, + "learning_rate": 7.507282255514132e-07, + "loss": 0.00016785907791927457, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1708, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 65.41666889190674, + "completions/min_length": 25.125, + "epoch": 3.397120873665922, + "grad_norm": 0.01129909746147632, + "kl": 0.16998291015625, + "learning_rate": 7.504552240424777e-07, + "loss": 0.00016990506264846772, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1709, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 64.21875095367432, + "completions/min_length": 29.0, + "epoch": 3.399106478034252, + "grad_norm": 0.010326836098592045, + "kl": 0.1895751953125, + "learning_rate": 7.501821228219681e-07, + "loss": 0.00018965858907904476, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1710, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 78.89583587646484, + "completions/min_length": 32.75, + "epoch": 3.4010920824025814, + "grad_norm": 0.007930106955842066, + "kl": 0.156982421875, + "learning_rate": 7.499089219986114e-07, + "loss": 0.00015706241538282484, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1711, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 74.04167032241821, + "completions/min_length": 21.0, + "epoch": 3.403077686770911, + "grad_norm": 0.00881103987580228, + "kl": 0.16668701171875, + "learning_rate": 7.496356216811749e-07, + "loss": 0.00016673810023348778, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1712, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 69.06250095367432, + "completions/min_length": 33.625, + "epoch": 3.4050632911392404, + "grad_norm": 0.5746561055634027, + "kl": 0.14215087890625, + "learning_rate": 7.493622219784654e-07, + "loss": -0.008945518173277378, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1713, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 71.34375238418579, + "completions/min_length": 37.875, + "epoch": 3.40704889550757, + "grad_norm": 0.008620110561251806, + "kl": 0.179443359375, + "learning_rate": 7.490887229993291e-07, + "loss": 0.00017924243002198637, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1714, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 71.85416841506958, + "completions/min_length": 31.125, + "epoch": 3.4090344998759, + "grad_norm": 0.008395071812018153, + "kl": 0.13922119140625, + "learning_rate": 7.488151248526518e-07, + "loss": 0.00013900024350732565, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1715, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 75.56250143051147, + "completions/min_length": 32.625, + "epoch": 3.4110201042442294, + "grad_norm": 0.008315892709862402, + "kl": 0.1593017578125, + "learning_rate": 7.485414276473586e-07, + "loss": 0.0001594174245838076, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1716, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.625, + "completions/mean_length": 70.0729193687439, + "completions/min_length": 28.5, + "epoch": 3.413005708612559, + "grad_norm": 0.00783300529868059, + "kl": 0.1651611328125, + "learning_rate": 7.482676314924143e-07, + "loss": 0.00016503839287906885, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1717, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 61.135419845581055, + "completions/min_length": 24.625, + "epoch": 3.4149913129808884, + "grad_norm": 1.9546241395304556, + "kl": 0.18292236328125, + "learning_rate": 7.479937364968232e-07, + "loss": -0.00027079074061475694, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1718, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 78.18750190734863, + "completions/min_length": 35.5, + "epoch": 3.416976917349218, + "grad_norm": 1.7623421600901559, + "kl": 2.26300048828125, + "learning_rate": 7.477197427696284e-07, + "loss": -0.0010346894850954413, + "memory(GiB)": 94.21, + "reward": 1.7291666865348816, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.18837061524391174, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1719, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 78.32291889190674, + "completions/min_length": 28.625, + "epoch": 3.418962521717548, + "grad_norm": 0.006645385845243952, + "kl": 0.16180419921875, + "learning_rate": 7.47445650419913e-07, + "loss": 0.0001617235830053687, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1720, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 73.79166793823242, + "completions/min_length": 33.5, + "epoch": 3.4209481260858774, + "grad_norm": 0.007464780670110095, + "kl": 0.169921875, + "learning_rate": 7.471714595567987e-07, + "loss": 0.00016992166638374329, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1721, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.25, + "completions/mean_length": 78.35416793823242, + "completions/min_length": 30.375, + "epoch": 3.422933730454207, + "grad_norm": 0.0065839816936756465, + "kl": 0.15899658203125, + "learning_rate": 7.468971702894469e-07, + "loss": 0.00015909734065644443, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1722, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 74.26041889190674, + "completions/min_length": 28.75, + "epoch": 3.424919334822537, + "grad_norm": 1.6644447947001004, + "kl": 0.1729736328125, + "learning_rate": 7.466227827270583e-07, + "loss": 0.013186678290367126, + "memory(GiB)": 94.21, + "reward": 1.8229166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.2281883768737316, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1723, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 69.17708492279053, + "completions/min_length": 27.125, + "epoch": 3.4269049391908664, + "grad_norm": 0.006002825201215312, + "kl": 0.14239501953125, + "learning_rate": 7.463482969788718e-07, + "loss": 0.00014237160212360322, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1724, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 68.88541889190674, + "completions/min_length": 28.5, + "epoch": 3.428890543559196, + "grad_norm": 1.8490101057166934, + "kl": 0.22674560546875, + "learning_rate": 7.460737131541665e-07, + "loss": -0.001330991624854505, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.06846532225608826, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.30890411138534546, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1725, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 67.45833539962769, + "completions/min_length": 22.0, + "epoch": 3.4308761479275254, + "grad_norm": 0.006188197772008088, + "kl": 0.135498046875, + "learning_rate": 7.457990313622601e-07, + "loss": 0.00013564078835770488, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1726, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 75.18750286102295, + "completions/min_length": 27.25, + "epoch": 3.432861752295855, + "grad_norm": 0.007752230806755084, + "kl": 0.1744384765625, + "learning_rate": 7.455242517125091e-07, + "loss": 0.00017422600649297237, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1727, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.75, + "completions/mean_length": 63.75000190734863, + "completions/min_length": 27.0, + "epoch": 3.434847356664185, + "grad_norm": 0.009166380740336441, + "kl": 0.157470703125, + "learning_rate": 7.452493743143091e-07, + "loss": 0.0001575587666593492, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1728, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.625, + "completions/mean_length": 66.64583396911621, + "completions/min_length": 30.25, + "epoch": 3.4368329610325143, + "grad_norm": 0.00628188816198669, + "kl": 0.12420654296875, + "learning_rate": 7.449743992770949e-07, + "loss": 0.0001242095313500613, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1729, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 74.05208539962769, + "completions/min_length": 29.375, + "epoch": 3.438818565400844, + "grad_norm": 0.006433618028306444, + "kl": 0.13214111328125, + "learning_rate": 7.446993267103399e-07, + "loss": 0.00013204696006141603, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1730, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 78.08333587646484, + "completions/min_length": 34.25, + "epoch": 3.4408041697691734, + "grad_norm": 0.006373883099692789, + "kl": 0.1527099609375, + "learning_rate": 7.444241567235561e-07, + "loss": 0.00015269606956280768, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1731, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 74.70833587646484, + "completions/min_length": 29.125, + "epoch": 3.442789774137503, + "grad_norm": 1.138698122894617, + "kl": 0.13763427734375, + "learning_rate": 7.441488894262948e-07, + "loss": -0.006746276281774044, + "memory(GiB)": 94.21, + "reward": 1.6979166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.35874661430716515, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1732, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 82.48958587646484, + "completions/min_length": 31.875, + "epoch": 3.444775378505833, + "grad_norm": 0.008164486759855389, + "kl": 0.13238525390625, + "learning_rate": 7.438735249281459e-07, + "loss": 0.0001324699114775285, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1733, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 69.66666793823242, + "completions/min_length": 24.625, + "epoch": 3.4467609828741623, + "grad_norm": 0.01014397796764063, + "kl": 0.1566162109375, + "learning_rate": 7.435980633387374e-07, + "loss": 0.0001566542632644996, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1734, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 75.95833492279053, + "completions/min_length": 29.5, + "epoch": 3.448746587242492, + "grad_norm": 0.007700556726749011, + "kl": 0.171142578125, + "learning_rate": 7.433225047677368e-07, + "loss": 0.00017101904086302966, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1735, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 75.16666984558105, + "completions/min_length": 35.0, + "epoch": 3.450732191610822, + "grad_norm": 0.0078091523468408, + "kl": 0.1402587890625, + "learning_rate": 7.4304684932485e-07, + "loss": 0.00014021956303622574, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1736, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 73.11458444595337, + "completions/min_length": 21.75, + "epoch": 3.4527177959791513, + "grad_norm": 0.008060159842349679, + "kl": 0.1611328125, + "learning_rate": 7.42771097119821e-07, + "loss": 0.0001611356856301427, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1737, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.625, + "completions/mean_length": 71.78125190734863, + "completions/min_length": 30.75, + "epoch": 3.454703400347481, + "grad_norm": 0.005258868446796736, + "kl": 0.1329345703125, + "learning_rate": 7.424952482624327e-07, + "loss": 0.00013275850506033748, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1738, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 70.76041889190674, + "completions/min_length": 28.5, + "epoch": 3.4566890047158103, + "grad_norm": 0.019238751690534312, + "kl": 0.16473388671875, + "learning_rate": 7.422193028625065e-07, + "loss": 0.00016477250028401613, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1739, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 79.45833683013916, + "completions/min_length": 29.25, + "epoch": 3.45867460908414, + "grad_norm": 0.019995114685998377, + "kl": 0.1566162109375, + "learning_rate": 7.419432610299023e-07, + "loss": 0.0001565864949952811, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1740, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 78.71875333786011, + "completions/min_length": 32.25, + "epoch": 3.4606602134524698, + "grad_norm": 0.004951524678397244, + "kl": 0.12481689453125, + "learning_rate": 7.416671228745181e-07, + "loss": 0.00012488094216678292, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1741, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 74.26041793823242, + "completions/min_length": 36.0, + "epoch": 3.4626458178207993, + "grad_norm": 0.005712938335980011, + "kl": 0.15191650390625, + "learning_rate": 7.413908885062902e-07, + "loss": 0.00015190729754976928, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1742, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 76.083336353302, + "completions/min_length": 25.875, + "epoch": 3.464631422189129, + "grad_norm": 0.004767199588825017, + "kl": 0.11920166015625, + "learning_rate": 7.411145580351938e-07, + "loss": 0.0001191339033539407, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1743, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 79.87500190734863, + "completions/min_length": 38.0, + "epoch": 3.4666170265574583, + "grad_norm": 0.005086541043455125, + "kl": 0.16180419921875, + "learning_rate": 7.408381315712416e-07, + "loss": 0.00016174567281268537, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1744, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.25, + "completions/mean_length": 74.55208587646484, + "completions/min_length": 30.125, + "epoch": 3.468602630925788, + "grad_norm": 0.010422260363298103, + "kl": 0.13922119140625, + "learning_rate": 7.405616092244849e-07, + "loss": 0.00013916482566855848, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1745, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 70.4166693687439, + "completions/min_length": 29.625, + "epoch": 3.4705882352941178, + "grad_norm": 0.005632430520019546, + "kl": 0.1383056640625, + "learning_rate": 7.402849911050136e-07, + "loss": 0.00013838404265698045, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1746, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 78.7291693687439, + "completions/min_length": 32.0, + "epoch": 3.4725738396624473, + "grad_norm": 1.195313390646512, + "kl": 0.15374755859375, + "learning_rate": 7.400082773229549e-07, + "loss": 0.00626950990408659, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1747, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 75.95833683013916, + "completions/min_length": 29.625, + "epoch": 3.4745594440307768, + "grad_norm": 1.471272619877359, + "kl": 0.15460205078125, + "learning_rate": 7.397314679884745e-07, + "loss": -0.018831651657819748, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.18335824459791183, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1748, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 72.63541889190674, + "completions/min_length": 28.75, + "epoch": 3.4765450483991067, + "grad_norm": 0.006510104769034702, + "kl": 0.13385009765625, + "learning_rate": 7.394545632117761e-07, + "loss": 0.00013389332161750644, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1749, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 72.09375190734863, + "completions/min_length": 31.875, + "epoch": 3.4785306527674362, + "grad_norm": 0.9269615218887043, + "kl": 0.15655517578125, + "learning_rate": 7.391775631031015e-07, + "loss": 0.01420507114380598, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1750, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 73.11458587646484, + "completions/min_length": 27.375, + "epoch": 3.4805162571357657, + "grad_norm": 1.2281621569462045, + "kl": 0.16961669921875, + "learning_rate": 7.389004677727304e-07, + "loss": -0.00881937239319086, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166669771075, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1751, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 68.4479193687439, + "completions/min_length": 26.5, + "epoch": 3.4825018615040952, + "grad_norm": 0.004998159829211148, + "kl": 0.1248779296875, + "learning_rate": 7.386232773309801e-07, + "loss": 0.00012489521759562194, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1752, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 76.05208492279053, + "completions/min_length": 32.25, + "epoch": 3.4844874658724247, + "grad_norm": 0.0053036963477892714, + "kl": 0.14691162109375, + "learning_rate": 7.383459918882063e-07, + "loss": 0.0001469059643568471, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1753, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.75, + "completions/mean_length": 83.45833492279053, + "completions/min_length": 37.5, + "epoch": 3.4864730702407547, + "grad_norm": 1.052878812999578, + "kl": 0.14801025390625, + "learning_rate": 7.380686115548023e-07, + "loss": 0.002062710002064705, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1754, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 76.20833539962769, + "completions/min_length": 30.375, + "epoch": 3.488458674609084, + "grad_norm": 0.004662863880316448, + "kl": 0.1090087890625, + "learning_rate": 7.377911364411988e-07, + "loss": 0.00010899835615418851, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1755, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 73.04166889190674, + "completions/min_length": 27.5, + "epoch": 3.4904442789774137, + "grad_norm": 0.008554424656485449, + "kl": 0.1456298828125, + "learning_rate": 7.375135666578649e-07, + "loss": 0.00014553712389897555, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1756, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 69.98958587646484, + "completions/min_length": 28.375, + "epoch": 3.4924298833457432, + "grad_norm": 0.7139281385286269, + "kl": 0.13531494140625, + "learning_rate": 7.37235902315307e-07, + "loss": 0.005020014476031065, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1757, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 75.62500190734863, + "completions/min_length": 28.75, + "epoch": 3.4944154877140727, + "grad_norm": 0.004439238411343683, + "kl": 0.11932373046875, + "learning_rate": 7.36958143524069e-07, + "loss": 0.00011935785732930526, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1758, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 81.85416841506958, + "completions/min_length": 25.875, + "epoch": 3.4964010920824027, + "grad_norm": 0.006904535457039458, + "kl": 0.163818359375, + "learning_rate": 7.366802903947329e-07, + "loss": 0.00016380642773583531, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1759, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 78.34375238418579, + "completions/min_length": 23.5, + "epoch": 3.498386696450732, + "grad_norm": 0.007442303936144033, + "kl": 0.16259765625, + "learning_rate": 7.364023430379177e-07, + "loss": 0.0001626228477107361, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1760, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 74.927086353302, + "completions/min_length": 32.625, + "epoch": 3.5003723008190617, + "grad_norm": 0.006725138916354058, + "kl": 0.14306640625, + "learning_rate": 7.361243015642804e-07, + "loss": 0.0001429548137821257, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1761, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 83.68750286102295, + "completions/min_length": 30.75, + "epoch": 3.5023579051873917, + "grad_norm": 0.006402114686299546, + "kl": 0.141845703125, + "learning_rate": 7.35846166084515e-07, + "loss": 0.00014180198195390403, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1762, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 74.11458587646484, + "completions/min_length": 32.0, + "epoch": 3.504343509555721, + "grad_norm": 1.5242345691940025, + "kl": 0.15777587890625, + "learning_rate": 7.355679367093535e-07, + "loss": 0.005624070763587952, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1763, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 82.16666984558105, + "completions/min_length": 24.25, + "epoch": 3.5063291139240507, + "grad_norm": 0.005008337493259989, + "kl": 0.139892578125, + "learning_rate": 7.352896135495648e-07, + "loss": 0.00013970279542263597, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1764, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 79.03125095367432, + "completions/min_length": 34.125, + "epoch": 3.50831471829238, + "grad_norm": 0.0063231165661246464, + "kl": 0.128662109375, + "learning_rate": 7.350111967159551e-07, + "loss": 0.00012871438229922205, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1765, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 78.36458492279053, + "completions/min_length": 33.125, + "epoch": 3.5103003226607097, + "grad_norm": 0.9796185565579775, + "kl": 0.1348876953125, + "learning_rate": 7.347326863193683e-07, + "loss": 0.01389553677290678, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1766, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.125, + "completions/mean_length": 74.03125333786011, + "completions/min_length": 27.0, + "epoch": 3.5122859270290396, + "grad_norm": 0.012969665645352702, + "kl": 0.1329345703125, + "learning_rate": 7.344540824706854e-07, + "loss": 0.00013288123591337353, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1767, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 74.65625238418579, + "completions/min_length": 31.375, + "epoch": 3.514271531397369, + "grad_norm": 0.7792473241226281, + "kl": 0.13555908203125, + "learning_rate": 7.341753852808243e-07, + "loss": -0.008246229030191898, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1768, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 71.57291793823242, + "completions/min_length": 23.125, + "epoch": 3.5162571357656986, + "grad_norm": 0.006008880970299535, + "kl": 0.1143798828125, + "learning_rate": 7.338965948607405e-07, + "loss": 0.00011432982137193903, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1769, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.0, + "completions/mean_length": 73.43750286102295, + "completions/min_length": 24.5, + "epoch": 3.518242740134028, + "grad_norm": 0.0045265401935526105, + "kl": 0.14056396484375, + "learning_rate": 7.336177113214264e-07, + "loss": 0.00014058224041946232, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1770, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 72.92708587646484, + "completions/min_length": 26.875, + "epoch": 3.5202283445023577, + "grad_norm": 0.008165285444302825, + "kl": 0.12371826171875, + "learning_rate": 7.333387347739116e-07, + "loss": 0.00012376814265735447, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1771, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 69.07291793823242, + "completions/min_length": 26.25, + "epoch": 3.5222139488706876, + "grad_norm": 0.004496528961753019, + "kl": 0.1346435546875, + "learning_rate": 7.330596653292624e-07, + "loss": 0.00013465905794873834, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1772, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 70.33333587646484, + "completions/min_length": 26.75, + "epoch": 3.524199553239017, + "grad_norm": 0.005157170136741095, + "kl": 0.123809814453125, + "learning_rate": 7.327805030985821e-07, + "loss": 0.00012389587936922908, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1773, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 75.28125190734863, + "completions/min_length": 33.25, + "epoch": 3.5261851576073466, + "grad_norm": 1.0780627597589751, + "kl": 0.132080078125, + "learning_rate": 7.325012481930119e-07, + "loss": -0.006494271568953991, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1774, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 73.0729193687439, + "completions/min_length": 31.5, + "epoch": 3.5281707619756766, + "grad_norm": 0.005637240376047731, + "kl": 0.1092529296875, + "learning_rate": 7.322219007237284e-07, + "loss": 0.00010925737296929583, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1775, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 70.18750190734863, + "completions/min_length": 32.625, + "epoch": 3.530156366344006, + "grad_norm": 0.0054442983551818164, + "kl": 0.14739990234375, + "learning_rate": 7.319424608019462e-07, + "loss": 0.0001474183809477836, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1776, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.875, + "completions/mean_length": 80.94791984558105, + "completions/min_length": 32.625, + "epoch": 3.5321419707123356, + "grad_norm": 0.006323462722378896, + "kl": 0.13037109375, + "learning_rate": 7.31662928538916e-07, + "loss": 0.0001303914177697152, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1777, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 73.26041841506958, + "completions/min_length": 28.125, + "epoch": 3.534127575080665, + "grad_norm": 2.063083501267316, + "kl": 0.14923095703125, + "learning_rate": 7.31383304045926e-07, + "loss": -0.005754535552114248, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.2231760062277317, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1778, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 70.98958539962769, + "completions/min_length": 30.75, + "epoch": 3.5361131794489946, + "grad_norm": 0.00569979668839109, + "kl": 0.13128662109375, + "learning_rate": 7.311035874343003e-07, + "loss": 0.00013118793140165508, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1779, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 68.1354193687439, + "completions/min_length": 18.5, + "epoch": 3.5380987838173246, + "grad_norm": 0.005362944183722852, + "kl": 0.10382080078125, + "learning_rate": 7.308237788154001e-07, + "loss": 0.00010373725672252476, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1780, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 78.64583492279053, + "completions/min_length": 37.75, + "epoch": 3.540084388185654, + "grad_norm": 0.005507703822651866, + "kl": 0.13519287109375, + "learning_rate": 7.305438783006235e-07, + "loss": 0.00013502707588486373, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1781, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 69.90625238418579, + "completions/min_length": 28.75, + "epoch": 3.5420699925539836, + "grad_norm": 0.005272995335092888, + "kl": 0.12017822265625, + "learning_rate": 7.302638860014045e-07, + "loss": 0.00012019756104564294, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1782, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 67.47916889190674, + "completions/min_length": 26.25, + "epoch": 3.544055596922313, + "grad_norm": 0.005618939014267104, + "kl": 0.1375732421875, + "learning_rate": 7.299838020292142e-07, + "loss": 0.00013759899593424052, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1783, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 78.7916693687439, + "completions/min_length": 30.75, + "epoch": 3.5460412012906426, + "grad_norm": 0.006450190090770658, + "kl": 0.15087890625, + "learning_rate": 7.297036264955598e-07, + "loss": 0.00015095654816832393, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1784, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.25, + "completions/mean_length": 79.89583683013916, + "completions/min_length": 28.625, + "epoch": 3.5480268056589725, + "grad_norm": 0.006078465170606672, + "kl": 0.12969970703125, + "learning_rate": 7.294233595119856e-07, + "loss": 0.00012971264368388802, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1785, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 68.27083444595337, + "completions/min_length": 25.625, + "epoch": 3.550012410027302, + "grad_norm": 1.060742909163463, + "kl": 0.103515625, + "learning_rate": 7.291430011900714e-07, + "loss": 0.002888138871639967, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1786, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 74.19791984558105, + "completions/min_length": 30.5, + "epoch": 3.5519980143956316, + "grad_norm": 0.005159957355823121, + "kl": 0.122314453125, + "learning_rate": 7.288625516414341e-07, + "loss": 0.00012228739797137678, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1787, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.5, + "completions/mean_length": 67.57291841506958, + "completions/min_length": 26.625, + "epoch": 3.5539836187639615, + "grad_norm": 0.005048837208848599, + "kl": 0.09710693359375, + "learning_rate": 7.285820109777267e-07, + "loss": 9.700824739411473e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1788, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 81.55208492279053, + "completions/min_length": 35.625, + "epoch": 3.555969223132291, + "grad_norm": 2.802655587411937, + "kl": 0.13677978515625, + "learning_rate": 7.283013793106383e-07, + "loss": -0.0034123463556170464, + "memory(GiB)": 94.21, + "reward": 1.8437500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.8437500074505806, + "rewards/CineAccuracyORM/std": 0.1783013790845871, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1789, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 63.66666841506958, + "completions/min_length": 22.0, + "epoch": 3.5579548275006205, + "grad_norm": 0.005373974337805658, + "kl": 0.11224365234375, + "learning_rate": 7.280206567518942e-07, + "loss": 0.00011214089317945763, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1790, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 77.76041984558105, + "completions/min_length": 35.0, + "epoch": 3.55994043186895, + "grad_norm": 0.7314390909669484, + "kl": 0.1492919921875, + "learning_rate": 7.277398434132564e-07, + "loss": 0.010314702056348324, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166669771075, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1791, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 76.07291889190674, + "completions/min_length": 24.0, + "epoch": 3.5619260362372795, + "grad_norm": 0.005081806749917839, + "kl": 0.1201171875, + "learning_rate": 7.274589394065227e-07, + "loss": 0.00012014494132017717, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1792, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 70.18750190734863, + "completions/min_length": 32.375, + "epoch": 3.5639116406056095, + "grad_norm": 2.4177105628840043, + "kl": 0.1680908203125, + "learning_rate": 7.271779448435265e-07, + "loss": 0.007997551001608372, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1793, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 73.53125286102295, + "completions/min_length": 29.875, + "epoch": 3.565897244973939, + "grad_norm": 0.8596422892425496, + "kl": 0.170166015625, + "learning_rate": 7.268968598361381e-07, + "loss": 0.0001700421271380037, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1794, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 67.48958492279053, + "completions/min_length": 25.875, + "epoch": 3.5678828493422685, + "grad_norm": 0.004895542140499346, + "kl": 0.09979248046875, + "learning_rate": 7.266156844962637e-07, + "loss": 9.970397513825446e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1795, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 75.10416984558105, + "completions/min_length": 35.0, + "epoch": 3.569868453710598, + "grad_norm": 0.005409499908882387, + "kl": 0.12469482421875, + "learning_rate": 7.263344189358446e-07, + "loss": 0.00012467967462725937, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1796, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.25, + "completions/mean_length": 76.42708539962769, + "completions/min_length": 30.5, + "epoch": 3.5718540580789275, + "grad_norm": 0.9104884220848838, + "kl": 0.1405029296875, + "learning_rate": 7.260530632668589e-07, + "loss": 0.01337971817702055, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1797, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 72.15625190734863, + "completions/min_length": 29.25, + "epoch": 3.5738396624472575, + "grad_norm": 0.004686041944902814, + "kl": 0.112548828125, + "learning_rate": 7.257716176013204e-07, + "loss": 0.00011262830230407417, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1798, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 72.72916889190674, + "completions/min_length": 27.5, + "epoch": 3.575825266815587, + "grad_norm": 0.005643537075254303, + "kl": 0.14208984375, + "learning_rate": 7.254900820512788e-07, + "loss": 0.00014193866809364408, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1799, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 66.52083444595337, + "completions/min_length": 28.0, + "epoch": 3.5778108711839165, + "grad_norm": 0.0048415769370699245, + "kl": 0.09820556640625, + "learning_rate": 7.252084567288188e-07, + "loss": 9.824969311011955e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1800, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 63.302085876464844, + "completions/min_length": 23.0, + "epoch": 3.5797964755522464, + "grad_norm": 1.7110392222200153, + "kl": 0.89947509765625, + "learning_rate": 7.24926741746062e-07, + "loss": -0.0016936659812927246, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1801, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.125, + "completions/mean_length": 61.156251430511475, + "completions/min_length": 15.375, + "epoch": 3.581782079920576, + "grad_norm": 0.1543262279104331, + "kl": 0.21392822265625, + "learning_rate": 7.246449372151651e-07, + "loss": 0.00021442461002152413, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1802, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 61.343750953674316, + "completions/min_length": 25.75, + "epoch": 3.5837676842889055, + "grad_norm": 0.006875509190911133, + "kl": 0.1259765625, + "learning_rate": 7.243630432483203e-07, + "loss": 0.0001259248674614355, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1803, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 72.00000286102295, + "completions/min_length": 37.25, + "epoch": 3.585753288657235, + "grad_norm": 0.0072108871217124145, + "kl": 0.1336669921875, + "learning_rate": 7.240810599577557e-07, + "loss": 0.00013358003343455493, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1804, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.25, + "completions/mean_length": 69.25000286102295, + "completions/min_length": 32.625, + "epoch": 3.5877388930255645, + "grad_norm": 0.007652425373720988, + "kl": 0.123046875, + "learning_rate": 7.237989874557348e-07, + "loss": 0.00012306266580708325, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1805, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 68.67708444595337, + "completions/min_length": 29.375, + "epoch": 3.5897244973938944, + "grad_norm": 0.005980489894497306, + "kl": 0.1192626953125, + "learning_rate": 7.235168258545569e-07, + "loss": 0.00011916876246687025, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1806, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 68.59375333786011, + "completions/min_length": 26.375, + "epoch": 3.591710101762224, + "grad_norm": 0.007014690623918719, + "kl": 0.1312255859375, + "learning_rate": 7.232345752665563e-07, + "loss": 0.0001311766100116074, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1807, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.75, + "completions/mean_length": 62.38541793823242, + "completions/min_length": 25.625, + "epoch": 3.5936957061305534, + "grad_norm": 0.006197969225767213, + "kl": 0.1141357421875, + "learning_rate": 7.229522358041032e-07, + "loss": 0.00011410910519771278, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1808, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.125, + "completions/mean_length": 65.30208539962769, + "completions/min_length": 29.75, + "epoch": 3.595681310498883, + "grad_norm": 0.00851083362483851, + "kl": 0.13275146484375, + "learning_rate": 7.226698075796028e-07, + "loss": 0.00013260229025036097, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1809, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 69.68750190734863, + "completions/min_length": 34.625, + "epoch": 3.5976669148672125, + "grad_norm": 0.005047180379247043, + "kl": 0.12200927734375, + "learning_rate": 7.223872907054959e-07, + "loss": 0.00012217086623422801, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1810, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.25, + "completions/mean_length": 73.14583539962769, + "completions/min_length": 27.5, + "epoch": 3.5996525192355424, + "grad_norm": 0.006522959598551174, + "kl": 0.13055419921875, + "learning_rate": 7.221046852942585e-07, + "loss": 0.00013057971955277026, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1811, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 67.66666841506958, + "completions/min_length": 29.0, + "epoch": 3.601638123603872, + "grad_norm": 1.3865765564894799, + "kl": 0.12506103515625, + "learning_rate": 7.218219914584018e-07, + "loss": -0.005225293338298798, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1812, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 65.77083587646484, + "completions/min_length": 29.875, + "epoch": 3.6036237279722014, + "grad_norm": 0.006628537842286538, + "kl": 0.12939453125, + "learning_rate": 7.215392093104724e-07, + "loss": 0.00012932605750393122, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1813, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.25, + "completions/mean_length": 63.03125190734863, + "completions/min_length": 27.875, + "epoch": 3.6056093323405314, + "grad_norm": 0.7043513720869784, + "kl": 0.12054443359375, + "learning_rate": 7.212563389630516e-07, + "loss": 0.0026327527593821287, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1814, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.75, + "completions/mean_length": 64.69791984558105, + "completions/min_length": 30.375, + "epoch": 3.607594936708861, + "grad_norm": 1.879648578374954, + "kl": 0.1417236328125, + "learning_rate": 7.209733805287566e-07, + "loss": 0.018731407821178436, + "memory(GiB)": 94.21, + "reward": 1.8854166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.12591182813048363, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1815, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 74.06250143051147, + "completions/min_length": 32.375, + "epoch": 3.6095805410771904, + "grad_norm": 0.007056660868362295, + "kl": 0.14337158203125, + "learning_rate": 7.206903341202388e-07, + "loss": 0.00014341103087645024, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1816, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 67.75000095367432, + "completions/min_length": 32.625, + "epoch": 3.61156614544552, + "grad_norm": 0.006371641351019976, + "kl": 0.13885498046875, + "learning_rate": 7.204071998501851e-07, + "loss": 0.00013888889225199819, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1817, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 68.11458492279053, + "completions/min_length": 29.125, + "epoch": 3.6135517498138494, + "grad_norm": 0.008241844511920065, + "kl": 0.12786865234375, + "learning_rate": 7.201239778313172e-07, + "loss": 0.00012802897254005075, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1818, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.625, + "completions/mean_length": 64.80208492279053, + "completions/min_length": 29.0, + "epoch": 3.6155373541821794, + "grad_norm": 0.006215019744812914, + "kl": 0.13055419921875, + "learning_rate": 7.198406681763924e-07, + "loss": 0.0001305764017160982, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1819, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.75, + "completions/mean_length": 55.61458492279053, + "completions/min_length": 23.25, + "epoch": 3.617522958550509, + "grad_norm": 0.00827985827536905, + "kl": 0.11395263671875, + "learning_rate": 7.195572709982017e-07, + "loss": 0.00011397053458495066, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1820, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 64.2604193687439, + "completions/min_length": 24.75, + "epoch": 3.6195085629188384, + "grad_norm": 0.006110202754089011, + "kl": 0.14373779296875, + "learning_rate": 7.192737864095717e-07, + "loss": 0.00014358545013237745, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1821, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.875, + "completions/mean_length": 57.56250190734863, + "completions/min_length": 24.125, + "epoch": 3.621494167287168, + "grad_norm": 0.009407831162796688, + "kl": 0.11248779296875, + "learning_rate": 7.189902145233639e-07, + "loss": 0.0001125200287788175, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1822, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 71.43750190734863, + "completions/min_length": 29.125, + "epoch": 3.6234797716554974, + "grad_norm": 0.0065155727812167, + "kl": 0.14495849609375, + "learning_rate": 7.18706555452474e-07, + "loss": 0.00014499214012175798, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1823, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 60.83333492279053, + "completions/min_length": 19.375, + "epoch": 3.6254653760238273, + "grad_norm": 2.731760216377746, + "kl": 0.1307373046875, + "learning_rate": 7.184228093098331e-07, + "loss": 0.020896030589938164, + "memory(GiB)": 94.21, + "reward": 1.8437500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.8437500074505806, + "rewards/CineAccuracyORM/std": 0.08474057167768478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1824, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.375, + "completions/mean_length": 63.562500953674316, + "completions/min_length": 27.75, + "epoch": 3.627450980392157, + "grad_norm": 0.007092418579656643, + "kl": 0.1331787109375, + "learning_rate": 7.181389762084062e-07, + "loss": 0.00013313154340721667, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1825, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 62.55208444595337, + "completions/min_length": 27.5, + "epoch": 3.6294365847604864, + "grad_norm": 0.006724095345345822, + "kl": 0.09979248046875, + "learning_rate": 7.178550562611937e-07, + "loss": 9.976244473364204e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1826, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 56.54166841506958, + "completions/min_length": 28.125, + "epoch": 3.6314221891288163, + "grad_norm": 0.008591325526163072, + "kl": 0.13714599609375, + "learning_rate": 7.175710495812299e-07, + "loss": 0.00013697068789042532, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1827, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.875, + "completions/mean_length": 58.02083396911621, + "completions/min_length": 27.875, + "epoch": 3.633407793497146, + "grad_norm": 2.0772600373004755, + "kl": 0.12646484375, + "learning_rate": 7.172869562815841e-07, + "loss": 0.013000253587961197, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1828, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.5, + "completions/mean_length": 66.60416889190674, + "completions/min_length": 23.5, + "epoch": 3.6353933978654753, + "grad_norm": 0.006807599656466557, + "kl": 0.13201904296875, + "learning_rate": 7.1700277647536e-07, + "loss": 0.0001318681170232594, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1829, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.75, + "completions/mean_length": 66.94791793823242, + "completions/min_length": 28.125, + "epoch": 3.637379002233805, + "grad_norm": 0.006777744219711361, + "kl": 0.10772705078125, + "learning_rate": 7.167185102756954e-07, + "loss": 0.00010777149873320013, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1830, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 98.75, + "completions/mean_length": 55.35416841506958, + "completions/min_length": 27.625, + "epoch": 3.6393646066021343, + "grad_norm": 0.006369809038275092, + "kl": 0.1226806640625, + "learning_rate": 7.164341577957628e-07, + "loss": 0.0001226781605510041, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1831, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.125, + "completions/mean_length": 57.145835399627686, + "completions/min_length": 24.625, + "epoch": 3.6413502109704643, + "grad_norm": 0.004898125680672852, + "kl": 0.09814453125, + "learning_rate": 7.161497191487692e-07, + "loss": 9.809241601033136e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1832, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 62.14583492279053, + "completions/min_length": 25.25, + "epoch": 3.643335815338794, + "grad_norm": 0.004841641715958726, + "kl": 0.099365234375, + "learning_rate": 7.158651944479554e-07, + "loss": 9.944696648744866e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1833, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 67.0104193687439, + "completions/min_length": 29.25, + "epoch": 3.6453214197071233, + "grad_norm": 0.005631832695056003, + "kl": 0.109375, + "learning_rate": 7.155805838065971e-07, + "loss": 0.00010937352635664865, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1834, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 65.58333492279053, + "completions/min_length": 27.25, + "epoch": 3.647307024075453, + "grad_norm": 2.0516988789726516, + "kl": 0.121826171875, + "learning_rate": 7.152958873380036e-07, + "loss": 0.0021626483649015427, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1835, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 59.156251430511475, + "completions/min_length": 28.5, + "epoch": 3.6492926284437823, + "grad_norm": 0.00792809365393854, + "kl": 0.104736328125, + "learning_rate": 7.150111051555187e-07, + "loss": 0.00010478984040673822, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1836, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.0, + "completions/mean_length": 65.06250143051147, + "completions/min_length": 27.125, + "epoch": 3.6512782328121123, + "grad_norm": 0.006272151654520285, + "kl": 0.12457275390625, + "learning_rate": 7.147262373725203e-07, + "loss": 0.00012455848627723753, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1837, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.375, + "completions/mean_length": 64.45833587646484, + "completions/min_length": 29.25, + "epoch": 3.653263837180442, + "grad_norm": 0.010138752430041276, + "kl": 0.11981201171875, + "learning_rate": 7.144412841024203e-07, + "loss": 0.0001199308899231255, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1838, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.625, + "completions/mean_length": 63.64583492279053, + "completions/min_length": 30.0, + "epoch": 3.6552494415487713, + "grad_norm": 1.959911313887933, + "kl": 0.13482666015625, + "learning_rate": 7.141562454586649e-07, + "loss": 0.0025736771058291197, + "memory(GiB)": 94.21, + "reward": 1.739583358168602, + "reward_std": 0.0765465535223484, + "rewards/CineAccuracyORM/mean": 0.7395833432674408, + "rewards/CineAccuracyORM/std": 0.3323722183704376, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1839, + "train_speed(iter/s)": 0.022671 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.875, + "completions/mean_length": 61.312500953674316, + "completions/min_length": 28.125, + "epoch": 3.6572350459171012, + "grad_norm": 0.005725444141515463, + "kl": 0.11456298828125, + "learning_rate": 7.13871121554734e-07, + "loss": 0.0001146845388575457, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1840, + "train_speed(iter/s)": 0.022672 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 69.6666693687439, + "completions/min_length": 29.5, + "epoch": 3.6592206502854308, + "grad_norm": 1.1932159932838553, + "kl": 0.15057373046875, + "learning_rate": 7.135859125041413e-07, + "loss": -0.008493129163980484, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1841, + "train_speed(iter/s)": 0.022673 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.125, + "completions/mean_length": 58.406251430511475, + "completions/min_length": 22.125, + "epoch": 3.6612062546537603, + "grad_norm": 2.7533264143821095, + "kl": 0.140380859375, + "learning_rate": 7.133006184204346e-07, + "loss": 0.009647785685956478, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1842, + "train_speed(iter/s)": 0.022675 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 68.00000190734863, + "completions/min_length": 26.875, + "epoch": 3.6631918590220898, + "grad_norm": 0.008914227966030367, + "kl": 0.1353759765625, + "learning_rate": 7.130152394171958e-07, + "loss": 0.00013552504242397845, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1843, + "train_speed(iter/s)": 0.022676 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 70.57291984558105, + "completions/min_length": 30.125, + "epoch": 3.6651774633904193, + "grad_norm": 0.004903080778798898, + "kl": 0.116058349609375, + "learning_rate": 7.127297756080404e-07, + "loss": 0.0001160261090262793, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1844, + "train_speed(iter/s)": 0.022676 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 64.48958492279053, + "completions/min_length": 24.75, + "epoch": 3.6671630677587492, + "grad_norm": 0.009379265161374049, + "kl": 0.10693359375, + "learning_rate": 7.124442271066174e-07, + "loss": 0.00010711261711549014, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1845, + "train_speed(iter/s)": 0.022675 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 65.09375190734863, + "completions/min_length": 24.0, + "epoch": 3.6691486721270787, + "grad_norm": 0.013110222449817103, + "kl": 0.11053466796875, + "learning_rate": 7.121585940266098e-07, + "loss": 0.00011059310054406524, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1846, + "train_speed(iter/s)": 0.022676 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 68.28125143051147, + "completions/min_length": 29.25, + "epoch": 3.6711342764954082, + "grad_norm": 0.7796965033919522, + "kl": 0.16357421875, + "learning_rate": 7.118728764817344e-07, + "loss": 0.0032601915299892426, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1847, + "train_speed(iter/s)": 0.022676 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 58.86458444595337, + "completions/min_length": 24.875, + "epoch": 3.6731198808637378, + "grad_norm": 0.005150863181530681, + "kl": 0.1390380859375, + "learning_rate": 7.115870745857415e-07, + "loss": 0.0001388014352414757, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1848, + "train_speed(iter/s)": 0.022677 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.875, + "completions/mean_length": 59.76041841506958, + "completions/min_length": 24.375, + "epoch": 3.6751054852320673, + "grad_norm": 1.407358173851238, + "kl": 0.12493896484375, + "learning_rate": 7.113011884524147e-07, + "loss": -0.01851654425263405, + "memory(GiB)": 94.21, + "reward": 1.8229166865348816, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.2281883768737316, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1849, + "train_speed(iter/s)": 0.022675 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 105.375, + "completions/mean_length": 55.69791793823242, + "completions/min_length": 22.375, + "epoch": 3.677091089600397, + "grad_norm": 0.007230555246015449, + "kl": 0.11529541015625, + "learning_rate": 7.110152181955715e-07, + "loss": 0.00011520447878865525, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1850, + "train_speed(iter/s)": 0.022677 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 66.18750190734863, + "completions/min_length": 26.5, + "epoch": 3.6790766939687267, + "grad_norm": 0.005970788714428602, + "kl": 0.1524658203125, + "learning_rate": 7.107291639290626e-07, + "loss": 0.0001524357357993722, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1851, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 66.76041793823242, + "completions/min_length": 21.875, + "epoch": 3.6810622983370562, + "grad_norm": 0.0058751415618637035, + "kl": 0.092041015625, + "learning_rate": 7.104430257667729e-07, + "loss": 9.202898945659399e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1852, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 67.36458444595337, + "completions/min_length": 28.25, + "epoch": 3.683047902705386, + "grad_norm": 0.005798619096735513, + "kl": 0.12322998046875, + "learning_rate": 7.101568038226193e-07, + "loss": 0.00012326159048825502, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1853, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 66.61458539962769, + "completions/min_length": 29.0, + "epoch": 3.6850335070737157, + "grad_norm": 0.005238016725894942, + "kl": 0.10540771484375, + "learning_rate": 7.098704982105533e-07, + "loss": 0.00010534387547522783, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1854, + "train_speed(iter/s)": 0.022677 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 66.72916841506958, + "completions/min_length": 27.25, + "epoch": 3.687019111442045, + "grad_norm": 0.00717185441201321, + "kl": 0.10296630859375, + "learning_rate": 7.095841090445593e-07, + "loss": 0.00010290753562003374, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1855, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.625, + "completions/mean_length": 62.51041793823242, + "completions/min_length": 25.75, + "epoch": 3.6890047158103747, + "grad_norm": 0.007378353955862464, + "kl": 0.11468505859375, + "learning_rate": 7.092976364386549e-07, + "loss": 0.00011478550732135773, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1856, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.5, + "completions/mean_length": 65.7604193687439, + "completions/min_length": 27.625, + "epoch": 3.690990320178704, + "grad_norm": 1.588375474639574, + "kl": 0.13885498046875, + "learning_rate": 7.09011080506891e-07, + "loss": -0.0022888171952217817, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1857, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.375, + "completions/mean_length": 57.500000953674316, + "completions/min_length": 23.125, + "epoch": 3.692975924547034, + "grad_norm": 0.004777811878242808, + "kl": 0.0960693359375, + "learning_rate": 7.087244413633515e-07, + "loss": 9.60480174398981e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1858, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 69.85416889190674, + "completions/min_length": 28.25, + "epoch": 3.6949615289153637, + "grad_norm": 1.323252843270289, + "kl": 0.2032470703125, + "learning_rate": 7.084377191221537e-07, + "loss": 0.005240024998784065, + "memory(GiB)": 94.21, + "reward": 1.8125000149011612, + "reward_std": 0.05103103816509247, + "rewards/CineAccuracyORM/mean": 0.8125000074505806, + "rewards/CineAccuracyORM/std": 0.23100870847702026, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1859, + "train_speed(iter/s)": 0.02268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 68.81250238418579, + "completions/min_length": 28.75, + "epoch": 3.696947133283693, + "grad_norm": 0.006342188120975697, + "kl": 0.12506103515625, + "learning_rate": 7.081509138974476e-07, + "loss": 0.00012491666711866856, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1860, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 64.53125190734863, + "completions/min_length": 20.625, + "epoch": 3.6989327376520227, + "grad_norm": 0.007387791806852369, + "kl": 0.12518310546875, + "learning_rate": 7.078640258034169e-07, + "loss": 0.0001251039357157424, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1861, + "train_speed(iter/s)": 0.022679 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 61.34375286102295, + "completions/min_length": 28.875, + "epoch": 3.700918342020352, + "grad_norm": 0.009776880280304325, + "kl": 0.10504150390625, + "learning_rate": 7.075770549542776e-07, + "loss": 0.00010499759810045362, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1862, + "train_speed(iter/s)": 0.022679 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 73.62500190734863, + "completions/min_length": 31.5, + "epoch": 3.702903946388682, + "grad_norm": 0.006774700881786028, + "kl": 0.1256103515625, + "learning_rate": 7.072900014642788e-07, + "loss": 0.00012560535105876625, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1863, + "train_speed(iter/s)": 0.022681 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 77.3229193687439, + "completions/min_length": 30.5, + "epoch": 3.7048895507570117, + "grad_norm": 0.6222512603968614, + "kl": 0.13238525390625, + "learning_rate": 7.070028654477031e-07, + "loss": -0.00737034622579813, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1864, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 64.88541746139526, + "completions/min_length": 21.875, + "epoch": 3.706875155125341, + "grad_norm": 0.006040225226120234, + "kl": 0.09716796875, + "learning_rate": 7.06715647018865e-07, + "loss": 9.721890091896057e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1865, + "train_speed(iter/s)": 0.022679 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 69.19791889190674, + "completions/min_length": 25.625, + "epoch": 3.708860759493671, + "grad_norm": 0.005582538170644881, + "kl": 0.105712890625, + "learning_rate": 7.064283462921124e-07, + "loss": 0.00010561906674411148, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1866, + "train_speed(iter/s)": 0.022679 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.875, + "completions/mean_length": 60.51041793823242, + "completions/min_length": 24.375, + "epoch": 3.7108463638620006, + "grad_norm": 0.006752502014591171, + "kl": 0.1104736328125, + "learning_rate": 7.061409633818261e-07, + "loss": 0.00011047690350096673, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1867, + "train_speed(iter/s)": 0.022682 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.875, + "completions/mean_length": 68.12500333786011, + "completions/min_length": 36.375, + "epoch": 3.71283196823033, + "grad_norm": 0.007721242730341458, + "kl": 0.1253662109375, + "learning_rate": 7.058534984024192e-07, + "loss": 0.00012530006642919034, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1868, + "train_speed(iter/s)": 0.022682 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 60.875000953674316, + "completions/min_length": 21.5, + "epoch": 3.7148175725986596, + "grad_norm": 0.006035502647825041, + "kl": 0.0963134765625, + "learning_rate": 7.055659514683376e-07, + "loss": 9.623346704756841e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1869, + "train_speed(iter/s)": 0.022682 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.875, + "completions/mean_length": 59.989585876464844, + "completions/min_length": 23.625, + "epoch": 3.716803176966989, + "grad_norm": 0.011465434708352068, + "kl": 0.142578125, + "learning_rate": 7.052783226940598e-07, + "loss": 0.00014271473628468812, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1870, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 68.02083539962769, + "completions/min_length": 28.375, + "epoch": 3.718788781335319, + "grad_norm": 0.006816685458152101, + "kl": 0.129638671875, + "learning_rate": 7.049906121940972e-07, + "loss": 0.00012967245129402727, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1871, + "train_speed(iter/s)": 0.022682 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 65.37500095367432, + "completions/min_length": 25.625, + "epoch": 3.7207743857036486, + "grad_norm": 1.99842936831698, + "kl": 0.1126708984375, + "learning_rate": 7.047028200829936e-07, + "loss": 0.0154643002897501, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1872, + "train_speed(iter/s)": 0.022682 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.75, + "completions/mean_length": 67.80208492279053, + "completions/min_length": 30.25, + "epoch": 3.722759990071978, + "grad_norm": 0.00850229765740659, + "kl": 0.14208984375, + "learning_rate": 7.044149464753247e-07, + "loss": 0.00014212813402991742, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1873, + "train_speed(iter/s)": 0.022685 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 71.73958539962769, + "completions/min_length": 31.5, + "epoch": 3.7247455944403076, + "grad_norm": 1.4640433437717335, + "kl": 0.16326904296875, + "learning_rate": 7.041269914856995e-07, + "loss": 0.0028760037384927273, + "memory(GiB)": 94.21, + "reward": 1.7604166865348816, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.16290925815701485, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1874, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 71.2916669845581, + "completions/min_length": 32.5, + "epoch": 3.726731198808637, + "grad_norm": 0.006477994837750161, + "kl": 0.159027099609375, + "learning_rate": 7.038389552287589e-07, + "loss": 0.0001590569590916857, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1875, + "train_speed(iter/s)": 0.022685 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 66.87500190734863, + "completions/min_length": 21.875, + "epoch": 3.728716803176967, + "grad_norm": 0.008271216754882607, + "kl": 0.1041259765625, + "learning_rate": 7.035508378191765e-07, + "loss": 0.00010413752170279622, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1876, + "train_speed(iter/s)": 0.022686 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 66.78125095367432, + "completions/min_length": 23.375, + "epoch": 3.7307024075452966, + "grad_norm": 0.00664322864823127, + "kl": 0.116455078125, + "learning_rate": 7.032626393716576e-07, + "loss": 0.0001165727007901296, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1877, + "train_speed(iter/s)": 0.022686 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 103.0, + "completions/mean_length": 56.052085876464844, + "completions/min_length": 28.25, + "epoch": 3.732688011913626, + "grad_norm": 0.007829866383206988, + "kl": 0.1219482421875, + "learning_rate": 7.029743600009405e-07, + "loss": 0.00012191152200102806, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1878, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 71.43750095367432, + "completions/min_length": 32.875, + "epoch": 3.734673616281956, + "grad_norm": 0.006424328093703674, + "kl": 0.107666015625, + "learning_rate": 7.026859998217952e-07, + "loss": 0.00010763494356069714, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1879, + "train_speed(iter/s)": 0.02269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 77.89583587646484, + "completions/min_length": 33.75, + "epoch": 3.7366592206502856, + "grad_norm": 0.006521821053455773, + "kl": 0.13043212890625, + "learning_rate": 7.02397558949024e-07, + "loss": 0.00013040761405136436, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1880, + "train_speed(iter/s)": 0.022689 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 74.87500286102295, + "completions/min_length": 23.25, + "epoch": 3.738644825018615, + "grad_norm": 0.006884919195593551, + "kl": 0.13214111328125, + "learning_rate": 7.021090374974617e-07, + "loss": 0.0001320820301771164, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1881, + "train_speed(iter/s)": 0.022689 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 71.82291841506958, + "completions/min_length": 26.375, + "epoch": 3.7406304293869446, + "grad_norm": 0.18718118936398945, + "kl": 0.20416259765625, + "learning_rate": 7.018204355819745e-07, + "loss": 0.00020377016335260123, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1882, + "train_speed(iter/s)": 0.022689 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 68.23958539962769, + "completions/min_length": 24.625, + "epoch": 3.742616033755274, + "grad_norm": 1.173162333055251, + "kl": 0.1240234375, + "learning_rate": 7.015317533174611e-07, + "loss": 0.00340383592993021, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1883, + "train_speed(iter/s)": 0.022689 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 71.33333539962769, + "completions/min_length": 26.25, + "epoch": 3.744601638123604, + "grad_norm": 0.9205295205375594, + "kl": 0.12945556640625, + "learning_rate": 7.012429908188522e-07, + "loss": -0.02063288539648056, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1884, + "train_speed(iter/s)": 0.022689 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 66.30208539962769, + "completions/min_length": 26.25, + "epoch": 3.7465872424919335, + "grad_norm": 0.005034871181005451, + "kl": 0.12896728515625, + "learning_rate": 7.009541482011101e-07, + "loss": 0.00012909471115563065, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1885, + "train_speed(iter/s)": 0.02269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 63.09375190734863, + "completions/min_length": 26.0, + "epoch": 3.748572846860263, + "grad_norm": 0.011845975495309176, + "kl": 0.13043212890625, + "learning_rate": 7.006652255792293e-07, + "loss": 0.00013028444664087147, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1886, + "train_speed(iter/s)": 0.022691 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.875, + "completions/mean_length": 65.88541841506958, + "completions/min_length": 29.125, + "epoch": 3.7505584512285925, + "grad_norm": 0.007137531001676096, + "kl": 0.11895751953125, + "learning_rate": 7.00376223068236e-07, + "loss": 0.00011895185161847621, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1887, + "train_speed(iter/s)": 0.022692 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 62.72916889190674, + "completions/min_length": 26.625, + "epoch": 3.752544055596922, + "grad_norm": 0.0067629483915526405, + "kl": 0.11480712890625, + "learning_rate": 7.000871407831885e-07, + "loss": 0.00011476640065666288, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1888, + "train_speed(iter/s)": 0.022693 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 61.59375190734863, + "completions/min_length": 26.5, + "epoch": 3.754529659965252, + "grad_norm": 0.007762670190126869, + "kl": 0.131103515625, + "learning_rate": 6.997979788391765e-07, + "loss": 0.00013126680278219283, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1889, + "train_speed(iter/s)": 0.022691 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 68.61458539962769, + "completions/min_length": 28.375, + "epoch": 3.7565152643335815, + "grad_norm": 0.005160033397521778, + "kl": 0.10675048828125, + "learning_rate": 6.995087373513214e-07, + "loss": 0.00010677635145839304, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1890, + "train_speed(iter/s)": 0.022691 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 74.87500190734863, + "completions/min_length": 26.75, + "epoch": 3.758500868701911, + "grad_norm": 0.005537344400095234, + "kl": 0.11224365234375, + "learning_rate": 6.992194164347766e-07, + "loss": 0.00011223134060855955, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1891, + "train_speed(iter/s)": 0.022691 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.5, + "completions/mean_length": 61.28125238418579, + "completions/min_length": 28.0, + "epoch": 3.760486473070241, + "grad_norm": 0.005390244078857738, + "kl": 0.1016845703125, + "learning_rate": 6.989300162047272e-07, + "loss": 0.0001016532041830942, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1892, + "train_speed(iter/s)": 0.022694 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 66.68750143051147, + "completions/min_length": 27.625, + "epoch": 3.7624720774385705, + "grad_norm": 0.005636414187853037, + "kl": 0.107177734375, + "learning_rate": 6.98640536776389e-07, + "loss": 0.00010705619206419215, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1893, + "train_speed(iter/s)": 0.022694 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.5, + "completions/mean_length": 76.05208587646484, + "completions/min_length": 35.375, + "epoch": 3.7644576818069, + "grad_norm": 0.005081606145340272, + "kl": 0.12762451171875, + "learning_rate": 6.983509782650102e-07, + "loss": 0.00012753944611176848, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1894, + "train_speed(iter/s)": 0.022695 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 71.42708587646484, + "completions/min_length": 27.75, + "epoch": 3.7664432861752295, + "grad_norm": 0.005999624547732231, + "kl": 0.12530517578125, + "learning_rate": 6.980613407858703e-07, + "loss": 0.00012525395140983164, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1895, + "train_speed(iter/s)": 0.022695 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.375, + "completions/mean_length": 78.71875286102295, + "completions/min_length": 33.0, + "epoch": 3.768428890543559, + "grad_norm": 0.005217533850369434, + "kl": 0.11328125, + "learning_rate": 6.977716244542804e-07, + "loss": 0.00011322525097057223, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1896, + "train_speed(iter/s)": 0.022696 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.875, + "completions/mean_length": 62.750001430511475, + "completions/min_length": 26.5, + "epoch": 3.770414494911889, + "grad_norm": 0.005345821615258043, + "kl": 0.10003662109375, + "learning_rate": 6.974818293855822e-07, + "loss": 9.994323772843927e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1897, + "train_speed(iter/s)": 0.022694 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 67.48958539962769, + "completions/min_length": 27.625, + "epoch": 3.7724000992802185, + "grad_norm": 0.004984296940265329, + "kl": 0.12548828125, + "learning_rate": 6.971919556951497e-07, + "loss": 0.00012557375885080546, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1898, + "train_speed(iter/s)": 0.022695 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 69.72916889190674, + "completions/min_length": 28.625, + "epoch": 3.774385703648548, + "grad_norm": 0.007322977817807963, + "kl": 0.1207275390625, + "learning_rate": 6.969020034983876e-07, + "loss": 0.00012055723345838487, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1899, + "train_speed(iter/s)": 0.022695 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 69.31250047683716, + "completions/min_length": 27.0, + "epoch": 3.7763713080168775, + "grad_norm": 0.004901615243586582, + "kl": 0.13311767578125, + "learning_rate": 6.966119729107325e-07, + "loss": 0.00013311261136550456, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1900, + "train_speed(iter/s)": 0.022695 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 66.39583539962769, + "completions/min_length": 22.875, + "epoch": 3.778356912385207, + "grad_norm": 0.004334965617467214, + "kl": 0.11871337890625, + "learning_rate": 6.963218640476511e-07, + "loss": 0.00011863360123243183, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1901, + "train_speed(iter/s)": 0.022696 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 70.208336353302, + "completions/min_length": 29.0, + "epoch": 3.780342516753537, + "grad_norm": 0.004725200400548933, + "kl": 0.10064697265625, + "learning_rate": 6.960316770246426e-07, + "loss": 0.00010059717897092924, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1902, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 71.52083587646484, + "completions/min_length": 30.125, + "epoch": 3.7823281211218664, + "grad_norm": 0.0041194173903015534, + "kl": 0.1268310546875, + "learning_rate": 6.957414119572361e-07, + "loss": 0.00012679636711254716, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1903, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 69.06250190734863, + "completions/min_length": 27.625, + "epoch": 3.784313725490196, + "grad_norm": 3.176268279264717, + "kl": 0.1370849609375, + "learning_rate": 6.954510689609927e-07, + "loss": -0.009499862790107727, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1904, + "train_speed(iter/s)": 0.022696 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 76.770836353302, + "completions/min_length": 33.75, + "epoch": 3.786299329858526, + "grad_norm": 0.007278422250295857, + "kl": 0.1461181640625, + "learning_rate": 6.95160648151504e-07, + "loss": 0.00014601660950575024, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1905, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.375, + "completions/mean_length": 60.2291693687439, + "completions/min_length": 26.25, + "epoch": 3.7882849342268554, + "grad_norm": 0.014600334124028485, + "kl": 0.1004638671875, + "learning_rate": 6.948701496443926e-07, + "loss": 0.000100525445304811, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1906, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 64.84375190734863, + "completions/min_length": 25.875, + "epoch": 3.790270538595185, + "grad_norm": 2.228812484942028, + "kl": 0.16302490234375, + "learning_rate": 6.945795735553123e-07, + "loss": -0.002496750559657812, + "memory(GiB)": 94.21, + "reward": 1.8750000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8750000037252903, + "rewards/CineAccuracyORM/std": 0.12309149652719498, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1907, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 65.84375095367432, + "completions/min_length": 28.25, + "epoch": 3.7922561429635144, + "grad_norm": 0.024051866287186398, + "kl": 0.1282958984375, + "learning_rate": 6.942889199999479e-07, + "loss": 0.00012829070328734815, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1908, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 70.72916841506958, + "completions/min_length": 29.625, + "epoch": 3.794241747331844, + "grad_norm": 0.10977170695701262, + "kl": 0.20635986328125, + "learning_rate": 6.939981890940143e-07, + "loss": 0.00020652561215683818, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1909, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 63.84375238418579, + "completions/min_length": 31.0, + "epoch": 3.796227351700174, + "grad_norm": 1.544374184989343, + "kl": 0.11077880859375, + "learning_rate": 6.93707380953258e-07, + "loss": -0.00868302769958973, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1910, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.625, + "completions/mean_length": 66.50000095367432, + "completions/min_length": 30.625, + "epoch": 3.7982129560685034, + "grad_norm": 0.014868270103500735, + "kl": 0.1168212890625, + "learning_rate": 6.934164956934557e-07, + "loss": 0.0001170191535493359, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1911, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 68.61458492279053, + "completions/min_length": 30.75, + "epoch": 3.800198560436833, + "grad_norm": 0.004727159406425762, + "kl": 0.09637451171875, + "learning_rate": 6.931255334304154e-07, + "loss": 9.636702452553436e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1912, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.5, + "completions/mean_length": 63.46875333786011, + "completions/min_length": 28.375, + "epoch": 3.8021841648051624, + "grad_norm": 0.00885914029598628, + "kl": 0.10003662109375, + "learning_rate": 6.928344942799751e-07, + "loss": 0.00010010460391640663, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1913, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 71.36458539962769, + "completions/min_length": 29.0, + "epoch": 3.804169769173492, + "grad_norm": 0.006270580432592501, + "kl": 0.14453125, + "learning_rate": 6.925433783580037e-07, + "loss": 0.0001445884263375774, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1914, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 60.48958492279053, + "completions/min_length": 21.375, + "epoch": 3.806155373541822, + "grad_norm": 0.007046441434647712, + "kl": 0.105224609375, + "learning_rate": 6.922521857804008e-07, + "loss": 0.00010524334356887266, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1915, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.875, + "completions/mean_length": 78.45833683013916, + "completions/min_length": 28.625, + "epoch": 3.8081409779101514, + "grad_norm": 0.0052942223035268915, + "kl": 0.1007080078125, + "learning_rate": 6.919609166630965e-07, + "loss": 0.00010063032095786184, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1916, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 74.47916793823242, + "completions/min_length": 25.25, + "epoch": 3.810126582278481, + "grad_norm": 0.854848926341564, + "kl": 0.111572265625, + "learning_rate": 6.916695711220508e-07, + "loss": -0.0038129142485558987, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1917, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 76.61458683013916, + "completions/min_length": 29.75, + "epoch": 3.812112186646811, + "grad_norm": 0.005031210589315135, + "kl": 0.10882568359375, + "learning_rate": 6.913781492732548e-07, + "loss": 0.00010881990601774305, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1918, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 74.54166889190674, + "completions/min_length": 29.125, + "epoch": 3.8140977910151403, + "grad_norm": 0.006844058088828535, + "kl": 0.13311767578125, + "learning_rate": 6.910866512327301e-07, + "loss": 0.00013300779392011464, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1919, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 69.78125238418579, + "completions/min_length": 24.5, + "epoch": 3.81608339538347, + "grad_norm": 0.005166042245422494, + "kl": 0.11859130859375, + "learning_rate": 6.907950771165281e-07, + "loss": 0.00011861752136610448, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1920, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.625, + "completions/mean_length": 61.15625190734863, + "completions/min_length": 21.5, + "epoch": 3.8180689997517994, + "grad_norm": 0.00485129154851239, + "kl": 0.11279296875, + "learning_rate": 6.905034270407304e-07, + "loss": 0.00011265276407357305, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1921, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 66.14583539962769, + "completions/min_length": 26.125, + "epoch": 3.820054604120129, + "grad_norm": 0.004765973189027413, + "kl": 0.12005615234375, + "learning_rate": 6.902117011214495e-07, + "loss": 0.00011994189117103815, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1922, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 75.35416746139526, + "completions/min_length": 30.0, + "epoch": 3.822040208488459, + "grad_norm": 0.004603408952741002, + "kl": 0.1141357421875, + "learning_rate": 6.899198994748273e-07, + "loss": 0.00011391112639103085, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1923, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 65.43750143051147, + "completions/min_length": 28.625, + "epoch": 3.8240258128567883, + "grad_norm": 0.006335350848244709, + "kl": 0.12322998046875, + "learning_rate": 6.896280222170368e-07, + "loss": 0.00012319302186369896, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1924, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 73.75000190734863, + "completions/min_length": 30.375, + "epoch": 3.826011417225118, + "grad_norm": 0.004939332662941249, + "kl": 0.10321044921875, + "learning_rate": 6.8933606946428e-07, + "loss": 0.00010315005056327209, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1925, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 79.44792079925537, + "completions/min_length": 33.625, + "epoch": 3.8279970215934473, + "grad_norm": 0.004856906748949679, + "kl": 0.11199951171875, + "learning_rate": 6.8904404133279e-07, + "loss": 0.00011200741573702544, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1926, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 68.70833539962769, + "completions/min_length": 24.875, + "epoch": 3.829982625961777, + "grad_norm": 0.006834150376968235, + "kl": 0.13092041015625, + "learning_rate": 6.887519379388293e-07, + "loss": 0.00013078290794510394, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1927, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 72.75000095367432, + "completions/min_length": 27.0, + "epoch": 3.831968230330107, + "grad_norm": 1.1439780393790333, + "kl": 0.1322021484375, + "learning_rate": 6.884597593986905e-07, + "loss": -0.00607278710231185, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1928, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.0, + "completions/mean_length": 67.8541693687439, + "completions/min_length": 25.0, + "epoch": 3.8339538346984363, + "grad_norm": 0.0055525868749895695, + "kl": 0.1134033203125, + "learning_rate": 6.88167505828696e-07, + "loss": 0.00011340210767230019, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1929, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.0, + "completions/mean_length": 80.38541984558105, + "completions/min_length": 33.375, + "epoch": 3.835939439066766, + "grad_norm": 0.004803210972103647, + "kl": 0.1268310546875, + "learning_rate": 6.878751773451982e-07, + "loss": 0.00012681195221375674, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1930, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.25, + "completions/mean_length": 67.31250143051147, + "completions/min_length": 24.625, + "epoch": 3.8379250434350958, + "grad_norm": 0.004959017452902894, + "kl": 0.13775634765625, + "learning_rate": 6.875827740645795e-07, + "loss": 0.00013765256153419614, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1931, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 74.75000190734863, + "completions/min_length": 32.5, + "epoch": 3.8399106478034253, + "grad_norm": 0.004585640330844273, + "kl": 0.11090087890625, + "learning_rate": 6.872902961032516e-07, + "loss": 0.00011086107406299561, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1932, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 68.94791841506958, + "completions/min_length": 29.25, + "epoch": 3.841896252171755, + "grad_norm": 1.4734246620443077, + "kl": 0.114990234375, + "learning_rate": 6.869977435776565e-07, + "loss": -0.016336556524038315, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1933, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 67.35416889190674, + "completions/min_length": 29.0, + "epoch": 3.8438818565400843, + "grad_norm": 0.00621383867890989, + "kl": 0.10723876953125, + "learning_rate": 6.867051166042655e-07, + "loss": 0.00010706111061153933, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1934, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 64.63541889190674, + "completions/min_length": 25.75, + "epoch": 3.845867460908414, + "grad_norm": 0.12179852725245788, + "kl": 0.19085693359375, + "learning_rate": 6.864124152995796e-07, + "loss": 0.00019111763685941696, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1935, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 72.552086353302, + "completions/min_length": 30.25, + "epoch": 3.8478530652767438, + "grad_norm": 0.0059436806618933845, + "kl": 0.11016845703125, + "learning_rate": 6.861196397801297e-07, + "loss": 0.0001100352019420825, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1936, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 63.31250190734863, + "completions/min_length": 26.375, + "epoch": 3.8498386696450733, + "grad_norm": 0.8467452472158886, + "kl": 0.10107421875, + "learning_rate": 6.858267901624756e-07, + "loss": -0.01629771664738655, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1937, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 73.34375238418579, + "completions/min_length": 28.0, + "epoch": 3.8518242740134028, + "grad_norm": 0.004944954355679515, + "kl": 0.117767333984375, + "learning_rate": 6.85533866563207e-07, + "loss": 0.00011779210035456344, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1938, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 63.67708492279053, + "completions/min_length": 32.5, + "epoch": 3.8538098783817323, + "grad_norm": 0.3669326533371451, + "kl": 0.4107666015625, + "learning_rate": 6.852408690989434e-07, + "loss": 0.00040961726335808635, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1939, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 68.73958587646484, + "completions/min_length": 26.0, + "epoch": 3.855795482750062, + "grad_norm": 1.5836457630688137, + "kl": 0.10760498046875, + "learning_rate": 6.849477978863333e-07, + "loss": -0.010588235221803188, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1940, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 63.82291889190674, + "completions/min_length": 28.625, + "epoch": 3.8577810871183917, + "grad_norm": 0.025299780110206237, + "kl": 0.1402587890625, + "learning_rate": 6.846546530420543e-07, + "loss": 0.00014026850112713873, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1941, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.75, + "completions/mean_length": 61.562501430511475, + "completions/min_length": 21.25, + "epoch": 3.8597666914867212, + "grad_norm": 1.4719966550579282, + "kl": 0.12982177734375, + "learning_rate": 6.843614346828137e-07, + "loss": -0.00681114848703146, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.0765465535223484, + "rewards/CineAccuracyORM/mean": 0.6875000074505806, + "rewards/CineAccuracyORM/std": 0.3615669459104538, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1942, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 79.36458587646484, + "completions/min_length": 29.125, + "epoch": 3.8617522958550508, + "grad_norm": 0.014381200129125837, + "kl": 0.1251220703125, + "learning_rate": 6.840681429253482e-07, + "loss": 0.00012504885671660304, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1943, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 68.02083539962769, + "completions/min_length": 29.0, + "epoch": 3.8637379002233807, + "grad_norm": 0.9467279928595111, + "kl": 0.14398193359375, + "learning_rate": 6.837747778864235e-07, + "loss": 0.003352126805111766, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1944, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 64.18750143051147, + "completions/min_length": 26.625, + "epoch": 3.86572350459171, + "grad_norm": 0.028181675532151108, + "kl": 0.134033203125, + "learning_rate": 6.834813396828343e-07, + "loss": 0.00013379205483943224, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1945, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.375, + "completions/mean_length": 66.19791889190674, + "completions/min_length": 30.375, + "epoch": 3.8677091089600397, + "grad_norm": 0.86803973594992, + "kl": 0.15765380859375, + "learning_rate": 6.831878284314045e-07, + "loss": 0.005411760415881872, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1946, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.25, + "completions/mean_length": 66.53125143051147, + "completions/min_length": 30.5, + "epoch": 3.8696947133283692, + "grad_norm": 0.031029352449010936, + "kl": 0.13739013671875, + "learning_rate": 6.828942442489877e-07, + "loss": 0.00013740621216129512, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1947, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.125, + "completions/mean_length": 60.8229193687439, + "completions/min_length": 30.75, + "epoch": 3.8716803176966987, + "grad_norm": 0.029363139256240227, + "kl": 0.1807861328125, + "learning_rate": 6.826005872524656e-07, + "loss": 0.00018048289348371327, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1948, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.625, + "completions/mean_length": 69.70833492279053, + "completions/min_length": 30.75, + "epoch": 3.8736659220650287, + "grad_norm": 1.494218997773182, + "kl": 0.1422119140625, + "learning_rate": 6.823068575587495e-07, + "loss": 0.0057297926396131516, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1949, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 60.08333492279053, + "completions/min_length": 23.875, + "epoch": 3.875651526433358, + "grad_norm": 1.247487800196632, + "kl": 0.19287109375, + "learning_rate": 6.820130552847794e-07, + "loss": -0.012802074663341045, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1950, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.125, + "completions/mean_length": 61.083335399627686, + "completions/min_length": 30.875, + "epoch": 3.8776371308016877, + "grad_norm": 1.229905762425562, + "kl": 0.1690673828125, + "learning_rate": 6.817191805475243e-07, + "loss": 0.007171073462814093, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1951, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 69.7291693687439, + "completions/min_length": 30.375, + "epoch": 3.879622735170017, + "grad_norm": 0.019876752549019244, + "kl": 0.13299560546875, + "learning_rate": 6.81425233463982e-07, + "loss": 0.00013292356743477285, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1952, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 194.5, + "completions/mean_length": 73.61458539962769, + "completions/min_length": 31.875, + "epoch": 3.8816083395383467, + "grad_norm": 2.073612769823052, + "kl": 0.14691162109375, + "learning_rate": 6.81131214151179e-07, + "loss": 0.00530334934592247, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1953, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 67.71875095367432, + "completions/min_length": 35.75, + "epoch": 3.8835939439066767, + "grad_norm": 0.02050286212793071, + "kl": 0.1431884765625, + "learning_rate": 6.808371227261709e-07, + "loss": 0.00014325630036182702, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1954, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 71.34375238418579, + "completions/min_length": 29.75, + "epoch": 3.885579548275006, + "grad_norm": 0.7280848519261982, + "kl": 0.13128662109375, + "learning_rate": 6.805429593060415e-07, + "loss": 0.00013115754700265825, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1955, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 69.89583492279053, + "completions/min_length": 33.875, + "epoch": 3.8875651526433357, + "grad_norm": 0.9744350584480629, + "kl": 0.13262939453125, + "learning_rate": 6.802487240079039e-07, + "loss": 0.006188714876770973, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 1956, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.125, + "completions/mean_length": 68.43750143051147, + "completions/min_length": 33.25, + "epoch": 3.8895507570116656, + "grad_norm": 0.03647582824718364, + "kl": 0.181396484375, + "learning_rate": 6.799544169488991e-07, + "loss": 0.00018141789769288152, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1957, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 65.38541889190674, + "completions/min_length": 31.5, + "epoch": 3.891536361379995, + "grad_norm": 0.02551893699699209, + "kl": 0.14971923828125, + "learning_rate": 6.796600382461972e-07, + "loss": 0.0001495483156759292, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1958, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.875, + "completions/mean_length": 74.12500238418579, + "completions/min_length": 33.375, + "epoch": 3.8935219657483247, + "grad_norm": 1.568937123012861, + "kl": 0.16259765625, + "learning_rate": 6.793655880169966e-07, + "loss": -0.008005126379430294, + "memory(GiB)": 94.21, + "reward": 1.5833333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.583333333954215, + "rewards/CineAccuracyORM/std": 0.375051774084568, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1959, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 72.50000333786011, + "completions/min_length": 34.0, + "epoch": 3.895507570116654, + "grad_norm": 0.012139293898265117, + "kl": 0.13787841796875, + "learning_rate": 6.790710663785244e-07, + "loss": 0.0001378914894303307, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1960, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.75, + "completions/mean_length": 61.55208492279053, + "completions/min_length": 30.5, + "epoch": 3.8974931744849837, + "grad_norm": 0.022091588572302666, + "kl": 0.13909912109375, + "learning_rate": 6.787764734480357e-07, + "loss": 0.00013918429613113403, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1961, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.0, + "completions/mean_length": 66.02083539962769, + "completions/min_length": 31.875, + "epoch": 3.8994787788533136, + "grad_norm": 1.2804247007575393, + "kl": 0.13671875, + "learning_rate": 6.784818093428143e-07, + "loss": 0.015222916379570961, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1962, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.625, + "completions/mean_length": 68.39583587646484, + "completions/min_length": 32.875, + "epoch": 3.901464383221643, + "grad_norm": 0.006602535393418251, + "kl": 0.134033203125, + "learning_rate": 6.781870741801723e-07, + "loss": 0.00013385264901444316, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1963, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 66.87500238418579, + "completions/min_length": 32.625, + "epoch": 3.9034499875899726, + "grad_norm": 0.006767917896116481, + "kl": 0.12548828125, + "learning_rate": 6.778922680774502e-07, + "loss": 0.00012554106069728732, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1964, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.25, + "completions/mean_length": 59.96875286102295, + "completions/min_length": 28.5, + "epoch": 3.905435591958302, + "grad_norm": 0.006813314583956773, + "kl": 0.11474609375, + "learning_rate": 6.775973911520164e-07, + "loss": 0.00011462499242043123, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1965, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.375, + "completions/mean_length": 60.250001430511475, + "completions/min_length": 30.0, + "epoch": 3.9074211963266317, + "grad_norm": 0.0718876500951966, + "kl": 0.2137451171875, + "learning_rate": 6.773024435212677e-07, + "loss": 0.00021401792764663696, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1966, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 60.73958492279053, + "completions/min_length": 30.75, + "epoch": 3.9094068006949616, + "grad_norm": 0.007183778965311937, + "kl": 0.1380615234375, + "learning_rate": 6.770074253026293e-07, + "loss": 0.0001381241308990866, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1967, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 75.19791793823242, + "completions/min_length": 33.125, + "epoch": 3.911392405063291, + "grad_norm": 0.006426563821911658, + "kl": 0.11773681640625, + "learning_rate": 6.767123366135541e-07, + "loss": 0.00011759914923459291, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1968, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.625, + "completions/mean_length": 61.7291693687439, + "completions/min_length": 27.875, + "epoch": 3.9133780094316206, + "grad_norm": 0.005667167382687648, + "kl": 0.1005859375, + "learning_rate": 6.764171775715232e-07, + "loss": 0.00010057847248390317, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1969, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 67.01041889190674, + "completions/min_length": 31.625, + "epoch": 3.9153636137999506, + "grad_norm": 0.0066141996552372675, + "kl": 0.13897705078125, + "learning_rate": 6.761219482940457e-07, + "loss": 0.00013895424490328878, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1970, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 67.52083587646484, + "completions/min_length": 31.75, + "epoch": 3.91734921816828, + "grad_norm": 1.6495596582335152, + "kl": 0.11236572265625, + "learning_rate": 6.758266488986586e-07, + "loss": -0.006600073538720608, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1971, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.375, + "completions/mean_length": 70.41666984558105, + "completions/min_length": 32.5, + "epoch": 3.9193348225366096, + "grad_norm": 0.007284866109490994, + "kl": 0.10821533203125, + "learning_rate": 6.755312795029271e-07, + "loss": 0.00010831169493030757, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1972, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.125, + "completions/mean_length": 62.54166889190674, + "completions/min_length": 33.625, + "epoch": 3.921320426904939, + "grad_norm": 0.032575234447123964, + "kl": 0.14178466796875, + "learning_rate": 6.75235840224444e-07, + "loss": 0.00014187510532792658, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1973, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.0, + "completions/mean_length": 66.32291984558105, + "completions/min_length": 37.0, + "epoch": 3.9233060312732686, + "grad_norm": 0.008293509430724343, + "kl": 0.14508056640625, + "learning_rate": 6.7494033118083e-07, + "loss": 0.0001450619602110237, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1974, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.875, + "completions/mean_length": 62.156250953674316, + "completions/min_length": 30.625, + "epoch": 3.9252916356415986, + "grad_norm": 0.006937031637228478, + "kl": 0.1177978515625, + "learning_rate": 6.746447524897334e-07, + "loss": 0.00011770258424803615, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1975, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.125, + "completions/mean_length": 63.34375333786011, + "completions/min_length": 30.875, + "epoch": 3.927277240009928, + "grad_norm": 0.008577611593958562, + "kl": 0.1484375, + "learning_rate": 6.743491042688306e-07, + "loss": 0.00014840041694696993, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1976, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 72.98958492279053, + "completions/min_length": 28.875, + "epoch": 3.9292628443782576, + "grad_norm": 0.0063579125709845065, + "kl": 0.11407470703125, + "learning_rate": 6.740533866358252e-07, + "loss": 0.00011416269990149885, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1977, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 71.30208492279053, + "completions/min_length": 30.75, + "epoch": 3.931248448746587, + "grad_norm": 1.4761917726996892, + "kl": 0.15081787109375, + "learning_rate": 6.737575997084491e-07, + "loss": -0.013859925791621208, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.11807912588119507, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1978, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.0, + "completions/mean_length": 63.60416841506958, + "completions/min_length": 31.5, + "epoch": 3.9332340531149166, + "grad_norm": 0.007616244468245575, + "kl": 0.1258544921875, + "learning_rate": 6.73461743604461e-07, + "loss": 0.00012578511086758226, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1979, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 70.8854193687439, + "completions/min_length": 35.375, + "epoch": 3.9352196574832465, + "grad_norm": 0.006625010156448254, + "kl": 0.113037109375, + "learning_rate": 6.731658184416479e-07, + "loss": 0.00011308002285659313, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1980, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 74.54166793823242, + "completions/min_length": 35.125, + "epoch": 3.937205261851576, + "grad_norm": 0.006120438705529417, + "kl": 0.1190185546875, + "learning_rate": 6.728698243378236e-07, + "loss": 0.0001191550400108099, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1981, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.875, + "completions/mean_length": 60.45833444595337, + "completions/min_length": 31.25, + "epoch": 3.9391908662199056, + "grad_norm": 0.0071934876881588556, + "kl": 0.125244140625, + "learning_rate": 6.725737614108299e-07, + "loss": 0.00012520799646154046, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1982, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 71.23958587646484, + "completions/min_length": 29.625, + "epoch": 3.9411764705882355, + "grad_norm": 0.00590422020723048, + "kl": 0.135498046875, + "learning_rate": 6.722776297785356e-07, + "loss": 0.00013533519813790917, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1983, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.25, + "completions/mean_length": 62.531251430511475, + "completions/min_length": 32.625, + "epoch": 3.943162074956565, + "grad_norm": 0.007514490586814119, + "kl": 0.1483154296875, + "learning_rate": 6.719814295588371e-07, + "loss": 0.00014824900426901877, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1984, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 64.15625095367432, + "completions/min_length": 30.0, + "epoch": 3.9451476793248945, + "grad_norm": 0.008563349020987557, + "kl": 0.12451171875, + "learning_rate": 6.716851608696582e-07, + "loss": 0.00012440930004231632, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1985, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 71.3541693687439, + "completions/min_length": 32.875, + "epoch": 3.947133283693224, + "grad_norm": 0.00844346407017783, + "kl": 0.13494873046875, + "learning_rate": 6.713888238289496e-07, + "loss": 0.00013479527842719108, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1986, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 66.62500143051147, + "completions/min_length": 33.0, + "epoch": 3.9491188880615535, + "grad_norm": 2.6213789328551678, + "kl": 0.142333984375, + "learning_rate": 6.710924185546893e-07, + "loss": 0.003287344006821513, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1987, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.625, + "completions/mean_length": 64.16666889190674, + "completions/min_length": 34.5, + "epoch": 3.9511044924298835, + "grad_norm": 0.9934585794107743, + "kl": 0.124267578125, + "learning_rate": 6.707959451648829e-07, + "loss": -0.003844011574983597, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166669771075, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1988, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 67.927086353302, + "completions/min_length": 33.0, + "epoch": 3.953090096798213, + "grad_norm": 0.006736003654127351, + "kl": 0.11407470703125, + "learning_rate": 6.704994037775626e-07, + "loss": 0.00011398106289561838, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1989, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.75, + "completions/mean_length": 62.843750953674316, + "completions/min_length": 31.375, + "epoch": 3.9550757011665425, + "grad_norm": 0.008397633567474767, + "kl": 0.11236572265625, + "learning_rate": 6.702027945107879e-07, + "loss": 0.00011219953739782795, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1990, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 69.09375190734863, + "completions/min_length": 35.25, + "epoch": 3.9570613055348725, + "grad_norm": 0.0062549754019340245, + "kl": 0.12615966796875, + "learning_rate": 6.699061174826452e-07, + "loss": 0.00012613250873982906, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1991, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 70.72916841506958, + "completions/min_length": 32.875, + "epoch": 3.9590469099032015, + "grad_norm": 0.8474983537110822, + "kl": 0.111968994140625, + "learning_rate": 6.696093728112479e-07, + "loss": -0.007378804497420788, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1992, + "train_speed(iter/s)": 0.022725 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 72.25000190734863, + "completions/min_length": 34.25, + "epoch": 3.9610325142715315, + "grad_norm": 0.006842777116988327, + "kl": 0.12701416015625, + "learning_rate": 6.693125606147368e-07, + "loss": 0.00012694911856669933, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1993, + "train_speed(iter/s)": 0.022726 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 70.01041793823242, + "completions/min_length": 30.25, + "epoch": 3.963018118639861, + "grad_norm": 0.007910718969016838, + "kl": 0.1258544921875, + "learning_rate": 6.690156810112786e-07, + "loss": 0.0001257880503544584, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1994, + "train_speed(iter/s)": 0.022726 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.5, + "completions/mean_length": 66.88541889190674, + "completions/min_length": 35.25, + "epoch": 3.9650037230081905, + "grad_norm": 0.007175878029710901, + "kl": 0.10247802734375, + "learning_rate": 6.687187341190679e-07, + "loss": 0.00010259408736601472, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1995, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.25, + "completions/mean_length": 66.40625190734863, + "completions/min_length": 30.5, + "epoch": 3.9669893273765204, + "grad_norm": 0.0067688850015800465, + "kl": 0.11151123046875, + "learning_rate": 6.684217200563252e-07, + "loss": 0.00011153890955029055, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1996, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 104.375, + "completions/mean_length": 68.03125190734863, + "completions/min_length": 39.5, + "epoch": 3.96897493174485, + "grad_norm": 0.007878138096834917, + "kl": 0.1180419921875, + "learning_rate": 6.681246389412985e-07, + "loss": 0.0001179118626168929, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1997, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 72.67708587646484, + "completions/min_length": 30.375, + "epoch": 3.9709605361131795, + "grad_norm": 0.00870718565431842, + "kl": 0.14044189453125, + "learning_rate": 6.678274908922619e-07, + "loss": 0.00014022283721715212, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1998, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.5, + "completions/mean_length": 65.70833587646484, + "completions/min_length": 35.625, + "epoch": 3.972946140481509, + "grad_norm": 0.007485116102384131, + "kl": 0.127197265625, + "learning_rate": 6.675302760275166e-07, + "loss": 0.0001272835215786472, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 1999, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 75.1979193687439, + "completions/min_length": 35.25, + "epoch": 3.9749317448498385, + "grad_norm": 0.006362508917186569, + "kl": 0.11773681640625, + "learning_rate": 6.6723299446539e-07, + "loss": 0.00011758983600884676, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2000, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 67.9479193687439, + "completions/min_length": 30.5, + "epoch": 3.9769173492181684, + "grad_norm": 0.9363975663111594, + "kl": 0.11956787109375, + "learning_rate": 6.669356463242361e-07, + "loss": 0.013932823203504086, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6458333358168602, + "rewards/CineAccuracyORM/std": 0.3879413418471813, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2001, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.375, + "completions/mean_length": 70.65625333786011, + "completions/min_length": 32.625, + "epoch": 3.978902953586498, + "grad_norm": 0.007441439324369319, + "kl": 0.13214111328125, + "learning_rate": 6.66638231722436e-07, + "loss": 0.00013221651897765696, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2002, + "train_speed(iter/s)": 0.022723 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 71.22916793823242, + "completions/min_length": 36.75, + "epoch": 3.9808885579548274, + "grad_norm": 0.008255645582687036, + "kl": 0.11822509765625, + "learning_rate": 6.663407507783964e-07, + "loss": 0.00011813984019681811, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2003, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 61.812501430511475, + "completions/min_length": 28.25, + "epoch": 3.9828741623231574, + "grad_norm": 0.00657424478774642, + "kl": 0.10955810546875, + "learning_rate": 6.66043203610551e-07, + "loss": 0.00010963801469188184, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2004, + "train_speed(iter/s)": 0.022723 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 75.08333587646484, + "completions/min_length": 35.125, + "epoch": 3.9848597666914864, + "grad_norm": 0.009475988794735851, + "kl": 0.13995361328125, + "learning_rate": 6.657455903373596e-07, + "loss": 0.00013996614143252373, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2005, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.75, + "completions/mean_length": 67.92708539962769, + "completions/min_length": 30.75, + "epoch": 3.9868453710598164, + "grad_norm": 0.005353517132734897, + "kl": 0.10235595703125, + "learning_rate": 6.654479110773083e-07, + "loss": 0.00010232740896753967, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2006, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.25, + "completions/mean_length": 64.23958683013916, + "completions/min_length": 32.75, + "epoch": 3.988830975428146, + "grad_norm": 0.005695187441115863, + "kl": 0.1026611328125, + "learning_rate": 6.6515016594891e-07, + "loss": 0.00010261538409395143, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2007, + "train_speed(iter/s)": 0.022725 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.5, + "completions/mean_length": 69.270836353302, + "completions/min_length": 35.125, + "epoch": 3.9908165797964754, + "grad_norm": 1.0797091209633252, + "kl": 0.13232421875, + "learning_rate": 6.648523550707028e-07, + "loss": -0.0008881315588951111, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 2008, + "train_speed(iter/s)": 0.022725 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.5, + "completions/mean_length": 64.26041889190674, + "completions/min_length": 36.125, + "epoch": 3.9928021841648054, + "grad_norm": 0.0058442717427213235, + "kl": 0.1116943359375, + "learning_rate": 6.645544785612523e-07, + "loss": 0.00011167748016305268, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2009, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 69.07291841506958, + "completions/min_length": 33.375, + "epoch": 3.994787788533135, + "grad_norm": 0.005048500283070655, + "kl": 0.10980224609375, + "learning_rate": 6.642565365391488e-07, + "loss": 0.00010970650328090414, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2010, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.875, + "completions/mean_length": 60.59375190734863, + "completions/min_length": 28.625, + "epoch": 3.9967733929014644, + "grad_norm": 0.010755256469881605, + "kl": 0.12109375, + "learning_rate": 6.639585291230097e-07, + "loss": 0.00012112577678635716, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2011, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 68.14583539962769, + "completions/min_length": 31.125, + "epoch": 3.998758997269794, + "grad_norm": 0.005064750581841125, + "kl": 0.1336669921875, + "learning_rate": 6.636604564314781e-07, + "loss": 0.0001337287249043584, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2012, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 71.18750381469727, + "completions/min_length": 32.375, + "epoch": 4.00198560436833, + "grad_norm": 0.018869894006653228, + "kl": 0.11322021484375, + "learning_rate": 6.633623185832231e-07, + "loss": 0.00011309284309390932, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2013, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.5, + "completions/mean_length": 68.38541889190674, + "completions/min_length": 31.875, + "epoch": 4.003971208736659, + "grad_norm": 0.00565829619644366, + "kl": 0.12139892578125, + "learning_rate": 6.630641156969397e-07, + "loss": 0.000121168268378824, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2014, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 68.88541984558105, + "completions/min_length": 32.375, + "epoch": 4.005956813104989, + "grad_norm": 0.010764540924807033, + "kl": 0.11993408203125, + "learning_rate": 6.627658478913488e-07, + "loss": 0.00011988793266937137, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2015, + "train_speed(iter/s)": 0.022725 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 68.16666841506958, + "completions/min_length": 31.625, + "epoch": 4.007942417473318, + "grad_norm": 0.005544413440212136, + "kl": 0.1365966796875, + "learning_rate": 6.624675152851974e-07, + "loss": 0.00013645910075865686, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2016, + "train_speed(iter/s)": 0.022726 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.125, + "completions/mean_length": 59.10416793823242, + "completions/min_length": 29.0, + "epoch": 4.009928021841648, + "grad_norm": 0.005310373150290691, + "kl": 0.10260009765625, + "learning_rate": 6.621691179972579e-07, + "loss": 0.00010257892427034676, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2017, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.25, + "completions/mean_length": 62.489585399627686, + "completions/min_length": 31.5, + "epoch": 4.011913626209978, + "grad_norm": 0.005474587717592868, + "kl": 0.11834716796875, + "learning_rate": 6.618706561463287e-07, + "loss": 0.00011841466039186344, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2018, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 68.68750190734863, + "completions/min_length": 31.375, + "epoch": 4.013899230578307, + "grad_norm": 0.005991308310092773, + "kl": 0.11474609375, + "learning_rate": 6.615721298512337e-07, + "loss": 0.00011478038504719734, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2019, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.5, + "completions/mean_length": 75.13541793823242, + "completions/min_length": 36.0, + "epoch": 4.015884834946637, + "grad_norm": 0.005616389886586344, + "kl": 0.13177490234375, + "learning_rate": 6.612735392308227e-07, + "loss": 0.00013186628348194063, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2020, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.5, + "completions/mean_length": 64.56250238418579, + "completions/min_length": 31.375, + "epoch": 4.017870439314967, + "grad_norm": 0.006213665224953286, + "kl": 0.095947265625, + "learning_rate": 6.609748844039711e-07, + "loss": 9.60233955993317e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2021, + "train_speed(iter/s)": 0.022729 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 65.84375190734863, + "completions/min_length": 33.125, + "epoch": 4.019856043683296, + "grad_norm": 0.005088416557353746, + "kl": 0.1044921875, + "learning_rate": 6.606761654895797e-07, + "loss": 0.00010443732026033103, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2022, + "train_speed(iter/s)": 0.02273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 73.50000238418579, + "completions/min_length": 31.75, + "epoch": 4.021841648051626, + "grad_norm": 0.005350222865723179, + "kl": 0.10540771484375, + "learning_rate": 6.603773826065749e-07, + "loss": 0.0001054336316883564, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2023, + "train_speed(iter/s)": 0.022731 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 70.64583444595337, + "completions/min_length": 34.25, + "epoch": 4.023827252419955, + "grad_norm": 0.00909442194662836, + "kl": 0.1087646484375, + "learning_rate": 6.600785358739083e-07, + "loss": 0.00010878332977881655, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2024, + "train_speed(iter/s)": 0.022731 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 68.114586353302, + "completions/min_length": 35.625, + "epoch": 4.025812856788285, + "grad_norm": 0.008400199158934446, + "kl": 0.1041259765625, + "learning_rate": 6.597796254105575e-07, + "loss": 0.00010408522211946547, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2025, + "train_speed(iter/s)": 0.022733 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 68.82292032241821, + "completions/min_length": 30.875, + "epoch": 4.027798461156615, + "grad_norm": 0.6613634201844689, + "kl": 0.1153564453125, + "learning_rate": 6.594806513355251e-07, + "loss": -0.008995014242827892, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2026, + "train_speed(iter/s)": 0.022733 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.125, + "completions/mean_length": 66.64583683013916, + "completions/min_length": 34.25, + "epoch": 4.029784065524944, + "grad_norm": 1.6642902751946476, + "kl": 0.13616943359375, + "learning_rate": 6.591816137678387e-07, + "loss": -0.005860649049282074, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2027, + "train_speed(iter/s)": 0.022735 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.75, + "completions/mean_length": 64.02083539962769, + "completions/min_length": 31.0, + "epoch": 4.031769669893274, + "grad_norm": 2.373622370428783, + "kl": 0.1221923828125, + "learning_rate": 6.58882512826552e-07, + "loss": -0.0016249145846813917, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2028, + "train_speed(iter/s)": 0.022735 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.125, + "completions/mean_length": 63.989585399627686, + "completions/min_length": 32.375, + "epoch": 4.033755274261603, + "grad_norm": 0.006024388389464885, + "kl": 0.09661865234375, + "learning_rate": 6.585833486307434e-07, + "loss": 9.664696699474007e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2029, + "train_speed(iter/s)": 0.022736 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.375, + "completions/mean_length": 63.656251430511475, + "completions/min_length": 31.0, + "epoch": 4.035740878629933, + "grad_norm": 0.006330535347817947, + "kl": 0.10064697265625, + "learning_rate": 6.582841212995164e-07, + "loss": 0.0001006251186481677, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2030, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 68.06250238418579, + "completions/min_length": 32.625, + "epoch": 4.037726482998263, + "grad_norm": 0.007502050417841412, + "kl": 0.12774658203125, + "learning_rate": 6.579848309519997e-07, + "loss": 0.0001276147668249905, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2031, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 64.35416841506958, + "completions/min_length": 32.375, + "epoch": 4.039712087366592, + "grad_norm": 1.6598181666727851, + "kl": 0.10150146484375, + "learning_rate": 6.576854777073473e-07, + "loss": 0.007703796029090881, + "memory(GiB)": 94.21, + "reward": 1.6979166865348816, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.2281883768737316, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2032, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 67.15625238418579, + "completions/min_length": 34.25, + "epoch": 4.041697691734922, + "grad_norm": 1.8902506396159635, + "kl": 0.10443115234375, + "learning_rate": 6.573860616847385e-07, + "loss": -0.006393782794475555, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2033, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 67.63541841506958, + "completions/min_length": 34.875, + "epoch": 4.043683296103252, + "grad_norm": 0.007403135043457375, + "kl": 0.12353515625, + "learning_rate": 6.570865830033764e-07, + "loss": 0.00012345501454547048, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2034, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 68.55208492279053, + "completions/min_length": 30.25, + "epoch": 4.045668900471581, + "grad_norm": 0.007562275871408161, + "kl": 0.12005615234375, + "learning_rate": 6.567870417824904e-07, + "loss": 0.00012012640218017623, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2035, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 61.50000190734863, + "completions/min_length": 33.625, + "epoch": 4.047654504839911, + "grad_norm": 0.006990191592929426, + "kl": 0.11993408203125, + "learning_rate": 6.564874381413344e-07, + "loss": 0.00011990381608484313, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2036, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.0, + "completions/mean_length": 66.00000190734863, + "completions/min_length": 35.125, + "epoch": 4.04964010920824, + "grad_norm": 0.005657248848228836, + "kl": 0.10577392578125, + "learning_rate": 6.561877721991866e-07, + "loss": 0.0001058382767951116, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2037, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 67.98958539962769, + "completions/min_length": 34.25, + "epoch": 4.05162571357657, + "grad_norm": 0.005489670958569793, + "kl": 0.095947265625, + "learning_rate": 6.558880440753507e-07, + "loss": 9.60080506047234e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2038, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.875, + "completions/mean_length": 65.50000190734863, + "completions/min_length": 31.75, + "epoch": 4.0536113179449, + "grad_norm": 0.007025838097572775, + "kl": 0.10809326171875, + "learning_rate": 6.555882538891546e-07, + "loss": 0.00010811498941620812, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2039, + "train_speed(iter/s)": 0.022744 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.875, + "completions/mean_length": 67.302086353302, + "completions/min_length": 34.25, + "epoch": 4.055596922313229, + "grad_norm": 0.007774702212682246, + "kl": 0.121826171875, + "learning_rate": 6.552884017599516e-07, + "loss": 0.00012183383660158142, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2040, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 69.41666841506958, + "completions/min_length": 34.5, + "epoch": 4.057582526681559, + "grad_norm": 0.005903042534519346, + "kl": 0.1195068359375, + "learning_rate": 6.549884878071189e-07, + "loss": 0.00011955788795603439, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2041, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.875, + "completions/mean_length": 66.7604193687439, + "completions/min_length": 32.25, + "epoch": 4.059568131049888, + "grad_norm": 0.006313828209238338, + "kl": 0.09527587890625, + "learning_rate": 6.546885121500584e-07, + "loss": 9.523648623144254e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2042, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 69.76041889190674, + "completions/min_length": 31.625, + "epoch": 4.061553735418218, + "grad_norm": 0.007556741937032334, + "kl": 0.12371826171875, + "learning_rate": 6.543884749081975e-07, + "loss": 0.00012373802019283175, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2043, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 67.15625190734863, + "completions/min_length": 32.25, + "epoch": 4.063539339786548, + "grad_norm": 0.0053709199638572805, + "kl": 0.1041259765625, + "learning_rate": 6.54088376200987e-07, + "loss": 0.00010402753105154261, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2044, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.625, + "completions/mean_length": 63.59375238418579, + "completions/min_length": 36.25, + "epoch": 4.065524944154877, + "grad_norm": 0.006130519045175084, + "kl": 0.0970458984375, + "learning_rate": 6.537882161479027e-07, + "loss": 9.713557665236294e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2045, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 71.57291889190674, + "completions/min_length": 35.625, + "epoch": 4.067510548523207, + "grad_norm": 0.005291047146873017, + "kl": 0.10546875, + "learning_rate": 6.534879948684446e-07, + "loss": 0.00010560001828707755, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2046, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 74.302086353302, + "completions/min_length": 33.625, + "epoch": 4.069496152891537, + "grad_norm": 0.00536066426363881, + "kl": 0.11505126953125, + "learning_rate": 6.531877124821375e-07, + "loss": 0.00011505494330776855, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2047, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.5, + "completions/mean_length": 60.20833444595337, + "completions/min_length": 31.25, + "epoch": 4.071481757259866, + "grad_norm": 0.006665023104218962, + "kl": 0.1234130859375, + "learning_rate": 6.5288736910853e-07, + "loss": 0.0001234076771652326, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2048, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 71.73958444595337, + "completions/min_length": 35.5, + "epoch": 4.073467361628196, + "grad_norm": 0.006252620345059078, + "kl": 0.11041259765625, + "learning_rate": 6.525869648671951e-07, + "loss": 0.00011042873666156083, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2049, + "train_speed(iter/s)": 0.02275 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.75, + "completions/mean_length": 69.13541841506958, + "completions/min_length": 36.125, + "epoch": 4.075452965996525, + "grad_norm": 0.24347129207963739, + "kl": 0.478759765625, + "learning_rate": 6.522864998777304e-07, + "loss": 0.00047699594870209694, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2050, + "train_speed(iter/s)": 0.022751 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.125, + "completions/mean_length": 63.395835876464844, + "completions/min_length": 34.25, + "epoch": 4.077438570364855, + "grad_norm": 0.005737709950479365, + "kl": 0.1356201171875, + "learning_rate": 6.519859742597573e-07, + "loss": 0.00013556258636526763, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2051, + "train_speed(iter/s)": 0.022751 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 63.66666793823242, + "completions/min_length": 33.75, + "epoch": 4.079424174733185, + "grad_norm": 2.258360350546377, + "kl": 0.12432861328125, + "learning_rate": 6.516853881329214e-07, + "loss": 0.001749946502968669, + "memory(GiB)": 94.21, + "reward": 1.59375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.59375, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2052, + "train_speed(iter/s)": 0.022752 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.5, + "completions/mean_length": 64.364586353302, + "completions/min_length": 31.125, + "epoch": 4.081409779101514, + "grad_norm": 0.03400161286007858, + "kl": 0.1368408203125, + "learning_rate": 6.513847416168929e-07, + "loss": 0.00013668896281160414, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2053, + "train_speed(iter/s)": 0.022754 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.0, + "completions/mean_length": 64.13541793823242, + "completions/min_length": 28.625, + "epoch": 4.083395383469844, + "grad_norm": 0.006116490161371725, + "kl": 0.10125732421875, + "learning_rate": 6.51084034831365e-07, + "loss": 0.0001011538552120328, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2054, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 70.57291889190674, + "completions/min_length": 31.875, + "epoch": 4.085380987838173, + "grad_norm": 0.10092509064326098, + "kl": 0.2481689453125, + "learning_rate": 6.507832678960559e-07, + "loss": 0.00024841591948643327, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2055, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 69.46875143051147, + "completions/min_length": 36.25, + "epoch": 4.087366592206503, + "grad_norm": 0.007406912868890276, + "kl": 0.11578369140625, + "learning_rate": 6.504824409307069e-07, + "loss": 0.00011570375500014052, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2056, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.375, + "completions/mean_length": 68.64583587646484, + "completions/min_length": 34.375, + "epoch": 4.089352196574833, + "grad_norm": 0.02998255989960638, + "kl": 0.14678955078125, + "learning_rate": 6.501815540550843e-07, + "loss": 0.00014662364264950156, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2057, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.5, + "completions/mean_length": 68.78125190734863, + "completions/min_length": 33.625, + "epoch": 4.091337800943162, + "grad_norm": 0.006547836350884739, + "kl": 0.10009765625, + "learning_rate": 6.49880607388977e-07, + "loss": 0.00010011550330091268, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2058, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 63.07291793823242, + "completions/min_length": 30.375, + "epoch": 4.093323405311492, + "grad_norm": 0.00809876703403715, + "kl": 0.1104736328125, + "learning_rate": 6.495796010521985e-07, + "loss": 0.0001104526745621115, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2059, + "train_speed(iter/s)": 0.022758 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 67.8541693687439, + "completions/min_length": 34.625, + "epoch": 4.095309009679822, + "grad_norm": 0.006693559366330109, + "kl": 0.109619140625, + "learning_rate": 6.492785351645859e-07, + "loss": 0.00010952491720672697, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2060, + "train_speed(iter/s)": 0.022758 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 71.75000143051147, + "completions/min_length": 33.875, + "epoch": 4.097294614048151, + "grad_norm": 0.03956217964159395, + "kl": 0.15301513671875, + "learning_rate": 6.489774098460002e-07, + "loss": 0.00015298080688808113, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2061, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 72.47916984558105, + "completions/min_length": 37.125, + "epoch": 4.099280218416481, + "grad_norm": 1.0697147886646554, + "kl": 0.10809326171875, + "learning_rate": 6.486762252163254e-07, + "loss": 0.003566889790818095, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2062, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 214.5, + "completions/mean_length": 77.37500190734863, + "completions/min_length": 33.375, + "epoch": 4.10126582278481, + "grad_norm": 0.009292234084509054, + "kl": 0.1224365234375, + "learning_rate": 6.483749813954694e-07, + "loss": 0.00012236254406161606, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2063, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 68.1979193687439, + "completions/min_length": 33.25, + "epoch": 4.10325142715314, + "grad_norm": 0.006989477705086491, + "kl": 0.104248046875, + "learning_rate": 6.480736785033644e-07, + "loss": 0.00010422086052130908, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2064, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 192.875, + "completions/mean_length": 82.42708587646484, + "completions/min_length": 39.5, + "epoch": 4.10523703152147, + "grad_norm": 0.005914921173196594, + "kl": 0.1207275390625, + "learning_rate": 6.477723166599651e-07, + "loss": 0.00012081613385817036, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2065, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 67.48958539962769, + "completions/min_length": 33.125, + "epoch": 4.107222635889799, + "grad_norm": 0.009639885902223732, + "kl": 0.12384033203125, + "learning_rate": 6.474708959852503e-07, + "loss": 0.00012394244549795985, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2066, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 69.18750143051147, + "completions/min_length": 34.125, + "epoch": 4.109208240258129, + "grad_norm": 0.008215214185244112, + "kl": 0.1318359375, + "learning_rate": 6.471694165992219e-07, + "loss": 0.0001318525173701346, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2067, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.5, + "completions/mean_length": 62.09375190734863, + "completions/min_length": 34.875, + "epoch": 4.111193844626458, + "grad_norm": 0.008315337984825706, + "kl": 0.1231689453125, + "learning_rate": 6.468678786219052e-07, + "loss": 0.0001231917121913284, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2068, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.625, + "completions/mean_length": 68.46875190734863, + "completions/min_length": 31.375, + "epoch": 4.113179448994788, + "grad_norm": 0.007692090798060133, + "kl": 0.11822509765625, + "learning_rate": 6.46566282173349e-07, + "loss": 0.00011806873953901231, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2069, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 81.7291693687439, + "completions/min_length": 39.5, + "epoch": 4.115165053363118, + "grad_norm": 1.790176083852296, + "kl": 0.1051025390625, + "learning_rate": 6.462646273736254e-07, + "loss": -0.000978361233137548, + "memory(GiB)": 94.21, + "reward": 1.8125000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8125000074505806, + "rewards/CineAccuracyORM/std": 0.2407601661980152, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2070, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 75.32291889190674, + "completions/min_length": 37.25, + "epoch": 4.117150657731447, + "grad_norm": 0.007760996761536065, + "kl": 0.12493896484375, + "learning_rate": 6.459629143428294e-07, + "loss": 0.00012478661665227264, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2071, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 69.29166889190674, + "completions/min_length": 35.5, + "epoch": 4.119136262099777, + "grad_norm": 0.007313916390480204, + "kl": 0.12762451171875, + "learning_rate": 6.456611432010795e-07, + "loss": 0.0001277018163818866, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2072, + "train_speed(iter/s)": 0.022759 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 80.48958492279053, + "completions/min_length": 33.75, + "epoch": 4.121121866468107, + "grad_norm": 0.007348825257992149, + "kl": 0.12713623046875, + "learning_rate": 6.453593140685171e-07, + "loss": 0.00012722087558358908, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2073, + "train_speed(iter/s)": 0.02276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.25, + "completions/mean_length": 70.1666693687439, + "completions/min_length": 33.0, + "epoch": 4.123107470836436, + "grad_norm": 0.006566896844079208, + "kl": 0.1209716796875, + "learning_rate": 6.450574270653072e-07, + "loss": 0.00012109326780773699, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2074, + "train_speed(iter/s)": 0.022762 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 71.04166984558105, + "completions/min_length": 31.75, + "epoch": 4.125093075204766, + "grad_norm": 0.7637919243343001, + "kl": 0.10986328125, + "learning_rate": 6.447554823116371e-07, + "loss": 0.006727161817252636, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2075, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 69.90625238418579, + "completions/min_length": 37.0, + "epoch": 4.127078679573095, + "grad_norm": 0.007444117658055701, + "kl": 0.1278076171875, + "learning_rate": 6.444534799277177e-07, + "loss": 0.0001277729170396924, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2076, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 242.125, + "completions/mean_length": 73.96875286102295, + "completions/min_length": 33.875, + "epoch": 4.129064283941425, + "grad_norm": 0.0064112611068168545, + "kl": 0.10089111328125, + "learning_rate": 6.441514200337823e-07, + "loss": 0.00010088998533319682, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2077, + "train_speed(iter/s)": 0.022759 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 65.87500286102295, + "completions/min_length": 34.25, + "epoch": 4.131049888309755, + "grad_norm": 0.007220385721150977, + "kl": 0.1048583984375, + "learning_rate": 6.438493027500878e-07, + "loss": 0.00010480167111381888, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2078, + "train_speed(iter/s)": 0.022759 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.25, + "completions/mean_length": 78.63541889190674, + "completions/min_length": 32.875, + "epoch": 4.133035492678084, + "grad_norm": 0.12356985252536719, + "kl": 0.19171142578125, + "learning_rate": 6.435471281969132e-07, + "loss": 0.0001919107453431934, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2079, + "train_speed(iter/s)": 0.022759 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 81.72916793823242, + "completions/min_length": 39.5, + "epoch": 4.135021097046414, + "grad_norm": 0.0072358693611688845, + "kl": 0.1356201171875, + "learning_rate": 6.432448964945607e-07, + "loss": 0.00013537434278987348, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2080, + "train_speed(iter/s)": 0.02276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 67.88541793823242, + "completions/min_length": 30.875, + "epoch": 4.137006701414743, + "grad_norm": 0.006059138893072708, + "kl": 0.09832763671875, + "learning_rate": 6.429426077633555e-07, + "loss": 9.828020120039582e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2081, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.375, + "completions/mean_length": 71.81250333786011, + "completions/min_length": 30.125, + "epoch": 4.138992305783073, + "grad_norm": 0.007048251625977571, + "kl": 0.1160888671875, + "learning_rate": 6.426402621236448e-07, + "loss": 0.00011619397264439613, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2082, + "train_speed(iter/s)": 0.02276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 68.93750190734863, + "completions/min_length": 37.375, + "epoch": 4.140977910151403, + "grad_norm": 0.005108235245407083, + "kl": 0.0955810546875, + "learning_rate": 6.423378596957989e-07, + "loss": 9.567459346726537e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2083, + "train_speed(iter/s)": 0.02276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.75, + "completions/mean_length": 70.54166984558105, + "completions/min_length": 32.5, + "epoch": 4.142963514519732, + "grad_norm": 0.006484121392104565, + "kl": 0.1065673828125, + "learning_rate": 6.42035400600211e-07, + "loss": 0.00010657946404535323, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2084, + "train_speed(iter/s)": 0.02276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 71.17708396911621, + "completions/min_length": 27.375, + "epoch": 4.144949118888062, + "grad_norm": 0.007882251499710408, + "kl": 0.12213134765625, + "learning_rate": 6.417328849572963e-07, + "loss": 0.00012198302283650264, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2085, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.75, + "completions/mean_length": 78.39583587646484, + "completions/min_length": 32.875, + "epoch": 4.146934723256392, + "grad_norm": 0.004703872238594433, + "kl": 0.11083984375, + "learning_rate": 6.414303128874927e-07, + "loss": 0.00011083879508078098, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2086, + "train_speed(iter/s)": 0.02276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.375, + "completions/mean_length": 66.98958587646484, + "completions/min_length": 36.75, + "epoch": 4.148920327624721, + "grad_norm": 0.005130927648626159, + "kl": 0.10040283203125, + "learning_rate": 6.411276845112607e-07, + "loss": 0.00010044153896160424, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2087, + "train_speed(iter/s)": 0.02276 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 224.125, + "completions/mean_length": 79.52083778381348, + "completions/min_length": 32.125, + "epoch": 4.150905931993051, + "grad_norm": 0.8583976607757324, + "kl": 0.11871337890625, + "learning_rate": 6.40824999949083e-07, + "loss": -0.0034922566264867783, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2088, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 84.86458587646484, + "completions/min_length": 40.875, + "epoch": 4.15289153636138, + "grad_norm": 0.09933518555005108, + "kl": 0.1343994140625, + "learning_rate": 6.40522259321465e-07, + "loss": 0.00013415730791166425, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2089, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 81.69791889190674, + "completions/min_length": 41.625, + "epoch": 4.15487714072971, + "grad_norm": 0.01409942750044266, + "kl": 0.15057373046875, + "learning_rate": 6.402194627489339e-07, + "loss": 0.00015057779091876, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2090, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 72.94791841506958, + "completions/min_length": 34.5, + "epoch": 4.1568627450980395, + "grad_norm": 0.0054982524493101224, + "kl": 0.110595703125, + "learning_rate": 6.399166103520397e-07, + "loss": 0.00011063828424084932, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2091, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 74.13541889190674, + "completions/min_length": 33.375, + "epoch": 4.158848349466369, + "grad_norm": 0.005859043458859241, + "kl": 0.1171875, + "learning_rate": 6.396137022513545e-07, + "loss": 0.00011718708265107125, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2092, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 66.63541889190674, + "completions/min_length": 32.75, + "epoch": 4.160833953834699, + "grad_norm": 0.0057757172806900845, + "kl": 0.096923828125, + "learning_rate": 6.393107385674723e-07, + "loss": 9.697902714833617e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2093, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 79.84375286102295, + "completions/min_length": 39.375, + "epoch": 4.162819558203028, + "grad_norm": 1.339601419508341, + "kl": 0.13958740234375, + "learning_rate": 6.390077194210093e-07, + "loss": 0.0008928714087232947, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666679084301, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2094, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 64.55208396911621, + "completions/min_length": 29.75, + "epoch": 4.164805162571358, + "grad_norm": 1.4715376401178155, + "kl": 0.100830078125, + "learning_rate": 6.387046449326044e-07, + "loss": -0.0012706003617495298, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2095, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 70.32292032241821, + "completions/min_length": 34.5, + "epoch": 4.1667907669396875, + "grad_norm": 0.004903255111950009, + "kl": 0.1004638671875, + "learning_rate": 6.384015152229174e-07, + "loss": 0.00010027983080362901, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2096, + "train_speed(iter/s)": 0.022758 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 73.55208587646484, + "completions/min_length": 32.375, + "epoch": 4.168776371308017, + "grad_norm": 0.004786192227970846, + "kl": 0.106689453125, + "learning_rate": 6.380983304126312e-07, + "loss": 0.00010673022916307673, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2097, + "train_speed(iter/s)": 0.022758 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 68.15625095367432, + "completions/min_length": 33.875, + "epoch": 4.1707619756763465, + "grad_norm": 0.006162062992126851, + "kl": 0.10003662109375, + "learning_rate": 6.377950906224498e-07, + "loss": 9.988941019400954e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2098, + "train_speed(iter/s)": 0.022759 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 71.94791841506958, + "completions/min_length": 36.625, + "epoch": 4.1727475800446765, + "grad_norm": 0.004269110568429055, + "kl": 0.11187744140625, + "learning_rate": 6.374917959730996e-07, + "loss": 0.00011188999633304775, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2099, + "train_speed(iter/s)": 0.022759 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 67.3229169845581, + "completions/min_length": 32.5, + "epoch": 4.174733184413006, + "grad_norm": 1.147202615199326, + "kl": 0.09332275390625, + "learning_rate": 6.371884465853288e-07, + "loss": 0.0021340053062886, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2100, + "train_speed(iter/s)": 0.022759 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.875, + "completions/mean_length": 57.89583492279053, + "completions/min_length": 28.25, + "epoch": 4.1767187887813355, + "grad_norm": 0.004930987189095135, + "kl": 0.09088134765625, + "learning_rate": 6.368850425799071e-07, + "loss": 9.076326387003064e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2101, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.375, + "completions/mean_length": 69.10416889190674, + "completions/min_length": 34.875, + "epoch": 4.178704393149665, + "grad_norm": 0.0072407518494027765, + "kl": 0.0975341796875, + "learning_rate": 6.36581584077626e-07, + "loss": 9.744841372594237e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2102, + "train_speed(iter/s)": 0.022762 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 70.28125190734863, + "completions/min_length": 32.125, + "epoch": 4.1806899975179945, + "grad_norm": 0.031942284214138535, + "kl": 0.1275634765625, + "learning_rate": 6.36278071199299e-07, + "loss": 0.0001275965478271246, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2103, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 72.92708444595337, + "completions/min_length": 33.875, + "epoch": 4.1826756018863245, + "grad_norm": 0.017988018169071023, + "kl": 0.11181640625, + "learning_rate": 6.359745040657611e-07, + "loss": 0.00011177727719768882, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2104, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 73.20833492279053, + "completions/min_length": 34.0, + "epoch": 4.1846612062546535, + "grad_norm": 0.004784269293028821, + "kl": 0.1123046875, + "learning_rate": 6.356708827978688e-07, + "loss": 0.00011230337258893996, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2105, + "train_speed(iter/s)": 0.022762 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 72.92708492279053, + "completions/min_length": 30.875, + "epoch": 4.1866468106229835, + "grad_norm": 1.6088016454590102, + "kl": 0.2540283203125, + "learning_rate": 6.353672075165002e-07, + "loss": -0.0019080055644735694, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2106, + "train_speed(iter/s)": 0.022761 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 239.125, + "completions/mean_length": 79.47916889190674, + "completions/min_length": 31.125, + "epoch": 4.188632414991313, + "grad_norm": 1.910045322763083, + "kl": 0.10528564453125, + "learning_rate": 6.350634783425548e-07, + "loss": 0.030049694702029228, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.0852636992931366, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 2107, + "train_speed(iter/s)": 0.022757 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 72.37500333786011, + "completions/min_length": 34.875, + "epoch": 4.1906180193596425, + "grad_norm": 0.009727818090587384, + "kl": 0.10888671875, + "learning_rate": 6.347596953969538e-07, + "loss": 0.00010897692118305713, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2108, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 68.63541841506958, + "completions/min_length": 33.375, + "epoch": 4.1926036237279725, + "grad_norm": 0.006830828685938915, + "kl": 0.09857177734375, + "learning_rate": 6.344558588006397e-07, + "loss": 9.849101479630917e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2109, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 71.30208492279053, + "completions/min_length": 34.75, + "epoch": 4.1945892280963015, + "grad_norm": 1.7226840248697695, + "kl": 0.111724853515625, + "learning_rate": 6.341519686745764e-07, + "loss": -0.0008382114465348423, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2110, + "train_speed(iter/s)": 0.022756 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 77.86458396911621, + "completions/min_length": 35.5, + "epoch": 4.1965748324646315, + "grad_norm": 0.005362547596227209, + "kl": 0.0897216796875, + "learning_rate": 6.338480251397488e-07, + "loss": 8.972088107839227e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2111, + "train_speed(iter/s)": 0.022754 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 68.02083539962769, + "completions/min_length": 31.375, + "epoch": 4.198560436832961, + "grad_norm": 1.1520637314147772, + "kl": 0.54217529296875, + "learning_rate": 6.335440283171635e-07, + "loss": 0.0005427386495284736, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2112, + "train_speed(iter/s)": 0.022754 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 75.22916793823242, + "completions/min_length": 33.75, + "epoch": 4.2005460412012905, + "grad_norm": 0.815616580644487, + "kl": 0.104278564453125, + "learning_rate": 6.332399783278481e-07, + "loss": -0.0004986375570297241, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2113, + "train_speed(iter/s)": 0.022754 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 103.625, + "completions/mean_length": 62.04166889190674, + "completions/min_length": 33.875, + "epoch": 4.2025316455696204, + "grad_norm": 0.004005737931186196, + "kl": 0.09130859375, + "learning_rate": 6.329358752928515e-07, + "loss": 9.119157766690478e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2114, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.25, + "completions/mean_length": 73.10416793823242, + "completions/min_length": 35.5, + "epoch": 4.2045172499379495, + "grad_norm": 0.004237827244992204, + "kl": 0.1151123046875, + "learning_rate": 6.326317193332434e-07, + "loss": 0.00011505176371429116, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2115, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 77.17708492279053, + "completions/min_length": 32.5, + "epoch": 4.2065028543062795, + "grad_norm": 0.003736387319663302, + "kl": 0.09588623046875, + "learning_rate": 6.323275105701149e-07, + "loss": 9.586976375430822e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2116, + "train_speed(iter/s)": 0.022755 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 78.302086353302, + "completions/min_length": 36.25, + "epoch": 4.208488458674609, + "grad_norm": 0.004441502566949554, + "kl": 0.09747314453125, + "learning_rate": 6.32023249124578e-07, + "loss": 9.750405297381803e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2117, + "train_speed(iter/s)": 0.022754 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 74.89583587646484, + "completions/min_length": 33.5, + "epoch": 4.2104740630429385, + "grad_norm": 1.046161051555905, + "kl": 0.1474609375, + "learning_rate": 6.317189351177656e-07, + "loss": -0.012441083788871765, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2118, + "train_speed(iter/s)": 0.022754 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 73.59375143051147, + "completions/min_length": 36.75, + "epoch": 4.212459667411268, + "grad_norm": 1.3710098052319393, + "kl": 0.10467529296875, + "learning_rate": 6.314145686708318e-07, + "loss": -0.005860991310328245, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2119, + "train_speed(iter/s)": 0.022754 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 70.53125190734863, + "completions/min_length": 32.25, + "epoch": 4.2144452717795975, + "grad_norm": 0.7564927188599331, + "kl": 0.08367919921875, + "learning_rate": 6.311101499049511e-07, + "loss": -0.007466999813914299, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2120, + "train_speed(iter/s)": 0.022753 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 358.375, + "completions/mean_length": 98.90625333786011, + "completions/min_length": 39.125, + "epoch": 4.216430876147927, + "grad_norm": 0.4515841407103624, + "kl": 0.1043701171875, + "learning_rate": 6.308056789413194e-07, + "loss": 0.019756514579057693, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 2121, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 70.86458539962769, + "completions/min_length": 34.75, + "epoch": 4.218416480516257, + "grad_norm": 0.005363501536762136, + "kl": 0.095947265625, + "learning_rate": 6.305011559011531e-07, + "loss": 9.590342233423144e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2122, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 72.97916984558105, + "completions/min_length": 33.5, + "epoch": 4.2204020848845865, + "grad_norm": 0.005511976413349534, + "kl": 0.108642578125, + "learning_rate": 6.301965809056889e-07, + "loss": 0.00010873382416320965, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2123, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 81.78125190734863, + "completions/min_length": 43.125, + "epoch": 4.222387689252916, + "grad_norm": 0.0060005434509784, + "kl": 0.11236572265625, + "learning_rate": 6.298919540761851e-07, + "loss": 0.00011240018648095429, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2124, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 87.02083683013916, + "completions/min_length": 41.125, + "epoch": 4.224373293621246, + "grad_norm": 0.00540217399546723, + "kl": 0.1251220703125, + "learning_rate": 6.2958727553392e-07, + "loss": 0.0001250765926670283, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2125, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 198.125, + "completions/mean_length": 85.3854193687439, + "completions/min_length": 41.125, + "epoch": 4.226358897989575, + "grad_norm": 0.004633497194136154, + "kl": 0.1041259765625, + "learning_rate": 6.292825454001924e-07, + "loss": 0.00010407729860162362, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2126, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 71.31250286102295, + "completions/min_length": 37.25, + "epoch": 4.228344502357905, + "grad_norm": 0.019864548172534455, + "kl": 0.11541748046875, + "learning_rate": 6.289777637963222e-07, + "loss": 0.00011548617476364598, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2127, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.875, + "completions/mean_length": 77.45833539962769, + "completions/min_length": 35.125, + "epoch": 4.230330106726234, + "grad_norm": 0.0038901213178737966, + "kl": 0.09002685546875, + "learning_rate": 6.286729308436491e-07, + "loss": 8.997264376375824e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2128, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 76.08333587646484, + "completions/min_length": 37.0, + "epoch": 4.232315711094564, + "grad_norm": 0.00409681251213564, + "kl": 0.095458984375, + "learning_rate": 6.283680466635342e-07, + "loss": 9.557482553645968e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2129, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 75.16666889190674, + "completions/min_length": 31.375, + "epoch": 4.234301315462894, + "grad_norm": 0.004317427547692185, + "kl": 0.09796142578125, + "learning_rate": 6.280631113773579e-07, + "loss": 9.802542626857758e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2130, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 75.645836353302, + "completions/min_length": 35.875, + "epoch": 4.236286919831223, + "grad_norm": 0.0050924570654843375, + "kl": 0.10748291015625, + "learning_rate": 6.277581251065216e-07, + "loss": 0.00010743318125605583, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2131, + "train_speed(iter/s)": 0.022744 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 80.19792032241821, + "completions/min_length": 33.625, + "epoch": 4.238272524199553, + "grad_norm": 0.005420583518167263, + "kl": 0.093780517578125, + "learning_rate": 6.274530879724467e-07, + "loss": 9.378982940688729e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2132, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.25, + "completions/mean_length": 81.78125238418579, + "completions/min_length": 36.0, + "epoch": 4.240258128567882, + "grad_norm": 0.02458503872892915, + "kl": 0.14434814453125, + "learning_rate": 6.271480000965753e-07, + "loss": 0.0001443479413865134, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2133, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.25, + "completions/mean_length": 71.72916841506958, + "completions/min_length": 36.125, + "epoch": 4.242243732936212, + "grad_norm": 0.007250340908846005, + "kl": 0.11737060546875, + "learning_rate": 6.268428616003692e-07, + "loss": 0.00011734977306332439, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2134, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.625, + "completions/mean_length": 70.00000238418579, + "completions/min_length": 34.875, + "epoch": 4.244229337304542, + "grad_norm": 0.005096121448354268, + "kl": 0.11724853515625, + "learning_rate": 6.265376726053106e-07, + "loss": 0.00011712061677826568, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2135, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 77.54166889190674, + "completions/min_length": 33.375, + "epoch": 4.246214941672871, + "grad_norm": 1.0925840630045296, + "kl": 0.1119384765625, + "learning_rate": 6.262324332329017e-07, + "loss": 0.010600554756820202, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2136, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.625, + "completions/mean_length": 69.0416693687439, + "completions/min_length": 36.875, + "epoch": 4.248200546041201, + "grad_norm": 0.02270641123003356, + "kl": 0.12701416015625, + "learning_rate": 6.25927143604665e-07, + "loss": 0.00012672273442149162, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2137, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 71.14583587646484, + "completions/min_length": 30.375, + "epoch": 4.250186150409531, + "grad_norm": 0.0073331646430859435, + "kl": 0.12554931640625, + "learning_rate": 6.256218038421427e-07, + "loss": 0.00012549271923489869, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2138, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 73.56250190734863, + "completions/min_length": 32.125, + "epoch": 4.25217175477786, + "grad_norm": 1.214939233500986, + "kl": 0.12042236328125, + "learning_rate": 6.253164140668969e-07, + "loss": -0.010047522373497486, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2139, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 71.86458539962769, + "completions/min_length": 32.625, + "epoch": 4.25415735914619, + "grad_norm": 0.006206648688445081, + "kl": 0.11810302734375, + "learning_rate": 6.250109744005099e-07, + "loss": 0.00011817819904536009, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2140, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 73.63541841506958, + "completions/min_length": 35.625, + "epoch": 4.256142963514519, + "grad_norm": 0.006382881391492027, + "kl": 0.1397705078125, + "learning_rate": 6.247054849645841e-07, + "loss": 0.0001397307205479592, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2141, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 79.93750286102295, + "completions/min_length": 34.75, + "epoch": 4.258128567882849, + "grad_norm": 0.0051710882284947335, + "kl": 0.10797119140625, + "learning_rate": 6.24399945880741e-07, + "loss": 0.00010794639092637226, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2142, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 75.43750095367432, + "completions/min_length": 36.75, + "epoch": 4.260114172251179, + "grad_norm": 0.005328129405991408, + "kl": 0.1298828125, + "learning_rate": 6.240943572706222e-07, + "loss": 0.0001298969582421705, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2143, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 72.34375190734863, + "completions/min_length": 32.125, + "epoch": 4.262099776619508, + "grad_norm": 0.011909797426491574, + "kl": 0.123291015625, + "learning_rate": 6.237887192558893e-07, + "loss": 0.00012328616867307574, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2144, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 81.79166984558105, + "completions/min_length": 37.875, + "epoch": 4.264085380987838, + "grad_norm": 0.8580151172022481, + "kl": 0.11370849609375, + "learning_rate": 6.234830319582232e-07, + "loss": 0.010421425104141235, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2145, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.875, + "completions/mean_length": 79.67708492279053, + "completions/min_length": 40.125, + "epoch": 4.266070985356167, + "grad_norm": 0.0067696176604222, + "kl": 0.1102294921875, + "learning_rate": 6.231772954993244e-07, + "loss": 0.00011016390635631979, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2146, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 77.5416693687439, + "completions/min_length": 39.125, + "epoch": 4.268056589724497, + "grad_norm": 0.007469727195422004, + "kl": 0.1107177734375, + "learning_rate": 6.228715100009134e-07, + "loss": 0.00011065860599046573, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2147, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 76.95833492279053, + "completions/min_length": 37.0, + "epoch": 4.270042194092827, + "grad_norm": 1.4679923823335637, + "kl": 0.113037109375, + "learning_rate": 6.225656755847297e-07, + "loss": 0.00011307001113891602, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2148, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 77.08333587646484, + "completions/min_length": 34.75, + "epoch": 4.272027798461156, + "grad_norm": 0.3427710252940932, + "kl": 0.26995849609375, + "learning_rate": 6.222597923725326e-07, + "loss": 0.0002700219047255814, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2149, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 86.68750286102295, + "completions/min_length": 38.0, + "epoch": 4.274013402829486, + "grad_norm": 0.006710652229371557, + "kl": 0.1202392578125, + "learning_rate": 6.219538604861008e-07, + "loss": 0.00012040885485475883, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2150, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 80.28125286102295, + "completions/min_length": 35.25, + "epoch": 4.275999007197816, + "grad_norm": 0.006281563695797398, + "kl": 0.09869384765625, + "learning_rate": 6.216478800472323e-07, + "loss": 9.877184493234381e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2151, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 70.72916889190674, + "completions/min_length": 39.875, + "epoch": 4.277984611566145, + "grad_norm": 0.007604098408230099, + "kl": 0.11285400390625, + "learning_rate": 6.213418511777444e-07, + "loss": 0.00011290128895780072, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2152, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 73.31250095367432, + "completions/min_length": 32.375, + "epoch": 4.279970215934475, + "grad_norm": 0.00611997089867395, + "kl": 0.09368896484375, + "learning_rate": 6.210357739994736e-07, + "loss": 9.357830276712775e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2153, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 83.48958587646484, + "completions/min_length": 37.875, + "epoch": 4.281955820302804, + "grad_norm": 0.005505165867505372, + "kl": 0.1214599609375, + "learning_rate": 6.207296486342762e-07, + "loss": 0.00012154154683230445, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2154, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 80.89583492279053, + "completions/min_length": 33.75, + "epoch": 4.283941424671134, + "grad_norm": 0.005343126484922576, + "kl": 0.11297607421875, + "learning_rate": 6.204234752040267e-07, + "loss": 0.00011293106945231557, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2155, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 70.26041889190674, + "completions/min_length": 35.5, + "epoch": 4.285927029039464, + "grad_norm": 0.005165625719855671, + "kl": 0.10089111328125, + "learning_rate": 6.201172538306197e-07, + "loss": 0.00010086174006573856, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2156, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 84.13541841506958, + "completions/min_length": 39.625, + "epoch": 4.287912633407793, + "grad_norm": 0.005232846227203835, + "kl": 0.11505126953125, + "learning_rate": 6.198109846359681e-07, + "loss": 0.00011489923053886741, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2157, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.25, + "completions/mean_length": 84.35416984558105, + "completions/min_length": 32.625, + "epoch": 4.289898237776123, + "grad_norm": 1.2383161245242642, + "kl": 0.12127685546875, + "learning_rate": 6.195046677420046e-07, + "loss": 0.009707589633762836, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2158, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 72.54166889190674, + "completions/min_length": 34.625, + "epoch": 4.291883842144452, + "grad_norm": 0.004348617019701289, + "kl": 0.0947265625, + "learning_rate": 6.191983032706802e-07, + "loss": 9.466991468798369e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2159, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 75.84375190734863, + "completions/min_length": 29.875, + "epoch": 4.293869446512782, + "grad_norm": 0.004859036025950963, + "kl": 0.115081787109375, + "learning_rate": 6.188918913439654e-07, + "loss": 0.00011508363240864128, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2160, + "train_speed(iter/s)": 0.022749 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 74.77083492279053, + "completions/min_length": 35.625, + "epoch": 4.295855050881112, + "grad_norm": 0.020921215540191326, + "kl": 0.108001708984375, + "learning_rate": 6.18585432083849e-07, + "loss": 0.00010808964725583792, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2161, + "train_speed(iter/s)": 0.022748 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 83.4479193687439, + "completions/min_length": 39.125, + "epoch": 4.297840655249441, + "grad_norm": 1.266218071061667, + "kl": 0.102294921875, + "learning_rate": 6.182789256123392e-07, + "loss": -0.00463305227458477, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2162, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 82.32291889190674, + "completions/min_length": 38.0, + "epoch": 4.299826259617771, + "grad_norm": 0.007097711139553494, + "kl": 0.13116455078125, + "learning_rate": 6.179723720514628e-07, + "loss": 0.00013098123599775136, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2163, + "train_speed(iter/s)": 0.022747 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.125, + "completions/mean_length": 83.63541984558105, + "completions/min_length": 41.5, + "epoch": 4.301811863986101, + "grad_norm": 2.1056242944638575, + "kl": 0.1376953125, + "learning_rate": 6.176657715232653e-07, + "loss": 0.0019861895125359297, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2164, + "train_speed(iter/s)": 0.022746 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 87.53125190734863, + "completions/min_length": 38.75, + "epoch": 4.30379746835443, + "grad_norm": 0.004936754792153554, + "kl": 0.1077880859375, + "learning_rate": 6.173591241498108e-07, + "loss": 0.00010781047603813931, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2165, + "train_speed(iter/s)": 0.022745 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.25, + "completions/mean_length": 81.70833683013916, + "completions/min_length": 36.0, + "epoch": 4.30578307272276, + "grad_norm": 1.9775669128117377, + "kl": 0.10980224609375, + "learning_rate": 6.170524300531822e-07, + "loss": -0.022212138399481773, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.057790376245975494, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2166, + "train_speed(iter/s)": 0.022744 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 69.27083539962769, + "completions/min_length": 32.0, + "epoch": 4.307768677091089, + "grad_norm": 0.0065041533823454786, + "kl": 0.10552978515625, + "learning_rate": 6.167456893554811e-07, + "loss": 0.0001056869950843975, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2167, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 73.82291793823242, + "completions/min_length": 29.75, + "epoch": 4.309754281459419, + "grad_norm": 0.005296944146499002, + "kl": 0.09259033203125, + "learning_rate": 6.164389021788274e-07, + "loss": 9.261623199563473e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2168, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 76.63541984558105, + "completions/min_length": 35.25, + "epoch": 4.311739885827749, + "grad_norm": 0.8327864291520056, + "kl": 0.13287353515625, + "learning_rate": 6.161320686453597e-07, + "loss": -0.006725304760038853, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2169, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 64.06250095367432, + "completions/min_length": 30.5, + "epoch": 4.313725490196078, + "grad_norm": 1.4972505990412106, + "kl": 0.106475830078125, + "learning_rate": 6.158251888772349e-07, + "loss": 0.0009859844576567411, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2170, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 66.55208587646484, + "completions/min_length": 34.0, + "epoch": 4.315711094564408, + "grad_norm": 0.009045672936514327, + "kl": 0.0880126953125, + "learning_rate": 6.155182629966284e-07, + "loss": 8.795078611001372e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2171, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 71.94791984558105, + "completions/min_length": 34.125, + "epoch": 4.317696698932737, + "grad_norm": 0.19352634896198373, + "kl": 0.29296875, + "learning_rate": 6.152112911257341e-07, + "loss": 0.0002925730077549815, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2172, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.375, + "completions/mean_length": 73.36458539962769, + "completions/min_length": 34.375, + "epoch": 4.319682303301067, + "grad_norm": 0.004952008794087204, + "kl": 0.094696044921875, + "learning_rate": 6.149042733867638e-07, + "loss": 9.47610751609318e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2173, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 72.16666889190674, + "completions/min_length": 32.375, + "epoch": 4.321667907669397, + "grad_norm": 0.010858264860695637, + "kl": 0.1270751953125, + "learning_rate": 6.145972099019482e-07, + "loss": 0.00012701982632279396, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2174, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 79.95833587646484, + "completions/min_length": 37.25, + "epoch": 4.323653512037726, + "grad_norm": 0.004800592752618598, + "kl": 0.11627197265625, + "learning_rate": 6.142901007935354e-07, + "loss": 0.00011625223851297051, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2175, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 74.98958492279053, + "completions/min_length": 39.875, + "epoch": 4.325639116406056, + "grad_norm": 0.009943999205471144, + "kl": 0.132171630859375, + "learning_rate": 6.139829461837923e-07, + "loss": 0.00013222289271652699, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2176, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 235.875, + "completions/mean_length": 79.98958587646484, + "completions/min_length": 35.75, + "epoch": 4.327624720774386, + "grad_norm": 0.5031584637934956, + "kl": 0.10137939453125, + "learning_rate": 6.136757461950038e-07, + "loss": 0.018761513754725456, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 2177, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 69.56250238418579, + "completions/min_length": 35.625, + "epoch": 4.329610325142715, + "grad_norm": 0.005095083675958716, + "kl": 0.13433837890625, + "learning_rate": 6.133685009494727e-07, + "loss": 0.00013436871813610196, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2178, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 70.40625190734863, + "completions/min_length": 33.125, + "epoch": 4.331595929511045, + "grad_norm": 0.005527743852791901, + "kl": 0.089111328125, + "learning_rate": 6.130612105695198e-07, + "loss": 8.904706191970035e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2179, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 72.85416793823242, + "completions/min_length": 35.125, + "epoch": 4.333581533879374, + "grad_norm": 0.015451175902975043, + "kl": 0.14697265625, + "learning_rate": 6.127538751774838e-07, + "loss": 0.00014691613614559174, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2180, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 76.89583396911621, + "completions/min_length": 29.5, + "epoch": 4.335567138247704, + "grad_norm": 0.005606252675714691, + "kl": 0.09564208984375, + "learning_rate": 6.12446494895722e-07, + "loss": 9.575676813255996e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2181, + "train_speed(iter/s)": 0.022737 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.5, + "completions/mean_length": 63.67708492279053, + "completions/min_length": 31.75, + "epoch": 4.337552742616034, + "grad_norm": 0.005841399969538675, + "kl": 0.09710693359375, + "learning_rate": 6.12139069846609e-07, + "loss": 9.709967707749456e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2182, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 70.81250143051147, + "completions/min_length": 31.375, + "epoch": 4.339538346984363, + "grad_norm": 0.0052388760696236505, + "kl": 0.10931396484375, + "learning_rate": 6.118316001525367e-07, + "loss": 0.0001094454200938344, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2183, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 73.40625190734863, + "completions/min_length": 35.0, + "epoch": 4.341523951352693, + "grad_norm": 0.007631662816604879, + "kl": 0.12664794921875, + "learning_rate": 6.115240859359158e-07, + "loss": 0.00012666269321925938, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2184, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 70.22916793823242, + "completions/min_length": 35.25, + "epoch": 4.343509555721022, + "grad_norm": 2.353035239427212, + "kl": 0.1259765625, + "learning_rate": 6.112165273191743e-07, + "loss": 0.01087275892496109, + "memory(GiB)": 94.21, + "reward": 1.8854166865348816, + "reward_std": 0.0900652389973402, + "rewards/CineAccuracyORM/mean": 0.885416679084301, + "rewards/CineAccuracyORM/std": 0.17456800863146782, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2185, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 75.35416841506958, + "completions/min_length": 35.875, + "epoch": 4.345495160089352, + "grad_norm": 0.0068809409178183486, + "kl": 0.12139892578125, + "learning_rate": 6.109089244247576e-07, + "loss": 0.00012130946561228484, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2186, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 74.15625286102295, + "completions/min_length": 35.375, + "epoch": 4.347480764457682, + "grad_norm": 1.5008723837449958, + "kl": 0.13409423828125, + "learning_rate": 6.106012773751292e-07, + "loss": 0.0020405042450875044, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2187, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 67.92708587646484, + "completions/min_length": 32.5, + "epoch": 4.349466368826011, + "grad_norm": 0.006215444967002574, + "kl": 0.10614013671875, + "learning_rate": 6.102935862927699e-07, + "loss": 0.00010628569725668058, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2188, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 63.21875238418579, + "completions/min_length": 28.375, + "epoch": 4.351451973194341, + "grad_norm": 0.005864865024531859, + "kl": 0.08837890625, + "learning_rate": 6.099858513001781e-07, + "loss": 8.834658365231007e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2189, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.375, + "completions/mean_length": 78.22916841506958, + "completions/min_length": 36.625, + "epoch": 4.353437577562671, + "grad_norm": 0.011949924863843985, + "kl": 0.12762451171875, + "learning_rate": 6.096780725198696e-07, + "loss": 0.0001275732065550983, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2190, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 73.64583539962769, + "completions/min_length": 35.5, + "epoch": 4.355423181931, + "grad_norm": 0.7898535902942363, + "kl": 0.11663818359375, + "learning_rate": 6.093702500743777e-07, + "loss": -0.008968975394964218, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2191, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 77.37500238418579, + "completions/min_length": 34.875, + "epoch": 4.35740878629933, + "grad_norm": 0.005929511737035633, + "kl": 0.113525390625, + "learning_rate": 6.090623840862532e-07, + "loss": 0.00011332183930790052, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2192, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 70.40625143051147, + "completions/min_length": 38.75, + "epoch": 4.359394390667659, + "grad_norm": 0.006608199745306608, + "kl": 0.09600830078125, + "learning_rate": 6.087544746780642e-07, + "loss": 9.602106729289517e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2193, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 64.60416793823242, + "completions/min_length": 33.625, + "epoch": 4.361379995035989, + "grad_norm": 0.0075261420942403555, + "kl": 0.09661865234375, + "learning_rate": 6.084465219723958e-07, + "loss": 9.656776092015207e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2194, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 73.09375286102295, + "completions/min_length": 33.5, + "epoch": 4.363365599404319, + "grad_norm": 0.007139100938433291, + "kl": 0.1239013671875, + "learning_rate": 6.081385260918506e-07, + "loss": 0.00012366512964945287, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2195, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 76.56250286102295, + "completions/min_length": 33.0, + "epoch": 4.365351203772648, + "grad_norm": 0.007515732848141562, + "kl": 0.14251708984375, + "learning_rate": 6.078304871590484e-07, + "loss": 0.00014258341980166733, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2196, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 63.343751430511475, + "completions/min_length": 28.875, + "epoch": 4.367336808140978, + "grad_norm": 0.012154051616345378, + "kl": 0.1004638671875, + "learning_rate": 6.07522405296626e-07, + "loss": 0.00010032587306341156, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2197, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 78.87500238418579, + "completions/min_length": 36.25, + "epoch": 4.369322412509307, + "grad_norm": 0.006292582384205913, + "kl": 0.1156005859375, + "learning_rate": 6.072142806272375e-07, + "loss": 0.00011555859236977994, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2198, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 74.62500286102295, + "completions/min_length": 34.75, + "epoch": 4.371308016877637, + "grad_norm": 1.0412706249333372, + "kl": 0.11749267578125, + "learning_rate": 6.069061132735539e-07, + "loss": 0.012513134628534317, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2199, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 71.35416889190674, + "completions/min_length": 30.375, + "epoch": 4.373293621245967, + "grad_norm": 2.327396643946038, + "kl": 0.1337890625, + "learning_rate": 6.065979033582631e-07, + "loss": 0.007141719572246075, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166669771075, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2200, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.0, + "completions/mean_length": 74.32291793823242, + "completions/min_length": 34.375, + "epoch": 4.375279225614296, + "grad_norm": 0.057880779564860686, + "kl": 0.21368408203125, + "learning_rate": 6.0628965100407e-07, + "loss": 0.00021348870359361172, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2201, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 70.50000333786011, + "completions/min_length": 34.75, + "epoch": 4.377264829982626, + "grad_norm": 0.008269238724736299, + "kl": 0.10009765625, + "learning_rate": 6.059813563336966e-07, + "loss": 0.0001000729389488697, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2202, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 64.00000047683716, + "completions/min_length": 30.875, + "epoch": 4.379250434350956, + "grad_norm": 0.008295183069219449, + "kl": 0.111572265625, + "learning_rate": 6.056730194698816e-07, + "loss": 0.00011152803199365735, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2203, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 73.77083683013916, + "completions/min_length": 33.125, + "epoch": 4.381236038719285, + "grad_norm": 0.006837591722160979, + "kl": 0.11505126953125, + "learning_rate": 6.053646405353803e-07, + "loss": 0.00011512891796883196, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2204, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 75.31250190734863, + "completions/min_length": 32.375, + "epoch": 4.383221643087615, + "grad_norm": 0.1702190118078794, + "kl": 0.17474365234375, + "learning_rate": 6.050562196529651e-07, + "loss": 0.00017487231525592506, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2205, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 66.10416793823242, + "completions/min_length": 32.125, + "epoch": 4.385207247455944, + "grad_norm": 3.9327445203665405, + "kl": 0.11376953125, + "learning_rate": 6.047477569454251e-07, + "loss": 0.002327452879399061, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2206, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 62.31250190734863, + "completions/min_length": 30.875, + "epoch": 4.387192851824274, + "grad_norm": 0.0075972750118719476, + "kl": 0.10369873046875, + "learning_rate": 6.044392525355655e-07, + "loss": 0.00010372063843533397, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2207, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.625, + "completions/mean_length": 81.6354193687439, + "completions/min_length": 38.5, + "epoch": 4.389178456192604, + "grad_norm": 0.006659378555092665, + "kl": 0.11602783203125, + "learning_rate": 6.041307065462086e-07, + "loss": 0.00011604859901126474, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2208, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 64.63541889190674, + "completions/min_length": 30.875, + "epoch": 4.391164060560933, + "grad_norm": 0.007547120112482382, + "kl": 0.12982177734375, + "learning_rate": 6.038221191001934e-07, + "loss": 0.000129716529045254, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2209, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 72.02083539962769, + "completions/min_length": 34.0, + "epoch": 4.393149664929263, + "grad_norm": 0.006123245332568914, + "kl": 0.12371826171875, + "learning_rate": 6.03513490320375e-07, + "loss": 0.0001235016097780317, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2210, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.25, + "completions/mean_length": 61.45833444595337, + "completions/min_length": 34.375, + "epoch": 4.395135269297592, + "grad_norm": 0.007131661201739009, + "kl": 0.1190185546875, + "learning_rate": 6.03204820329625e-07, + "loss": 0.00011896588694071397, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2211, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 75.73958587646484, + "completions/min_length": 34.5, + "epoch": 4.397120873665922, + "grad_norm": 0.005671645655020672, + "kl": 0.11016845703125, + "learning_rate": 6.028961092508318e-07, + "loss": 0.00011019622616004199, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2212, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.125, + "completions/mean_length": 61.82291889190674, + "completions/min_length": 32.625, + "epoch": 4.399106478034252, + "grad_norm": 0.0061670904229082875, + "kl": 0.097076416015625, + "learning_rate": 6.025873572068996e-07, + "loss": 9.709945879876614e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2213, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 70.97916984558105, + "completions/min_length": 33.5, + "epoch": 4.401092082402581, + "grad_norm": 0.005323586639017825, + "kl": 0.1224365234375, + "learning_rate": 6.022785643207494e-07, + "loss": 0.0001225903833983466, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2214, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 65.90625190734863, + "completions/min_length": 33.625, + "epoch": 4.403077686770911, + "grad_norm": 0.960801187850006, + "kl": 0.129150390625, + "learning_rate": 6.019697307153179e-07, + "loss": -0.0025950162671506405, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2215, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 68.67708444595337, + "completions/min_length": 34.0, + "epoch": 4.405063291139241, + "grad_norm": 0.005329605907190612, + "kl": 0.10321044921875, + "learning_rate": 6.016608565135587e-07, + "loss": 0.0001033106236718595, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2216, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 68.47916889190674, + "completions/min_length": 33.5, + "epoch": 4.40704889550757, + "grad_norm": 0.007051404309491915, + "kl": 0.102294921875, + "learning_rate": 6.013519418384411e-07, + "loss": 0.00010225444566458464, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2217, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 79.88541889190674, + "completions/min_length": 34.625, + "epoch": 4.4090344998759, + "grad_norm": 0.006658809598769112, + "kl": 0.13885498046875, + "learning_rate": 6.010429868129506e-07, + "loss": 0.0001388160017086193, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2218, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 76.05208587646484, + "completions/min_length": 37.25, + "epoch": 4.411020104244229, + "grad_norm": 0.005511796052147891, + "kl": 0.12298583984375, + "learning_rate": 6.007339915600889e-07, + "loss": 0.00012317602522671223, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2219, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 69.71875095367432, + "completions/min_length": 33.0, + "epoch": 4.413005708612559, + "grad_norm": 1.798769509700025, + "kl": 0.1224365234375, + "learning_rate": 6.004249562028734e-07, + "loss": 0.004593212157487869, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2220, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.625, + "completions/mean_length": 78.8541693687439, + "completions/min_length": 34.375, + "epoch": 4.414991312980889, + "grad_norm": 0.00873641520187544, + "kl": 0.124267578125, + "learning_rate": 6.001158808643378e-07, + "loss": 0.0001242886937689036, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2221, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 71.020836353302, + "completions/min_length": 32.25, + "epoch": 4.416976917349218, + "grad_norm": 0.008128993079254675, + "kl": 0.13153076171875, + "learning_rate": 5.998067656675318e-07, + "loss": 0.00013156019849702716, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2222, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 75.81250190734863, + "completions/min_length": 32.5, + "epoch": 4.418962521717548, + "grad_norm": 0.005167928035830761, + "kl": 0.1212158203125, + "learning_rate": 5.994976107355204e-07, + "loss": 0.00012118794256821275, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2223, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 68.00000190734863, + "completions/min_length": 33.375, + "epoch": 4.420948126085877, + "grad_norm": 1.3517077603265466, + "kl": 0.11785888671875, + "learning_rate": 5.991884161913849e-07, + "loss": -0.0055998824536800385, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2224, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.375, + "completions/mean_length": 68.70833492279053, + "completions/min_length": 34.25, + "epoch": 4.422933730454207, + "grad_norm": 0.008277399572450427, + "kl": 0.1094970703125, + "learning_rate": 5.988791821582223e-07, + "loss": 0.00010955524339806288, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2225, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 72.78125143051147, + "completions/min_length": 35.5, + "epoch": 4.424919334822537, + "grad_norm": 2.1676799538672005, + "kl": 0.108642578125, + "learning_rate": 5.985699087591455e-07, + "loss": 0.0005191924865357578, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2226, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 76.11458587646484, + "completions/min_length": 33.375, + "epoch": 4.426904939190866, + "grad_norm": 0.010581434453325027, + "kl": 0.11492919921875, + "learning_rate": 5.982605961172826e-07, + "loss": 0.00011495671787997708, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2227, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 77.28125190734863, + "completions/min_length": 39.5, + "epoch": 4.428890543559196, + "grad_norm": 0.014829442732975697, + "kl": 0.14886474609375, + "learning_rate": 5.979512443557773e-07, + "loss": 0.00014887719589751214, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2228, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 80.87500286102295, + "completions/min_length": 35.875, + "epoch": 4.430876147927526, + "grad_norm": 0.006505129416172592, + "kl": 0.10479736328125, + "learning_rate": 5.976418535977895e-07, + "loss": 0.00010468787513673306, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2229, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 69.59375190734863, + "completions/min_length": 32.625, + "epoch": 4.432861752295855, + "grad_norm": 0.00908326855435322, + "kl": 0.10736083984375, + "learning_rate": 5.973324239664943e-07, + "loss": 0.00010746221232693642, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2230, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 78.5729193687439, + "completions/min_length": 32.875, + "epoch": 4.434847356664185, + "grad_norm": 0.14447759008994532, + "kl": 0.1588134765625, + "learning_rate": 5.970229555850823e-07, + "loss": 0.0001588661689311266, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2231, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.875, + "completions/mean_length": 69.38541841506958, + "completions/min_length": 32.625, + "epoch": 4.436832961032514, + "grad_norm": 0.004345346713983905, + "kl": 0.094024658203125, + "learning_rate": 5.96713448576759e-07, + "loss": 9.405636228621006e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2232, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.25, + "completions/mean_length": 80.56250286102295, + "completions/min_length": 41.25, + "epoch": 4.438818565400844, + "grad_norm": 0.007195054618359823, + "kl": 0.10919189453125, + "learning_rate": 5.964039030647463e-07, + "loss": 0.00010911936260527, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2233, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 79.96875190734863, + "completions/min_length": 38.0, + "epoch": 4.440804169769174, + "grad_norm": 0.7738683864853091, + "kl": 0.12646484375, + "learning_rate": 5.960943191722806e-07, + "loss": -0.001549186883494258, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2234, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 85.32291984558105, + "completions/min_length": 41.625, + "epoch": 4.442789774137503, + "grad_norm": 0.005536912862824791, + "kl": 0.119415283203125, + "learning_rate": 5.957846970226139e-07, + "loss": 0.00011937151430174708, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2235, + "train_speed(iter/s)": 0.022744 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 71.31250286102295, + "completions/min_length": 31.625, + "epoch": 4.444775378505833, + "grad_norm": 0.006703990839594633, + "kl": 0.093994140625, + "learning_rate": 5.954750367390133e-07, + "loss": 9.39558885875158e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2236, + "train_speed(iter/s)": 0.022743 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 78.05208492279053, + "completions/min_length": 38.25, + "epoch": 4.446760982874162, + "grad_norm": 1.131962043629531, + "kl": 0.115234375, + "learning_rate": 5.951653384447614e-07, + "loss": -0.002550897654145956, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2237, + "train_speed(iter/s)": 0.022744 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 77.97917079925537, + "completions/min_length": 32.875, + "epoch": 4.448746587242492, + "grad_norm": 0.0051354552749809145, + "kl": 0.0958251953125, + "learning_rate": 5.948556022631556e-07, + "loss": 9.585735824657604e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2238, + "train_speed(iter/s)": 0.022744 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.125, + "completions/mean_length": 86.14583587646484, + "completions/min_length": 39.25, + "epoch": 4.450732191610822, + "grad_norm": 0.004362142424286791, + "kl": 0.103515625, + "learning_rate": 5.945458283175084e-07, + "loss": 0.00010353367542847991, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2239, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 72.92708492279053, + "completions/min_length": 33.625, + "epoch": 4.452717795979151, + "grad_norm": 0.006494140007703258, + "kl": 0.115478515625, + "learning_rate": 5.942360167311476e-07, + "loss": 0.00011557410471141338, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2240, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 75.46875286102295, + "completions/min_length": 32.125, + "epoch": 4.454703400347481, + "grad_norm": 0.005789564642703661, + "kl": 0.108795166015625, + "learning_rate": 5.939261676274155e-07, + "loss": 0.00010869429388549179, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2241, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.75, + "completions/mean_length": 88.41666793823242, + "completions/min_length": 39.625, + "epoch": 4.456689004715811, + "grad_norm": 1.2740001698366927, + "kl": 0.09259033203125, + "learning_rate": 5.936162811296699e-07, + "loss": 0.007235649041831493, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2242, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 67.46875143051147, + "completions/min_length": 30.625, + "epoch": 4.45867460908414, + "grad_norm": 0.005501138745148115, + "kl": 0.1058349609375, + "learning_rate": 5.933063573612835e-07, + "loss": 0.00010583333641989157, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2243, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 71.65625333786011, + "completions/min_length": 32.125, + "epoch": 4.46066021345247, + "grad_norm": 0.0052792073570150375, + "kl": 0.09326171875, + "learning_rate": 5.929963964456429e-07, + "loss": 9.329054591944441e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2244, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.25, + "completions/mean_length": 65.1979193687439, + "completions/min_length": 35.375, + "epoch": 4.462645817820799, + "grad_norm": 1.5023405885701933, + "kl": 0.670166015625, + "learning_rate": 5.926863985061506e-07, + "loss": 0.0006756539805792272, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2245, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 82.18750095367432, + "completions/min_length": 41.875, + "epoch": 4.464631422189129, + "grad_norm": 0.005549182014369176, + "kl": 0.095733642578125, + "learning_rate": 5.923763636662233e-07, + "loss": 9.578568278811872e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2246, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.25, + "completions/mean_length": 65.51041889190674, + "completions/min_length": 34.0, + "epoch": 4.466617026557459, + "grad_norm": 0.006619573686598099, + "kl": 0.07904052734375, + "learning_rate": 5.920662920492927e-07, + "loss": 7.910047133918852e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2247, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 70.50000190734863, + "completions/min_length": 33.375, + "epoch": 4.468602630925788, + "grad_norm": 0.005926243252638949, + "kl": 0.09478759765625, + "learning_rate": 5.917561837788045e-07, + "loss": 9.47156804613769e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2248, + "train_speed(iter/s)": 0.022741 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 70.26041889190674, + "completions/min_length": 35.875, + "epoch": 4.470588235294118, + "grad_norm": 0.004213126061681416, + "kl": 0.08453369140625, + "learning_rate": 5.914460389782198e-07, + "loss": 8.450097811874002e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2249, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.125, + "completions/mean_length": 69.67708492279053, + "completions/min_length": 30.375, + "epoch": 4.472573839662447, + "grad_norm": 0.004192200362034999, + "kl": 0.08502197265625, + "learning_rate": 5.911358577710137e-07, + "loss": 8.49056668812409e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2250, + "train_speed(iter/s)": 0.022742 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 71.58333587646484, + "completions/min_length": 33.0, + "epoch": 4.474559444030777, + "grad_norm": 0.006390449071373999, + "kl": 0.080718994140625, + "learning_rate": 5.908256402806761e-07, + "loss": 8.078113751253113e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2251, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.0, + "completions/mean_length": 81.3854193687439, + "completions/min_length": 37.625, + "epoch": 4.476545048399107, + "grad_norm": 0.005658102287913021, + "kl": 0.1036376953125, + "learning_rate": 5.90515386630711e-07, + "loss": 0.00010348573414376006, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2252, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 65.739586353302, + "completions/min_length": 29.125, + "epoch": 4.478530652767436, + "grad_norm": 0.004735257029860334, + "kl": 0.0887451171875, + "learning_rate": 5.90205096944637e-07, + "loss": 8.882739348337054e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2253, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.375, + "completions/mean_length": 83.23958539962769, + "completions/min_length": 34.5, + "epoch": 4.480516257135766, + "grad_norm": 0.005777942210744048, + "kl": 0.123779296875, + "learning_rate": 5.898947713459874e-07, + "loss": 0.00012394817895255983, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2254, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 74.677086353302, + "completions/min_length": 27.375, + "epoch": 4.482501861504096, + "grad_norm": 0.005132669729563733, + "kl": 0.08868408203125, + "learning_rate": 5.895844099583093e-07, + "loss": 8.870419696904719e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2255, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 73.89583587646484, + "completions/min_length": 36.75, + "epoch": 4.484487465872425, + "grad_norm": 0.004848555611705649, + "kl": 0.0904541015625, + "learning_rate": 5.892740129051637e-07, + "loss": 9.037971904035658e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2256, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 71.29166841506958, + "completions/min_length": 32.75, + "epoch": 4.486473070240755, + "grad_norm": 0.00417805260128553, + "kl": 0.10064697265625, + "learning_rate": 5.88963580310127e-07, + "loss": 0.00010073679004563019, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2257, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 80.81250190734863, + "completions/min_length": 41.375, + "epoch": 4.488458674609084, + "grad_norm": 0.004026284028419137, + "kl": 0.10394287109375, + "learning_rate": 5.886531122967888e-07, + "loss": 0.00010382196342106909, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2258, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 78.40625238418579, + "completions/min_length": 31.25, + "epoch": 4.490444278977414, + "grad_norm": 0.0033095778525263505, + "kl": 0.0966796875, + "learning_rate": 5.883426089887531e-07, + "loss": 9.674718603491783e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2259, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 77.65625143051147, + "completions/min_length": 31.0, + "epoch": 4.492429883345744, + "grad_norm": 0.005435685847843289, + "kl": 0.11260986328125, + "learning_rate": 5.880320705096376e-07, + "loss": 0.00011271526454947889, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2260, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 76.802086353302, + "completions/min_length": 32.75, + "epoch": 4.494415487714073, + "grad_norm": 0.003927775994213521, + "kl": 0.117889404296875, + "learning_rate": 5.877214969830745e-07, + "loss": 0.00011776182509493083, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2261, + "train_speed(iter/s)": 0.02274 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.125, + "completions/mean_length": 83.16666984558105, + "completions/min_length": 31.75, + "epoch": 4.496401092082403, + "grad_norm": 0.00425138432199738, + "kl": 0.09832763671875, + "learning_rate": 5.874108885327098e-07, + "loss": 9.834981756284833e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2262, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 79.96875190734863, + "completions/min_length": 29.625, + "epoch": 4.498386696450732, + "grad_norm": 1.4847891524536354, + "kl": 0.09027099609375, + "learning_rate": 5.871002452822033e-07, + "loss": -0.010531831532716751, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2263, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 75.64583587646484, + "completions/min_length": 31.25, + "epoch": 4.500372300819062, + "grad_norm": 0.008389382738686307, + "kl": 0.09576416015625, + "learning_rate": 5.867895673552288e-07, + "loss": 9.583967039361596e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2264, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 73.27083587646484, + "completions/min_length": 37.625, + "epoch": 4.502357905187392, + "grad_norm": 0.003872977791608026, + "kl": 0.091400146484375, + "learning_rate": 5.864788548754737e-07, + "loss": 9.143753413809463e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2265, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 72.62500190734863, + "completions/min_length": 34.125, + "epoch": 4.504343509555721, + "grad_norm": 0.005363005900148331, + "kl": 0.098846435546875, + "learning_rate": 5.861681079666394e-07, + "loss": 9.892591333482414e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2266, + "train_speed(iter/s)": 0.022739 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 74.21875286102295, + "completions/min_length": 30.75, + "epoch": 4.506329113924051, + "grad_norm": 0.012140812564650938, + "kl": 0.101165771484375, + "learning_rate": 5.858573267524408e-07, + "loss": 0.00010126647248398513, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2267, + "train_speed(iter/s)": 0.022738 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.875, + "completions/mean_length": 83.62500333786011, + "completions/min_length": 34.0, + "epoch": 4.508314718292381, + "grad_norm": 0.008323405110357944, + "kl": 0.11407470703125, + "learning_rate": 5.855465113566065e-07, + "loss": 0.00011406631529098377, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2268, + "train_speed(iter/s)": 0.022737 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 82.34375190734863, + "completions/min_length": 36.375, + "epoch": 4.51030032266071, + "grad_norm": 0.004265793745991542, + "kl": 0.092498779296875, + "learning_rate": 5.852356619028789e-07, + "loss": 9.246898116543889e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2269, + "train_speed(iter/s)": 0.022735 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.375, + "completions/mean_length": 84.97916793823242, + "completions/min_length": 36.125, + "epoch": 4.51228592702904, + "grad_norm": 0.004924395643969063, + "kl": 0.09814453125, + "learning_rate": 5.849247785150134e-07, + "loss": 9.818821854423732e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2270, + "train_speed(iter/s)": 0.022733 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 75.60416984558105, + "completions/min_length": 33.875, + "epoch": 4.514271531397369, + "grad_norm": 0.009767516751758824, + "kl": 0.10345458984375, + "learning_rate": 5.8461386131678e-07, + "loss": 0.0001034951419569552, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2271, + "train_speed(iter/s)": 0.022732 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 73.52083444595337, + "completions/min_length": 33.0, + "epoch": 4.516257135765699, + "grad_norm": 1.8262905603559427, + "kl": 0.103271484375, + "learning_rate": 5.84302910431961e-07, + "loss": -0.004324705805629492, + "memory(GiB)": 94.21, + "reward": 1.8437500149011612, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.8437500074505806, + "rewards/CineAccuracyORM/std": 0.1911909468472004, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2272, + "train_speed(iter/s)": 0.022731 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 75.31250238418579, + "completions/min_length": 34.875, + "epoch": 4.518242740134029, + "grad_norm": 1.0578366038951796, + "kl": 0.157470703125, + "learning_rate": 5.839919259843525e-07, + "loss": 0.00038225826574489474, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666679084301, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2273, + "train_speed(iter/s)": 0.02273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 78.39583492279053, + "completions/min_length": 27.625, + "epoch": 4.520228344502358, + "grad_norm": 0.008614658097092684, + "kl": 0.10693359375, + "learning_rate": 5.836809080977643e-07, + "loss": 0.00010691669012885541, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2274, + "train_speed(iter/s)": 0.02273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.875, + "completions/mean_length": 78.37500286102295, + "completions/min_length": 34.625, + "epoch": 4.522213948870688, + "grad_norm": 0.003384064155008789, + "kl": 0.08953857421875, + "learning_rate": 5.833698568960194e-07, + "loss": 8.955416706157848e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2275, + "train_speed(iter/s)": 0.022729 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 80.59375286102295, + "completions/min_length": 33.125, + "epoch": 4.524199553239017, + "grad_norm": 0.004664150600999667, + "kl": 0.103271484375, + "learning_rate": 5.830587725029537e-07, + "loss": 0.00010333849786547944, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2276, + "train_speed(iter/s)": 0.022729 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 72.73958587646484, + "completions/min_length": 34.75, + "epoch": 4.526185157607347, + "grad_norm": 0.0033492312690766, + "kl": 0.0872802734375, + "learning_rate": 5.827476550424164e-07, + "loss": 8.730647823540494e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2277, + "train_speed(iter/s)": 0.02273 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.625, + "completions/mean_length": 81.02083587646484, + "completions/min_length": 38.875, + "epoch": 4.528170761975677, + "grad_norm": 0.003882525200665498, + "kl": 0.098480224609375, + "learning_rate": 5.824365046382702e-07, + "loss": 9.85459191724658e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2278, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 82.25000190734863, + "completions/min_length": 34.125, + "epoch": 4.530156366344006, + "grad_norm": 0.004690491013353411, + "kl": 0.10150146484375, + "learning_rate": 5.821253214143908e-07, + "loss": 0.00010153650509892032, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2279, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 73.54166841506958, + "completions/min_length": 34.125, + "epoch": 4.532141970712336, + "grad_norm": 0.008977938805834819, + "kl": 0.0831298828125, + "learning_rate": 5.818141054946667e-07, + "loss": 8.310612611239776e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2280, + "train_speed(iter/s)": 0.022727 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.25, + "completions/mean_length": 78.51041984558105, + "completions/min_length": 29.875, + "epoch": 4.5341275750806656, + "grad_norm": 0.005656146666333078, + "kl": 0.1134033203125, + "learning_rate": 5.815028570029998e-07, + "loss": 0.00011320726480334997, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2281, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 79.70833730697632, + "completions/min_length": 33.5, + "epoch": 4.536113179448995, + "grad_norm": 0.0034398269403313384, + "kl": 0.08966064453125, + "learning_rate": 5.811915760633046e-07, + "loss": 8.962757419794798e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2282, + "train_speed(iter/s)": 0.022729 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 81.28125238418579, + "completions/min_length": 36.75, + "epoch": 4.538098783817325, + "grad_norm": 1.5436595756455518, + "kl": 0.09344482421875, + "learning_rate": 5.808802627995089e-07, + "loss": -0.01086314208805561, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2283, + "train_speed(iter/s)": 0.022728 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.5, + "completions/mean_length": 93.56250286102295, + "completions/min_length": 39.125, + "epoch": 4.540084388185654, + "grad_norm": 0.0032088076358396497, + "kl": 0.092498779296875, + "learning_rate": 5.805689173355528e-07, + "loss": 9.258277714252472e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2284, + "train_speed(iter/s)": 0.022726 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 66.64583444595337, + "completions/min_length": 30.375, + "epoch": 4.542069992553984, + "grad_norm": 0.005530933365593661, + "kl": 0.11419677734375, + "learning_rate": 5.802575397953899e-07, + "loss": 0.00011411268496885896, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2285, + "train_speed(iter/s)": 0.022726 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 77.19791984558105, + "completions/min_length": 31.5, + "epoch": 4.5440555969223135, + "grad_norm": 0.005525004061213157, + "kl": 0.10479736328125, + "learning_rate": 5.79946130302986e-07, + "loss": 0.00010479833872523159, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2286, + "train_speed(iter/s)": 0.022726 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 75.89583587646484, + "completions/min_length": 39.625, + "epoch": 4.546041201290643, + "grad_norm": 0.7946204126687513, + "kl": 0.0855712890625, + "learning_rate": 5.796346889823202e-07, + "loss": -0.0010799586307257414, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2287, + "train_speed(iter/s)": 0.022725 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 82.31250286102295, + "completions/min_length": 36.5, + "epoch": 4.5480268056589725, + "grad_norm": 0.8243358410051134, + "kl": 0.102294921875, + "learning_rate": 5.793232159573838e-07, + "loss": 0.00551933329552412, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2288, + "train_speed(iter/s)": 0.022726 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.125, + "completions/mean_length": 88.19791841506958, + "completions/min_length": 38.5, + "epoch": 4.550012410027302, + "grad_norm": 1.476136086373933, + "kl": 0.10107421875, + "learning_rate": 5.790117113521806e-07, + "loss": 0.00010109124559676275, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2289, + "train_speed(iter/s)": 0.022725 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.375, + "completions/mean_length": 82.42708587646484, + "completions/min_length": 37.25, + "epoch": 4.551998014395632, + "grad_norm": 0.005686331306765389, + "kl": 0.08880615234375, + "learning_rate": 5.787001752907276e-07, + "loss": 8.879662345862016e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2290, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 77.19791889190674, + "completions/min_length": 32.875, + "epoch": 4.5539836187639615, + "grad_norm": 1.0548499734212398, + "kl": 0.0855712890625, + "learning_rate": 5.783886078970537e-07, + "loss": 0.0011719726026058197, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2291, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.25, + "completions/mean_length": 81.98958492279053, + "completions/min_length": 31.5, + "epoch": 4.555969223132291, + "grad_norm": 0.005153707535255694, + "kl": 0.0975341796875, + "learning_rate": 5.780770092952009e-07, + "loss": 9.75908333202824e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2292, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 75.37500143051147, + "completions/min_length": 32.75, + "epoch": 4.5579548275006205, + "grad_norm": 0.005105565838476203, + "kl": 0.085601806640625, + "learning_rate": 5.777653796092229e-07, + "loss": 8.554181840736419e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2293, + "train_speed(iter/s)": 0.022723 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 81.89583683013916, + "completions/min_length": 38.625, + "epoch": 4.5599404318689505, + "grad_norm": 0.004798246380916185, + "kl": 0.09674072265625, + "learning_rate": 5.774537189631861e-07, + "loss": 9.666193363955244e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2294, + "train_speed(iter/s)": 0.022724 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 76.28125095367432, + "completions/min_length": 32.625, + "epoch": 4.5619260362372795, + "grad_norm": 0.00572011525467052, + "kl": 0.103302001953125, + "learning_rate": 5.771420274811696e-07, + "loss": 0.00010323635069653392, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2295, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 81.05208587646484, + "completions/min_length": 36.5, + "epoch": 4.5639116406056095, + "grad_norm": 0.004333919524378042, + "kl": 0.08770751953125, + "learning_rate": 5.768303052872642e-07, + "loss": 8.761404023971409e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2296, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 68.39583539962769, + "completions/min_length": 25.375, + "epoch": 4.565897244973939, + "grad_norm": 0.005163773785245674, + "kl": 0.08453369140625, + "learning_rate": 5.765185525055732e-07, + "loss": 8.445871935691684e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2297, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 83.18750286102295, + "completions/min_length": 36.875, + "epoch": 4.5678828493422685, + "grad_norm": 0.0036358926877679476, + "kl": 0.08697509765625, + "learning_rate": 5.762067692602119e-07, + "loss": 8.697862358530983e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2298, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 72.37500190734863, + "completions/min_length": 31.25, + "epoch": 4.5698684537105985, + "grad_norm": 0.00606025448858362, + "kl": 0.092010498046875, + "learning_rate": 5.758949556753082e-07, + "loss": 9.191343997372314e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2299, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 72.41666889190674, + "completions/min_length": 31.875, + "epoch": 4.5718540580789275, + "grad_norm": 1.9573697120281306, + "kl": 0.09136962890625, + "learning_rate": 5.755831118750015e-07, + "loss": -0.0026668086647987366, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.18617857620120049, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2300, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 76.41666841506958, + "completions/min_length": 39.75, + "epoch": 4.5738396624472575, + "grad_norm": 0.006006257641365506, + "kl": 0.09698486328125, + "learning_rate": 5.752712379834435e-07, + "loss": 9.682127711130306e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2301, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 197.875, + "completions/mean_length": 91.87500286102295, + "completions/min_length": 38.75, + "epoch": 4.5758252668155865, + "grad_norm": 0.003058366902914677, + "kl": 0.08502197265625, + "learning_rate": 5.74959334124798e-07, + "loss": 8.493951463606209e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2302, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.375, + "completions/mean_length": 79.50000286102295, + "completions/min_length": 31.25, + "epoch": 4.5778108711839165, + "grad_norm": 0.003333706345711044, + "kl": 0.09051513671875, + "learning_rate": 5.746474004232405e-07, + "loss": 9.043920726981014e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2303, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 77.27083587646484, + "completions/min_length": 33.875, + "epoch": 4.5797964755522464, + "grad_norm": 0.0055613782787916006, + "kl": 0.09375, + "learning_rate": 5.743354370029583e-07, + "loss": 9.380087431054562e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2304, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 78.33333539962769, + "completions/min_length": 35.875, + "epoch": 4.5817820799205755, + "grad_norm": 0.0037245481595628164, + "kl": 0.089385986328125, + "learning_rate": 5.74023443988151e-07, + "loss": 8.936197264119983e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2305, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.0, + "completions/mean_length": 74.12500190734863, + "completions/min_length": 34.375, + "epoch": 4.5837676842889055, + "grad_norm": 2.230663856149765, + "kl": 0.098907470703125, + "learning_rate": 5.737114215030295e-07, + "loss": -0.0076047456823289394, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8333333395421505, + "rewards/CineAccuracyORM/std": 0.17548105120658875, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2306, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 67.03125190734863, + "completions/min_length": 32.5, + "epoch": 4.585753288657235, + "grad_norm": 0.0037963544544235488, + "kl": 0.07318115234375, + "learning_rate": 5.733993696718168e-07, + "loss": 7.318564894376323e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2307, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 83.31250238418579, + "completions/min_length": 37.75, + "epoch": 4.5877388930255645, + "grad_norm": 0.004103642316760418, + "kl": 0.0963134765625, + "learning_rate": 5.73087288618747e-07, + "loss": 9.642505028750747e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2308, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.25, + "completions/mean_length": 71.28125190734863, + "completions/min_length": 33.0, + "epoch": 4.589724497393894, + "grad_norm": 0.0034598986690599367, + "kl": 0.099853515625, + "learning_rate": 5.727751784680667e-07, + "loss": 9.977837180485949e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2309, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 77.72916841506958, + "completions/min_length": 36.875, + "epoch": 4.5917101017622235, + "grad_norm": 1.9793506553163016, + "kl": 0.08746337890625, + "learning_rate": 5.724630393440333e-07, + "loss": -0.00461022462695837, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2310, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 78.21875238418579, + "completions/min_length": 32.75, + "epoch": 4.5936957061305534, + "grad_norm": 0.03233720365612494, + "kl": 0.09197998046875, + "learning_rate": 5.721508713709162e-07, + "loss": 9.184504597214982e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2311, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 72.88541841506958, + "completions/min_length": 31.75, + "epoch": 4.595681310498883, + "grad_norm": 0.6754820617097291, + "kl": 0.08831787109375, + "learning_rate": 5.718386746729961e-07, + "loss": 0.008238280192017555, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2312, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.0, + "completions/mean_length": 73.59375381469727, + "completions/min_length": 36.375, + "epoch": 4.5976669148672125, + "grad_norm": 0.005317897778769317, + "kl": 0.09112548828125, + "learning_rate": 5.715264493745651e-07, + "loss": 9.116034198086709e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2313, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 72.3854193687439, + "completions/min_length": 30.625, + "epoch": 4.599652519235542, + "grad_norm": 0.0033929874385534953, + "kl": 0.078765869140625, + "learning_rate": 5.71214195599927e-07, + "loss": 7.870925037423149e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2314, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 80.50000286102295, + "completions/min_length": 32.25, + "epoch": 4.6016381236038715, + "grad_norm": 0.0032873752823652275, + "kl": 0.093505859375, + "learning_rate": 5.709019134733964e-07, + "loss": 9.349064202979207e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2315, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 65.71875238418579, + "completions/min_length": 30.0, + "epoch": 4.603623727972201, + "grad_norm": 0.011534392390535811, + "kl": 0.1021728515625, + "learning_rate": 5.705896031192997e-07, + "loss": 0.00010226282029179856, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2316, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 84.61458587646484, + "completions/min_length": 36.875, + "epoch": 4.605609332340531, + "grad_norm": 0.003852925664040561, + "kl": 0.10693359375, + "learning_rate": 5.702772646619742e-07, + "loss": 0.00010697339894250035, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2317, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 77.06250286102295, + "completions/min_length": 35.375, + "epoch": 4.6075949367088604, + "grad_norm": 0.004558999717422353, + "kl": 0.08642578125, + "learning_rate": 5.699648982257685e-07, + "loss": 8.62802698975429e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2318, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.75, + "completions/mean_length": 83.78125286102295, + "completions/min_length": 33.125, + "epoch": 4.60958054107719, + "grad_norm": 0.0036160464170613936, + "kl": 0.10736083984375, + "learning_rate": 5.696525039350425e-07, + "loss": 0.0001075066247722134, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2319, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 81.53125286102295, + "completions/min_length": 34.5, + "epoch": 4.61156614544552, + "grad_norm": 0.003760841205670082, + "kl": 0.08953857421875, + "learning_rate": 5.693400819141669e-07, + "loss": 8.954511577030644e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2320, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 78.33333492279053, + "completions/min_length": 33.25, + "epoch": 4.613551749813849, + "grad_norm": 0.0032498174063719976, + "kl": 0.082763671875, + "learning_rate": 5.690276322875236e-07, + "loss": 8.275630534626544e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2321, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 77.11458587646484, + "completions/min_length": 33.0, + "epoch": 4.615537354182179, + "grad_norm": 0.004086336713738296, + "kl": 0.08404541015625, + "learning_rate": 5.687151551795054e-07, + "loss": 8.408527355641127e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2322, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.875, + "completions/mean_length": 82.73958683013916, + "completions/min_length": 34.0, + "epoch": 4.617522958550508, + "grad_norm": 0.01248841140481593, + "kl": 0.13299560546875, + "learning_rate": 5.684026507145165e-07, + "loss": 0.0001329867372987792, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2323, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 65.54166841506958, + "completions/min_length": 32.375, + "epoch": 4.619508562918838, + "grad_norm": 3.1658015235706767, + "kl": 0.09576416015625, + "learning_rate": 5.68090119016971e-07, + "loss": -0.007310189306735992, + "memory(GiB)": 94.21, + "reward": 1.5729166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.5729166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2324, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 78.60416889190674, + "completions/min_length": 37.375, + "epoch": 4.621494167287168, + "grad_norm": 0.003460455846155925, + "kl": 0.11016845703125, + "learning_rate": 5.677775602112947e-07, + "loss": 0.00011014618212357163, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2325, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 73.8229193687439, + "completions/min_length": 33.875, + "epoch": 4.623479771655497, + "grad_norm": 0.983220531646428, + "kl": 0.1031494140625, + "learning_rate": 5.674649744219242e-07, + "loss": -0.0007950937142595649, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2326, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 74.39583587646484, + "completions/min_length": 33.625, + "epoch": 4.625465376023827, + "grad_norm": 1.396096258057355, + "kl": 0.09417724609375, + "learning_rate": 5.671523617733064e-07, + "loss": 0.007858018390834332, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2327, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 83.89583539962769, + "completions/min_length": 34.0, + "epoch": 4.627450980392156, + "grad_norm": 1.0866659667101322, + "kl": 0.11163330078125, + "learning_rate": 5.66839722389899e-07, + "loss": -0.003320117946714163, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2328, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.875, + "completions/mean_length": 76.93750238418579, + "completions/min_length": 34.5, + "epoch": 4.629436584760486, + "grad_norm": 0.004133248965090325, + "kl": 0.089813232421875, + "learning_rate": 5.665270563961702e-07, + "loss": 8.97534191608429e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2329, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 74.84375238418579, + "completions/min_length": 37.875, + "epoch": 4.631422189128816, + "grad_norm": 3.6598942616515813, + "kl": 0.108184814453125, + "learning_rate": 5.662143639165995e-07, + "loss": 0.0006956271827220917, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2330, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 75.42708492279053, + "completions/min_length": 34.0, + "epoch": 4.633407793497145, + "grad_norm": 0.003285613511303384, + "kl": 0.09954833984375, + "learning_rate": 5.659016450756761e-07, + "loss": 9.94454458123073e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2331, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.375, + "completions/mean_length": 88.41667079925537, + "completions/min_length": 38.25, + "epoch": 4.635393397865475, + "grad_norm": 1.3189134547690207, + "kl": 0.19110107421875, + "learning_rate": 5.655888999979004e-07, + "loss": -0.013224013149738312, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2332, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 78.29166841506958, + "completions/min_length": 33.125, + "epoch": 4.637379002233805, + "grad_norm": 0.00324602327054682, + "kl": 0.07659912109375, + "learning_rate": 5.652761288077824e-07, + "loss": 7.664500299142674e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2333, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.125, + "completions/mean_length": 76.05208492279053, + "completions/min_length": 33.625, + "epoch": 4.639364606602134, + "grad_norm": 1.2506648342000495, + "kl": 0.22784423828125, + "learning_rate": 5.649633316298435e-07, + "loss": 0.0016061998903751373, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2334, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 82.34375238418579, + "completions/min_length": 36.375, + "epoch": 4.641350210970464, + "grad_norm": 0.004870658101811156, + "kl": 0.101318359375, + "learning_rate": 5.646505085886144e-07, + "loss": 0.0001012769207591191, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2335, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 70.12500286102295, + "completions/min_length": 32.125, + "epoch": 4.643335815338793, + "grad_norm": 0.006618151616686567, + "kl": 0.088531494140625, + "learning_rate": 5.643376598086371e-07, + "loss": 8.857453940436244e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2336, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 61.37500238418579, + "completions/min_length": 24.25, + "epoch": 4.645321419707123, + "grad_norm": 0.0035250140365492284, + "kl": 0.08172607421875, + "learning_rate": 5.640247854144633e-07, + "loss": 8.166702173184603e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2337, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 69.39583587646484, + "completions/min_length": 28.125, + "epoch": 4.647307024075453, + "grad_norm": 0.005895554204687453, + "kl": 0.0992431640625, + "learning_rate": 5.637118855306547e-07, + "loss": 9.923591278493404e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2338, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.5, + "completions/mean_length": 76.33333587646484, + "completions/min_length": 33.875, + "epoch": 4.649292628443782, + "grad_norm": 0.7507075993821026, + "kl": 0.13653564453125, + "learning_rate": 5.633989602817837e-07, + "loss": 0.004025675356388092, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2339, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 72.27083492279053, + "completions/min_length": 32.25, + "epoch": 4.651278232812112, + "grad_norm": 0.004642095660157392, + "kl": 0.0936279296875, + "learning_rate": 5.630860097924325e-07, + "loss": 9.374831279274076e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2340, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.25, + "completions/mean_length": 79.15625190734863, + "completions/min_length": 29.25, + "epoch": 4.653263837180441, + "grad_norm": 1.2879447231176964, + "kl": 0.11737060546875, + "learning_rate": 5.627730341871933e-07, + "loss": 0.00011727835226338357, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2341, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.125, + "completions/mean_length": 79.21875238418579, + "completions/min_length": 33.75, + "epoch": 4.655249441548771, + "grad_norm": 0.7877443988396042, + "kl": 0.09991455078125, + "learning_rate": 5.624600335906681e-07, + "loss": 0.008169095031917095, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2342, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.875, + "completions/mean_length": 79.91666984558105, + "completions/min_length": 28.125, + "epoch": 4.657235045917101, + "grad_norm": 0.00591320841549025, + "kl": 0.09527587890625, + "learning_rate": 5.621470081274698e-07, + "loss": 9.53097696765326e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2343, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 72.66666984558105, + "completions/min_length": 30.5, + "epoch": 4.65922065028543, + "grad_norm": 0.003958699016839167, + "kl": 0.09844970703125, + "learning_rate": 5.6183395792222e-07, + "loss": 9.841163409873843e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2344, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.625, + "completions/mean_length": 73.81250190734863, + "completions/min_length": 28.5, + "epoch": 4.66120625465376, + "grad_norm": 0.005329246216295835, + "kl": 0.075164794921875, + "learning_rate": 5.615208830995508e-07, + "loss": 7.521644874941558e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2345, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 74.98958539962769, + "completions/min_length": 35.375, + "epoch": 4.66319185902209, + "grad_norm": 0.006392068411079872, + "kl": 0.09625244140625, + "learning_rate": 5.612077837841039e-07, + "loss": 9.623055666452274e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2346, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 73.33333587646484, + "completions/min_length": 31.125, + "epoch": 4.665177463390419, + "grad_norm": 0.004098474073644894, + "kl": 0.087005615234375, + "learning_rate": 5.608946601005311e-07, + "loss": 8.697745943209156e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2347, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 83.95833683013916, + "completions/min_length": 38.75, + "epoch": 4.667163067758749, + "grad_norm": 1.227646445616175, + "kl": 0.10247802734375, + "learning_rate": 5.605815121734934e-07, + "loss": -0.012014409527182579, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2348, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.75, + "completions/mean_length": 82.06250238418579, + "completions/min_length": 29.375, + "epoch": 4.669148672127078, + "grad_norm": 0.004814142082146612, + "kl": 0.088348388671875, + "learning_rate": 5.602683401276614e-07, + "loss": 8.838576468406245e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2349, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.5, + "completions/mean_length": 83.78125286102295, + "completions/min_length": 34.875, + "epoch": 4.671134276495408, + "grad_norm": 0.005736769257701222, + "kl": 0.10888671875, + "learning_rate": 5.599551440877161e-07, + "loss": 0.00010884978109970689, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2350, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.25, + "completions/mean_length": 81.69791984558105, + "completions/min_length": 41.625, + "epoch": 4.673119880863738, + "grad_norm": 0.005309514502676998, + "kl": 0.10150146484375, + "learning_rate": 5.596419241783474e-07, + "loss": 0.00010142278915736824, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2351, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 67.97916841506958, + "completions/min_length": 27.875, + "epoch": 4.675105485232067, + "grad_norm": 0.005548726933792474, + "kl": 0.10076904296875, + "learning_rate": 5.593286805242549e-07, + "loss": 0.00010085056419484317, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2352, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.875, + "completions/mean_length": 81.47916889190674, + "completions/min_length": 28.75, + "epoch": 4.677091089600397, + "grad_norm": 0.0030455917706678484, + "kl": 0.09954833984375, + "learning_rate": 5.590154132501472e-07, + "loss": 9.947115177055821e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2353, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 75.59375238418579, + "completions/min_length": 32.25, + "epoch": 4.679076693968726, + "grad_norm": 0.7006896297815685, + "kl": 0.10076904296875, + "learning_rate": 5.58702122480743e-07, + "loss": -0.006216554902493954, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2354, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.75, + "completions/mean_length": 82.11458587646484, + "completions/min_length": 32.375, + "epoch": 4.681062298337056, + "grad_norm": 0.003947702664324214, + "kl": 0.108642578125, + "learning_rate": 5.583888083407699e-07, + "loss": 0.00010863847273867577, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2355, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 73.052086353302, + "completions/min_length": 32.125, + "epoch": 4.683047902705386, + "grad_norm": 0.0034927833041282865, + "kl": 0.08538818359375, + "learning_rate": 5.580754709549652e-07, + "loss": 8.53494493640028e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2356, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.375, + "completions/mean_length": 84.04166984558105, + "completions/min_length": 32.25, + "epoch": 4.685033507073715, + "grad_norm": 2.4961354573388714, + "kl": 0.1226806640625, + "learning_rate": 5.577621104480751e-07, + "loss": 0.012847809121012688, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.7708333432674408, + "rewards/CineAccuracyORM/std": 0.2592903971672058, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2357, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 80.07291841506958, + "completions/min_length": 31.625, + "epoch": 4.687019111442045, + "grad_norm": 0.0043750363380664565, + "kl": 0.10748291015625, + "learning_rate": 5.574487269448549e-07, + "loss": 0.00010759155702544376, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2358, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 83.01041793823242, + "completions/min_length": 32.375, + "epoch": 4.689004715810375, + "grad_norm": 0.004391053659948604, + "kl": 0.11566162109375, + "learning_rate": 5.571353205700695e-07, + "loss": 0.00011556669778656214, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2359, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 74.58333539962769, + "completions/min_length": 31.625, + "epoch": 4.690990320178704, + "grad_norm": 0.006677941367474188, + "kl": 0.120849609375, + "learning_rate": 5.568218914484925e-07, + "loss": 0.00012101449829060584, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2360, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 75.1354193687439, + "completions/min_length": 37.5, + "epoch": 4.692975924547034, + "grad_norm": 0.004430730118602707, + "kl": 0.0965576171875, + "learning_rate": 5.565084397049071e-07, + "loss": 9.643529483582824e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2361, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 81.96875143051147, + "completions/min_length": 30.375, + "epoch": 4.694961528915364, + "grad_norm": 0.004668489739607124, + "kl": 0.0992431640625, + "learning_rate": 5.561949654641046e-07, + "loss": 9.912320820149034e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2362, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 82.67708587646484, + "completions/min_length": 38.0, + "epoch": 4.696947133283693, + "grad_norm": 0.004168253751659089, + "kl": 0.08251953125, + "learning_rate": 5.558814688508862e-07, + "loss": 8.25101014925167e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2363, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 70.03125047683716, + "completions/min_length": 31.0, + "epoch": 4.698932737652023, + "grad_norm": 1.067279898281127, + "kl": 0.1177978515625, + "learning_rate": 5.555679499900613e-07, + "loss": -0.004482237156480551, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2364, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.5, + "completions/mean_length": 72.15625143051147, + "completions/min_length": 33.0, + "epoch": 4.700918342020352, + "grad_norm": 1.6388835772280552, + "kl": 0.1346435546875, + "learning_rate": 5.552544090064487e-07, + "loss": 0.001115697785280645, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2365, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 72.08333492279053, + "completions/min_length": 38.75, + "epoch": 4.702903946388682, + "grad_norm": 0.006825047979680221, + "kl": 0.086669921875, + "learning_rate": 5.549408460248757e-07, + "loss": 8.665671339258552e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2366, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 77.40625238418579, + "completions/min_length": 37.0, + "epoch": 4.704889550757011, + "grad_norm": 0.00532596327968206, + "kl": 0.0960693359375, + "learning_rate": 5.546272611701783e-07, + "loss": 9.604192746337503e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2367, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 77.17708587646484, + "completions/min_length": 37.5, + "epoch": 4.706875155125341, + "grad_norm": 0.005383832134177418, + "kl": 0.08392333984375, + "learning_rate": 5.543136545672014e-07, + "loss": 8.388089190702885e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2368, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.375, + "completions/mean_length": 84.57291841506958, + "completions/min_length": 33.125, + "epoch": 4.708860759493671, + "grad_norm": 0.9234181661280052, + "kl": 0.091705322265625, + "learning_rate": 5.540000263407986e-07, + "loss": -0.0066925231367349625, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2369, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 73.13541889190674, + "completions/min_length": 32.875, + "epoch": 4.710846363862, + "grad_norm": 0.015995415525439965, + "kl": 0.12982177734375, + "learning_rate": 5.536863766158317e-07, + "loss": 0.00012981262989342213, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2370, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.25, + "completions/mean_length": 79.79166984558105, + "completions/min_length": 27.25, + "epoch": 4.71283196823033, + "grad_norm": 0.011202974540784661, + "kl": 0.103607177734375, + "learning_rate": 5.533727055171717e-07, + "loss": 0.00010354029654990882, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2371, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.0, + "completions/mean_length": 77.26041889190674, + "completions/min_length": 33.375, + "epoch": 4.71481757259866, + "grad_norm": 0.022467234398754667, + "kl": 0.114990234375, + "learning_rate": 5.530590131696978e-07, + "loss": 0.00011488457676023245, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2372, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 75.20833539962769, + "completions/min_length": 33.625, + "epoch": 4.716803176966989, + "grad_norm": 0.0052564242479786515, + "kl": 0.08538818359375, + "learning_rate": 5.527452996982973e-07, + "loss": 8.54157842695713e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2373, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 73.40625333786011, + "completions/min_length": 36.625, + "epoch": 4.718788781335319, + "grad_norm": 0.0034781322116524535, + "kl": 0.109893798828125, + "learning_rate": 5.524315652278663e-07, + "loss": 0.00010982695675920695, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2374, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 74.05208683013916, + "completions/min_length": 31.5, + "epoch": 4.720774385703649, + "grad_norm": 0.012881657197212185, + "kl": 0.103057861328125, + "learning_rate": 5.521178098833092e-07, + "loss": 0.00010311169899068773, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2375, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 66.91666889190674, + "completions/min_length": 29.0, + "epoch": 4.722759990071978, + "grad_norm": 0.009953129755388168, + "kl": 0.087158203125, + "learning_rate": 5.518040337895391e-07, + "loss": 8.716151205589995e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2376, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 77.64583492279053, + "completions/min_length": 34.0, + "epoch": 4.724745594440308, + "grad_norm": 0.00759740992684897, + "kl": 0.111328125, + "learning_rate": 5.514902370714763e-07, + "loss": 0.00011120867566205561, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2377, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 81.552086353302, + "completions/min_length": 32.125, + "epoch": 4.726731198808637, + "grad_norm": 0.003968374011660823, + "kl": 0.107421875, + "learning_rate": 5.511764198540505e-07, + "loss": 0.00010742887388914824, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2378, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.5, + "completions/mean_length": 74.63541984558105, + "completions/min_length": 29.125, + "epoch": 4.728716803176967, + "grad_norm": 0.0043210659978762415, + "kl": 0.08184814453125, + "learning_rate": 5.508625822621988e-07, + "loss": 8.181139128282666e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2379, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.875, + "completions/mean_length": 84.54166984558105, + "completions/min_length": 33.5, + "epoch": 4.730702407545296, + "grad_norm": 0.8589729055032919, + "kl": 0.09381103515625, + "learning_rate": 5.505487244208667e-07, + "loss": 0.004366949200630188, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2380, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 80.70833683013916, + "completions/min_length": 35.625, + "epoch": 4.732688011913626, + "grad_norm": 0.003663083710401756, + "kl": 0.119384765625, + "learning_rate": 5.502348464550077e-07, + "loss": 0.00011942406854359433, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2381, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 71.59375143051147, + "completions/min_length": 32.625, + "epoch": 4.734673616281956, + "grad_norm": 0.0067909253536695404, + "kl": 0.09637451171875, + "learning_rate": 5.499209484895833e-07, + "loss": 9.640395728638396e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2382, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 83.11458587646484, + "completions/min_length": 36.875, + "epoch": 4.736659220650285, + "grad_norm": 0.003723763070110564, + "kl": 0.123870849609375, + "learning_rate": 5.496070306495632e-07, + "loss": 0.0001238843979081139, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2383, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 69.68750143051147, + "completions/min_length": 31.125, + "epoch": 4.738644825018615, + "grad_norm": 0.9046098062435918, + "kl": 0.1024169921875, + "learning_rate": 5.492930930599244e-07, + "loss": 0.018296249210834503, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2384, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 76.59375095367432, + "completions/min_length": 31.625, + "epoch": 4.740630429386945, + "grad_norm": 1.1791352675626274, + "kl": 0.10150146484375, + "learning_rate": 5.489791358456526e-07, + "loss": 0.019950827583670616, + "memory(GiB)": 94.21, + "reward": 1.8750000149011612, + "reward_std": 0.05103103630244732, + "rewards/CineAccuracyORM/mean": 0.8750000027939677, + "rewards/CineAccuracyORM/std": 0.07216878235340118, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2385, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 69.864586353302, + "completions/min_length": 32.625, + "epoch": 4.742616033755274, + "grad_norm": 0.0032040085308207853, + "kl": 0.08612060546875, + "learning_rate": 5.486651591317405e-07, + "loss": 8.610550867160782e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2386, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 74.45833587646484, + "completions/min_length": 30.75, + "epoch": 4.744601638123604, + "grad_norm": 1.2950493324088936, + "kl": 0.098785400390625, + "learning_rate": 5.48351163043189e-07, + "loss": 0.01045030914247036, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2387, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.875, + "completions/mean_length": 70.07291841506958, + "completions/min_length": 33.375, + "epoch": 4.746587242491934, + "grad_norm": 1.1716671097453595, + "kl": 0.09332275390625, + "learning_rate": 5.48037147705007e-07, + "loss": 9.33458431973122e-05, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666679084301, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2388, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 68.05208492279053, + "completions/min_length": 32.125, + "epoch": 4.748572846860263, + "grad_norm": 0.006718216519998036, + "kl": 0.09088134765625, + "learning_rate": 5.477231132422102e-07, + "loss": 9.086485079023987e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2389, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 72.12500238418579, + "completions/min_length": 34.0, + "epoch": 4.750558451228593, + "grad_norm": 0.005850786240599144, + "kl": 0.1046142578125, + "learning_rate": 5.474090597798228e-07, + "loss": 0.00010463615763001144, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2390, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 66.87500238418579, + "completions/min_length": 30.375, + "epoch": 4.752544055596922, + "grad_norm": 0.005589889461778325, + "kl": 0.091949462890625, + "learning_rate": 5.47094987442876e-07, + "loss": 9.180587949231267e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2391, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.125, + "completions/mean_length": 70.38541841506958, + "completions/min_length": 34.375, + "epoch": 4.754529659965252, + "grad_norm": 0.006014159457760767, + "kl": 0.10833740234375, + "learning_rate": 5.467808963564089e-07, + "loss": 0.00010835006833076477, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2392, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 66.30208492279053, + "completions/min_length": 30.25, + "epoch": 4.756515264333581, + "grad_norm": 0.005456182113695637, + "kl": 0.11688232421875, + "learning_rate": 5.464667866454677e-07, + "loss": 0.00011701976472977549, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2393, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 72.28125190734863, + "completions/min_length": 32.5, + "epoch": 4.758500868701911, + "grad_norm": 0.0060403999481007265, + "kl": 0.0926513671875, + "learning_rate": 5.46152658435106e-07, + "loss": 9.255832992494106e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2394, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 72.17708444595337, + "completions/min_length": 33.875, + "epoch": 4.760486473070241, + "grad_norm": 2.210610458798055, + "kl": 0.11981201171875, + "learning_rate": 5.458385118503854e-07, + "loss": 0.0004693021473940462, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2395, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 79.81250333786011, + "completions/min_length": 36.375, + "epoch": 4.76247207743857, + "grad_norm": 0.005084701552266223, + "kl": 0.111572265625, + "learning_rate": 5.455243470163741e-07, + "loss": 0.00011156110849697143, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2396, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 82.96875047683716, + "completions/min_length": 36.25, + "epoch": 4.7644576818069, + "grad_norm": 0.005731457334040926, + "kl": 0.087615966796875, + "learning_rate": 5.452101640581479e-07, + "loss": 8.76723206602037e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2397, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.375, + "completions/mean_length": 81.26041841506958, + "completions/min_length": 33.375, + "epoch": 4.76644328617523, + "grad_norm": 0.9592539950105011, + "kl": 0.54248046875, + "learning_rate": 5.448959631007897e-07, + "loss": 0.0005427635624073446, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2398, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 65.7916693687439, + "completions/min_length": 30.0, + "epoch": 4.768428890543559, + "grad_norm": 1.1409983457833175, + "kl": 0.11627197265625, + "learning_rate": 5.445817442693895e-07, + "loss": 0.009158136323094368, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2399, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 69.81250190734863, + "completions/min_length": 33.125, + "epoch": 4.770414494911889, + "grad_norm": 0.007583821969683654, + "kl": 0.09735107421875, + "learning_rate": 5.44267507689045e-07, + "loss": 9.73366986727342e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2400, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.375, + "completions/mean_length": 76.16666793823242, + "completions/min_length": 32.75, + "epoch": 4.772400099280219, + "grad_norm": 0.007561538167937902, + "kl": 0.10980224609375, + "learning_rate": 5.439532534848598e-07, + "loss": 0.00010980182560160756, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2401, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 74.42708587646484, + "completions/min_length": 30.5, + "epoch": 4.774385703648548, + "grad_norm": 0.005302285364055428, + "kl": 0.095703125, + "learning_rate": 5.436389817819458e-07, + "loss": 9.568034147378057e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2402, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 75.15625190734863, + "completions/min_length": 36.25, + "epoch": 4.776371308016878, + "grad_norm": 0.007283842877613333, + "kl": 0.114013671875, + "learning_rate": 5.43324692705421e-07, + "loss": 0.00011406959674786776, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2403, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 69.81250238418579, + "completions/min_length": 33.875, + "epoch": 4.778356912385207, + "grad_norm": 0.005518262783046529, + "kl": 0.11669921875, + "learning_rate": 5.430103863804107e-07, + "loss": 0.00011671232641674578, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2404, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 77.95833492279053, + "completions/min_length": 35.0, + "epoch": 4.780342516753537, + "grad_norm": 0.0064720898847713485, + "kl": 0.0914306640625, + "learning_rate": 5.426960629320466e-07, + "loss": 9.163098002318293e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2405, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.625, + "completions/mean_length": 70.39583444595337, + "completions/min_length": 29.75, + "epoch": 4.782328121121866, + "grad_norm": 0.0071481501454178165, + "kl": 0.103851318359375, + "learning_rate": 5.423817224854681e-07, + "loss": 0.00010394788114354014, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2406, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 67.07291889190674, + "completions/min_length": 29.875, + "epoch": 4.784313725490196, + "grad_norm": 0.007921547386444372, + "kl": 0.11151123046875, + "learning_rate": 5.420673651658206e-07, + "loss": 0.00011142025323351845, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2407, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 63.73958492279053, + "completions/min_length": 28.25, + "epoch": 4.786299329858526, + "grad_norm": 0.007132908701328116, + "kl": 0.102020263671875, + "learning_rate": 5.417529910982566e-07, + "loss": 0.00010212243068963289, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2408, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 69.65625143051147, + "completions/min_length": 31.25, + "epoch": 4.788284934226855, + "grad_norm": 0.004370914955831252, + "kl": 0.084197998046875, + "learning_rate": 5.414386004079348e-07, + "loss": 8.41338187456131e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2409, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 65.62500190734863, + "completions/min_length": 29.5, + "epoch": 4.790270538595185, + "grad_norm": 0.007441647857881992, + "kl": 0.1043701171875, + "learning_rate": 5.411241932200212e-07, + "loss": 0.00010443673090776429, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2410, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 71.62500286102295, + "completions/min_length": 29.875, + "epoch": 4.792256142963515, + "grad_norm": 1.6594604408765041, + "kl": 0.093017578125, + "learning_rate": 5.408097696596879e-07, + "loss": -0.008023375645279884, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2411, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 67.62500095367432, + "completions/min_length": 29.75, + "epoch": 4.794241747331844, + "grad_norm": 0.0049650770774216785, + "kl": 0.10498046875, + "learning_rate": 5.404953298521136e-07, + "loss": 0.00010494540038052946, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2412, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.0, + "completions/mean_length": 60.93750190734863, + "completions/min_length": 28.875, + "epoch": 4.796227351700174, + "grad_norm": 0.005936533204147861, + "kl": 0.1175537109375, + "learning_rate": 5.401808739224836e-07, + "loss": 0.00011726385855581611, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2413, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 70.489586353302, + "completions/min_length": 29.75, + "epoch": 4.798212956068504, + "grad_norm": 0.005918967887232658, + "kl": 0.10546875, + "learning_rate": 5.398664019959893e-07, + "loss": 0.00010532489977777004, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2414, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 75.80208587646484, + "completions/min_length": 32.0, + "epoch": 4.800198560436833, + "grad_norm": 1.6532817596178317, + "kl": 0.09588623046875, + "learning_rate": 5.395519141978288e-07, + "loss": -0.011720061302185059, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.8645833432674408, + "rewards/CineAccuracyORM/std": 0.1783013828098774, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2415, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 61.52083492279053, + "completions/min_length": 30.25, + "epoch": 4.802184164805163, + "grad_norm": 0.006584208519040118, + "kl": 0.09381103515625, + "learning_rate": 5.392374106532067e-07, + "loss": 9.39446035772562e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2416, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 68.53125190734863, + "completions/min_length": 36.25, + "epoch": 4.804169769173492, + "grad_norm": 0.004054651644895156, + "kl": 0.109771728515625, + "learning_rate": 5.389228914873333e-07, + "loss": 0.00010964569810312241, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2417, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 62.69791889190674, + "completions/min_length": 26.75, + "epoch": 4.806155373541822, + "grad_norm": 0.0051225988957680855, + "kl": 0.0938720703125, + "learning_rate": 5.386083568254252e-07, + "loss": 9.3840055342298e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2418, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 60.81250190734863, + "completions/min_length": 23.625, + "epoch": 4.808140977910151, + "grad_norm": 0.004022710665534642, + "kl": 0.09423828125, + "learning_rate": 5.382938067927056e-07, + "loss": 9.437235712539405e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2419, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 67.75000238418579, + "completions/min_length": 27.0, + "epoch": 4.810126582278481, + "grad_norm": 0.0044085375323783095, + "kl": 0.10601806640625, + "learning_rate": 5.379792415144039e-07, + "loss": 0.00010604178532958031, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2420, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 67.56250238418579, + "completions/min_length": 33.5, + "epoch": 4.812112186646811, + "grad_norm": 0.0037846223981376536, + "kl": 0.086639404296875, + "learning_rate": 5.376646611157547e-07, + "loss": 8.663708285894245e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2421, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 68.2916693687439, + "completions/min_length": 30.875, + "epoch": 4.81409779101514, + "grad_norm": 1.2514813350739247, + "kl": 0.115234375, + "learning_rate": 5.373500657219994e-07, + "loss": 0.0035899742506444454, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2422, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 64.82291793823242, + "completions/min_length": 25.5, + "epoch": 4.81608339538347, + "grad_norm": 0.0061356395441369655, + "kl": 0.0989990234375, + "learning_rate": 5.370354554583851e-07, + "loss": 9.900087025016546e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2423, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 61.10416841506958, + "completions/min_length": 28.125, + "epoch": 4.8180689997518, + "grad_norm": 0.010043134892153693, + "kl": 0.12725830078125, + "learning_rate": 5.367208304501651e-07, + "loss": 0.0001272726512979716, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2424, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 70.46875190734863, + "completions/min_length": 29.125, + "epoch": 4.820054604120129, + "grad_norm": 0.011813061415614057, + "kl": 0.096405029296875, + "learning_rate": 5.364061908225979e-07, + "loss": 9.641156066209078e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2425, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.5, + "completions/mean_length": 73.01041889190674, + "completions/min_length": 30.125, + "epoch": 4.822040208488459, + "grad_norm": 0.0033979085263042023, + "kl": 0.09051513671875, + "learning_rate": 5.360915367009487e-07, + "loss": 9.04013286344707e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2426, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 66.75000286102295, + "completions/min_length": 30.5, + "epoch": 4.824025812856789, + "grad_norm": 0.015786099070162915, + "kl": 0.1123046875, + "learning_rate": 5.35776868210488e-07, + "loss": 0.00011224078480154276, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2427, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 67.53125143051147, + "completions/min_length": 29.75, + "epoch": 4.826011417225118, + "grad_norm": 0.01813570278831089, + "kl": 0.11529541015625, + "learning_rate": 5.354621854764918e-07, + "loss": 0.00011539125989656895, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2428, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 67.84375190734863, + "completions/min_length": 31.625, + "epoch": 4.827997021593448, + "grad_norm": 1.3404214233406921, + "kl": 0.13311767578125, + "learning_rate": 5.351474886242419e-07, + "loss": 0.0008415021002292633, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2429, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 77.84375286102295, + "completions/min_length": 33.875, + "epoch": 4.829982625961777, + "grad_norm": 0.008204466360237073, + "kl": 0.1256103515625, + "learning_rate": 5.348327777790261e-07, + "loss": 0.00012558323214761913, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2430, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 70.88541841506958, + "completions/min_length": 35.0, + "epoch": 4.831968230330107, + "grad_norm": 0.030086811251245887, + "kl": 0.14935302734375, + "learning_rate": 5.345180530661376e-07, + "loss": 0.0001494913303758949, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2431, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.125, + "completions/mean_length": 73.28125190734863, + "completions/min_length": 31.375, + "epoch": 4.833953834698436, + "grad_norm": 0.005192728882534674, + "kl": 0.1104736328125, + "learning_rate": 5.342033146108747e-07, + "loss": 0.00011036378418793902, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2432, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.75, + "completions/mean_length": 58.781251430511475, + "completions/min_length": 30.75, + "epoch": 4.835939439066766, + "grad_norm": 1.6542870348295509, + "kl": 0.09246826171875, + "learning_rate": 5.338885625385419e-07, + "loss": 0.005943110212683678, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2433, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 69.92708492279053, + "completions/min_length": 30.375, + "epoch": 4.837925043435096, + "grad_norm": 1.4833259338393363, + "kl": 0.13726806640625, + "learning_rate": 5.335737969744484e-07, + "loss": -0.0009108378435485065, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2434, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 72.83333587646484, + "completions/min_length": 29.75, + "epoch": 4.839910647803425, + "grad_norm": 0.6162476187060693, + "kl": 0.1016845703125, + "learning_rate": 5.332590180439093e-07, + "loss": 0.0042025865986943245, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2435, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 76.55208587646484, + "completions/min_length": 30.5, + "epoch": 4.841896252171755, + "grad_norm": 0.009750830485556718, + "kl": 0.0950927734375, + "learning_rate": 5.329442258722446e-07, + "loss": 9.516437421552837e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2436, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.25, + "completions/mean_length": 57.114585399627686, + "completions/min_length": 28.25, + "epoch": 4.843881856540085, + "grad_norm": 0.0036481253289747276, + "kl": 0.103515625, + "learning_rate": 5.326294205847799e-07, + "loss": 0.00010345465125283226, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2437, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 71.28125190734863, + "completions/min_length": 27.875, + "epoch": 4.845867460908414, + "grad_norm": 0.005900224576327768, + "kl": 0.0970458984375, + "learning_rate": 5.323146023068459e-07, + "loss": 9.70427063293755e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2438, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 67.11458539962769, + "completions/min_length": 26.25, + "epoch": 4.847853065276744, + "grad_norm": 0.900041579322121, + "kl": 0.15936279296875, + "learning_rate": 5.319997711637785e-07, + "loss": -0.00158620101865381, + "memory(GiB)": 94.21, + "reward": 1.6041666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6041666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2439, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 72.01041889190674, + "completions/min_length": 32.875, + "epoch": 4.849838669645074, + "grad_norm": 0.03612891042038138, + "kl": 0.11517333984375, + "learning_rate": 5.31684927280919e-07, + "loss": 0.00011502523557282984, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2440, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.5, + "completions/mean_length": 60.41666841506958, + "completions/min_length": 27.25, + "epoch": 4.851824274013403, + "grad_norm": 0.39799840829665395, + "kl": 0.144287109375, + "learning_rate": 5.313700707836128e-07, + "loss": 0.00014442240353673697, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2441, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 72.92708539962769, + "completions/min_length": 27.5, + "epoch": 4.853809878381733, + "grad_norm": 0.0037649828080419364, + "kl": 0.08746337890625, + "learning_rate": 5.310552017972115e-07, + "loss": 8.744312799535692e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2442, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 76.42708587646484, + "completions/min_length": 30.5, + "epoch": 4.855795482750062, + "grad_norm": 0.006160073643950107, + "kl": 0.091217041015625, + "learning_rate": 5.307403204470711e-07, + "loss": 9.12101095309481e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2443, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.125, + "completions/mean_length": 65.114586353302, + "completions/min_length": 31.25, + "epoch": 4.857781087118392, + "grad_norm": 0.0038459676714678154, + "kl": 0.09649658203125, + "learning_rate": 5.304254268585525e-07, + "loss": 9.642692020861432e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2444, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 70.25000095367432, + "completions/min_length": 35.125, + "epoch": 4.859766691486721, + "grad_norm": 0.004112866337073631, + "kl": 0.09722900390625, + "learning_rate": 5.301105211570215e-07, + "loss": 9.71705885604024e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2445, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 77.20833587646484, + "completions/min_length": 30.875, + "epoch": 4.861752295855051, + "grad_norm": 0.005534104946098873, + "kl": 0.0982666015625, + "learning_rate": 5.297956034678489e-07, + "loss": 9.842171857599169e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2446, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 68.34375238418579, + "completions/min_length": 34.0, + "epoch": 4.863737900223381, + "grad_norm": 0.005883966607315339, + "kl": 0.1197509765625, + "learning_rate": 5.294806739164103e-07, + "loss": 0.00011970134801231325, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2447, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 71.9166693687439, + "completions/min_length": 33.0, + "epoch": 4.86572350459171, + "grad_norm": 0.005637366120279593, + "kl": 0.105712890625, + "learning_rate": 5.291657326280856e-07, + "loss": 0.00010571972234174609, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2448, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 77.21875190734863, + "completions/min_length": 30.375, + "epoch": 4.86770910896004, + "grad_norm": 0.005492113734642495, + "kl": 0.117431640625, + "learning_rate": 5.288507797282598e-07, + "loss": 0.0001174115895992145, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2449, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 64.65625238418579, + "completions/min_length": 28.625, + "epoch": 4.86969471332837, + "grad_norm": 0.006394802479059686, + "kl": 0.09759521484375, + "learning_rate": 5.285358153423222e-07, + "loss": 9.768750169314444e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2450, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 73.53125286102295, + "completions/min_length": 31.875, + "epoch": 4.871680317696699, + "grad_norm": 0.004573515631246396, + "kl": 0.10382080078125, + "learning_rate": 5.282208395956672e-07, + "loss": 0.00010383043991168961, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2451, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 70.07291793823242, + "completions/min_length": 28.0, + "epoch": 4.873665922065029, + "grad_norm": 0.0039011895581462595, + "kl": 0.085845947265625, + "learning_rate": 5.279058526136932e-07, + "loss": 8.593181701144204e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2452, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.375, + "completions/mean_length": 65.65625190734863, + "completions/min_length": 32.625, + "epoch": 4.875651526433359, + "grad_norm": 0.004034131568846768, + "kl": 0.096527099609375, + "learning_rate": 5.275908545218031e-07, + "loss": 9.660809155320749e-05, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2453, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 69.42708492279053, + "completions/min_length": 33.75, + "epoch": 4.877637130801688, + "grad_norm": 0.7483998493559936, + "kl": 0.1092529296875, + "learning_rate": 5.272758454454046e-07, + "loss": -0.0015418616821989417, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2454, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.875, + "completions/mean_length": 78.88541889190674, + "completions/min_length": 31.875, + "epoch": 4.879622735170018, + "grad_norm": 0.005481427973184682, + "kl": 0.089599609375, + "learning_rate": 5.269608255099093e-07, + "loss": 8.953303040470928e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2455, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 72.9479193687439, + "completions/min_length": 34.125, + "epoch": 4.881608339538347, + "grad_norm": 0.005171843779926468, + "kl": 0.10986328125, + "learning_rate": 5.266457948407335e-07, + "loss": 0.00010994139302056283, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2456, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.625, + "completions/mean_length": 72.95833539962769, + "completions/min_length": 31.625, + "epoch": 4.883593943906677, + "grad_norm": 0.9559258584998357, + "kl": 0.0894775390625, + "learning_rate": 5.263307535632977e-07, + "loss": 0.008330987766385078, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2457, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 67.45833587646484, + "completions/min_length": 32.375, + "epoch": 4.885579548275006, + "grad_norm": 1.628687917754534, + "kl": 0.11138916015625, + "learning_rate": 5.260157018030265e-07, + "loss": 0.012987165711820126, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2458, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 71.69791984558105, + "completions/min_length": 35.875, + "epoch": 4.887565152643336, + "grad_norm": 0.003774112920988199, + "kl": 0.0972900390625, + "learning_rate": 5.257006396853487e-07, + "loss": 9.727019642014056e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2459, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 79.36458683013916, + "completions/min_length": 36.75, + "epoch": 4.889550757011666, + "grad_norm": 1.920496296676164, + "kl": 0.125518798828125, + "learning_rate": 5.253855673356974e-07, + "loss": 0.0001255422830581665, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2460, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 64.71875190734863, + "completions/min_length": 30.875, + "epoch": 4.891536361379995, + "grad_norm": 1.0317472636902048, + "kl": 0.09527587890625, + "learning_rate": 5.250704848795093e-07, + "loss": -0.007099680602550507, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2461, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 200.375, + "completions/mean_length": 80.33333492279053, + "completions/min_length": 31.25, + "epoch": 4.893521965748325, + "grad_norm": 0.005030844334698776, + "kl": 0.090576171875, + "learning_rate": 5.247553924422259e-07, + "loss": 9.064136247616261e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2462, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 67.96875143051147, + "completions/min_length": 30.875, + "epoch": 4.895507570116655, + "grad_norm": 0.006903793937059978, + "kl": 0.0970458984375, + "learning_rate": 5.244402901492917e-07, + "loss": 9.697987115941942e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2463, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 63.95833492279053, + "completions/min_length": 30.0, + "epoch": 4.897493174484984, + "grad_norm": 1.2059173504728449, + "kl": 0.08685302734375, + "learning_rate": 5.241251781261563e-07, + "loss": -0.00625160988420248, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2464, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 74.57291984558105, + "completions/min_length": 28.875, + "epoch": 4.899478778853314, + "grad_norm": 0.0054901913850022395, + "kl": 0.1116943359375, + "learning_rate": 5.23810056498272e-07, + "loss": 0.00011166576587129384, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2465, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.125, + "completions/mean_length": 61.45833492279053, + "completions/min_length": 32.625, + "epoch": 4.901464383221644, + "grad_norm": 1.609978848652296, + "kl": 0.15814208984375, + "learning_rate": 5.234949253910957e-07, + "loss": -0.012825253419578075, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2466, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.125, + "completions/mean_length": 60.145835399627686, + "completions/min_length": 29.625, + "epoch": 4.903449987589973, + "grad_norm": 1.4608919173600083, + "kl": 0.122802734375, + "learning_rate": 5.231797849300878e-07, + "loss": 0.008462455123662949, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2467, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.0, + "completions/mean_length": 60.34375190734863, + "completions/min_length": 30.25, + "epoch": 4.905435591958303, + "grad_norm": 0.9285653224750158, + "kl": 0.12066650390625, + "learning_rate": 5.228646352407123e-07, + "loss": 0.011191550642251968, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2468, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.125, + "completions/mean_length": 58.75000190734863, + "completions/min_length": 29.25, + "epoch": 4.907421196326632, + "grad_norm": 0.008073963425392131, + "kl": 0.13873291015625, + "learning_rate": 5.225494764484372e-07, + "loss": 0.0001389403478242457, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2469, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.125, + "completions/mean_length": 57.00000286102295, + "completions/min_length": 33.0, + "epoch": 4.909406800694962, + "grad_norm": 0.008614501150336497, + "kl": 0.097015380859375, + "learning_rate": 5.222343086787338e-07, + "loss": 9.70488108578138e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2470, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 68.39583539962769, + "completions/min_length": 32.875, + "epoch": 4.911392405063291, + "grad_norm": 0.006892782724553454, + "kl": 0.08892822265625, + "learning_rate": 5.219191320570773e-07, + "loss": 8.886732393875718e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2471, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.375, + "completions/mean_length": 66.87500190734863, + "completions/min_length": 32.0, + "epoch": 4.913378009431621, + "grad_norm": 0.9343885714201815, + "kl": 0.1260986328125, + "learning_rate": 5.216039467089462e-07, + "loss": 0.00012606134987436235, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2472, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 66.81250190734863, + "completions/min_length": 25.125, + "epoch": 4.915363613799951, + "grad_norm": 0.005987858396304022, + "kl": 0.09649658203125, + "learning_rate": 5.212887527598224e-07, + "loss": 9.650029096519575e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2473, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 66.92708444595337, + "completions/min_length": 31.5, + "epoch": 4.91734921816828, + "grad_norm": 0.007730737343364597, + "kl": 0.102142333984375, + "learning_rate": 5.209735503351913e-07, + "loss": 0.00010213504720013589, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2474, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.25, + "completions/mean_length": 59.875001430511475, + "completions/min_length": 29.75, + "epoch": 4.91933482253661, + "grad_norm": 0.006081899602236584, + "kl": 0.093048095703125, + "learning_rate": 5.20658339560542e-07, + "loss": 9.307700383942574e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2475, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 67.15625143051147, + "completions/min_length": 32.25, + "epoch": 4.9213204269049395, + "grad_norm": 0.004609696009971894, + "kl": 0.11077880859375, + "learning_rate": 5.203431205613663e-07, + "loss": 0.00011090931366197765, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2476, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 68.11458539962769, + "completions/min_length": 30.625, + "epoch": 4.923306031273269, + "grad_norm": 0.004358524050330704, + "kl": 0.110565185546875, + "learning_rate": 5.200278934631599e-07, + "loss": 0.00011049947352148592, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2477, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 65.84375286102295, + "completions/min_length": 29.75, + "epoch": 4.9252916356415986, + "grad_norm": 0.007875402272716112, + "kl": 0.11273193359375, + "learning_rate": 5.197126583914213e-07, + "loss": 0.00011280830949544907, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2478, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 64.03125190734863, + "completions/min_length": 30.5, + "epoch": 4.9272772400099285, + "grad_norm": 0.0068509544153078855, + "kl": 0.11297607421875, + "learning_rate": 5.193974154716523e-07, + "loss": 0.00011290029942756519, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2479, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 58.70833396911621, + "completions/min_length": 27.625, + "epoch": 4.929262844378258, + "grad_norm": 0.0070226259483188004, + "kl": 0.09039306640625, + "learning_rate": 5.190821648293579e-07, + "loss": 9.039058932103217e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2480, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 96.5, + "completions/mean_length": 52.59375238418579, + "completions/min_length": 29.75, + "epoch": 4.9312484487465875, + "grad_norm": 0.8564621064781582, + "kl": 0.084075927734375, + "learning_rate": 5.187669065900461e-07, + "loss": -0.005659153684973717, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2481, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 65.82291889190674, + "completions/min_length": 31.75, + "epoch": 4.933234053114917, + "grad_norm": 0.9206816560403929, + "kl": 0.107421875, + "learning_rate": 5.18451640879228e-07, + "loss": 0.00010737528646131977, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2482, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.625, + "completions/mean_length": 59.843751430511475, + "completions/min_length": 25.75, + "epoch": 4.9352196574832465, + "grad_norm": 0.005017533976612992, + "kl": 0.0865478515625, + "learning_rate": 5.181363678224175e-07, + "loss": 8.650618110550568e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2483, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 68.54166889190674, + "completions/min_length": 35.5, + "epoch": 4.937205261851576, + "grad_norm": 1.5375153223407034, + "kl": 0.12274169921875, + "learning_rate": 5.178210875451317e-07, + "loss": 0.0116494819521904, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.2357477955520153, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2484, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 60.562501430511475, + "completions/min_length": 31.25, + "epoch": 4.9391908662199056, + "grad_norm": 0.0042275951610442905, + "kl": 0.09979248046875, + "learning_rate": 5.175058001728901e-07, + "loss": 9.977295849239454e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2485, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 59.906251430511475, + "completions/min_length": 29.125, + "epoch": 4.9411764705882355, + "grad_norm": 1.9155401228502713, + "kl": 0.103973388671875, + "learning_rate": 5.171905058312156e-07, + "loss": 0.004133741371333599, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2486, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.0, + "completions/mean_length": 64.1666693687439, + "completions/min_length": 31.75, + "epoch": 4.943162074956565, + "grad_norm": 1.4105166445347717, + "kl": 0.112548828125, + "learning_rate": 5.168752046456335e-07, + "loss": 0.00011263291526120156, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2487, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.125, + "completions/mean_length": 70.8541693687439, + "completions/min_length": 32.125, + "epoch": 4.9451476793248945, + "grad_norm": 1.4548254308666484, + "kl": 0.120361328125, + "learning_rate": 5.165598967416721e-07, + "loss": 0.003328974125906825, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2488, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.5, + "completions/mean_length": 62.14583492279053, + "completions/min_length": 32.375, + "epoch": 4.9471332836932245, + "grad_norm": 0.005293333214922288, + "kl": 0.10101318359375, + "learning_rate": 5.162445822448622e-07, + "loss": 0.00010100413783220574, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2489, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 102.375, + "completions/mean_length": 54.687501430511475, + "completions/min_length": 27.625, + "epoch": 4.9491188880615535, + "grad_norm": 0.0041782129467817985, + "kl": 0.094268798828125, + "learning_rate": 5.159292612807368e-07, + "loss": 9.41972466534935e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2490, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.875, + "completions/mean_length": 56.531251430511475, + "completions/min_length": 27.875, + "epoch": 4.9511044924298835, + "grad_norm": 0.01139219662701482, + "kl": 0.0928955078125, + "learning_rate": 5.156139339748325e-07, + "loss": 9.280835365643725e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2491, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 64.51041841506958, + "completions/min_length": 30.25, + "epoch": 4.953090096798213, + "grad_norm": 0.007939537988386461, + "kl": 0.09747314453125, + "learning_rate": 5.152986004526874e-07, + "loss": 9.744924318511039e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2492, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 66.72916889190674, + "completions/min_length": 29.875, + "epoch": 4.9550757011665425, + "grad_norm": 0.006610433613237356, + "kl": 0.10546875, + "learning_rate": 5.149832608398429e-07, + "loss": 0.00010541768278926611, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2493, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 64.06250190734863, + "completions/min_length": 27.125, + "epoch": 4.9570613055348725, + "grad_norm": 0.0061374842737937935, + "kl": 0.11907958984375, + "learning_rate": 5.14667915261842e-07, + "loss": 0.00011894351337105036, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2494, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 66.03125286102295, + "completions/min_length": 33.0, + "epoch": 4.9590469099032015, + "grad_norm": 0.003986665833275534, + "kl": 0.099822998046875, + "learning_rate": 5.143525638442308e-07, + "loss": 9.979990863939747e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2495, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.5, + "completions/mean_length": 68.11458539962769, + "completions/min_length": 30.125, + "epoch": 4.9610325142715315, + "grad_norm": 0.00541753933868002, + "kl": 0.11151123046875, + "learning_rate": 5.140372067125574e-07, + "loss": 0.00011152663500979543, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2496, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.25, + "completions/mean_length": 69.62500238418579, + "completions/min_length": 29.375, + "epoch": 4.9630181186398605, + "grad_norm": 0.007136145583559292, + "kl": 0.1041259765625, + "learning_rate": 5.137218439923724e-07, + "loss": 0.00010399991151643917, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2497, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 59.75000286102295, + "completions/min_length": 24.625, + "epoch": 4.9650037230081905, + "grad_norm": 0.004386399532308572, + "kl": 0.09515380859375, + "learning_rate": 5.134064758092279e-07, + "loss": 9.512354154139757e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2498, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 65.00000095367432, + "completions/min_length": 29.75, + "epoch": 4.96698932737652, + "grad_norm": 1.2146127555719877, + "kl": 0.1259765625, + "learning_rate": 5.130911022886791e-07, + "loss": 0.01988319493830204, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2499, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 65.19791841506958, + "completions/min_length": 28.75, + "epoch": 4.9689749317448495, + "grad_norm": 0.004769253733993392, + "kl": 0.10015869140625, + "learning_rate": 5.12775723556283e-07, + "loss": 0.0001002311910269782, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2500, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 64.46875190734863, + "completions/min_length": 32.375, + "epoch": 4.9709605361131795, + "grad_norm": 0.0048734435667527045, + "kl": 0.0955810546875, + "learning_rate": 5.124603397375984e-07, + "loss": 9.563114144839346e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2501, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 62.10416793823242, + "completions/min_length": 29.375, + "epoch": 4.972946140481509, + "grad_norm": 0.006828807784757375, + "kl": 0.10101318359375, + "learning_rate": 5.121449509581864e-07, + "loss": 0.00010100104555021971, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2502, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 69.08333492279053, + "completions/min_length": 31.375, + "epoch": 4.9749317448498385, + "grad_norm": 1.626345787410207, + "kl": 0.11474609375, + "learning_rate": 5.118295573436099e-07, + "loss": 0.004872969351708889, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2503, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 60.15625238418579, + "completions/min_length": 27.625, + "epoch": 4.976917349218168, + "grad_norm": 1.62017652658355, + "kl": 0.14483642578125, + "learning_rate": 5.115141590194341e-07, + "loss": -0.012070084922015667, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.18335824459791183, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2504, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 57.35416841506958, + "completions/min_length": 25.625, + "epoch": 4.978902953586498, + "grad_norm": 2.8349164905215627, + "kl": 0.087493896484375, + "learning_rate": 5.111987561112255e-07, + "loss": 0.009440974332392216, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.8645833432674408, + "rewards/CineAccuracyORM/std": 0.1783013790845871, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2505, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 72.67708587646484, + "completions/min_length": 31.25, + "epoch": 4.980888557954827, + "grad_norm": 0.005239150046712194, + "kl": 0.109619140625, + "learning_rate": 5.108833487445531e-07, + "loss": 0.00010942328663077205, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2506, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.0, + "completions/mean_length": 60.666667461395264, + "completions/min_length": 33.875, + "epoch": 4.982874162323157, + "grad_norm": 0.006901586637242084, + "kl": 0.09808349609375, + "learning_rate": 5.10567937044987e-07, + "loss": 9.804704313864931e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2507, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.5, + "completions/mean_length": 57.05208492279053, + "completions/min_length": 27.375, + "epoch": 4.9848597666914864, + "grad_norm": 0.0037095853385381385, + "kl": 0.117156982421875, + "learning_rate": 5.102525211380993e-07, + "loss": 0.0001172378397313878, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2508, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.375, + "completions/mean_length": 68.05208492279053, + "completions/min_length": 29.625, + "epoch": 4.986845371059816, + "grad_norm": 0.008658319096287257, + "kl": 0.1197509765625, + "learning_rate": 5.09937101149464e-07, + "loss": 0.00011973457731073722, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2509, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 60.468751430511475, + "completions/min_length": 25.75, + "epoch": 4.9888309754281455, + "grad_norm": 0.007608958178471858, + "kl": 0.1156005859375, + "learning_rate": 5.096216772046566e-07, + "loss": 0.0001156711223302409, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2510, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 69.27083539962769, + "completions/min_length": 26.625, + "epoch": 4.990816579796475, + "grad_norm": 0.008256515988531997, + "kl": 0.12310791015625, + "learning_rate": 5.09306249429254e-07, + "loss": 0.00012306516873650253, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2511, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.375, + "completions/mean_length": 57.906250953674316, + "completions/min_length": 21.5, + "epoch": 4.992802184164805, + "grad_norm": 0.011586377581446278, + "kl": 0.100341796875, + "learning_rate": 5.089908179488346e-07, + "loss": 0.0001003802681225352, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2512, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.25, + "completions/mean_length": 59.16666793823242, + "completions/min_length": 27.375, + "epoch": 4.994787788533134, + "grad_norm": 0.006509803796658807, + "kl": 0.113433837890625, + "learning_rate": 5.086753828889787e-07, + "loss": 0.00011339227057760581, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2513, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 66.36458396911621, + "completions/min_length": 29.25, + "epoch": 4.996773392901464, + "grad_norm": 1.0638713380695677, + "kl": 0.10284423828125, + "learning_rate": 5.083599443752674e-07, + "loss": -0.004439922980964184, + "memory(GiB)": 94.21, + "reward": 1.5520833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.5520833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2514, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 61.145835399627686, + "completions/min_length": 29.625, + "epoch": 4.998758997269794, + "grad_norm": 1.263638038105288, + "kl": 0.09698486328125, + "learning_rate": 5.080445025332837e-07, + "loss": -0.00285749277099967, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2515, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 60.38541793823242, + "completions/min_length": 30.5, + "epoch": 5.00198560436833, + "grad_norm": 0.00495686285155662, + "kl": 0.08428955078125, + "learning_rate": 5.077290574886117e-07, + "loss": 8.42295921756886e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2516, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.875, + "completions/mean_length": 58.625001430511475, + "completions/min_length": 27.625, + "epoch": 5.003971208736659, + "grad_norm": 0.004849195416877281, + "kl": 0.10198974609375, + "learning_rate": 5.074136093668371e-07, + "loss": 0.00010205099533777684, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2517, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 69.84375143051147, + "completions/min_length": 32.125, + "epoch": 5.005956813104989, + "grad_norm": 0.004124959605075942, + "kl": 0.098876953125, + "learning_rate": 5.070981582935461e-07, + "loss": 9.879124991130084e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2518, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.375, + "completions/mean_length": 67.52083492279053, + "completions/min_length": 30.5, + "epoch": 5.007942417473318, + "grad_norm": 0.007869952992425459, + "kl": 0.1163330078125, + "learning_rate": 5.067827043943265e-07, + "loss": 0.00011634735710686073, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2519, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 60.76041841506958, + "completions/min_length": 29.0, + "epoch": 5.009928021841648, + "grad_norm": 0.004514639479132769, + "kl": 0.0968017578125, + "learning_rate": 5.064672477947674e-07, + "loss": 9.674839384388179e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2520, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 70.2916693687439, + "completions/min_length": 29.5, + "epoch": 5.011913626209978, + "grad_norm": 2.6740408899220762, + "kl": 0.11004638671875, + "learning_rate": 5.061517886204592e-07, + "loss": 0.006533198058605194, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.06846532225608826, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.17834587395191193, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2521, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 63.43750238418579, + "completions/min_length": 31.75, + "epoch": 5.013899230578307, + "grad_norm": 0.006898258000778394, + "kl": 0.1104736328125, + "learning_rate": 5.058363269969921e-07, + "loss": 0.00011039745004381984, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2522, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 70.04166889190674, + "completions/min_length": 31.5, + "epoch": 5.015884834946637, + "grad_norm": 0.00654909731383044, + "kl": 0.111785888671875, + "learning_rate": 5.055208630499588e-07, + "loss": 0.00011178172280779108, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2523, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 55.947917461395264, + "completions/min_length": 29.0, + "epoch": 5.017870439314967, + "grad_norm": 2.7380943180690016, + "kl": 0.0997314453125, + "learning_rate": 5.052053969049519e-07, + "loss": 0.005968490149825811, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2524, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.875, + "completions/mean_length": 64.73958539962769, + "completions/min_length": 30.375, + "epoch": 5.019856043683296, + "grad_norm": 0.007934944139124055, + "kl": 0.1082763671875, + "learning_rate": 5.048899286875655e-07, + "loss": 0.00010820224997587502, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2525, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.875, + "completions/mean_length": 59.35416793823242, + "completions/min_length": 28.375, + "epoch": 5.021841648051626, + "grad_norm": 0.0064653226573720135, + "kl": 0.10858154296875, + "learning_rate": 5.045744585233937e-07, + "loss": 0.00010868739627767354, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2526, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.875, + "completions/mean_length": 62.572919845581055, + "completions/min_length": 31.375, + "epoch": 5.023827252419955, + "grad_norm": 2.1455342436718032, + "kl": 0.1002197265625, + "learning_rate": 5.042589865380325e-07, + "loss": 0.0008290037512779236, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2527, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 60.55208492279053, + "completions/min_length": 31.125, + "epoch": 5.025812856788285, + "grad_norm": 0.005044840531814492, + "kl": 0.093353271484375, + "learning_rate": 5.039435128570778e-07, + "loss": 9.343147394247353e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2528, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 73.46875286102295, + "completions/min_length": 28.875, + "epoch": 5.027798461156615, + "grad_norm": 0.006163497835603591, + "kl": 0.100372314453125, + "learning_rate": 5.036280376061264e-07, + "loss": 0.00010037500032922253, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2529, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 69.97916793823242, + "completions/min_length": 30.375, + "epoch": 5.029784065524944, + "grad_norm": 0.007583189517541824, + "kl": 0.1162109375, + "learning_rate": 5.033125609107757e-07, + "loss": 0.00011631062807282433, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2530, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 66.57291889190674, + "completions/min_length": 30.375, + "epoch": 5.031769669893274, + "grad_norm": 0.004714270366371135, + "kl": 0.105712890625, + "learning_rate": 5.029970828966236e-07, + "loss": 0.0001056095861713402, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2531, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.75, + "completions/mean_length": 83.739586353302, + "completions/min_length": 31.125, + "epoch": 5.033755274261603, + "grad_norm": 0.005708425257391247, + "kl": 0.12255859375, + "learning_rate": 5.02681603689269e-07, + "loss": 0.00012249842984601855, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2532, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 64.87500143051147, + "completions/min_length": 28.5, + "epoch": 5.035740878629933, + "grad_norm": 0.004570930730190933, + "kl": 0.100189208984375, + "learning_rate": 5.023661234143106e-07, + "loss": 0.00010015173756983131, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2533, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 55.562500953674316, + "completions/min_length": 27.25, + "epoch": 5.037726482998263, + "grad_norm": 0.006616829847489677, + "kl": 0.109161376953125, + "learning_rate": 5.020506421973479e-07, + "loss": 0.00010916464816546068, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2534, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.625, + "completions/mean_length": 65.89583539962769, + "completions/min_length": 27.125, + "epoch": 5.039712087366592, + "grad_norm": 0.0034828922211539307, + "kl": 0.0919189453125, + "learning_rate": 5.017351601639808e-07, + "loss": 9.19168523978442e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2535, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 63.44791889190674, + "completions/min_length": 31.0, + "epoch": 5.041697691734922, + "grad_norm": 0.0051874652623181425, + "kl": 0.097686767578125, + "learning_rate": 5.014196774398093e-07, + "loss": 9.773996862350032e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2536, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 65.20833492279053, + "completions/min_length": 29.75, + "epoch": 5.043683296103252, + "grad_norm": 1.9653830777598411, + "kl": 0.38671875, + "learning_rate": 5.01104194150434e-07, + "loss": -0.0030101314187049866, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2537, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 61.875000953674316, + "completions/min_length": 28.375, + "epoch": 5.045668900471581, + "grad_norm": 0.007699093583365529, + "kl": 0.09490966796875, + "learning_rate": 5.007887104214553e-07, + "loss": 9.496343409409747e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2538, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 66.62500095367432, + "completions/min_length": 29.75, + "epoch": 5.047654504839911, + "grad_norm": 0.003927864273868547, + "kl": 0.098236083984375, + "learning_rate": 5.004732263784741e-07, + "loss": 9.817550017032772e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2539, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 67.05208492279053, + "completions/min_length": 29.125, + "epoch": 5.04964010920824, + "grad_norm": 1.2703279602329227, + "kl": 0.10308837890625, + "learning_rate": 5.001577421470915e-07, + "loss": -0.009337382391095161, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166669771075, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2540, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 67.42708492279053, + "completions/min_length": 32.875, + "epoch": 5.05162571357657, + "grad_norm": 1.6264383387918246, + "kl": 0.1051025390625, + "learning_rate": 4.998422578529084e-07, + "loss": -0.003819418605417013, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2541, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 72.43750190734863, + "completions/min_length": 31.75, + "epoch": 5.0536113179449, + "grad_norm": 0.004149466904815951, + "kl": 0.094024658203125, + "learning_rate": 4.995267736215257e-07, + "loss": 9.400384442415088e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2542, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 65.57291889190674, + "completions/min_length": 29.625, + "epoch": 5.055596922313229, + "grad_norm": 0.020291987098361323, + "kl": 0.115631103515625, + "learning_rate": 4.992112895785447e-07, + "loss": 0.00011562337022041902, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2543, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 67.29166841506958, + "completions/min_length": 28.625, + "epoch": 5.057582526681559, + "grad_norm": 0.023178408926272404, + "kl": 0.1390380859375, + "learning_rate": 4.98895805849566e-07, + "loss": 0.0001389832905260846, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2544, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 67.63541841506958, + "completions/min_length": 32.375, + "epoch": 5.059568131049888, + "grad_norm": 0.004945961044770626, + "kl": 0.10650634765625, + "learning_rate": 4.985803225601908e-07, + "loss": 0.00010636872320901603, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2545, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 70.38541889190674, + "completions/min_length": 32.5, + "epoch": 5.061553735418218, + "grad_norm": 0.003946838572823769, + "kl": 0.10302734375, + "learning_rate": 4.982648398360193e-07, + "loss": 0.00010314557584933937, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2546, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.875, + "completions/mean_length": 61.81250190734863, + "completions/min_length": 31.5, + "epoch": 5.063539339786548, + "grad_norm": 0.004165742620855547, + "kl": 0.101654052734375, + "learning_rate": 4.979493578026522e-07, + "loss": 0.00010163609113078564, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2547, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.25, + "completions/mean_length": 69.52083539962769, + "completions/min_length": 29.75, + "epoch": 5.065524944154877, + "grad_norm": 0.004493888874728538, + "kl": 0.08056640625, + "learning_rate": 4.976338765856895e-07, + "loss": 8.054426871240139e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2548, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 66.97916889190674, + "completions/min_length": 32.0, + "epoch": 5.067510548523207, + "grad_norm": 0.006237478390847987, + "kl": 0.123779296875, + "learning_rate": 4.97318396310731e-07, + "loss": 0.0001237476826645434, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2549, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 68.70833587646484, + "completions/min_length": 29.0, + "epoch": 5.069496152891537, + "grad_norm": 0.004238779272408308, + "kl": 0.124847412109375, + "learning_rate": 4.970029171033763e-07, + "loss": 0.00012498561409302056, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2550, + "train_speed(iter/s)": 0.022723 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 246.125, + "completions/mean_length": 76.25000238418579, + "completions/min_length": 30.625, + "epoch": 5.071481757259866, + "grad_norm": 1.0745390531959413, + "kl": 0.14013671875, + "learning_rate": 4.966874390892243e-07, + "loss": 0.02296457812190056, + "memory(GiB)": 94.21, + "reward": 1.5833333432674408, + "reward_std": 0.08330589532852173, + "rewards/CineAccuracyORM/mean": 0.5937500074505806, + "rewards/CineAccuracyORM/std": 0.30885961651802063, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 2551, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 68.57291889190674, + "completions/min_length": 29.875, + "epoch": 5.073467361628196, + "grad_norm": 0.0032185030285077965, + "kl": 0.082061767578125, + "learning_rate": 4.963719623938737e-07, + "loss": 8.208003418985754e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2552, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 64.27083444595337, + "completions/min_length": 31.0, + "epoch": 5.075452965996525, + "grad_norm": 0.003599994897597821, + "kl": 0.0941162109375, + "learning_rate": 4.960564871429222e-07, + "loss": 9.41462567425333e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2553, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 63.94791889190674, + "completions/min_length": 31.125, + "epoch": 5.077438570364855, + "grad_norm": 0.004342573103799313, + "kl": 0.09063720703125, + "learning_rate": 4.957410134619675e-07, + "loss": 9.062183380592614e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2554, + "train_speed(iter/s)": 0.022722 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 70.37500238418579, + "completions/min_length": 27.375, + "epoch": 5.079424174733185, + "grad_norm": 0.005822693555720189, + "kl": 0.099609375, + "learning_rate": 4.954255414766061e-07, + "loss": 9.948282968252897e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2555, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 71.44791841506958, + "completions/min_length": 32.125, + "epoch": 5.081409779101514, + "grad_norm": 0.00457032319740269, + "kl": 0.0821533203125, + "learning_rate": 4.951100713124345e-07, + "loss": 8.20490822661668e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2556, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.5, + "completions/mean_length": 73.79166889190674, + "completions/min_length": 31.125, + "epoch": 5.083395383469844, + "grad_norm": 0.006224522880348564, + "kl": 0.115478515625, + "learning_rate": 4.94794603095048e-07, + "loss": 0.00011529694893397391, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2557, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 62.89583492279053, + "completions/min_length": 30.375, + "epoch": 5.085380987838173, + "grad_norm": 0.004684628436406768, + "kl": 0.0975341796875, + "learning_rate": 4.944791369500413e-07, + "loss": 9.747844160301611e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2558, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 72.29166841506958, + "completions/min_length": 33.25, + "epoch": 5.087366592206503, + "grad_norm": 0.003619857538395175, + "kl": 0.0911865234375, + "learning_rate": 4.941636730030078e-07, + "loss": 9.106974903261289e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2559, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 70.57291889190674, + "completions/min_length": 32.625, + "epoch": 5.089352196574833, + "grad_norm": 1.2921302040655736, + "kl": 0.126220703125, + "learning_rate": 4.93848211379541e-07, + "loss": 0.002803318202495575, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2560, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 68.22916984558105, + "completions/min_length": 28.0, + "epoch": 5.091337800943162, + "grad_norm": 1.1764317021676807, + "kl": 0.13055419921875, + "learning_rate": 4.935327522052325e-07, + "loss": -0.0032629664056003094, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2561, + "train_speed(iter/s)": 0.022721 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 78.27083444595337, + "completions/min_length": 28.375, + "epoch": 5.093323405311492, + "grad_norm": 0.9938929282267596, + "kl": 0.10723876953125, + "learning_rate": 4.932172956056734e-07, + "loss": 0.010715566575527191, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2562, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 70.97916841506958, + "completions/min_length": 34.625, + "epoch": 5.095309009679822, + "grad_norm": 0.003754987558670675, + "kl": 0.10089111328125, + "learning_rate": 4.929018417064539e-07, + "loss": 0.00010075596946990117, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2563, + "train_speed(iter/s)": 0.02272 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.875, + "completions/mean_length": 71.34375190734863, + "completions/min_length": 30.25, + "epoch": 5.097294614048151, + "grad_norm": 0.008900418972150936, + "kl": 0.10845947265625, + "learning_rate": 4.92586390633163e-07, + "loss": 0.00010846446821233258, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2564, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 70.76041793823242, + "completions/min_length": 32.125, + "epoch": 5.099280218416481, + "grad_norm": 0.03464225694956673, + "kl": 0.092041015625, + "learning_rate": 4.922709425113883e-07, + "loss": 9.195441816700622e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2565, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 64.87500095367432, + "completions/min_length": 28.75, + "epoch": 5.10126582278481, + "grad_norm": 1.5202377386924777, + "kl": 0.236419677734375, + "learning_rate": 4.919554974667164e-07, + "loss": -0.008266448974609375, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2566, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 72.78125238418579, + "completions/min_length": 28.125, + "epoch": 5.10325142715314, + "grad_norm": 0.0064623974494811675, + "kl": 0.11212158203125, + "learning_rate": 4.916400556247327e-07, + "loss": 0.00011209688091184944, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2567, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 70.33333539962769, + "completions/min_length": 31.25, + "epoch": 5.10523703152147, + "grad_norm": 0.004991632533892316, + "kl": 0.107666015625, + "learning_rate": 4.913246171110215e-07, + "loss": 0.0001076382104656659, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2568, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.625, + "completions/mean_length": 77.48958539962769, + "completions/min_length": 30.375, + "epoch": 5.107222635889799, + "grad_norm": 0.005269814049016017, + "kl": 0.12255859375, + "learning_rate": 4.910091820511653e-07, + "loss": 0.00012264422548469156, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2569, + "train_speed(iter/s)": 0.022719 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 71.30208492279053, + "completions/min_length": 32.5, + "epoch": 5.109208240258129, + "grad_norm": 0.9829264788720672, + "kl": 0.0919189453125, + "learning_rate": 4.90693750570746e-07, + "loss": -0.008390005677938461, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2570, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.375, + "completions/mean_length": 75.31250238418579, + "completions/min_length": 32.75, + "epoch": 5.111193844626458, + "grad_norm": 1.396398857368396, + "kl": 0.1234130859375, + "learning_rate": 4.903783227953433e-07, + "loss": -0.00040790438652038574, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.645833333954215, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2571, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 67.64583444595337, + "completions/min_length": 23.5, + "epoch": 5.113179448994788, + "grad_norm": 0.004563685966345267, + "kl": 0.090576171875, + "learning_rate": 4.900628988505359e-07, + "loss": 9.065106132766232e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2572, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 65.6041693687439, + "completions/min_length": 30.0, + "epoch": 5.115165053363118, + "grad_norm": 0.0050242112121208484, + "kl": 0.096832275390625, + "learning_rate": 4.897474788619006e-07, + "loss": 9.683077223598957e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2573, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 66.46875143051147, + "completions/min_length": 27.75, + "epoch": 5.117150657731447, + "grad_norm": 0.0034882707821251575, + "kl": 0.11846923828125, + "learning_rate": 4.894320629550132e-07, + "loss": 0.00011829341383418068, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2574, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 63.031251430511475, + "completions/min_length": 28.625, + "epoch": 5.119136262099777, + "grad_norm": 0.010017185729204422, + "kl": 0.0941162109375, + "learning_rate": 4.891166512554471e-07, + "loss": 9.421360300621018e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2575, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 69.20833587646484, + "completions/min_length": 32.25, + "epoch": 5.121121866468107, + "grad_norm": 0.9940637560083075, + "kl": 0.101318359375, + "learning_rate": 4.888012438887744e-07, + "loss": 0.0015658674528822303, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2576, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 71.22916841506958, + "completions/min_length": 30.75, + "epoch": 5.123107470836436, + "grad_norm": 0.00349634987423695, + "kl": 0.088592529296875, + "learning_rate": 4.884858409805659e-07, + "loss": 8.854376937961206e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2577, + "train_speed(iter/s)": 0.022718 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.75, + "completions/mean_length": 82.92708539962769, + "completions/min_length": 39.0, + "epoch": 5.125093075204766, + "grad_norm": 1.320022766440065, + "kl": 0.1160888671875, + "learning_rate": 4.881704426563902e-07, + "loss": -0.004320295061916113, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2578, + "train_speed(iter/s)": 0.022717 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.0, + "completions/mean_length": 80.23958492279053, + "completions/min_length": 33.375, + "epoch": 5.127078679573095, + "grad_norm": 0.0036361284087622525, + "kl": 0.10821533203125, + "learning_rate": 4.878550490418137e-07, + "loss": 0.00010807962098624557, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2579, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 69.00000286102295, + "completions/min_length": 26.875, + "epoch": 5.129064283941425, + "grad_norm": 0.004746080629299705, + "kl": 0.1199951171875, + "learning_rate": 4.875396602624017e-07, + "loss": 0.00011998966510873288, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2580, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 77.73958587646484, + "completions/min_length": 35.25, + "epoch": 5.131049888309755, + "grad_norm": 0.007535678592219246, + "kl": 0.10858154296875, + "learning_rate": 4.872242764437171e-07, + "loss": 0.00010861566261155531, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2581, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 82.71875286102295, + "completions/min_length": 31.375, + "epoch": 5.133035492678084, + "grad_norm": 0.8705752531512639, + "kl": 0.09991455078125, + "learning_rate": 4.869088977113207e-07, + "loss": 9.991849947255105e-05, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2582, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 73.58333539962769, + "completions/min_length": 33.0, + "epoch": 5.135021097046414, + "grad_norm": 0.004163078036784345, + "kl": 0.08905029296875, + "learning_rate": 4.86593524190772e-07, + "loss": 8.906162111088634e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2583, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.75, + "completions/mean_length": 70.66666841506958, + "completions/min_length": 32.125, + "epoch": 5.137006701414743, + "grad_norm": 0.011931836470542755, + "kl": 0.11639404296875, + "learning_rate": 4.862781560076276e-07, + "loss": 0.00011633748363237828, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2584, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.0, + "completions/mean_length": 78.9791693687439, + "completions/min_length": 34.375, + "epoch": 5.138992305783073, + "grad_norm": 0.0037652022100950295, + "kl": 0.108123779296875, + "learning_rate": 4.859627932874425e-07, + "loss": 0.0001079783687600866, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2585, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.625, + "completions/mean_length": 77.00000238418579, + "completions/min_length": 29.0, + "epoch": 5.140977910151403, + "grad_norm": 0.003763644546763978, + "kl": 0.09405517578125, + "learning_rate": 4.856474361557691e-07, + "loss": 9.412574581801891e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2586, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 73.93750143051147, + "completions/min_length": 28.25, + "epoch": 5.142963514519732, + "grad_norm": 0.0036903668686288885, + "kl": 0.09393310546875, + "learning_rate": 4.85332084738158e-07, + "loss": 9.381485142512247e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2587, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 70.145836353302, + "completions/min_length": 29.875, + "epoch": 5.144949118888062, + "grad_norm": 0.004692676291984332, + "kl": 0.085723876953125, + "learning_rate": 4.850167391601573e-07, + "loss": 8.578070992371067e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2588, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 67.31250190734863, + "completions/min_length": 30.625, + "epoch": 5.146934723256392, + "grad_norm": 0.16010160207592325, + "kl": 0.183502197265625, + "learning_rate": 4.847013995473124e-07, + "loss": 0.00018308368453290313, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2589, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 73.32291889190674, + "completions/min_length": 31.125, + "epoch": 5.148920327624721, + "grad_norm": 0.0036904766295732475, + "kl": 0.10711669921875, + "learning_rate": 4.843860660251675e-07, + "loss": 0.00010705438035074621, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2590, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.5, + "completions/mean_length": 72.10417032241821, + "completions/min_length": 28.375, + "epoch": 5.150905931993051, + "grad_norm": 0.00852112985528789, + "kl": 0.11773681640625, + "learning_rate": 4.840707387192631e-07, + "loss": 0.0001178392194560729, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2591, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 73.41666793823242, + "completions/min_length": 32.625, + "epoch": 5.15289153636138, + "grad_norm": 0.005945406501459239, + "kl": 0.09442138671875, + "learning_rate": 4.837554177551379e-07, + "loss": 9.440961002837867e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2592, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.875, + "completions/mean_length": 74.03125286102295, + "completions/min_length": 37.25, + "epoch": 5.15487714072971, + "grad_norm": 0.005295513267288697, + "kl": 0.11273193359375, + "learning_rate": 4.834401032583279e-07, + "loss": 0.0001126797214965336, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2593, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 72.15625286102295, + "completions/min_length": 33.875, + "epoch": 5.1568627450980395, + "grad_norm": 0.004223853939786181, + "kl": 0.1029052734375, + "learning_rate": 4.831247953543665e-07, + "loss": 0.00010293864033883438, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2594, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 63.54166841506958, + "completions/min_length": 28.125, + "epoch": 5.158848349466369, + "grad_norm": 0.007317113593867267, + "kl": 0.104248046875, + "learning_rate": 4.828094941687845e-07, + "loss": 0.00010427868983242661, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2595, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 69.96875238418579, + "completions/min_length": 32.5, + "epoch": 5.160833953834699, + "grad_norm": 0.006002254546329571, + "kl": 0.09857177734375, + "learning_rate": 4.824941998271098e-07, + "loss": 9.859363490249962e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2596, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.75, + "completions/mean_length": 66.10416841506958, + "completions/min_length": 28.125, + "epoch": 5.162819558203028, + "grad_norm": 0.006945710791103894, + "kl": 0.09759521484375, + "learning_rate": 4.821789124548684e-07, + "loss": 9.772012708708644e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2597, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 66.2291693687439, + "completions/min_length": 28.0, + "epoch": 5.164805162571358, + "grad_norm": 0.043844183191015844, + "kl": 0.1370849609375, + "learning_rate": 4.818636321775826e-07, + "loss": 0.00013703471631743014, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2598, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 64.84375190734863, + "completions/min_length": 26.875, + "epoch": 5.1667907669396875, + "grad_norm": 0.710353268314275, + "kl": 0.0999755859375, + "learning_rate": 4.81548359120772e-07, + "loss": -0.001494353055022657, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2599, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 72.10416793823242, + "completions/min_length": 33.0, + "epoch": 5.168776371308017, + "grad_norm": 1.2268984208000684, + "kl": 0.107269287109375, + "learning_rate": 4.812330934099539e-07, + "loss": -0.007672019302845001, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2600, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.25, + "completions/mean_length": 73.50000190734863, + "completions/min_length": 34.125, + "epoch": 5.1707619756763465, + "grad_norm": 0.004836426166943891, + "kl": 0.0980224609375, + "learning_rate": 4.809178351706421e-07, + "loss": 9.809209586819634e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2601, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 74.95833492279053, + "completions/min_length": 34.0, + "epoch": 5.1727475800446765, + "grad_norm": 0.0046030617352338585, + "kl": 0.107666015625, + "learning_rate": 4.806025845283478e-07, + "loss": 0.00010756956180557609, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2602, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.5, + "completions/mean_length": 73.67708587646484, + "completions/min_length": 30.125, + "epoch": 5.174733184413006, + "grad_norm": 0.005731457060869514, + "kl": 0.09820556640625, + "learning_rate": 4.802873416085787e-07, + "loss": 9.825353481573984e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2603, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 66.82291746139526, + "completions/min_length": 30.5, + "epoch": 5.1767187887813355, + "grad_norm": 0.007159933235794413, + "kl": 0.11114501953125, + "learning_rate": 4.7997210653684e-07, + "loss": 0.00011110660852864385, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2604, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 68.46875190734863, + "completions/min_length": 32.5, + "epoch": 5.178704393149665, + "grad_norm": 0.006762124669427222, + "kl": 0.109954833984375, + "learning_rate": 4.796568794386337e-07, + "loss": 0.00010990996088366956, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2605, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 77.30208539962769, + "completions/min_length": 30.0, + "epoch": 5.1806899975179945, + "grad_norm": 0.0034161967942836265, + "kl": 0.10052490234375, + "learning_rate": 4.793416604394581e-07, + "loss": 0.00010052922152681276, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2606, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 74.36458539962769, + "completions/min_length": 32.625, + "epoch": 5.1826756018863245, + "grad_norm": 1.7620352293194859, + "kl": 0.11260986328125, + "learning_rate": 4.790264496648087e-07, + "loss": -0.003977940417826176, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2607, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 73.13541793823242, + "completions/min_length": 34.25, + "epoch": 5.1846612062546535, + "grad_norm": 0.006612331233886834, + "kl": 0.1142578125, + "learning_rate": 4.787112472401778e-07, + "loss": 0.00011414119944674894, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2608, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.25, + "completions/mean_length": 63.97916841506958, + "completions/min_length": 36.25, + "epoch": 5.1866468106229835, + "grad_norm": 0.004612474305952471, + "kl": 0.093505859375, + "learning_rate": 4.783960532910539e-07, + "loss": 9.349848551210016e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2609, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 66.16666889190674, + "completions/min_length": 30.125, + "epoch": 5.188632414991313, + "grad_norm": 0.0071084556070459696, + "kl": 0.09454345703125, + "learning_rate": 4.780808679429227e-07, + "loss": 9.448091441299766e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2610, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.625, + "completions/mean_length": 59.250001430511475, + "completions/min_length": 28.5, + "epoch": 5.1906180193596425, + "grad_norm": 0.005476488372901364, + "kl": 0.101776123046875, + "learning_rate": 4.777656913212661e-07, + "loss": 0.00010169532470172271, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2611, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 72.82291889190674, + "completions/min_length": 34.125, + "epoch": 5.1926036237279725, + "grad_norm": 1.0597091670071848, + "kl": 0.09930419921875, + "learning_rate": 4.774505235515627e-07, + "loss": -0.00698669021949172, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2612, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 78.31250286102295, + "completions/min_length": 32.25, + "epoch": 5.1945892280963015, + "grad_norm": 0.0038470947091824846, + "kl": 0.11309814453125, + "learning_rate": 4.771353647592877e-07, + "loss": 0.00011326118692522869, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2613, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.875, + "completions/mean_length": 72.12500238418579, + "completions/min_length": 30.375, + "epoch": 5.1965748324646315, + "grad_norm": 0.005765951218875941, + "kl": 0.105224609375, + "learning_rate": 4.7682021506991234e-07, + "loss": 0.00010529119754210114, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2614, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.75, + "completions/mean_length": 64.2916669845581, + "completions/min_length": 28.0, + "epoch": 5.198560436832961, + "grad_norm": 0.005293132131858503, + "kl": 0.09222412109375, + "learning_rate": 4.765050746089044e-07, + "loss": 9.2321504780557e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2615, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 70.47916793823242, + "completions/min_length": 32.75, + "epoch": 5.2005460412012905, + "grad_norm": 0.005480880290834572, + "kl": 0.0919189453125, + "learning_rate": 4.7618994350172804e-07, + "loss": 9.197047620546073e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2616, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 80.48958587646484, + "completions/min_length": 35.0, + "epoch": 5.2025316455696204, + "grad_norm": 0.005516743594659246, + "kl": 0.10064697265625, + "learning_rate": 4.758748218738437e-07, + "loss": 0.00010073870362248272, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2617, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.875, + "completions/mean_length": 76.38541841506958, + "completions/min_length": 32.375, + "epoch": 5.2045172499379495, + "grad_norm": 0.006437585680345584, + "kl": 0.1337890625, + "learning_rate": 4.755597098507081e-07, + "loss": 0.00013380989548750222, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2618, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 68.94792032241821, + "completions/min_length": 33.625, + "epoch": 5.2065028543062795, + "grad_norm": 0.0051204374356839475, + "kl": 0.12884521484375, + "learning_rate": 4.7524460755777416e-07, + "loss": 0.00012870071805082262, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2619, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 61.343751430511475, + "completions/min_length": 30.625, + "epoch": 5.208488458674609, + "grad_norm": 0.0047993589187518, + "kl": 0.103759765625, + "learning_rate": 4.7492951512049067e-07, + "loss": 0.0001037592810462229, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2620, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 64.15625143051147, + "completions/min_length": 32.875, + "epoch": 5.2104740630429385, + "grad_norm": 0.006071364234428801, + "kl": 0.08392333984375, + "learning_rate": 4.7461443266430266e-07, + "loss": 8.384877583011985e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2621, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.875, + "completions/mean_length": 61.70833492279053, + "completions/min_length": 27.0, + "epoch": 5.212459667411268, + "grad_norm": 0.006307880013792626, + "kl": 0.09686279296875, + "learning_rate": 4.742993603146514e-07, + "loss": 9.688713180366904e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2622, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 70.18750190734863, + "completions/min_length": 30.625, + "epoch": 5.2144452717795975, + "grad_norm": 1.1016312246869608, + "kl": 0.466827392578125, + "learning_rate": 4.7398429819697363e-07, + "loss": 0.0013472680002450943, + "memory(GiB)": 94.21, + "reward": 1.6145833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6145833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2623, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 64.12500333786011, + "completions/min_length": 23.25, + "epoch": 5.216430876147927, + "grad_norm": 0.005868808569508863, + "kl": 0.107269287109375, + "learning_rate": 4.7366924643670213e-07, + "loss": 0.00010730304347816855, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2624, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 70.79166984558105, + "completions/min_length": 29.625, + "epoch": 5.218416480516257, + "grad_norm": 0.004022996270207801, + "kl": 0.102569580078125, + "learning_rate": 4.7335420515926643e-07, + "loss": 0.00010262371506541967, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2625, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 67.0729193687439, + "completions/min_length": 32.75, + "epoch": 5.2204020848845865, + "grad_norm": 0.004658142665238755, + "kl": 0.09912109375, + "learning_rate": 4.7303917449009075e-07, + "loss": 9.901373414322734e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2626, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 68.71875238418579, + "completions/min_length": 27.375, + "epoch": 5.222387689252916, + "grad_norm": 0.00399450224956877, + "kl": 0.105987548828125, + "learning_rate": 4.7272415455459545e-07, + "loss": 0.00010581130482023582, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2627, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 57.156250953674316, + "completions/min_length": 28.625, + "epoch": 5.224373293621246, + "grad_norm": 0.005320007114002999, + "kl": 0.101318359375, + "learning_rate": 4.724091454781969e-07, + "loss": 0.00010140084486920387, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2628, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 65.96875238418579, + "completions/min_length": 32.375, + "epoch": 5.226358897989575, + "grad_norm": 0.003597931263653341, + "kl": 0.08306884765625, + "learning_rate": 4.7209414738630684e-07, + "loss": 8.3007755165454e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2629, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 72.72916841506958, + "completions/min_length": 31.625, + "epoch": 5.228344502357905, + "grad_norm": 0.8677619530771741, + "kl": 0.129425048828125, + "learning_rate": 4.7177916040433285e-07, + "loss": -1.3140961527824402e-05, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166669771075, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2630, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 65.65625190734863, + "completions/min_length": 31.875, + "epoch": 5.230330106726234, + "grad_norm": 0.601444733348804, + "kl": 0.11334228515625, + "learning_rate": 4.714641846576776e-07, + "loss": 0.015820711851119995, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2631, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 69.09375381469727, + "completions/min_length": 31.625, + "epoch": 5.232315711094564, + "grad_norm": 0.004478085129754446, + "kl": 0.1082763671875, + "learning_rate": 4.7114922027174014e-07, + "loss": 0.00010826517245732248, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2632, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 69.72916793823242, + "completions/min_length": 35.0, + "epoch": 5.234301315462894, + "grad_norm": 1.7281601441009582, + "kl": 0.09722900390625, + "learning_rate": 4.7083426737191433e-07, + "loss": 0.0013518178602680564, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2633, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.75, + "completions/mean_length": 62.64583492279053, + "completions/min_length": 29.875, + "epoch": 5.236286919831223, + "grad_norm": 0.009172960753181211, + "kl": 0.085479736328125, + "learning_rate": 4.705193260835898e-07, + "loss": 8.555130625609308e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2634, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 74.26041793823242, + "completions/min_length": 27.625, + "epoch": 5.238272524199553, + "grad_norm": 0.008078826512667574, + "kl": 0.133392333984375, + "learning_rate": 4.702043965321511e-07, + "loss": 0.00013350852532312274, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2635, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.0, + "completions/mean_length": 64.20833587646484, + "completions/min_length": 27.375, + "epoch": 5.240258128567882, + "grad_norm": 0.005743604620532447, + "kl": 0.08721923828125, + "learning_rate": 4.698894788429785e-07, + "loss": 8.715804142411798e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2636, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 70.03125190734863, + "completions/min_length": 36.125, + "epoch": 5.242243732936212, + "grad_norm": 2.7837359727853075, + "kl": 0.113037109375, + "learning_rate": 4.6957457314144763e-07, + "loss": 0.0014959839172661304, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2637, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 109.625, + "completions/mean_length": 62.19791889190674, + "completions/min_length": 30.875, + "epoch": 5.244229337304542, + "grad_norm": 0.003928589652051801, + "kl": 0.10589599609375, + "learning_rate": 4.692596795529289e-07, + "loss": 0.00010605131683405489, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2638, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 64.34375238418579, + "completions/min_length": 28.875, + "epoch": 5.246214941672871, + "grad_norm": 0.006335306633372392, + "kl": 0.10504150390625, + "learning_rate": 4.689447982027884e-07, + "loss": 0.00010493789159227163, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2639, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 69.61458539962769, + "completions/min_length": 27.75, + "epoch": 5.248200546041201, + "grad_norm": 0.00806029531397031, + "kl": 0.11553955078125, + "learning_rate": 4.6862992921638715e-07, + "loss": 0.00011543689470272511, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2640, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 69.75000095367432, + "completions/min_length": 29.5, + "epoch": 5.250186150409531, + "grad_norm": 0.7667616665562859, + "kl": 0.090789794921875, + "learning_rate": 4.683150727190811e-07, + "loss": -0.002860710024833679, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2641, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 71.0416693687439, + "completions/min_length": 34.125, + "epoch": 5.25217175477786, + "grad_norm": 0.005673957075700008, + "kl": 0.12005615234375, + "learning_rate": 4.6800022883622146e-07, + "loss": 0.00012004860764136538, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2642, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.875, + "completions/mean_length": 65.00000238418579, + "completions/min_length": 34.875, + "epoch": 5.25415735914619, + "grad_norm": 0.9480174734421917, + "kl": 0.1240234375, + "learning_rate": 4.676853976931541e-07, + "loss": -0.0024216333404183388, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2643, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 74.87500238418579, + "completions/min_length": 28.0, + "epoch": 5.256142963514519, + "grad_norm": 0.011234550629193851, + "kl": 0.1002197265625, + "learning_rate": 4.673705794152202e-07, + "loss": 0.00010017223394243047, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2644, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 70.79166841506958, + "completions/min_length": 32.5, + "epoch": 5.258128567882849, + "grad_norm": 0.9134315240616419, + "kl": 0.1092529296875, + "learning_rate": 4.670557741277554e-07, + "loss": 6.292884791037068e-05, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2645, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 71.61458587646484, + "completions/min_length": 36.125, + "epoch": 5.260114172251179, + "grad_norm": 0.0038829953407312034, + "kl": 0.09124755859375, + "learning_rate": 4.667409819560908e-07, + "loss": 9.123456402448937e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2646, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.0, + "completions/mean_length": 58.677085876464844, + "completions/min_length": 26.125, + "epoch": 5.262099776619508, + "grad_norm": 0.003763861756350485, + "kl": 0.1295166015625, + "learning_rate": 4.6642620302555156e-07, + "loss": 0.00012935773702338338, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2647, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 65.19791793823242, + "completions/min_length": 27.875, + "epoch": 5.264085380987838, + "grad_norm": 0.004885265924074659, + "kl": 0.104888916015625, + "learning_rate": 4.661114374614581e-07, + "loss": 0.00010491409193491563, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2648, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 66.70833539962769, + "completions/min_length": 26.875, + "epoch": 5.266070985356167, + "grad_norm": 0.8058727246602714, + "kl": 0.1195068359375, + "learning_rate": 4.657966853891252e-07, + "loss": 0.006727383937686682, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2649, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 70.91666984558105, + "completions/min_length": 32.25, + "epoch": 5.268056589724497, + "grad_norm": 1.1097048789467958, + "kl": 0.08563232421875, + "learning_rate": 4.6548194693386253e-07, + "loss": -0.004129012115299702, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2650, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 65.71875095367432, + "completions/min_length": 32.0, + "epoch": 5.270042194092827, + "grad_norm": 0.0034455468977714772, + "kl": 0.10345458984375, + "learning_rate": 4.6516722222097375e-07, + "loss": 0.00010354255209676921, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2651, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 69.05208492279053, + "completions/min_length": 30.0, + "epoch": 5.272027798461156, + "grad_norm": 1.207912891456734, + "kl": 0.120361328125, + "learning_rate": 4.6485251137575804e-07, + "loss": 0.007184509187936783, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166669771075, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2652, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 69.70833444595337, + "completions/min_length": 33.5, + "epoch": 5.274013402829486, + "grad_norm": 0.0052535366879299245, + "kl": 0.105133056640625, + "learning_rate": 4.6453781452350825e-07, + "loss": 0.00010509626008570194, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2653, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 72.72916984558105, + "completions/min_length": 30.875, + "epoch": 5.275999007197816, + "grad_norm": 1.0883078647094717, + "kl": 0.1103515625, + "learning_rate": 4.642231317895121e-07, + "loss": -0.0037160050123929977, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2654, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 70.31250238418579, + "completions/min_length": 30.125, + "epoch": 5.277984611566145, + "grad_norm": 0.004094744670164528, + "kl": 0.114990234375, + "learning_rate": 4.639084632990512e-07, + "loss": 0.00011494129284983501, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2655, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 67.22916889190674, + "completions/min_length": 29.75, + "epoch": 5.279970215934475, + "grad_norm": 0.0034838891279340436, + "kl": 0.09283447265625, + "learning_rate": 4.63593809177402e-07, + "loss": 9.281394159188494e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2656, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 64.04166984558105, + "completions/min_length": 30.75, + "epoch": 5.281955820302804, + "grad_norm": 0.00645498781148834, + "kl": 0.08660888671875, + "learning_rate": 4.63279169549835e-07, + "loss": 8.655428973725066e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2657, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 67.07291793823242, + "completions/min_length": 28.375, + "epoch": 5.283941424671134, + "grad_norm": 0.010495825577192846, + "kl": 0.097991943359375, + "learning_rate": 4.629645445416148e-07, + "loss": 9.803565626498312e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2658, + "train_speed(iter/s)": 0.022716 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 71.11458539962769, + "completions/min_length": 32.25, + "epoch": 5.285927029039464, + "grad_norm": 0.0038338828230228164, + "kl": 0.082305908203125, + "learning_rate": 4.626499342780006e-07, + "loss": 8.228716615121812e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2659, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 67.64583444595337, + "completions/min_length": 34.625, + "epoch": 5.287912633407793, + "grad_norm": 0.003817949493188409, + "kl": 0.0902099609375, + "learning_rate": 4.623353388842453e-07, + "loss": 9.016209514811635e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2660, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 77.76041984558105, + "completions/min_length": 33.375, + "epoch": 5.289898237776123, + "grad_norm": 0.0039011975311036514, + "kl": 0.1116943359375, + "learning_rate": 4.6202075848559615e-07, + "loss": 0.0001117331994464621, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2661, + "train_speed(iter/s)": 0.022715 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 65.81250238418579, + "completions/min_length": 30.125, + "epoch": 5.291883842144452, + "grad_norm": 0.006911450215343453, + "kl": 0.091064453125, + "learning_rate": 4.6170619320729435e-07, + "loss": 9.098846203414723e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2662, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 70.17708396911621, + "completions/min_length": 34.875, + "epoch": 5.293869446512782, + "grad_norm": 0.003645681409957642, + "kl": 0.0985107421875, + "learning_rate": 4.613916431745749e-07, + "loss": 9.846442117122933e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2663, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.875, + "completions/mean_length": 75.00000238418579, + "completions/min_length": 34.125, + "epoch": 5.295855050881112, + "grad_norm": 1.0842185962248985, + "kl": 0.092529296875, + "learning_rate": 4.610771085126669e-07, + "loss": 0.01603594794869423, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2664, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.0, + "completions/mean_length": 71.48958539962769, + "completions/min_length": 33.25, + "epoch": 5.297840655249441, + "grad_norm": 0.006304608783192985, + "kl": 0.10736083984375, + "learning_rate": 4.6076258934679326e-07, + "loss": 0.00010729036148404703, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2665, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 77.70833683013916, + "completions/min_length": 36.875, + "epoch": 5.299826259617771, + "grad_norm": 1.0294134706974216, + "kl": 0.10662841796875, + "learning_rate": 4.60448085802171e-07, + "loss": 0.008762389421463013, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2666, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 65.6354193687439, + "completions/min_length": 30.875, + "epoch": 5.301811863986101, + "grad_norm": 0.0034014275666769452, + "kl": 0.10137939453125, + "learning_rate": 4.6013359800401066e-07, + "loss": 0.00010149140143766999, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2667, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 65.114586353302, + "completions/min_length": 27.5, + "epoch": 5.30379746835443, + "grad_norm": 0.7136705891887115, + "kl": 0.092559814453125, + "learning_rate": 4.5981912607751644e-07, + "loss": 0.005484148394316435, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2668, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 77.72916984558105, + "completions/min_length": 30.0, + "epoch": 5.30578307272276, + "grad_norm": 0.0057599309424786705, + "kl": 0.10968017578125, + "learning_rate": 4.5950467014788635e-07, + "loss": 0.00010955911420751363, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2669, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 67.95833444595337, + "completions/min_length": 28.375, + "epoch": 5.307768677091089, + "grad_norm": 0.005557647229112153, + "kl": 0.11376953125, + "learning_rate": 4.591902303403122e-07, + "loss": 0.00011361856013536453, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2670, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 68.7291693687439, + "completions/min_length": 30.125, + "epoch": 5.309754281459419, + "grad_norm": 0.0051339243785557525, + "kl": 0.10882568359375, + "learning_rate": 4.588758067799788e-07, + "loss": 0.00010882370406761765, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2671, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.25, + "completions/mean_length": 69.29166841506958, + "completions/min_length": 31.625, + "epoch": 5.311739885827749, + "grad_norm": 0.006618099108824196, + "kl": 0.10284423828125, + "learning_rate": 4.58561399592065e-07, + "loss": 0.00010289880447089672, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2672, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 72.29166793823242, + "completions/min_length": 35.5, + "epoch": 5.313725490196078, + "grad_norm": 1.4420723541614189, + "kl": 0.0989990234375, + "learning_rate": 4.582470089017434e-07, + "loss": -0.0015166203957051039, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2673, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 74.03125190734863, + "completions/min_length": 30.875, + "epoch": 5.315711094564408, + "grad_norm": 1.0284370583636606, + "kl": 0.1016845703125, + "learning_rate": 4.579326348341794e-07, + "loss": -0.006248476915061474, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2674, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 74.48958539962769, + "completions/min_length": 36.5, + "epoch": 5.317696698932737, + "grad_norm": 0.0046209224330496, + "kl": 0.09393310546875, + "learning_rate": 4.576182775145319e-07, + "loss": 9.3873604782857e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2675, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 62.47916793823242, + "completions/min_length": 28.875, + "epoch": 5.319682303301067, + "grad_norm": 0.004443433702788989, + "kl": 0.083984375, + "learning_rate": 4.573039370679534e-07, + "loss": 8.400942897424102e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2676, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 72.489586353302, + "completions/min_length": 34.875, + "epoch": 5.321667907669397, + "grad_norm": 0.004462116333901407, + "kl": 0.093505859375, + "learning_rate": 4.569896136195895e-07, + "loss": 9.333356138085946e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2677, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.25, + "completions/mean_length": 63.145835399627686, + "completions/min_length": 29.25, + "epoch": 5.323653512037726, + "grad_norm": 0.00423805904934543, + "kl": 0.104736328125, + "learning_rate": 4.566753072945791e-07, + "loss": 0.0001045277458615601, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2678, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 71.29166889190674, + "completions/min_length": 33.875, + "epoch": 5.325639116406056, + "grad_norm": 0.005030354068867479, + "kl": 0.09124755859375, + "learning_rate": 4.5636101821805416e-07, + "loss": 9.109963139053434e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2679, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.75, + "completions/mean_length": 65.43750143051147, + "completions/min_length": 29.0, + "epoch": 5.327624720774386, + "grad_norm": 0.005062764091960789, + "kl": 0.08856201171875, + "learning_rate": 4.560467465151401e-07, + "loss": 8.857058128342032e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2680, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.625, + "completions/mean_length": 63.69791889190674, + "completions/min_length": 26.0, + "epoch": 5.329610325142715, + "grad_norm": 0.004766491730719321, + "kl": 0.0863037109375, + "learning_rate": 4.5573249231095506e-07, + "loss": 8.619282016297802e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2681, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 64.46875238418579, + "completions/min_length": 26.875, + "epoch": 5.331595929511045, + "grad_norm": 0.004526369587408466, + "kl": 0.09234619140625, + "learning_rate": 4.5541825573061045e-07, + "loss": 9.22142862691544e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2682, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 240.875, + "completions/mean_length": 78.552086353302, + "completions/min_length": 33.5, + "epoch": 5.333581533879374, + "grad_norm": 1.421181422291279, + "kl": 0.0811767578125, + "learning_rate": 4.551040368992104e-07, + "loss": 0.03504526615142822, + "memory(GiB)": 94.21, + "reward": 1.8437500149011612, + "reward_std": 0.0765465572476387, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.20272701978683472, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 2683, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 69.43750286102295, + "completions/min_length": 30.0, + "epoch": 5.335567138247704, + "grad_norm": 0.004339324024029077, + "kl": 0.11724853515625, + "learning_rate": 4.547898359418522e-07, + "loss": 0.00011722946510417387, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2684, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 66.90625095367432, + "completions/min_length": 31.625, + "epoch": 5.337552742616034, + "grad_norm": 0.7031222032366499, + "kl": 0.1109619140625, + "learning_rate": 4.54475652983626e-07, + "loss": -0.007756437640637159, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2685, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 61.07291793823242, + "completions/min_length": 30.375, + "epoch": 5.339538346984363, + "grad_norm": 0.004755163514806301, + "kl": 0.091583251953125, + "learning_rate": 4.541614881496146e-07, + "loss": 9.161501657217741e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2686, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 68.19791984558105, + "completions/min_length": 30.625, + "epoch": 5.341523951352693, + "grad_norm": 0.0042779028794131795, + "kl": 0.091796875, + "learning_rate": 4.5384734156489394e-07, + "loss": 9.182580834021792e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2687, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 72.37500286102295, + "completions/min_length": 33.25, + "epoch": 5.343509555721022, + "grad_norm": 0.004643375402621083, + "kl": 0.13031005859375, + "learning_rate": 4.5353321335453244e-07, + "loss": 0.00013027619570493698, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2688, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.25, + "completions/mean_length": 67.09375190734863, + "completions/min_length": 32.625, + "epoch": 5.345495160089352, + "grad_norm": 1.250908987366685, + "kl": 0.08953857421875, + "learning_rate": 4.5321910364359115e-07, + "loss": 0.002979150740429759, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2689, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 74.89583587646484, + "completions/min_length": 30.5, + "epoch": 5.347480764457682, + "grad_norm": 0.005494014440971535, + "kl": 0.1156005859375, + "learning_rate": 4.529050125571241e-07, + "loss": 0.00011558862024685368, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2690, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 70.1354193687439, + "completions/min_length": 35.75, + "epoch": 5.349466368826011, + "grad_norm": 0.009284451392173784, + "kl": 0.11395263671875, + "learning_rate": 4.5259094022017735e-07, + "loss": 0.00011395406909286976, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2691, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.75, + "completions/mean_length": 58.56250190734863, + "completions/min_length": 28.5, + "epoch": 5.351451973194341, + "grad_norm": 0.003516243504162406, + "kl": 0.076904296875, + "learning_rate": 4.5227688675778993e-07, + "loss": 7.687011384405196e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2692, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 70.93750238418579, + "completions/min_length": 27.75, + "epoch": 5.353437577562671, + "grad_norm": 1.0759825607814095, + "kl": 0.1058349609375, + "learning_rate": 4.519628522949931e-07, + "loss": -0.007826106622815132, + "memory(GiB)": 94.21, + "reward": 1.5833333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.583333333954215, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2693, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.25, + "completions/mean_length": 60.78125190734863, + "completions/min_length": 24.25, + "epoch": 5.355423181931, + "grad_norm": 0.0038787620834825893, + "kl": 0.09002685546875, + "learning_rate": 4.516488369568109e-07, + "loss": 8.985436579678208e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2694, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 75.93750286102295, + "completions/min_length": 31.5, + "epoch": 5.35740878629933, + "grad_norm": 0.0037901288279317247, + "kl": 0.0902099609375, + "learning_rate": 4.513348408682596e-07, + "loss": 9.014080569613725e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2695, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.625, + "completions/mean_length": 73.89583492279053, + "completions/min_length": 28.75, + "epoch": 5.359394390667659, + "grad_norm": 0.005275113504058885, + "kl": 0.1119384765625, + "learning_rate": 4.510208641543475e-07, + "loss": 0.00011191405064892024, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2696, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 68.31250143051147, + "completions/min_length": 29.25, + "epoch": 5.361379995035989, + "grad_norm": 0.021379974501245513, + "kl": 0.12518310546875, + "learning_rate": 4.5070690694007554e-07, + "loss": 0.0001250390923814848, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2697, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 63.052086353302, + "completions/min_length": 25.75, + "epoch": 5.363365599404319, + "grad_norm": 0.00412966554760557, + "kl": 0.0975341796875, + "learning_rate": 4.503929693504368e-07, + "loss": 9.764985588844866e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2698, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.125, + "completions/mean_length": 64.75000333786011, + "completions/min_length": 32.25, + "epoch": 5.365351203772648, + "grad_norm": 0.003937340794297293, + "kl": 0.093841552734375, + "learning_rate": 4.5007905151041667e-07, + "loss": 9.39401361392811e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2699, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 69.92708587646484, + "completions/min_length": 30.625, + "epoch": 5.367336808140978, + "grad_norm": 0.0037077765540929667, + "kl": 0.118408203125, + "learning_rate": 4.4976515354499215e-07, + "loss": 0.00011831421579699963, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2700, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.75, + "completions/mean_length": 69.2916693687439, + "completions/min_length": 26.625, + "epoch": 5.369322412509307, + "grad_norm": 0.0040773690346505055, + "kl": 0.0986328125, + "learning_rate": 4.494512755791332e-07, + "loss": 9.857376426225528e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2701, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 61.88541889190674, + "completions/min_length": 28.625, + "epoch": 5.371308016877637, + "grad_norm": 0.0047566332830194895, + "kl": 0.09454345703125, + "learning_rate": 4.4913741773780123e-07, + "loss": 9.452088852412999e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2702, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 63.64583396911621, + "completions/min_length": 30.875, + "epoch": 5.373293621245967, + "grad_norm": 5.475690907559695, + "kl": 0.0948486328125, + "learning_rate": 4.4882358014594953e-07, + "loss": 0.008709586225450039, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.08330589346587658, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.2231760062277317, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2703, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.375, + "completions/mean_length": 62.47916793823242, + "completions/min_length": 34.25, + "epoch": 5.375279225614296, + "grad_norm": 0.9947492539809185, + "kl": 0.096893310546875, + "learning_rate": 4.485097629285237e-07, + "loss": -0.0009502036264166236, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2704, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 69.28125190734863, + "completions/min_length": 30.5, + "epoch": 5.377264829982626, + "grad_norm": 0.007534213449500174, + "kl": 0.092926025390625, + "learning_rate": 4.4819596621046104e-07, + "loss": 9.282486280426383e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2705, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 64.7916693687439, + "completions/min_length": 30.25, + "epoch": 5.379250434350956, + "grad_norm": 0.8825768328386991, + "kl": 0.105712890625, + "learning_rate": 4.478821901166907e-07, + "loss": -0.021621834486722946, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2706, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 62.19791793823242, + "completions/min_length": 33.25, + "epoch": 5.381236038719285, + "grad_norm": 2.2184819558525875, + "kl": 0.177001953125, + "learning_rate": 4.4756843477213365e-07, + "loss": 0.0025391201488673687, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2707, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 78.98958492279053, + "completions/min_length": 35.25, + "epoch": 5.383221643087615, + "grad_norm": 0.005625743754760762, + "kl": 0.111572265625, + "learning_rate": 4.472547003017027e-07, + "loss": 0.00011153100058436394, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2708, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 79.97916889190674, + "completions/min_length": 37.875, + "epoch": 5.385207247455944, + "grad_norm": 0.0048962897343131015, + "kl": 0.1190185546875, + "learning_rate": 4.469409868303022e-07, + "loss": 0.0001189793911180459, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2709, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 70.6666693687439, + "completions/min_length": 30.125, + "epoch": 5.387192851824274, + "grad_norm": 0.0041510375051012375, + "kl": 0.09808349609375, + "learning_rate": 4.466272944828282e-07, + "loss": 9.807931201066822e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2710, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 69.52083587646484, + "completions/min_length": 34.25, + "epoch": 5.389178456192604, + "grad_norm": 0.003736015138750805, + "kl": 0.11456298828125, + "learning_rate": 4.4631362338416824e-07, + "loss": 0.00011433372128522024, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2711, + "train_speed(iter/s)": 0.022714 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 66.81250190734863, + "completions/min_length": 27.125, + "epoch": 5.391164060560933, + "grad_norm": 0.004311131701960796, + "kl": 0.104339599609375, + "learning_rate": 4.459999736592015e-07, + "loss": 0.00010438440222060308, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2712, + "train_speed(iter/s)": 0.022713 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.010416666666666666, + "completions/max_length": 248.625, + "completions/mean_length": 82.19791984558105, + "completions/min_length": 34.25, + "epoch": 5.393149664929263, + "grad_norm": 0.47187730217754664, + "kl": 0.11151123046875, + "learning_rate": 4.456863454327986e-07, + "loss": 0.02150268852710724, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 2713, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 72.59375143051147, + "completions/min_length": 33.5, + "epoch": 5.395135269297592, + "grad_norm": 0.6022694409384836, + "kl": 0.11199951171875, + "learning_rate": 4.453727388298217e-07, + "loss": 0.014322001487016678, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2714, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.125, + "completions/mean_length": 68.22916793823242, + "completions/min_length": 35.25, + "epoch": 5.397120873665922, + "grad_norm": 0.0035347124586965705, + "kl": 0.097137451171875, + "learning_rate": 4.4505915397512433e-07, + "loss": 9.70976470853202e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2715, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 72.82291841506958, + "completions/min_length": 32.0, + "epoch": 5.399106478034252, + "grad_norm": 0.004458249324434249, + "kl": 0.10284423828125, + "learning_rate": 4.447455909935513e-07, + "loss": 0.0001026418904075399, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2716, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 71.56250381469727, + "completions/min_length": 34.875, + "epoch": 5.401092082402581, + "grad_norm": 0.004972111275595303, + "kl": 0.10711669921875, + "learning_rate": 4.444320500099387e-07, + "loss": 0.00010711140203056857, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2717, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 77.63541984558105, + "completions/min_length": 37.25, + "epoch": 5.403077686770911, + "grad_norm": 0.005823599494328186, + "kl": 0.1009521484375, + "learning_rate": 4.441185311491139e-07, + "loss": 0.00010087410919368267, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2718, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 74.04166984558105, + "completions/min_length": 34.125, + "epoch": 5.405063291139241, + "grad_norm": 1.1652621577955118, + "kl": 0.12432861328125, + "learning_rate": 4.438050345358955e-07, + "loss": 0.00012446939945220947, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2719, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 71.26041746139526, + "completions/min_length": 31.25, + "epoch": 5.40704889550757, + "grad_norm": 0.008644861278789832, + "kl": 0.123779296875, + "learning_rate": 4.434915602950931e-07, + "loss": 0.0001238467521034181, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2720, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.0, + "completions/mean_length": 69.22916889190674, + "completions/min_length": 35.5, + "epoch": 5.4090344998759, + "grad_norm": 0.9950152246198813, + "kl": 0.11181640625, + "learning_rate": 4.431781085515073e-07, + "loss": 0.00025595849729143083, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2721, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 71.8229193687439, + "completions/min_length": 28.5, + "epoch": 5.411020104244229, + "grad_norm": 0.00530408725328939, + "kl": 0.10540771484375, + "learning_rate": 4.428646794299305e-07, + "loss": 0.00010551625018706545, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2722, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 67.77083587646484, + "completions/min_length": 28.5, + "epoch": 5.413005708612559, + "grad_norm": 0.004754276706151285, + "kl": 0.1080322265625, + "learning_rate": 4.425512730551451e-07, + "loss": 0.00010791717795655131, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2723, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 74.69791793823242, + "completions/min_length": 30.75, + "epoch": 5.414991312980889, + "grad_norm": 0.004741860412861385, + "kl": 0.0977783203125, + "learning_rate": 4.4223788955192496e-07, + "loss": 9.790381591301411e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2724, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.625, + "completions/mean_length": 73.89583539962769, + "completions/min_length": 36.875, + "epoch": 5.416976917349218, + "grad_norm": 1.3053207298235663, + "kl": 0.12725830078125, + "learning_rate": 4.419245290450347e-07, + "loss": 0.0027800463140010834, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2725, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.375, + "completions/mean_length": 76.20833492279053, + "completions/min_length": 26.5, + "epoch": 5.418962521717548, + "grad_norm": 0.0053440894425301165, + "kl": 0.08514404296875, + "learning_rate": 4.4161119165923e-07, + "loss": 8.523924043402076e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2726, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 73.80208396911621, + "completions/min_length": 34.375, + "epoch": 5.420948126085877, + "grad_norm": 0.004176322705832965, + "kl": 0.104095458984375, + "learning_rate": 4.412978775192569e-07, + "loss": 0.00010409795504529029, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2727, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 65.66666889190674, + "completions/min_length": 32.375, + "epoch": 5.422933730454207, + "grad_norm": 0.005299010607605375, + "kl": 0.1090087890625, + "learning_rate": 4.4098458674985273e-07, + "loss": 0.00010888499673455954, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2728, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 73.36458587646484, + "completions/min_length": 31.75, + "epoch": 5.424919334822537, + "grad_norm": 0.003750018330106704, + "kl": 0.12127685546875, + "learning_rate": 4.406713194757451e-07, + "loss": 0.0001212742063216865, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2729, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 74.18750381469727, + "completions/min_length": 31.75, + "epoch": 5.426904939190866, + "grad_norm": 0.9352347713699581, + "kl": 0.10321044921875, + "learning_rate": 4.403580758216525e-07, + "loss": 0.015215136110782623, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2730, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.25, + "completions/mean_length": 80.07291984558105, + "completions/min_length": 37.25, + "epoch": 5.428890543559196, + "grad_norm": 0.0041428450854698005, + "kl": 0.11962890625, + "learning_rate": 4.400448559122838e-07, + "loss": 0.00011956026719417423, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2731, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 68.04166841506958, + "completions/min_length": 31.0, + "epoch": 5.430876147927526, + "grad_norm": 0.008000993279135282, + "kl": 0.10699462890625, + "learning_rate": 4.397316598723385e-07, + "loss": 0.00010696284880395979, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2732, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.0, + "completions/mean_length": 62.46875190734863, + "completions/min_length": 30.25, + "epoch": 5.432861752295855, + "grad_norm": 0.0044862733685761165, + "kl": 0.08282470703125, + "learning_rate": 4.3941848782650676e-07, + "loss": 8.273782441392541e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2733, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 69.37500238418579, + "completions/min_length": 24.5, + "epoch": 5.434847356664185, + "grad_norm": 0.004645467035789828, + "kl": 0.08270263671875, + "learning_rate": 4.391053398994689e-07, + "loss": 8.269631507573649e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2734, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 73.18750143051147, + "completions/min_length": 32.375, + "epoch": 5.436832961032514, + "grad_norm": 0.0047027933561566105, + "kl": 0.09796142578125, + "learning_rate": 4.38792216215896e-07, + "loss": 9.794151992537081e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2735, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 74.46875286102295, + "completions/min_length": 33.25, + "epoch": 5.438818565400844, + "grad_norm": 0.005357913022830147, + "kl": 0.091552734375, + "learning_rate": 4.384791169004492e-07, + "loss": 9.156801388598979e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2736, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 68.66666841506958, + "completions/min_length": 26.75, + "epoch": 5.440804169769174, + "grad_norm": 0.004121777672769144, + "kl": 0.10382080078125, + "learning_rate": 4.3816604207777997e-07, + "loss": 0.00010377737635280937, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2737, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 78.52083492279053, + "completions/min_length": 36.875, + "epoch": 5.442789774137503, + "grad_norm": 0.003545212904099309, + "kl": 0.10894775390625, + "learning_rate": 4.3785299187253014e-07, + "loss": 0.00010903298243647441, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2738, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 69.07291889190674, + "completions/min_length": 27.5, + "epoch": 5.444775378505833, + "grad_norm": 1.3973487238593616, + "kl": 0.10791015625, + "learning_rate": 4.375399664093318e-07, + "loss": 0.008710963651537895, + "memory(GiB)": 94.21, + "reward": 1.8125000149011612, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.8125000074505806, + "rewards/CineAccuracyORM/std": 0.23100870847702026, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2739, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 73.98958492279053, + "completions/min_length": 34.625, + "epoch": 5.446760982874162, + "grad_norm": 0.00561296944755905, + "kl": 0.1016845703125, + "learning_rate": 4.372269658128069e-07, + "loss": 0.00010171587928198278, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2740, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.5, + "completions/mean_length": 64.69791746139526, + "completions/min_length": 34.875, + "epoch": 5.448746587242492, + "grad_norm": 0.0033485004630473252, + "kl": 0.07318115234375, + "learning_rate": 4.369139902075674e-07, + "loss": 7.320643635466695e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2741, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 75.42708539962769, + "completions/min_length": 34.25, + "epoch": 5.450732191610822, + "grad_norm": 0.1101558695389743, + "kl": 0.117431640625, + "learning_rate": 4.3660103971821627e-07, + "loss": 0.00011730985715985298, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2742, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 66.33333539962769, + "completions/min_length": 29.125, + "epoch": 5.452717795979151, + "grad_norm": 0.005121904845751762, + "kl": 0.10894775390625, + "learning_rate": 4.362881144693453e-07, + "loss": 0.00010876153100980446, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2743, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 63.052085399627686, + "completions/min_length": 30.625, + "epoch": 5.454703400347481, + "grad_norm": 0.00422751582903777, + "kl": 0.11529541015625, + "learning_rate": 4.3597521458553674e-07, + "loss": 0.00011522645218065009, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2744, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.875, + "completions/mean_length": 64.34375143051147, + "completions/min_length": 27.75, + "epoch": 5.456689004715811, + "grad_norm": 0.004916915939521794, + "kl": 0.103546142578125, + "learning_rate": 4.3566234019136284e-07, + "loss": 0.00010361853492213413, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2745, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 67.89583539962769, + "completions/min_length": 36.375, + "epoch": 5.45867460908414, + "grad_norm": 0.003740552757059603, + "kl": 0.10321044921875, + "learning_rate": 4.3534949141138553e-07, + "loss": 0.00010333474347135052, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2746, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 75.80208539962769, + "completions/min_length": 33.0, + "epoch": 5.46066021345247, + "grad_norm": 0.004279682183233835, + "kl": 0.12255859375, + "learning_rate": 4.350366683701567e-07, + "loss": 0.00012245573452673852, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2747, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 76.8229193687439, + "completions/min_length": 30.125, + "epoch": 5.462645817820799, + "grad_norm": 0.0038114538094619556, + "kl": 0.09649658203125, + "learning_rate": 4.347238711922175e-07, + "loss": 9.651340951677412e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2748, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 69.45833492279053, + "completions/min_length": 33.0, + "epoch": 5.464631422189129, + "grad_norm": 0.009061094087616113, + "kl": 0.08917236328125, + "learning_rate": 4.344111000020996e-07, + "loss": 8.919953688746318e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2749, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 69.16666889190674, + "completions/min_length": 30.5, + "epoch": 5.466617026557459, + "grad_norm": 0.006235587790266846, + "kl": 0.10162353515625, + "learning_rate": 4.340983549243238e-07, + "loss": 0.00010143526014871895, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2750, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 71.69791841506958, + "completions/min_length": 28.75, + "epoch": 5.468602630925788, + "grad_norm": 1.640871944411773, + "kl": 0.089813232421875, + "learning_rate": 4.337856360834005e-07, + "loss": -0.01011543907225132, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2751, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 65.31250143051147, + "completions/min_length": 29.375, + "epoch": 5.470588235294118, + "grad_norm": 0.005109223648818194, + "kl": 0.09039306640625, + "learning_rate": 4.3347294360382974e-07, + "loss": 9.038949792739004e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2752, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 71.51041793823242, + "completions/min_length": 30.0, + "epoch": 5.472573839662447, + "grad_norm": 0.004346783368402475, + "kl": 0.123291015625, + "learning_rate": 4.3316027761010115e-07, + "loss": 0.00012337403313722461, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2753, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.375, + "completions/mean_length": 72.895836353302, + "completions/min_length": 30.5, + "epoch": 5.474559444030777, + "grad_norm": 0.0032625851940395133, + "kl": 0.09619140625, + "learning_rate": 4.328476382266937e-07, + "loss": 9.625362872611731e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2754, + "train_speed(iter/s)": 0.022712 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 73.75000095367432, + "completions/min_length": 30.5, + "epoch": 5.476545048399107, + "grad_norm": 0.9956874669307478, + "kl": 0.084747314453125, + "learning_rate": 4.325350255780757e-07, + "loss": -0.01096474938094616, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2755, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 72.46875286102295, + "completions/min_length": 29.625, + "epoch": 5.478530652767436, + "grad_norm": 0.004596652531736091, + "kl": 0.10223388671875, + "learning_rate": 4.3222243978870514e-07, + "loss": 0.00010214014764642343, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2756, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.125, + "completions/mean_length": 71.08333396911621, + "completions/min_length": 31.5, + "epoch": 5.480516257135766, + "grad_norm": 1.3504932140077563, + "kl": 0.0909423828125, + "learning_rate": 4.31909880983029e-07, + "loss": 0.006385432090610266, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2757, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 77.87500190734863, + "completions/min_length": 33.125, + "epoch": 5.482501861504096, + "grad_norm": 0.004577171923515002, + "kl": 0.11993408203125, + "learning_rate": 4.315973492854836e-07, + "loss": 0.00011998624540865421, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2758, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 74.75000286102295, + "completions/min_length": 31.875, + "epoch": 5.484487465872425, + "grad_norm": 0.003835642536919521, + "kl": 0.10211181640625, + "learning_rate": 4.312848448204946e-07, + "loss": 0.00010209978063357994, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2759, + "train_speed(iter/s)": 0.022711 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.125, + "completions/mean_length": 78.73958539962769, + "completions/min_length": 33.875, + "epoch": 5.486473070240755, + "grad_norm": 0.006161170813168482, + "kl": 0.09149169921875, + "learning_rate": 4.3097236771247653e-07, + "loss": 9.150305413641036e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2760, + "train_speed(iter/s)": 0.02271 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 78.98958587646484, + "completions/min_length": 31.25, + "epoch": 5.488458674609084, + "grad_norm": 0.006641730782599897, + "kl": 0.099761962890625, + "learning_rate": 4.306599180858332e-07, + "loss": 9.973629494197667e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2761, + "train_speed(iter/s)": 0.022709 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 74.32291889190674, + "completions/min_length": 33.125, + "epoch": 5.490444278977414, + "grad_norm": 1.1114238278661688, + "kl": 0.09893798828125, + "learning_rate": 4.3034749606495754e-07, + "loss": -0.012792940251529217, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2762, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 76.52083587646484, + "completions/min_length": 31.0, + "epoch": 5.492429883345744, + "grad_norm": 0.00834590355009779, + "kl": 0.12042236328125, + "learning_rate": 4.300351017742315e-07, + "loss": 0.00012039339344482869, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2763, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 72.32291793823242, + "completions/min_length": 31.625, + "epoch": 5.494415487714073, + "grad_norm": 0.007555199005939186, + "kl": 0.100433349609375, + "learning_rate": 4.2972273533802584e-07, + "loss": 0.00010044292139355093, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2764, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 71.71875190734863, + "completions/min_length": 29.0, + "epoch": 5.496401092082403, + "grad_norm": 0.006999279441102167, + "kl": 0.11468505859375, + "learning_rate": 4.294103968807003e-07, + "loss": 0.00011442082177381963, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2765, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 76.43750190734863, + "completions/min_length": 30.5, + "epoch": 5.498386696450732, + "grad_norm": 0.0053074686751306, + "kl": 0.0948486328125, + "learning_rate": 4.2909808652660355e-07, + "loss": 9.486137423664331e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2766, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 74.71875286102295, + "completions/min_length": 36.125, + "epoch": 5.500372300819062, + "grad_norm": 1.5960427560363841, + "kl": 0.10015869140625, + "learning_rate": 4.2878580440007313e-07, + "loss": 0.00593077577650547, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2767, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.875, + "completions/mean_length": 78.03125238418579, + "completions/min_length": 32.375, + "epoch": 5.502357905187392, + "grad_norm": 0.004398314903651077, + "kl": 0.08953857421875, + "learning_rate": 4.284735506254349e-07, + "loss": 8.946903835749254e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2768, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 71.1666693687439, + "completions/min_length": 34.0, + "epoch": 5.504343509555721, + "grad_norm": 0.9460714344328963, + "kl": 0.12957763671875, + "learning_rate": 4.2816132532700377e-07, + "loss": 0.001300673931837082, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2769, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.25, + "completions/mean_length": 67.60416746139526, + "completions/min_length": 32.875, + "epoch": 5.506329113924051, + "grad_norm": 0.004698637822917459, + "kl": 0.10321044921875, + "learning_rate": 4.2784912862908377e-07, + "loss": 0.00010317980195395648, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2770, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.375, + "completions/mean_length": 79.76041889190674, + "completions/min_length": 29.75, + "epoch": 5.508314718292381, + "grad_norm": 0.0055203803039646756, + "kl": 0.10186767578125, + "learning_rate": 4.275369606559667e-07, + "loss": 0.00010178033699048683, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2771, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 79.1979193687439, + "completions/min_length": 37.0, + "epoch": 5.51030032266071, + "grad_norm": 0.007044136385528007, + "kl": 0.09283447265625, + "learning_rate": 4.2722482153193336e-07, + "loss": 9.287783177569509e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2772, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 75.80208587646484, + "completions/min_length": 34.875, + "epoch": 5.51228592702904, + "grad_norm": 0.015216403875062002, + "kl": 0.1114501953125, + "learning_rate": 4.2691271138125296e-07, + "loss": 0.00011135396198369563, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2773, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.125, + "completions/mean_length": 76.53125238418579, + "completions/min_length": 34.75, + "epoch": 5.514271531397369, + "grad_norm": 0.011715117900308119, + "kl": 0.12481689453125, + "learning_rate": 4.266006303281833e-07, + "loss": 0.0001247400650754571, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2774, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 68.90625095367432, + "completions/min_length": 24.125, + "epoch": 5.516257135765699, + "grad_norm": 0.012060594862211975, + "kl": 0.09033203125, + "learning_rate": 4.262885784969705e-07, + "loss": 9.040775330504403e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2775, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 73.10416889190674, + "completions/min_length": 31.25, + "epoch": 5.518242740134029, + "grad_norm": 0.003547073998586838, + "kl": 0.08782958984375, + "learning_rate": 4.259765560118489e-07, + "loss": 8.780855569057167e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2776, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 105.25, + "completions/mean_length": 61.208335399627686, + "completions/min_length": 32.875, + "epoch": 5.520228344502358, + "grad_norm": 0.006252713256961044, + "kl": 0.094024658203125, + "learning_rate": 4.256645629970416e-07, + "loss": 9.388947364641353e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2777, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.375, + "completions/mean_length": 79.94791984558105, + "completions/min_length": 25.75, + "epoch": 5.522213948870688, + "grad_norm": 0.8101613636592477, + "kl": 0.09478759765625, + "learning_rate": 4.253525995767595e-07, + "loss": 0.010251425206661224, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2778, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 75.40625143051147, + "completions/min_length": 36.25, + "epoch": 5.524199553239017, + "grad_norm": 0.7213241319177761, + "kl": 0.1138916015625, + "learning_rate": 4.2504066587520206e-07, + "loss": 0.007435914129018784, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2779, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 73.79166889190674, + "completions/min_length": 29.375, + "epoch": 5.526185157607347, + "grad_norm": 0.004234806914973846, + "kl": 0.083099365234375, + "learning_rate": 4.247287620165565e-07, + "loss": 8.310205157613382e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2780, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 76.73958492279053, + "completions/min_length": 33.375, + "epoch": 5.528170761975677, + "grad_norm": 0.0035572609430518137, + "kl": 0.112060546875, + "learning_rate": 4.244168881249986e-07, + "loss": 0.00011203760368516669, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2781, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.125, + "completions/mean_length": 87.70833683013916, + "completions/min_length": 37.375, + "epoch": 5.530156366344006, + "grad_norm": 0.00333882602804208, + "kl": 0.10821533203125, + "learning_rate": 4.241050443246919e-07, + "loss": 0.00010833704436663538, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2782, + "train_speed(iter/s)": 0.022708 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.125, + "completions/mean_length": 77.35416889190674, + "completions/min_length": 26.5, + "epoch": 5.532141970712336, + "grad_norm": 0.005040117171171672, + "kl": 0.10223388671875, + "learning_rate": 4.23793230739788e-07, + "loss": 0.00010223597928415984, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2783, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.75, + "completions/mean_length": 81.36458492279053, + "completions/min_length": 34.125, + "epoch": 5.5341275750806656, + "grad_norm": 0.00737583101403247, + "kl": 0.10992431640625, + "learning_rate": 4.234814474944269e-07, + "loss": 0.00010996578203048557, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2784, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 71.40625286102295, + "completions/min_length": 24.625, + "epoch": 5.536113179448995, + "grad_norm": 0.0037289528468716316, + "kl": 0.09576416015625, + "learning_rate": 4.231696947127358e-07, + "loss": 9.570604743203148e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2785, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 77.92708444595337, + "completions/min_length": 33.75, + "epoch": 5.538098783817325, + "grad_norm": 0.0034856449937609364, + "kl": 0.090240478515625, + "learning_rate": 4.228579725188304e-07, + "loss": 9.02874962775968e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2786, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.625, + "completions/mean_length": 79.90625286102295, + "completions/min_length": 30.75, + "epoch": 5.540084388185654, + "grad_norm": 0.8492295518933224, + "kl": 0.097869873046875, + "learning_rate": 4.2254628103681395e-07, + "loss": -0.001284400699660182, + "memory(GiB)": 94.21, + "reward": 1.5729166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.5729166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2787, + "train_speed(iter/s)": 0.022707 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 81.81250190734863, + "completions/min_length": 30.625, + "epoch": 5.542069992553984, + "grad_norm": 0.7979358263391226, + "kl": 0.09454345703125, + "learning_rate": 4.222346203907773e-07, + "loss": 0.0003351215273141861, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2788, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 63.031250953674316, + "completions/min_length": 22.5, + "epoch": 5.5440555969223135, + "grad_norm": 0.004731990258344243, + "kl": 0.080230712890625, + "learning_rate": 4.2192299070479923e-07, + "loss": 8.015319326659665e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2789, + "train_speed(iter/s)": 0.022705 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 75.18750190734863, + "completions/min_length": 39.0, + "epoch": 5.546041201290643, + "grad_norm": 0.003992872859329794, + "kl": 0.0897216796875, + "learning_rate": 4.216113921029462e-07, + "loss": 8.980886195786297e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2790, + "train_speed(iter/s)": 0.022706 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.625, + "completions/mean_length": 83.18750286102295, + "completions/min_length": 32.0, + "epoch": 5.5480268056589725, + "grad_norm": 0.0034274734631158763, + "kl": 0.0958251953125, + "learning_rate": 4.212998247092724e-07, + "loss": 9.573270654072985e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2791, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.875, + "completions/mean_length": 87.88541889190674, + "completions/min_length": 35.125, + "epoch": 5.550012410027302, + "grad_norm": 0.004416743496037694, + "kl": 0.084869384765625, + "learning_rate": 4.2098828864781937e-07, + "loss": 8.485731086693704e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2792, + "train_speed(iter/s)": 0.022704 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 78.50000190734863, + "completions/min_length": 31.125, + "epoch": 5.551998014395632, + "grad_norm": 0.0035557824975480583, + "kl": 0.07843017578125, + "learning_rate": 4.206767840426163e-07, + "loss": 7.842038758099079e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2793, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 71.81250190734863, + "completions/min_length": 26.625, + "epoch": 5.5539836187639615, + "grad_norm": 0.004262223429072593, + "kl": 0.07781982421875, + "learning_rate": 4.203653110176798e-07, + "loss": 7.77176464907825e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2794, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.625, + "completions/mean_length": 90.82291984558105, + "completions/min_length": 33.75, + "epoch": 5.555969223132291, + "grad_norm": 0.7148260267243509, + "kl": 0.09979248046875, + "learning_rate": 4.2005386969701395e-07, + "loss": 0.00037801143480464816, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2795, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 78.78125095367432, + "completions/min_length": 34.375, + "epoch": 5.5579548275006205, + "grad_norm": 0.004525739703181718, + "kl": 0.0941162109375, + "learning_rate": 4.197424602046103e-07, + "loss": 9.414648229721934e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2796, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.0, + "completions/mean_length": 78.79166984558105, + "completions/min_length": 30.0, + "epoch": 5.5599404318689505, + "grad_norm": 0.0036919676371623025, + "kl": 0.1055908203125, + "learning_rate": 4.1943108266444716e-07, + "loss": 0.0001056083056028001, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2797, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 77.02083587646484, + "completions/min_length": 28.5, + "epoch": 5.5619260362372795, + "grad_norm": 0.004006692445778771, + "kl": 0.080718994140625, + "learning_rate": 4.1911973720049117e-07, + "loss": 8.070625335676596e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2798, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.875, + "completions/mean_length": 77.58333539962769, + "completions/min_length": 29.0, + "epoch": 5.5639116406056095, + "grad_norm": 0.0036904961820925907, + "kl": 0.11065673828125, + "learning_rate": 4.1880842393669543e-07, + "loss": 0.00011045071005355567, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2799, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 76.54166841506958, + "completions/min_length": 29.125, + "epoch": 5.565897244973939, + "grad_norm": 0.00476950471355126, + "kl": 0.09918212890625, + "learning_rate": 4.1849714299700024e-07, + "loss": 9.918311843648553e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2800, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 71.39583587646484, + "completions/min_length": 29.0, + "epoch": 5.5678828493422685, + "grad_norm": 0.004399608508489987, + "kl": 0.084075927734375, + "learning_rate": 4.1818589450533323e-07, + "loss": 8.404294203501195e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2801, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 76.79167079925537, + "completions/min_length": 34.375, + "epoch": 5.5698684537105985, + "grad_norm": 0.0045738558151656875, + "kl": 0.08685302734375, + "learning_rate": 4.178746785856092e-07, + "loss": 8.682149928063154e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2802, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 73.31250286102295, + "completions/min_length": 31.75, + "epoch": 5.5718540580789275, + "grad_norm": 0.0040530072078604545, + "kl": 0.093994140625, + "learning_rate": 4.1756349536172967e-07, + "loss": 9.394960943609476e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2803, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 73.16666793823242, + "completions/min_length": 33.625, + "epoch": 5.5738396624472575, + "grad_norm": 0.003492079602908078, + "kl": 0.086578369140625, + "learning_rate": 4.1725234495758355e-07, + "loss": 8.657629950903356e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2804, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 68.26041889190674, + "completions/min_length": 31.125, + "epoch": 5.5758252668155865, + "grad_norm": 0.004189735944043726, + "kl": 0.09124755859375, + "learning_rate": 4.169412274970463e-07, + "loss": 9.126631630351767e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2805, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.75, + "completions/mean_length": 75.71875095367432, + "completions/min_length": 28.25, + "epoch": 5.5778108711839165, + "grad_norm": 0.0039298656870085154, + "kl": 0.09027099609375, + "learning_rate": 4.1663014310398053e-07, + "loss": 9.025847248267382e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2806, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 71.39583587646484, + "completions/min_length": 27.0, + "epoch": 5.5797964755522464, + "grad_norm": 0.004541390025132296, + "kl": 0.08673095703125, + "learning_rate": 4.163190919022356e-07, + "loss": 8.66981572471559e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2807, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.875, + "completions/mean_length": 83.48958587646484, + "completions/min_length": 32.75, + "epoch": 5.5817820799205755, + "grad_norm": 0.003499435353091427, + "kl": 0.10589599609375, + "learning_rate": 4.1600807401564754e-07, + "loss": 0.00010582594404695556, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2808, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 78.18750238418579, + "completions/min_length": 33.375, + "epoch": 5.5837676842889055, + "grad_norm": 0.00460714383538939, + "kl": 0.08837890625, + "learning_rate": 4.1569708956803917e-07, + "loss": 8.837709901854396e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2809, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.625, + "completions/mean_length": 79.85416984558105, + "completions/min_length": 28.0, + "epoch": 5.585753288657235, + "grad_norm": 0.0037842346853757293, + "kl": 0.09576416015625, + "learning_rate": 4.1538613868322e-07, + "loss": 9.578990284353495e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2810, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 69.76041841506958, + "completions/min_length": 36.0, + "epoch": 5.5877388930255645, + "grad_norm": 0.004071194172173444, + "kl": 0.083221435546875, + "learning_rate": 4.150752214849864e-07, + "loss": 8.315254672197625e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2811, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 74.0104193687439, + "completions/min_length": 29.75, + "epoch": 5.589724497393894, + "grad_norm": 0.003568085591484155, + "kl": 0.096405029296875, + "learning_rate": 4.1476433809712117e-07, + "loss": 9.640575444791466e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2812, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 64.27083444595337, + "completions/min_length": 24.25, + "epoch": 5.5917101017622235, + "grad_norm": 0.003298590255078025, + "kl": 0.07684326171875, + "learning_rate": 4.144534886433935e-07, + "loss": 7.677805842831731e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2813, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 75.95833683013916, + "completions/min_length": 30.875, + "epoch": 5.5936957061305534, + "grad_norm": 0.00348809985468097, + "kl": 0.08978271484375, + "learning_rate": 4.141426732475592e-07, + "loss": 8.975945820566267e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2814, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 71.85416889190674, + "completions/min_length": 31.375, + "epoch": 5.595681310498883, + "grad_norm": 0.004052753480553658, + "kl": 0.09381103515625, + "learning_rate": 4.138318920333605e-07, + "loss": 9.384001896250993e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2815, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 72.07291793823242, + "completions/min_length": 27.125, + "epoch": 5.5976669148672125, + "grad_norm": 0.004321734421488672, + "kl": 0.08721923828125, + "learning_rate": 4.135211451245264e-07, + "loss": 8.720854384591803e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2816, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 66.76041889190674, + "completions/min_length": 31.625, + "epoch": 5.599652519235542, + "grad_norm": 2.400922390158963, + "kl": 0.10882568359375, + "learning_rate": 4.1321043264477107e-07, + "loss": 0.011667689308524132, + "memory(GiB)": 94.21, + "reward": 1.9062500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.9062500074505806, + "rewards/CineAccuracyORM/std": 0.12591182813048363, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2817, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 69.61458396911621, + "completions/min_length": 29.0, + "epoch": 5.6016381236038715, + "grad_norm": 0.003499673092992823, + "kl": 0.087249755859375, + "learning_rate": 4.1289975471779653e-07, + "loss": 8.720988989807665e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2818, + "train_speed(iter/s)": 0.022698 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 69.833336353302, + "completions/min_length": 31.375, + "epoch": 5.603623727972201, + "grad_norm": 0.0040166137241982106, + "kl": 0.085693359375, + "learning_rate": 4.125891114672902e-07, + "loss": 8.561991853639483e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2819, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 80.64583587646484, + "completions/min_length": 35.125, + "epoch": 5.605609332340531, + "grad_norm": 0.003384935053344692, + "kl": 0.08905029296875, + "learning_rate": 4.122785030169255e-07, + "loss": 8.904648711904883e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2820, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 74.00000190734863, + "completions/min_length": 32.625, + "epoch": 5.6075949367088604, + "grad_norm": 0.0034837593373750813, + "kl": 0.094940185546875, + "learning_rate": 4.119679294903625e-07, + "loss": 9.492408571531996e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2821, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.25, + "completions/mean_length": 67.51041841506958, + "completions/min_length": 27.75, + "epoch": 5.60958054107719, + "grad_norm": 0.6566340598572169, + "kl": 0.10760498046875, + "learning_rate": 4.1165739101124704e-07, + "loss": 0.013741843402385712, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2822, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.375, + "completions/mean_length": 78.98958587646484, + "completions/min_length": 32.875, + "epoch": 5.61156614544552, + "grad_norm": 0.003500954751752791, + "kl": 0.085540771484375, + "learning_rate": 4.1134688770321117e-07, + "loss": 8.559721754863858e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2823, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 74.39583492279053, + "completions/min_length": 29.5, + "epoch": 5.613551749813849, + "grad_norm": 1.1100773902979642, + "kl": 0.09246826171875, + "learning_rate": 4.110364196898728e-07, + "loss": -0.009677091613411903, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2824, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 71.12500286102295, + "completions/min_length": 30.25, + "epoch": 5.615537354182179, + "grad_norm": 0.003721097562736777, + "kl": 0.092376708984375, + "learning_rate": 4.1072598709483606e-07, + "loss": 9.241603402188048e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2825, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.125, + "completions/mean_length": 75.18750143051147, + "completions/min_length": 32.375, + "epoch": 5.617522958550508, + "grad_norm": 0.008131444471014607, + "kl": 0.085968017578125, + "learning_rate": 4.1041559004169073e-07, + "loss": 8.589683420723304e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2826, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 68.38541889190674, + "completions/min_length": 29.875, + "epoch": 5.619508562918838, + "grad_norm": 0.00364861352457814, + "kl": 0.092376708984375, + "learning_rate": 4.1010522865401257e-07, + "loss": 9.24561609281227e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2827, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.125, + "completions/mean_length": 69.64583492279053, + "completions/min_length": 24.25, + "epoch": 5.621494167287168, + "grad_norm": 0.0038094154290808113, + "kl": 0.0887451171875, + "learning_rate": 4.097949030553629e-07, + "loss": 8.8644286734052e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2828, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 65.2604193687439, + "completions/min_length": 19.0, + "epoch": 5.623479771655497, + "grad_norm": 0.0038007389427531196, + "kl": 0.08843994140625, + "learning_rate": 4.094846133692891e-07, + "loss": 8.835688640829176e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2829, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.25, + "completions/mean_length": 86.66666984558105, + "completions/min_length": 30.875, + "epoch": 5.625465376023827, + "grad_norm": 0.003930123066646798, + "kl": 0.11083984375, + "learning_rate": 4.0917435971932403e-07, + "loss": 0.00011081757838837802, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2830, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 65.3541693687439, + "completions/min_length": 27.875, + "epoch": 5.627450980392156, + "grad_norm": 0.0046369701533612705, + "kl": 0.091552734375, + "learning_rate": 4.0886414222898626e-07, + "loss": 9.159876935882494e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2831, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 75.15625190734863, + "completions/min_length": 26.75, + "epoch": 5.629436584760486, + "grad_norm": 0.0036160326580972763, + "kl": 0.10333251953125, + "learning_rate": 4.085539610217802e-07, + "loss": 0.00010320844012312591, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2832, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 74.73958539962769, + "completions/min_length": 30.625, + "epoch": 5.631422189128816, + "grad_norm": 0.003160076416520772, + "kl": 0.0965576171875, + "learning_rate": 4.0824381622119543e-07, + "loss": 9.652607695898041e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2833, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 63.53125333786011, + "completions/min_length": 23.25, + "epoch": 5.633407793497145, + "grad_norm": 0.9661848731161083, + "kl": 0.11309814453125, + "learning_rate": 4.0793370795070737e-07, + "loss": -0.008300778456032276, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2834, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 62.98958492279053, + "completions/min_length": 28.125, + "epoch": 5.635393397865475, + "grad_norm": 0.004041444232851166, + "kl": 0.0953369140625, + "learning_rate": 4.076236363337766e-07, + "loss": 9.529919771011919e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2835, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 73.53125190734863, + "completions/min_length": 29.5, + "epoch": 5.637379002233805, + "grad_norm": 0.003670685330173102, + "kl": 0.09912109375, + "learning_rate": 4.073136014938495e-07, + "loss": 9.907546336762607e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2836, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 71.68750286102295, + "completions/min_length": 31.875, + "epoch": 5.639364606602134, + "grad_norm": 0.004118533487192548, + "kl": 0.0882568359375, + "learning_rate": 4.070036035543572e-07, + "loss": 8.822607196634635e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2837, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 70.91666889190674, + "completions/min_length": 29.0, + "epoch": 5.641350210970464, + "grad_norm": 2.8281828765365593, + "kl": 0.0960693359375, + "learning_rate": 4.0669364263871655e-07, + "loss": -0.005358380731195211, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.10518955811858177, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2838, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.375, + "completions/mean_length": 76.59375190734863, + "completions/min_length": 29.0, + "epoch": 5.643335815338793, + "grad_norm": 0.0032349761202531387, + "kl": 0.0941162109375, + "learning_rate": 4.0638371887032996e-07, + "loss": 9.409929043613374e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2839, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 65.90625190734863, + "completions/min_length": 27.125, + "epoch": 5.645321419707123, + "grad_norm": 0.005501191730010586, + "kl": 0.106201171875, + "learning_rate": 4.0607383237258445e-07, + "loss": 0.00010618512169457972, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2840, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.625, + "completions/mean_length": 63.43750190734863, + "completions/min_length": 26.125, + "epoch": 5.647307024075453, + "grad_norm": 0.0033587813387234294, + "kl": 0.08978271484375, + "learning_rate": 4.057639832688525e-07, + "loss": 8.96783167263493e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2841, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 75.51041889190674, + "completions/min_length": 29.0, + "epoch": 5.649292628443782, + "grad_norm": 0.005688036601464512, + "kl": 0.10888671875, + "learning_rate": 4.0545417168249157e-07, + "loss": 0.00010891951387748122, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2842, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.25, + "completions/mean_length": 82.26041889190674, + "completions/min_length": 28.375, + "epoch": 5.651278232812112, + "grad_norm": 1.2719389705932045, + "kl": 0.122711181640625, + "learning_rate": 4.051443977368444e-07, + "loss": 0.014759061858057976, + "memory(GiB)": 94.21, + "reward": 1.6145833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6145833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2843, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 69.020836353302, + "completions/min_length": 24.5, + "epoch": 5.653263837180441, + "grad_norm": 0.7673907687171013, + "kl": 0.088897705078125, + "learning_rate": 4.048346615552387e-07, + "loss": -0.005343483295291662, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2844, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 76.86458683013916, + "completions/min_length": 30.125, + "epoch": 5.655249441548771, + "grad_norm": 1.261682669100062, + "kl": 0.0936279296875, + "learning_rate": 4.045249632609865e-07, + "loss": 0.003070330247282982, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2845, + "train_speed(iter/s)": 0.022703 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.125, + "completions/mean_length": 79.85416984558105, + "completions/min_length": 26.875, + "epoch": 5.657235045917101, + "grad_norm": 2.1128298929039504, + "kl": 0.11700439453125, + "learning_rate": 4.0421530297738603e-07, + "loss": -0.006457652896642685, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2846, + "train_speed(iter/s)": 0.022702 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 80.33333396911621, + "completions/min_length": 32.125, + "epoch": 5.65922065028543, + "grad_norm": 0.00367759112604411, + "kl": 0.0732421875, + "learning_rate": 4.039056808277194e-07, + "loss": 7.310794899240136e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2847, + "train_speed(iter/s)": 0.022701 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 73.37500286102295, + "completions/min_length": 28.0, + "epoch": 5.66120625465376, + "grad_norm": 0.021929967897152867, + "kl": 0.1116943359375, + "learning_rate": 4.035960969352537e-07, + "loss": 0.00011159037239849567, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2848, + "train_speed(iter/s)": 0.0227 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.875, + "completions/mean_length": 82.08333587646484, + "completions/min_length": 29.5, + "epoch": 5.66319185902209, + "grad_norm": 0.007933532407083242, + "kl": 0.09375, + "learning_rate": 4.0328655142324097e-07, + "loss": 9.373571811011061e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2849, + "train_speed(iter/s)": 0.022699 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 77.85416841506958, + "completions/min_length": 35.5, + "epoch": 5.665177463390419, + "grad_norm": 0.01950427463338215, + "kl": 0.09857177734375, + "learning_rate": 4.029770444149178e-07, + "loss": 9.865294850897044e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2850, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 75.82291984558105, + "completions/min_length": 26.375, + "epoch": 5.667163067758749, + "grad_norm": 1.113681173242187, + "kl": 0.107666015625, + "learning_rate": 4.0266757603350565e-07, + "loss": 0.0012330388417467475, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2851, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.125, + "completions/mean_length": 84.84375286102295, + "completions/min_length": 33.875, + "epoch": 5.669148672127078, + "grad_norm": 0.012310216612396499, + "kl": 0.111328125, + "learning_rate": 4.023581464022103e-07, + "loss": 0.00011126259050797671, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2852, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 73.71875190734863, + "completions/min_length": 25.875, + "epoch": 5.671134276495408, + "grad_norm": 1.967958842481573, + "kl": 0.1077880859375, + "learning_rate": 4.020487556442227e-07, + "loss": -0.0020981894340366125, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2853, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.875, + "completions/mean_length": 85.06250190734863, + "completions/min_length": 30.875, + "epoch": 5.673119880863738, + "grad_norm": 0.7212718210509244, + "kl": 0.128662109375, + "learning_rate": 4.0173940388271755e-07, + "loss": 0.006483552046120167, + "memory(GiB)": 94.21, + "reward": 1.6041666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6041666679084301, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2854, + "train_speed(iter/s)": 0.022697 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 81.28125190734863, + "completions/min_length": 34.125, + "epoch": 5.675105485232067, + "grad_norm": 0.0076694494668766485, + "kl": 0.089111328125, + "learning_rate": 4.014300912408545e-07, + "loss": 8.910287579055876e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2855, + "train_speed(iter/s)": 0.022696 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 76.34375190734863, + "completions/min_length": 25.875, + "epoch": 5.677091089600397, + "grad_norm": 0.008991046188857488, + "kl": 0.0914306640625, + "learning_rate": 4.0112081784177767e-07, + "loss": 9.137419692706317e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2856, + "train_speed(iter/s)": 0.022696 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.375, + "completions/mean_length": 78.48958539962769, + "completions/min_length": 26.0, + "epoch": 5.679076693968726, + "grad_norm": 0.007259986515532278, + "kl": 0.10211181640625, + "learning_rate": 4.008115838086151e-07, + "loss": 0.00010215988731943071, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2857, + "train_speed(iter/s)": 0.022695 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 214.75, + "completions/mean_length": 84.71875190734863, + "completions/min_length": 31.5, + "epoch": 5.681062298337056, + "grad_norm": 0.009064280885433934, + "kl": 0.10443115234375, + "learning_rate": 4.0050238926447974e-07, + "loss": 0.00010429555550217628, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2858, + "train_speed(iter/s)": 0.022694 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.375, + "completions/mean_length": 75.20833492279053, + "completions/min_length": 20.625, + "epoch": 5.683047902705386, + "grad_norm": 0.009373881983971306, + "kl": 0.0899658203125, + "learning_rate": 4.001932343324683e-07, + "loss": 8.994461677502841e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2859, + "train_speed(iter/s)": 0.022694 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.625, + "completions/mean_length": 81.21875286102295, + "completions/min_length": 27.375, + "epoch": 5.685033507073715, + "grad_norm": 0.0163795483525699, + "kl": 0.10748291015625, + "learning_rate": 3.998841191356622e-07, + "loss": 0.0001074122847057879, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2860, + "train_speed(iter/s)": 0.022693 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.125, + "completions/mean_length": 87.7604193687439, + "completions/min_length": 41.125, + "epoch": 5.687019111442045, + "grad_norm": 0.005258456943394062, + "kl": 0.09344482421875, + "learning_rate": 3.9957504379712667e-07, + "loss": 9.331519686384127e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2861, + "train_speed(iter/s)": 0.022692 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 76.42708683013916, + "completions/min_length": 26.625, + "epoch": 5.689004715810375, + "grad_norm": 0.004046060953278138, + "kl": 0.0819091796875, + "learning_rate": 3.992660084399112e-07, + "loss": 8.185824117390439e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2862, + "train_speed(iter/s)": 0.022692 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.0, + "completions/mean_length": 74.79166889190674, + "completions/min_length": 30.75, + "epoch": 5.690990320178704, + "grad_norm": 2.1318269900085824, + "kl": 0.09063720703125, + "learning_rate": 3.989570131870494e-07, + "loss": -0.001156588434241712, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2863, + "train_speed(iter/s)": 0.022692 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 77.25000190734863, + "completions/min_length": 32.5, + "epoch": 5.692975924547034, + "grad_norm": 0.007583302065831584, + "kl": 0.105712890625, + "learning_rate": 3.986480581615591e-07, + "loss": 0.0001056445762515068, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2864, + "train_speed(iter/s)": 0.022692 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 79.58333587646484, + "completions/min_length": 23.875, + "epoch": 5.694961528915364, + "grad_norm": 0.004385651748224059, + "kl": 0.1146240234375, + "learning_rate": 3.983391434864414e-07, + "loss": 0.00011460146924946457, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2865, + "train_speed(iter/s)": 0.022692 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.875, + "completions/mean_length": 88.16666889190674, + "completions/min_length": 31.125, + "epoch": 5.696947133283693, + "grad_norm": 0.0034099568272014983, + "kl": 0.088470458984375, + "learning_rate": 3.9803026928468205e-07, + "loss": 8.846410491969436e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2866, + "train_speed(iter/s)": 0.022691 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.625, + "completions/mean_length": 80.43750095367432, + "completions/min_length": 35.125, + "epoch": 5.698932737652023, + "grad_norm": 0.011822260535962208, + "kl": 0.11077880859375, + "learning_rate": 3.9772143567925076e-07, + "loss": 0.00011074334906879812, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2867, + "train_speed(iter/s)": 0.022691 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.25, + "completions/mean_length": 87.53125190734863, + "completions/min_length": 30.5, + "epoch": 5.700918342020352, + "grad_norm": 0.0034870339685979966, + "kl": 0.093597412109375, + "learning_rate": 3.9741264279310047e-07, + "loss": 9.35839198064059e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2868, + "train_speed(iter/s)": 0.02269 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 85.07291793823242, + "completions/min_length": 29.5, + "epoch": 5.702903946388682, + "grad_norm": 0.005243673408938612, + "kl": 0.07794189453125, + "learning_rate": 3.9710389074916825e-07, + "loss": 7.790450763422996e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2869, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 77.06250095367432, + "completions/min_length": 32.0, + "epoch": 5.704889550757011, + "grad_norm": 0.005938725781157538, + "kl": 0.101531982421875, + "learning_rate": 3.9679517967037495e-07, + "loss": 0.00010142349492525682, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2870, + "train_speed(iter/s)": 0.022689 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 74.79166841506958, + "completions/min_length": 28.25, + "epoch": 5.706875155125341, + "grad_norm": 0.004702679930715288, + "kl": 0.090087890625, + "learning_rate": 3.9648650967962505e-07, + "loss": 9.016798867378384e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2871, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 80.41666889190674, + "completions/min_length": 27.625, + "epoch": 5.708860759493671, + "grad_norm": 0.003765407027215818, + "kl": 0.09271240234375, + "learning_rate": 3.961778808998065e-07, + "loss": 9.270670125260949e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2872, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 75.32291889190674, + "completions/min_length": 29.5, + "epoch": 5.710846363862, + "grad_norm": 1.1173074582664202, + "kl": 0.10137939453125, + "learning_rate": 3.9586929345379127e-07, + "loss": 0.017305200919508934, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2873, + "train_speed(iter/s)": 0.022687 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 74.98958539962769, + "completions/min_length": 29.875, + "epoch": 5.71283196823033, + "grad_norm": 0.0040203168515333355, + "kl": 0.1070556640625, + "learning_rate": 3.955607474644345e-07, + "loss": 0.00010717045370256528, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2874, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 78.739586353302, + "completions/min_length": 24.75, + "epoch": 5.71481757259866, + "grad_norm": 0.004629111068471344, + "kl": 0.086578369140625, + "learning_rate": 3.9525224305457495e-07, + "loss": 8.650859672343358e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2875, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.5, + "completions/mean_length": 87.72916746139526, + "completions/min_length": 34.375, + "epoch": 5.716803176966989, + "grad_norm": 0.005799682580273611, + "kl": 0.08709716796875, + "learning_rate": 3.949437803470349e-07, + "loss": 8.704730134923011e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2876, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 69.96875095367432, + "completions/min_length": 28.625, + "epoch": 5.718788781335319, + "grad_norm": 0.005373147582449111, + "kl": 0.08203125, + "learning_rate": 3.9463535946461974e-07, + "loss": 8.200074080377817e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2877, + "train_speed(iter/s)": 0.022688 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.5, + "completions/mean_length": 77.01041889190674, + "completions/min_length": 31.75, + "epoch": 5.720774385703649, + "grad_norm": 0.004989757980520973, + "kl": 0.093048095703125, + "learning_rate": 3.9432698053011855e-07, + "loss": 9.297170618083328e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2878, + "train_speed(iter/s)": 0.022685 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 79.98958587646484, + "completions/min_length": 32.5, + "epoch": 5.722759990071978, + "grad_norm": 1.1965771845272757, + "kl": 0.10614013671875, + "learning_rate": 3.940186436663033e-07, + "loss": -0.004934785421937704, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2879, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 81.76041889190674, + "completions/min_length": 33.875, + "epoch": 5.724745594440308, + "grad_norm": 1.3269164319492914, + "kl": 0.1026611328125, + "learning_rate": 3.9371034899593e-07, + "loss": 0.00010267397010466084, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2880, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 82.06250190734863, + "completions/min_length": 30.875, + "epoch": 5.726731198808637, + "grad_norm": 0.004972271735632945, + "kl": 0.10040283203125, + "learning_rate": 3.9340209664173693e-07, + "loss": 0.00010049731645267457, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2881, + "train_speed(iter/s)": 0.022685 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.5, + "completions/mean_length": 90.46875238418579, + "completions/min_length": 36.0, + "epoch": 5.728716803176967, + "grad_norm": 0.0038756212511577215, + "kl": 0.09271240234375, + "learning_rate": 3.930938867264461e-07, + "loss": 9.273842442780733e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2882, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.0, + "completions/mean_length": 80.75000286102295, + "completions/min_length": 23.0, + "epoch": 5.730702407545296, + "grad_norm": 0.0035390928132083372, + "kl": 0.09417724609375, + "learning_rate": 3.9278571937276247e-07, + "loss": 9.413848601980135e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2883, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 73.8541693687439, + "completions/min_length": 27.25, + "epoch": 5.732688011913626, + "grad_norm": 0.0069725602021814435, + "kl": 0.1033935546875, + "learning_rate": 3.9247759470337403e-07, + "loss": 0.00010328639473300427, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2884, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 79.145836353302, + "completions/min_length": 33.25, + "epoch": 5.734673616281956, + "grad_norm": 0.0037228136304540502, + "kl": 0.08056640625, + "learning_rate": 3.921695128409517e-07, + "loss": 8.065340807661414e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2885, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 77.18750190734863, + "completions/min_length": 37.375, + "epoch": 5.736659220650285, + "grad_norm": 0.005487361410448833, + "kl": 0.084808349609375, + "learning_rate": 3.918614739081493e-07, + "loss": 8.481842087348923e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2886, + "train_speed(iter/s)": 0.022684 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 80.15625286102295, + "completions/min_length": 33.875, + "epoch": 5.738644825018615, + "grad_norm": 0.00400914441986354, + "kl": 0.10321044921875, + "learning_rate": 3.915534780276042e-07, + "loss": 0.0001032254658639431, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2887, + "train_speed(iter/s)": 0.022682 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.875, + "completions/mean_length": 79.78125238418579, + "completions/min_length": 30.75, + "epoch": 5.740630429386945, + "grad_norm": 0.004728472274261351, + "kl": 0.109771728515625, + "learning_rate": 3.912455253219358e-07, + "loss": 0.0001098291395464912, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2888, + "train_speed(iter/s)": 0.022682 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 76.1354193687439, + "completions/min_length": 29.5, + "epoch": 5.742616033755274, + "grad_norm": 0.040205722024986866, + "kl": 0.14013671875, + "learning_rate": 3.9093761591374675e-07, + "loss": 0.00014028666191734374, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2889, + "train_speed(iter/s)": 0.022681 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.5, + "completions/mean_length": 76.59375238418579, + "completions/min_length": 30.125, + "epoch": 5.744601638123604, + "grad_norm": 0.003686857667146047, + "kl": 0.086822509765625, + "learning_rate": 3.9062974992562224e-07, + "loss": 8.68609276949428e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2890, + "train_speed(iter/s)": 0.022681 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 194.75, + "completions/mean_length": 84.60416984558105, + "completions/min_length": 27.125, + "epoch": 5.746587242491934, + "grad_norm": 0.003979245273432298, + "kl": 0.0970458984375, + "learning_rate": 3.9032192748013043e-07, + "loss": 9.707448771223426e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2891, + "train_speed(iter/s)": 0.02268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.375, + "completions/mean_length": 67.33333587646484, + "completions/min_length": 21.75, + "epoch": 5.748572846860263, + "grad_norm": 0.004204862100027973, + "kl": 0.0811767578125, + "learning_rate": 3.9001414869982206e-07, + "loss": 8.112274372251704e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2892, + "train_speed(iter/s)": 0.022681 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 78.67708683013916, + "completions/min_length": 24.25, + "epoch": 5.750558451228593, + "grad_norm": 1.7226839420927158, + "kl": 0.0838623046875, + "learning_rate": 3.8970641370723e-07, + "loss": 0.00878245010972023, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.8333333395421505, + "rewards/CineAccuracyORM/std": 0.17548104748129845, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2893, + "train_speed(iter/s)": 0.022681 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 78.46875286102295, + "completions/min_length": 30.0, + "epoch": 5.752544055596922, + "grad_norm": 0.0036201103180731925, + "kl": 0.1190185546875, + "learning_rate": 3.893987226248707e-07, + "loss": 0.00011911365436390042, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2894, + "train_speed(iter/s)": 0.02268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 77.76041841506958, + "completions/min_length": 31.75, + "epoch": 5.754529659965252, + "grad_norm": 0.0039046202016866354, + "kl": 0.09246826171875, + "learning_rate": 3.890910755752424e-07, + "loss": 9.243890963261947e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2895, + "train_speed(iter/s)": 0.02268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.875, + "completions/mean_length": 91.23958492279053, + "completions/min_length": 40.75, + "epoch": 5.756515264333581, + "grad_norm": 0.8238213184679289, + "kl": 0.0931396484375, + "learning_rate": 3.8878347268082577e-07, + "loss": -0.007115792483091354, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2896, + "train_speed(iter/s)": 0.02268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.875, + "completions/mean_length": 78.89583539962769, + "completions/min_length": 26.875, + "epoch": 5.758500868701911, + "grad_norm": 0.0038819953302039055, + "kl": 0.09844970703125, + "learning_rate": 3.884759140640842e-07, + "loss": 9.849973139353096e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2897, + "train_speed(iter/s)": 0.02268 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 74.84375190734863, + "completions/min_length": 29.5, + "epoch": 5.760486473070241, + "grad_norm": 0.004409141246914084, + "kl": 0.08538818359375, + "learning_rate": 3.881683998474633e-07, + "loss": 8.543799049220979e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2898, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.5, + "completions/mean_length": 83.93750190734863, + "completions/min_length": 33.125, + "epoch": 5.76247207743857, + "grad_norm": 1.359126264867561, + "kl": 0.106353759765625, + "learning_rate": 3.878609301533912e-07, + "loss": -0.0023418504279106855, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2899, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 76.76041793823242, + "completions/min_length": 29.75, + "epoch": 5.7644576818069, + "grad_norm": 0.004656560504781321, + "kl": 0.10186767578125, + "learning_rate": 3.875535051042778e-07, + "loss": 0.00010198411473538727, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2900, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 70.09375333786011, + "completions/min_length": 21.875, + "epoch": 5.76644328617523, + "grad_norm": 0.0043039560461887606, + "kl": 0.09051513671875, + "learning_rate": 3.87246124822516e-07, + "loss": 9.051974484464154e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2901, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.75, + "completions/mean_length": 83.41666984558105, + "completions/min_length": 30.75, + "epoch": 5.768428890543559, + "grad_norm": 0.003974606641506099, + "kl": 0.10223388671875, + "learning_rate": 3.8693878943048025e-07, + "loss": 0.0001022075884975493, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2902, + "train_speed(iter/s)": 0.022678 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 68.92708539962769, + "completions/min_length": 28.125, + "epoch": 5.770414494911889, + "grad_norm": 1.147784167066906, + "kl": 0.095062255859375, + "learning_rate": 3.8663149905052737e-07, + "loss": -0.0020088721066713333, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2903, + "train_speed(iter/s)": 0.022677 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 79.052086353302, + "completions/min_length": 28.875, + "epoch": 5.772400099280219, + "grad_norm": 0.0033709819089969145, + "kl": 0.09381103515625, + "learning_rate": 3.8632425380499635e-07, + "loss": 9.371935448143631e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2904, + "train_speed(iter/s)": 0.022677 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 76.93750190734863, + "completions/min_length": 28.5, + "epoch": 5.774385703648548, + "grad_norm": 0.8454195716256399, + "kl": 0.08197021484375, + "learning_rate": 3.8601705381620774e-07, + "loss": 0.014056676998734474, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2905, + "train_speed(iter/s)": 0.022677 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 70.92708587646484, + "completions/min_length": 18.625, + "epoch": 5.776371308016878, + "grad_norm": 0.01833428620684871, + "kl": 0.12359619140625, + "learning_rate": 3.857098992064647e-07, + "loss": 0.00012341572437435389, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2906, + "train_speed(iter/s)": 0.022676 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 73.82292032241821, + "completions/min_length": 22.75, + "epoch": 5.778356912385207, + "grad_norm": 0.003682226585944467, + "kl": 0.0858154296875, + "learning_rate": 3.8540279009805185e-07, + "loss": 8.579586574342102e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2907, + "train_speed(iter/s)": 0.022676 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.875, + "completions/mean_length": 73.44791889190674, + "completions/min_length": 20.125, + "epoch": 5.780342516753537, + "grad_norm": 0.0037552103918148017, + "kl": 0.08489990234375, + "learning_rate": 3.850957266132361e-07, + "loss": 8.486880687996745e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2908, + "train_speed(iter/s)": 0.022675 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.25, + "completions/mean_length": 80.55208396911621, + "completions/min_length": 21.25, + "epoch": 5.782328121121866, + "grad_norm": 1.7545861382134982, + "kl": 0.107666015625, + "learning_rate": 3.847887088742659e-07, + "loss": 0.00010767951607704163, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2909, + "train_speed(iter/s)": 0.022675 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.25, + "completions/mean_length": 73.12500143051147, + "completions/min_length": 26.25, + "epoch": 5.784313725490196, + "grad_norm": 1.0189915019535853, + "kl": 0.087982177734375, + "learning_rate": 3.844817370033716e-07, + "loss": 0.016254860907793045, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2910, + "train_speed(iter/s)": 0.022673 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.875, + "completions/mean_length": 75.33333587646484, + "completions/min_length": 27.25, + "epoch": 5.786299329858526, + "grad_norm": 0.0040009282264094085, + "kl": 0.1044921875, + "learning_rate": 3.841748111227651e-07, + "loss": 0.00010461873898748308, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2911, + "train_speed(iter/s)": 0.022674 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 70.85416841506958, + "completions/min_length": 31.5, + "epoch": 5.788284934226855, + "grad_norm": 0.004523081990784025, + "kl": 0.094451904296875, + "learning_rate": 3.838679313546405e-07, + "loss": 9.434594539925456e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2912, + "train_speed(iter/s)": 0.022674 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.625, + "completions/mean_length": 80.58333492279053, + "completions/min_length": 34.0, + "epoch": 5.790270538595185, + "grad_norm": 1.9379524164896957, + "kl": 0.0933837890625, + "learning_rate": 3.8356109782117275e-07, + "loss": 9.336074435850605e-05, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6458333358168602, + "rewards/CineAccuracyORM/std": 0.3879413418471813, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2913, + "train_speed(iter/s)": 0.022674 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 79.83333492279053, + "completions/min_length": 27.75, + "epoch": 5.792256142963515, + "grad_norm": 1.6970084821016755, + "kl": 0.089599609375, + "learning_rate": 3.832543106445188e-07, + "loss": -0.0005534527590498328, + "memory(GiB)": 94.21, + "reward": 1.6145833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6145833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2914, + "train_speed(iter/s)": 0.022673 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 64.33333539962769, + "completions/min_length": 27.25, + "epoch": 5.794241747331844, + "grad_norm": 0.01129079340426373, + "kl": 0.113037109375, + "learning_rate": 3.8294756994681776e-07, + "loss": 0.00011286174412816763, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2915, + "train_speed(iter/s)": 0.022673 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 75.19791889190674, + "completions/min_length": 26.125, + "epoch": 5.796227351700174, + "grad_norm": 1.5064358724183429, + "kl": 0.090789794921875, + "learning_rate": 3.8264087585018924e-07, + "loss": 0.007380732800811529, + "memory(GiB)": 94.21, + "reward": 1.7500000149011612, + "reward_std": 0.05103103630244732, + "rewards/CineAccuracyORM/mean": 0.7500000074505806, + "rewards/CineAccuracyORM/std": 0.29628782719373703, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2916, + "train_speed(iter/s)": 0.022673 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 201.875, + "completions/mean_length": 85.20833492279053, + "completions/min_length": 31.25, + "epoch": 5.798212956068504, + "grad_norm": 0.003846152313106342, + "kl": 0.09326171875, + "learning_rate": 3.8233422847673475e-07, + "loss": 9.313251212006435e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2917, + "train_speed(iter/s)": 0.022672 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.375, + "completions/mean_length": 83.46875286102295, + "completions/min_length": 37.25, + "epoch": 5.800198560436833, + "grad_norm": 1.0992639251272731, + "kl": 0.1005859375, + "learning_rate": 3.8202762794853715e-07, + "loss": 0.02382952719926834, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.375051774084568, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2918, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 75.71875190734863, + "completions/min_length": 25.625, + "epoch": 5.802184164805163, + "grad_norm": 1.743163953784932, + "kl": 0.08819580078125, + "learning_rate": 3.8172107438766076e-07, + "loss": -0.01659288816154003, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.12089945748448372, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2919, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.375, + "completions/mean_length": 82.79166889190674, + "completions/min_length": 27.375, + "epoch": 5.804169769173492, + "grad_norm": 0.004523808945714486, + "kl": 0.1009521484375, + "learning_rate": 3.81414567916151e-07, + "loss": 0.00010098607162944973, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2920, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.25, + "completions/mean_length": 72.57291841506958, + "completions/min_length": 27.5, + "epoch": 5.806155373541822, + "grad_norm": 0.24544418874165735, + "kl": 0.212646484375, + "learning_rate": 3.811081086560346e-07, + "loss": 0.00021282854140736163, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2921, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.25, + "completions/mean_length": 71.62500381469727, + "completions/min_length": 28.125, + "epoch": 5.808140977910151, + "grad_norm": 0.0043468580674483745, + "kl": 0.099853515625, + "learning_rate": 3.808016967293197e-07, + "loss": 9.988778037950397e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2922, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 68.51041841506958, + "completions/min_length": 29.125, + "epoch": 5.810126582278481, + "grad_norm": 0.0039887505140692205, + "kl": 0.07208251953125, + "learning_rate": 3.8049533225799534e-07, + "loss": 7.210955664049834e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2923, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.5, + "completions/mean_length": 78.75000333786011, + "completions/min_length": 24.375, + "epoch": 5.812112186646811, + "grad_norm": 0.0043856681120003765, + "kl": 0.1015625, + "learning_rate": 3.8018901536403194e-07, + "loss": 0.00010158519580727443, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2924, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 69.5104193687439, + "completions/min_length": 31.875, + "epoch": 5.81409779101514, + "grad_norm": 1.3953477739577373, + "kl": 0.094268798828125, + "learning_rate": 3.7988274616938043e-07, + "loss": -0.004629717208445072, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2925, + "train_speed(iter/s)": 0.022671 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.125, + "completions/mean_length": 70.69791841506958, + "completions/min_length": 19.875, + "epoch": 5.81608339538347, + "grad_norm": 0.006662755789561431, + "kl": 0.08282470703125, + "learning_rate": 3.7957652479597333e-07, + "loss": 8.284844079753384e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2926, + "train_speed(iter/s)": 0.022671 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 66.833336353302, + "completions/min_length": 25.0, + "epoch": 5.8180689997518, + "grad_norm": 0.003897694013395826, + "kl": 0.073394775390625, + "learning_rate": 3.7927035136572393e-07, + "loss": 7.329390064114705e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2927, + "train_speed(iter/s)": 0.022671 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 75.46875238418579, + "completions/min_length": 30.875, + "epoch": 5.820054604120129, + "grad_norm": 0.0034434875166394164, + "kl": 0.09765625, + "learning_rate": 3.7896422600052625e-07, + "loss": 9.767961455509067e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2928, + "train_speed(iter/s)": 0.022671 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 71.92708444595337, + "completions/min_length": 25.25, + "epoch": 5.822040208488459, + "grad_norm": 1.9162569131966805, + "kl": 0.087982177734375, + "learning_rate": 3.786581488222556e-07, + "loss": -0.015171239152550697, + "memory(GiB)": 94.21, + "reward": 1.9791666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9791666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2929, + "train_speed(iter/s)": 0.022671 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.125, + "completions/mean_length": 72.42708444595337, + "completions/min_length": 27.625, + "epoch": 5.824025812856789, + "grad_norm": 0.0039399923731347425, + "kl": 0.0806884765625, + "learning_rate": 3.7835211995276765e-07, + "loss": 8.071921183727682e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2930, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 74.645836353302, + "completions/min_length": 21.875, + "epoch": 5.826011417225118, + "grad_norm": 0.00324747499578154, + "kl": 0.07037353515625, + "learning_rate": 3.780461395138991e-07, + "loss": 7.030913548078388e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2931, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 68.27083587646484, + "completions/min_length": 22.375, + "epoch": 5.827997021593448, + "grad_norm": 0.00431383145438088, + "kl": 0.07781982421875, + "learning_rate": 3.7774020762746745e-07, + "loss": 7.780019222991541e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2932, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 72.47916889190674, + "completions/min_length": 25.0, + "epoch": 5.829982625961777, + "grad_norm": 1.9090636097110216, + "kl": 0.09674072265625, + "learning_rate": 3.774343244152704e-07, + "loss": 0.0058203041553497314, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.18335824459791183, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2933, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 79.52083492279053, + "completions/min_length": 28.75, + "epoch": 5.831968230330107, + "grad_norm": 0.006316364041589908, + "kl": 0.08758544921875, + "learning_rate": 3.7712848999908676e-07, + "loss": 8.758005424169824e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2934, + "train_speed(iter/s)": 0.02267 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.0, + "completions/mean_length": 71.81250143051147, + "completions/min_length": 22.125, + "epoch": 5.833953834698436, + "grad_norm": 0.0037255999544836727, + "kl": 0.0833740234375, + "learning_rate": 3.768227045006756e-07, + "loss": 8.34009115351364e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2935, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.375, + "completions/mean_length": 75.97916984558105, + "completions/min_length": 25.5, + "epoch": 5.835939439066766, + "grad_norm": 0.0035334653910409583, + "kl": 0.0794677734375, + "learning_rate": 3.765169680417769e-07, + "loss": 7.949472637847066e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2936, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 83.92708492279053, + "completions/min_length": 28.875, + "epoch": 5.837925043435096, + "grad_norm": 0.003771749128448991, + "kl": 0.0833740234375, + "learning_rate": 3.7621128074411076e-07, + "loss": 8.329372212756425e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2937, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 192.875, + "completions/mean_length": 73.61458587646484, + "completions/min_length": 22.5, + "epoch": 5.839910647803425, + "grad_norm": 0.022936256618794247, + "kl": 0.1143798828125, + "learning_rate": 3.759056427293778e-07, + "loss": 0.00011428249854361638, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2938, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.5, + "completions/mean_length": 70.61458587646484, + "completions/min_length": 23.125, + "epoch": 5.841896252171755, + "grad_norm": 0.00970710425053875, + "kl": 0.1025390625, + "learning_rate": 3.756000541192591e-07, + "loss": 0.00010258870315738022, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2939, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 70.95833587646484, + "completions/min_length": 25.625, + "epoch": 5.843881856540085, + "grad_norm": 0.0061579946628700895, + "kl": 0.0809326171875, + "learning_rate": 3.752945150354159e-07, + "loss": 8.097306999843568e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2940, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.75, + "completions/mean_length": 80.84375190734863, + "completions/min_length": 30.25, + "epoch": 5.845867460908414, + "grad_norm": 0.003636028911850159, + "kl": 0.08221435546875, + "learning_rate": 3.7498902559949006e-07, + "loss": 8.218929724534974e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2941, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.75, + "completions/mean_length": 79.06250238418579, + "completions/min_length": 31.375, + "epoch": 5.847853065276744, + "grad_norm": 0.003120829864230285, + "kl": 0.07861328125, + "learning_rate": 3.7468358593310303e-07, + "loss": 7.856819865992293e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2942, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 73.90625190734863, + "completions/min_length": 25.875, + "epoch": 5.849838669645074, + "grad_norm": 0.003801615504914575, + "kl": 0.0906982421875, + "learning_rate": 3.743781961578573e-07, + "loss": 9.066134225577116e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2943, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 72.88541841506958, + "completions/min_length": 26.875, + "epoch": 5.851824274013403, + "grad_norm": 0.0036436166747744335, + "kl": 0.08294677734375, + "learning_rate": 3.7407285639533505e-07, + "loss": 8.297446765936911e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2944, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 74.6041693687439, + "completions/min_length": 21.25, + "epoch": 5.853809878381733, + "grad_norm": 1.5290246542422399, + "kl": 0.09075927734375, + "learning_rate": 3.737675667670983e-07, + "loss": 0.01472728606313467, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2945, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.5, + "completions/mean_length": 84.37500190734863, + "completions/min_length": 31.0, + "epoch": 5.855795482750062, + "grad_norm": 0.005983407809149179, + "kl": 0.09405517578125, + "learning_rate": 3.7346232739468944e-07, + "loss": 9.40181635087356e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2946, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 73.17708539962769, + "completions/min_length": 21.0, + "epoch": 5.857781087118392, + "grad_norm": 0.003353061161646867, + "kl": 0.08172607421875, + "learning_rate": 3.731571383996308e-07, + "loss": 8.175710536306724e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2947, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 62.468750953674316, + "completions/min_length": 23.375, + "epoch": 5.859766691486721, + "grad_norm": 0.004986112007044284, + "kl": 0.081817626953125, + "learning_rate": 3.7285199990342465e-07, + "loss": 8.168508065864444e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2948, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 70.25000190734863, + "completions/min_length": 29.75, + "epoch": 5.861752295855051, + "grad_norm": 0.005495352875061472, + "kl": 0.08514404296875, + "learning_rate": 3.725469120275532e-07, + "loss": 8.517235983163118e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2949, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 68.47916889190674, + "completions/min_length": 20.875, + "epoch": 5.863737900223381, + "grad_norm": 0.006157837253065416, + "kl": 0.083221435546875, + "learning_rate": 3.7224187489347844e-07, + "loss": 8.318398613482714e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2950, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 65.63541889190674, + "completions/min_length": 24.0, + "epoch": 5.86572350459171, + "grad_norm": 0.006483468559431403, + "kl": 0.09039306640625, + "learning_rate": 3.7193688862264214e-07, + "loss": 9.046150080394e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2951, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.25, + "completions/mean_length": 77.72916984558105, + "completions/min_length": 20.875, + "epoch": 5.86770910896004, + "grad_norm": 0.007216988320940811, + "kl": 0.0992431640625, + "learning_rate": 3.7163195333646594e-07, + "loss": 9.906500781653449e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2952, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 67.7916693687439, + "completions/min_length": 23.875, + "epoch": 5.86969471332837, + "grad_norm": 0.006972280389571689, + "kl": 0.089508056640625, + "learning_rate": 3.7132706915635083e-07, + "loss": 8.944849105319008e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2953, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.75, + "completions/mean_length": 80.79167032241821, + "completions/min_length": 28.0, + "epoch": 5.871680317696699, + "grad_norm": 0.005931778356708559, + "kl": 0.089874267578125, + "learning_rate": 3.710222362036779e-07, + "loss": 9.001302532851696e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2954, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 68.46875190734863, + "completions/min_length": 19.375, + "epoch": 5.873665922065029, + "grad_norm": 0.005969014872074029, + "kl": 0.0859375, + "learning_rate": 3.707174545998076e-07, + "loss": 8.594428072683513e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2955, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 71.67708492279053, + "completions/min_length": 23.125, + "epoch": 5.875651526433359, + "grad_norm": 0.09805706996430967, + "kl": 0.27642822265625, + "learning_rate": 3.7041272446608007e-07, + "loss": 0.000276578008197248, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2956, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.875, + "completions/mean_length": 81.66666984558105, + "completions/min_length": 30.125, + "epoch": 5.877637130801688, + "grad_norm": 0.006089357713340864, + "kl": 0.08837890625, + "learning_rate": 3.701080459238148e-07, + "loss": 8.834289474179968e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2957, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.875, + "completions/mean_length": 79.95833587646484, + "completions/min_length": 22.375, + "epoch": 5.879622735170018, + "grad_norm": 0.0048544631225078535, + "kl": 0.09759521484375, + "learning_rate": 3.6980341909431103e-07, + "loss": 9.758198575582355e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2958, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 78.30208492279053, + "completions/min_length": 19.625, + "epoch": 5.881608339538347, + "grad_norm": 0.004803929641272386, + "kl": 0.11053466796875, + "learning_rate": 3.69498844098847e-07, + "loss": 0.00011050906323362142, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2959, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 77.79166984558105, + "completions/min_length": 26.5, + "epoch": 5.883593943906677, + "grad_norm": 0.009623782732732174, + "kl": 0.086151123046875, + "learning_rate": 3.6919432105868053e-07, + "loss": 8.609489304944873e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2960, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 70.57291889190674, + "completions/min_length": 21.625, + "epoch": 5.885579548275006, + "grad_norm": 0.005301205789896645, + "kl": 0.0797119140625, + "learning_rate": 3.688898500950489e-07, + "loss": 7.974475738592446e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2961, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 76.63541889190674, + "completions/min_length": 31.0, + "epoch": 5.887565152643336, + "grad_norm": 0.003450906596375903, + "kl": 0.08489990234375, + "learning_rate": 3.6858543132916806e-07, + "loss": 8.487096056342125e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2962, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 67.30208539962769, + "completions/min_length": 23.375, + "epoch": 5.889550757011666, + "grad_norm": 0.005052504895692179, + "kl": 0.071685791015625, + "learning_rate": 3.6828106488223427e-07, + "loss": 7.170936441980302e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2963, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.0, + "completions/mean_length": 73.06250143051147, + "completions/min_length": 24.875, + "epoch": 5.891536361379995, + "grad_norm": 0.005036433030709688, + "kl": 0.08502197265625, + "learning_rate": 3.67976750875422e-07, + "loss": 8.504463767167181e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2964, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.75, + "completions/mean_length": 61.791667461395264, + "completions/min_length": 19.75, + "epoch": 5.893521965748325, + "grad_norm": 0.00408712473934292, + "kl": 0.078704833984375, + "learning_rate": 3.6767248942988514e-07, + "loss": 7.877701864344999e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2965, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 71.03125286102295, + "completions/min_length": 27.625, + "epoch": 5.895507570116655, + "grad_norm": 0.00573599659279665, + "kl": 0.086181640625, + "learning_rate": 3.6736828066675664e-07, + "loss": 8.620007429271936e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2966, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 73.78125143051147, + "completions/min_length": 22.25, + "epoch": 5.897493174484984, + "grad_norm": 0.005248226277860616, + "kl": 0.0999755859375, + "learning_rate": 3.6706412470714856e-07, + "loss": 0.00010005956573877484, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2967, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 75.28125190734863, + "completions/min_length": 28.375, + "epoch": 5.899478778853314, + "grad_norm": 0.004118834077966699, + "kl": 0.07537841796875, + "learning_rate": 3.667600216721519e-07, + "loss": 7.537403871538118e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2968, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.625, + "completions/mean_length": 77.16666841506958, + "completions/min_length": 28.25, + "epoch": 5.901464383221644, + "grad_norm": 0.003818959198151138, + "kl": 0.0887451171875, + "learning_rate": 3.6645597168283636e-07, + "loss": 8.871472527971491e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2969, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.875, + "completions/mean_length": 76.39583492279053, + "completions/min_length": 27.5, + "epoch": 5.903449987589973, + "grad_norm": 0.007421889798294304, + "kl": 0.11346435546875, + "learning_rate": 3.661519748602511e-07, + "loss": 0.00011346233077347279, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2970, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 63.843751430511475, + "completions/min_length": 26.0, + "epoch": 5.905435591958303, + "grad_norm": 1.3520482868020578, + "kl": 0.088348388671875, + "learning_rate": 3.6584803132542356e-07, + "loss": 0.016340874135494232, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2971, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 69.927086353302, + "completions/min_length": 30.375, + "epoch": 5.907421196326632, + "grad_norm": 0.007912083235969224, + "kl": 0.09619140625, + "learning_rate": 3.655441411993603e-07, + "loss": 9.617401519790292e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2972, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 68.7916669845581, + "completions/min_length": 21.0, + "epoch": 5.909406800694962, + "grad_norm": 0.00868182488605808, + "kl": 0.09735107421875, + "learning_rate": 3.652403046030462e-07, + "loss": 9.739540837472305e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2973, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.875, + "completions/mean_length": 76.89583492279053, + "completions/min_length": 17.5, + "epoch": 5.911392405063291, + "grad_norm": 0.008532889612779303, + "kl": 0.10479736328125, + "learning_rate": 3.649365216574453e-07, + "loss": 0.00010475765157025307, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2974, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 69.51041889190674, + "completions/min_length": 20.125, + "epoch": 5.913378009431621, + "grad_norm": 0.003483703631857241, + "kl": 0.08673095703125, + "learning_rate": 3.646327924835e-07, + "loss": 8.664555934956297e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2975, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.5, + "completions/mean_length": 82.14583683013916, + "completions/min_length": 24.125, + "epoch": 5.915363613799951, + "grad_norm": 0.004752179572459591, + "kl": 0.07733154296875, + "learning_rate": 3.6432911720213124e-07, + "loss": 7.731119694653898e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2976, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 69.20833444595337, + "completions/min_length": 18.625, + "epoch": 5.91734921816828, + "grad_norm": 0.5853186624109498, + "kl": 0.20550537109375, + "learning_rate": 3.6402549593423893e-07, + "loss": 0.016102951020002365, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2977, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.0, + "completions/mean_length": 87.23958587646484, + "completions/min_length": 33.5, + "epoch": 5.91933482253661, + "grad_norm": 0.7611127935842638, + "kl": 0.11114501953125, + "learning_rate": 3.6372192880070097e-07, + "loss": 0.0026620272547006607, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2978, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.625, + "completions/mean_length": 85.13541984558105, + "completions/min_length": 26.75, + "epoch": 5.9213204269049395, + "grad_norm": 0.005794860863805745, + "kl": 0.08740234375, + "learning_rate": 3.6341841592237407e-07, + "loss": 8.740430348552763e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2979, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 76.7291693687439, + "completions/min_length": 31.0, + "epoch": 5.923306031273269, + "grad_norm": 1.21158157663013, + "kl": 0.097900390625, + "learning_rate": 3.6311495742009304e-07, + "loss": 0.002060196129605174, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2980, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 77.25000143051147, + "completions/min_length": 26.875, + "epoch": 5.9252916356415986, + "grad_norm": 0.004366902761875255, + "kl": 0.0908203125, + "learning_rate": 3.628115534146714e-07, + "loss": 9.082164615392685e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2981, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 74.98958587646484, + "completions/min_length": 27.375, + "epoch": 5.9272772400099285, + "grad_norm": 0.0034984088361081018, + "kl": 0.09613037109375, + "learning_rate": 3.6250820402690053e-07, + "loss": 9.61034675128758e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2982, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 249.25, + "completions/mean_length": 88.87500286102295, + "completions/min_length": 22.875, + "epoch": 5.929262844378258, + "grad_norm": 0.9122087914842973, + "kl": 0.1951904296875, + "learning_rate": 3.622049093775501e-07, + "loss": 0.010865895077586174, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2983, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 74.1354193687439, + "completions/min_length": 25.25, + "epoch": 5.9312484487465875, + "grad_norm": 0.004693587594116673, + "kl": 0.096893310546875, + "learning_rate": 3.619016695873689e-07, + "loss": 9.692844469100237e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2984, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 76.0729193687439, + "completions/min_length": 16.0, + "epoch": 5.933234053114917, + "grad_norm": 0.004897663390481458, + "kl": 0.09503173828125, + "learning_rate": 3.6159848477708255e-07, + "loss": 9.503068577032536e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2985, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.25, + "completions/mean_length": 83.36458587646484, + "completions/min_length": 27.125, + "epoch": 5.9352196574832465, + "grad_norm": 0.004857511985454761, + "kl": 0.080780029296875, + "learning_rate": 3.612953550673957e-07, + "loss": 8.082823478616774e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2986, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.875, + "completions/mean_length": 75.15625143051147, + "completions/min_length": 25.375, + "epoch": 5.937205261851576, + "grad_norm": 0.011694055729766768, + "kl": 0.079345703125, + "learning_rate": 3.6099228057899055e-07, + "loss": 7.924922101665288e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2987, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 70.23958587646484, + "completions/min_length": 22.25, + "epoch": 5.9391908662199056, + "grad_norm": 0.00624293029874596, + "kl": 0.10369873046875, + "learning_rate": 3.6068926143252774e-07, + "loss": 0.00010362219472881407, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2988, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 67.33333587646484, + "completions/min_length": 21.625, + "epoch": 5.9411764705882355, + "grad_norm": 0.004475921118529464, + "kl": 0.074798583984375, + "learning_rate": 3.603862977486456e-07, + "loss": 7.480620843125507e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2989, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.0, + "completions/mean_length": 72.81250238418579, + "completions/min_length": 17.375, + "epoch": 5.943162074956565, + "grad_norm": 0.0050260122735072315, + "kl": 0.1019287109375, + "learning_rate": 3.6008338964796013e-07, + "loss": 0.00010196936636930332, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2990, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 71.89583492279053, + "completions/min_length": 24.875, + "epoch": 5.9451476793248945, + "grad_norm": 0.005172651937816761, + "kl": 0.11724853515625, + "learning_rate": 3.59780537251066e-07, + "loss": 0.00011721058399416506, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2991, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 74.13541984558105, + "completions/min_length": 23.375, + "epoch": 5.9471332836932245, + "grad_norm": 0.005079963609127899, + "kl": 0.10150146484375, + "learning_rate": 3.594777406785351e-07, + "loss": 0.00010150492744287476, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2992, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 80.89583587646484, + "completions/min_length": 24.875, + "epoch": 5.9491188880615535, + "grad_norm": 0.00582672905249592, + "kl": 0.09539794921875, + "learning_rate": 3.5917500005091704e-07, + "loss": 9.530916577205062e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2993, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.5, + "completions/mean_length": 77.36458492279053, + "completions/min_length": 23.75, + "epoch": 5.9511044924298835, + "grad_norm": 0.005135815814550793, + "kl": 0.080596923828125, + "learning_rate": 3.5887231548873935e-07, + "loss": 8.065038127824664e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2994, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 73.51041793823242, + "completions/min_length": 19.625, + "epoch": 5.953090096798213, + "grad_norm": 0.004943173991657411, + "kl": 0.09637451171875, + "learning_rate": 3.5856968711250735e-07, + "loss": 9.636885806685314e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2995, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 185.75, + "completions/mean_length": 81.71875286102295, + "completions/min_length": 25.25, + "epoch": 5.9550757011665425, + "grad_norm": 0.8318914149732651, + "kl": 0.10394287109375, + "learning_rate": 3.5826711504270376e-07, + "loss": 0.00010397534060757607, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2996, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 68.93750095367432, + "completions/min_length": 22.5, + "epoch": 5.9570613055348725, + "grad_norm": 0.8835777121408408, + "kl": 0.177490234375, + "learning_rate": 3.5796459939978893e-07, + "loss": 0.003880441188812256, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2997, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 69.79166889190674, + "completions/min_length": 21.375, + "epoch": 5.9590469099032015, + "grad_norm": 0.00442838891107234, + "kl": 0.08392333984375, + "learning_rate": 3.5766214030420095e-07, + "loss": 8.392141171498224e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2998, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.625, + "completions/mean_length": 70.45833492279053, + "completions/min_length": 19.625, + "epoch": 5.9610325142715315, + "grad_norm": 1.039928644425254, + "kl": 0.10028076171875, + "learning_rate": 3.573597378763552e-07, + "loss": -0.0047088852152228355, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 2999, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 68.16666841506958, + "completions/min_length": 22.0, + "epoch": 5.9630181186398605, + "grad_norm": 2.005029702131463, + "kl": 0.10845947265625, + "learning_rate": 3.5705739223664455e-07, + "loss": -0.011583349667489529, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3000, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 215.875, + "completions/mean_length": 76.35416841506958, + "completions/min_length": 23.5, + "epoch": 5.9650037230081905, + "grad_norm": 1.3729627300662692, + "kl": 0.08941650390625, + "learning_rate": 3.5675510350543933e-07, + "loss": 0.0010114660253748298, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3001, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 69.68750286102295, + "completions/min_length": 16.25, + "epoch": 5.96698932737652, + "grad_norm": 0.0037574323421503265, + "kl": 0.09136962890625, + "learning_rate": 3.564528718030869e-07, + "loss": 9.133273124461994e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3002, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.25, + "completions/mean_length": 69.65625333786011, + "completions/min_length": 21.875, + "epoch": 5.9689749317448495, + "grad_norm": 1.9937825834877494, + "kl": 0.09912109375, + "learning_rate": 3.561506972499123e-07, + "loss": 9.93019639281556e-05, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3003, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.0, + "completions/mean_length": 52.59375190734863, + "completions/min_length": 16.25, + "epoch": 5.9709605361131795, + "grad_norm": 0.0052650178946769405, + "kl": 0.077423095703125, + "learning_rate": 3.5584857996621766e-07, + "loss": 7.736920088063926e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3004, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 61.44791793823242, + "completions/min_length": 21.375, + "epoch": 5.972946140481509, + "grad_norm": 1.8411907169207193, + "kl": 0.1083984375, + "learning_rate": 3.5554652007228236e-07, + "loss": -0.00985028874129057, + "memory(GiB)": 94.21, + "reward": 1.7500000149011612, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.7500000074505806, + "rewards/CineAccuracyORM/std": 0.306039284914732, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3005, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.625, + "completions/mean_length": 70.37500286102295, + "completions/min_length": 19.875, + "epoch": 5.9749317448498385, + "grad_norm": 0.005944787733696136, + "kl": 0.10211181640625, + "learning_rate": 3.552445176883629e-07, + "loss": 0.00010209568426944315, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3006, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 70.83333492279053, + "completions/min_length": 24.375, + "epoch": 5.976917349218168, + "grad_norm": 0.004626703184411186, + "kl": 0.1005859375, + "learning_rate": 3.5494257293469285e-07, + "loss": 0.00010056763130705804, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3007, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.625, + "completions/mean_length": 67.75000190734863, + "completions/min_length": 23.875, + "epoch": 5.978902953586498, + "grad_norm": 0.004506350776103028, + "kl": 0.094818115234375, + "learning_rate": 3.546406859314829e-07, + "loss": 9.477618732489645e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3008, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 67.96875190734863, + "completions/min_length": 18.0, + "epoch": 5.980888557954827, + "grad_norm": 0.00680046156960817, + "kl": 0.08392333984375, + "learning_rate": 3.5433885679892075e-07, + "loss": 8.388960122829303e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3009, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.5, + "completions/mean_length": 70.0104193687439, + "completions/min_length": 20.875, + "epoch": 5.982874162323157, + "grad_norm": 0.004347740317199421, + "kl": 0.0872802734375, + "learning_rate": 3.5403708565717086e-07, + "loss": 8.725229417905211e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3010, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 73.93750190734863, + "completions/min_length": 24.125, + "epoch": 5.9848597666914864, + "grad_norm": 1.0430656049182414, + "kl": 0.2037353515625, + "learning_rate": 3.5373537262637465e-07, + "loss": -0.008169452659785748, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3011, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 71.73958444595337, + "completions/min_length": 27.625, + "epoch": 5.986845371059816, + "grad_norm": 0.016795101580157106, + "kl": 0.13067626953125, + "learning_rate": 3.5343371782665105e-07, + "loss": 0.00013058530748821795, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3012, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 73.42708587646484, + "completions/min_length": 22.875, + "epoch": 5.9888309754281455, + "grad_norm": 0.8903363707388529, + "kl": 0.11309814453125, + "learning_rate": 3.531321213780949e-07, + "loss": 0.005973357707262039, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3013, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 63.94791841506958, + "completions/min_length": 15.5, + "epoch": 5.990816579796475, + "grad_norm": 0.007654375298453226, + "kl": 0.09576416015625, + "learning_rate": 3.528305834007782e-07, + "loss": 9.577290620654821e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3014, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 66.73958587646484, + "completions/min_length": 20.375, + "epoch": 5.992802184164805, + "grad_norm": 1.5989800342398168, + "kl": 0.1014404296875, + "learning_rate": 3.525291040147498e-07, + "loss": -0.00199575605802238, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3015, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 64.54166889190674, + "completions/min_length": 17.0, + "epoch": 5.994787788533134, + "grad_norm": 0.00716979784356222, + "kl": 0.1171875, + "learning_rate": 3.522276833400349e-07, + "loss": 0.00011689918756019324, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3016, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 70.18750190734863, + "completions/min_length": 22.875, + "epoch": 5.996773392901464, + "grad_norm": 1.0281974600293744, + "kl": 0.10400390625, + "learning_rate": 3.519263214966355e-07, + "loss": -0.0014040371170267463, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.10518955811858177, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3017, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.375, + "completions/mean_length": 58.92708444595337, + "completions/min_length": 21.375, + "epoch": 5.998758997269794, + "grad_norm": 0.007848128392484928, + "kl": 0.0992431640625, + "learning_rate": 3.5162501860453044e-07, + "loss": 9.92728746496141e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3018, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.0, + "completions/mean_length": 69.36458396911621, + "completions/min_length": 17.375, + "epoch": 6.00198560436833, + "grad_norm": 0.007164685475978047, + "kl": 0.091064453125, + "learning_rate": 3.513237747836747e-07, + "loss": 9.108962694881484e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3019, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 66.86458587646484, + "completions/min_length": 19.75, + "epoch": 6.003971208736659, + "grad_norm": 0.7847226339114398, + "kl": 0.08758544921875, + "learning_rate": 3.510225901539998e-07, + "loss": -0.005176226608455181, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3020, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 71.91666889190674, + "completions/min_length": 26.5, + "epoch": 6.005956813104989, + "grad_norm": 0.013268198316585754, + "kl": 0.093505859375, + "learning_rate": 3.507214648354141e-07, + "loss": 9.360029071103781e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3021, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 68.18750047683716, + "completions/min_length": 24.25, + "epoch": 6.007942417473318, + "grad_norm": 0.0062959331303120945, + "kl": 0.095458984375, + "learning_rate": 3.504203989478015e-07, + "loss": 9.544835484120995e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3022, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 73.75000095367432, + "completions/min_length": 19.75, + "epoch": 6.009928021841648, + "grad_norm": 2.4589892143615204, + "kl": 0.11370849609375, + "learning_rate": 3.501193926110231e-07, + "loss": -0.0061433217488229275, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.7395833432674408, + "rewards/CineAccuracyORM/std": 0.30885962024331093, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3023, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.75, + "completions/mean_length": 77.90625143051147, + "completions/min_length": 22.75, + "epoch": 6.011913626209978, + "grad_norm": 0.0074195218885132735, + "kl": 0.08648681640625, + "learning_rate": 3.4981844594491577e-07, + "loss": 8.649392839288339e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3024, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.0, + "completions/mean_length": 72.75000333786011, + "completions/min_length": 19.875, + "epoch": 6.013899230578307, + "grad_norm": 0.008399255612897923, + "kl": 0.10888671875, + "learning_rate": 3.49517559069293e-07, + "loss": 0.00010886974632740021, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3025, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 80.30208587646484, + "completions/min_length": 28.875, + "epoch": 6.015884834946637, + "grad_norm": 0.006814297355306778, + "kl": 0.107666015625, + "learning_rate": 3.492167321039442e-07, + "loss": 0.00010780902812257409, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3026, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 66.27083539962769, + "completions/min_length": 18.625, + "epoch": 6.017870439314967, + "grad_norm": 0.010580291487204947, + "kl": 0.098388671875, + "learning_rate": 3.4891596516863505e-07, + "loss": 9.841322753345594e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3027, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 60.020836353302, + "completions/min_length": 22.375, + "epoch": 6.019856043683296, + "grad_norm": 0.006138909046486966, + "kl": 0.07861328125, + "learning_rate": 3.486152583831072e-07, + "loss": 7.861968333600089e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3028, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 59.708335876464844, + "completions/min_length": 17.125, + "epoch": 6.021841648051626, + "grad_norm": 0.0073077468221807215, + "kl": 0.104248046875, + "learning_rate": 3.4831461186707854e-07, + "loss": 0.00010423504863865674, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3029, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 70.6041693687439, + "completions/min_length": 26.375, + "epoch": 6.023827252419955, + "grad_norm": 0.0047598606273438785, + "kl": 0.0849609375, + "learning_rate": 3.4801402574024284e-07, + "loss": 8.498937677359208e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3030, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 67.70833492279053, + "completions/min_length": 20.125, + "epoch": 6.025812856788285, + "grad_norm": 0.005005653858880377, + "kl": 0.103240966796875, + "learning_rate": 3.477135001222695e-07, + "loss": 0.00010328226198907942, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3031, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.0, + "completions/mean_length": 80.33333539962769, + "completions/min_length": 26.375, + "epoch": 6.027798461156615, + "grad_norm": 0.003975941244766172, + "kl": 0.10101318359375, + "learning_rate": 3.4741303513280493e-07, + "loss": 0.00010101804218720645, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3032, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 61.270835399627686, + "completions/min_length": 21.0, + "epoch": 6.029784065524944, + "grad_norm": 0.006204868818693939, + "kl": 0.0948486328125, + "learning_rate": 3.4711263089147015e-07, + "loss": 9.481459710514173e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3033, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 66.60416984558105, + "completions/min_length": 21.5, + "epoch": 6.031769669893274, + "grad_norm": 0.006210913413401623, + "kl": 0.09979248046875, + "learning_rate": 3.4681228751786255e-07, + "loss": 9.9724973551929e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3034, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.75, + "completions/mean_length": 60.59375238418579, + "completions/min_length": 15.875, + "epoch": 6.033755274261603, + "grad_norm": 0.690022699096484, + "kl": 0.07293701171875, + "learning_rate": 3.4651200513155535e-07, + "loss": -0.005853736307471991, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3035, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 215.5, + "completions/mean_length": 76.83333396911621, + "completions/min_length": 21.875, + "epoch": 6.035740878629933, + "grad_norm": 0.009130938690803664, + "kl": 0.100341796875, + "learning_rate": 3.462117838520974e-07, + "loss": 0.00010030520934378728, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3036, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 60.73958492279053, + "completions/min_length": 20.375, + "epoch": 6.037726482998263, + "grad_norm": 0.02838405887534334, + "kl": 0.08935546875, + "learning_rate": 3.45911623799013e-07, + "loss": 8.938732207752764e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3037, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.875, + "completions/mean_length": 84.92708683013916, + "completions/min_length": 26.625, + "epoch": 6.039712087366592, + "grad_norm": 0.0037929333154969602, + "kl": 0.10345458984375, + "learning_rate": 3.4561152509180234e-07, + "loss": 0.0001035341338138096, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3038, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.75, + "completions/mean_length": 72.96875190734863, + "completions/min_length": 20.875, + "epoch": 6.041697691734922, + "grad_norm": 1.2769924363734104, + "kl": 0.097747802734375, + "learning_rate": 3.4531148784994135e-07, + "loss": 0.014284975826740265, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3039, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 66.60416841506958, + "completions/min_length": 23.25, + "epoch": 6.043683296103252, + "grad_norm": 0.00956282812315992, + "kl": 0.10498046875, + "learning_rate": 3.450115121928812e-07, + "loss": 0.00010505445970920846, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3040, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.625, + "completions/mean_length": 75.11458444595337, + "completions/min_length": 17.5, + "epoch": 6.045668900471581, + "grad_norm": 0.631342415016975, + "kl": 0.10052490234375, + "learning_rate": 3.447115982400485e-07, + "loss": -0.0039808619767427444, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3041, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.625, + "completions/mean_length": 73.12500238418579, + "completions/min_length": 19.375, + "epoch": 6.047654504839911, + "grad_norm": 0.7969507650453275, + "kl": 0.09503173828125, + "learning_rate": 3.4441174611084536e-07, + "loss": 0.0029293957632035017, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3042, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.25, + "completions/mean_length": 68.47916746139526, + "completions/min_length": 16.625, + "epoch": 6.04964010920824, + "grad_norm": 0.009191736156857703, + "kl": 0.10650634765625, + "learning_rate": 3.4411195592464936e-07, + "loss": 0.0001064623793354258, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3043, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.0, + "completions/mean_length": 61.40625047683716, + "completions/min_length": 21.625, + "epoch": 6.05162571357657, + "grad_norm": 0.775572556156759, + "kl": 0.098602294921875, + "learning_rate": 3.438122278008134e-07, + "loss": 0.0008362072403542697, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666679084301, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3044, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 68.94791793823242, + "completions/min_length": 18.25, + "epoch": 6.0536113179449, + "grad_norm": 0.004270799095794153, + "kl": 0.082427978515625, + "learning_rate": 3.435125618586656e-07, + "loss": 8.24808594188653e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3045, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 66.41666841506958, + "completions/min_length": 18.75, + "epoch": 6.055596922313229, + "grad_norm": 0.0076512697958888536, + "kl": 0.0960693359375, + "learning_rate": 3.4321295821750943e-07, + "loss": 9.598436736268923e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3046, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 52.29166841506958, + "completions/min_length": 14.5, + "epoch": 6.057582526681559, + "grad_norm": 0.00961010614499657, + "kl": 0.082244873046875, + "learning_rate": 3.4291341699662357e-07, + "loss": 8.218455332098529e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3047, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 60.395835399627686, + "completions/min_length": 21.5, + "epoch": 6.059568131049888, + "grad_norm": 0.009199954859576568, + "kl": 0.1094970703125, + "learning_rate": 3.4261393831526165e-07, + "loss": 0.00010940534411929548, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3048, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.125, + "completions/mean_length": 62.28125047683716, + "completions/min_length": 24.0, + "epoch": 6.061553735418218, + "grad_norm": 0.004586109928473245, + "kl": 0.08966064453125, + "learning_rate": 3.423145222926527e-07, + "loss": 8.965361485024914e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3049, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 66.05208444595337, + "completions/min_length": 21.375, + "epoch": 6.063539339786548, + "grad_norm": 0.007509344100748538, + "kl": 0.0994873046875, + "learning_rate": 3.4201516904800044e-07, + "loss": 9.94074362097308e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3050, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 71.677086353302, + "completions/min_length": 22.875, + "epoch": 6.065524944154877, + "grad_norm": 0.009464178818630315, + "kl": 0.09075927734375, + "learning_rate": 3.417158787004838e-07, + "loss": 9.082347969524562e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3051, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 69.39583396911621, + "completions/min_length": 25.625, + "epoch": 6.067510548523207, + "grad_norm": 2.1232579850193622, + "kl": 0.087371826171875, + "learning_rate": 3.4141665136925657e-07, + "loss": 0.0032394803129136562, + "memory(GiB)": 94.21, + "reward": 1.7916666865348816, + "reward_std": 0.08330589532852173, + "rewards/CineAccuracyORM/mean": 0.791666679084301, + "rewards/CineAccuracyORM/std": 0.27966488897800446, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3052, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 57.94791793823242, + "completions/min_length": 18.25, + "epoch": 6.069496152891537, + "grad_norm": 0.003931791342280391, + "kl": 0.075592041015625, + "learning_rate": 3.411174871734479e-07, + "loss": 7.559516234323382e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3053, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 69.96875238418579, + "completions/min_length": 23.75, + "epoch": 6.071481757259866, + "grad_norm": 0.004388787428620289, + "kl": 0.0919189453125, + "learning_rate": 3.4081838623216117e-07, + "loss": 9.189383126795292e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3054, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 61.927085399627686, + "completions/min_length": 22.5, + "epoch": 6.073467361628196, + "grad_norm": 0.005364773065299665, + "kl": 0.08087158203125, + "learning_rate": 3.4051934866447495e-07, + "loss": 8.09452249086462e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3055, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 67.520836353302, + "completions/min_length": 23.625, + "epoch": 6.075452965996525, + "grad_norm": 0.004018455230183852, + "kl": 0.12127685546875, + "learning_rate": 3.402203745894425e-07, + "loss": 0.00012133817654103041, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3056, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.375, + "completions/mean_length": 72.90625238418579, + "completions/min_length": 25.25, + "epoch": 6.077438570364855, + "grad_norm": 0.009318553599931523, + "kl": 0.11895751953125, + "learning_rate": 3.3992146412609166e-07, + "loss": 0.00011895672651007771, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3057, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 68.70833492279053, + "completions/min_length": 25.75, + "epoch": 6.079424174733185, + "grad_norm": 0.005132911519764411, + "kl": 0.08837890625, + "learning_rate": 3.396226173934253e-07, + "loss": 8.842172974254936e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3058, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.0, + "completions/mean_length": 58.114585876464844, + "completions/min_length": 24.125, + "epoch": 6.081409779101514, + "grad_norm": 0.004618095086480621, + "kl": 0.0836181640625, + "learning_rate": 3.393238345104202e-07, + "loss": 8.366788824787363e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3059, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 66.42708587646484, + "completions/min_length": 24.5, + "epoch": 6.083395383469844, + "grad_norm": 0.0051910155184405745, + "kl": 0.094024658203125, + "learning_rate": 3.3902511559602876e-07, + "loss": 9.40771060413681e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3060, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 65.84375238418579, + "completions/min_length": 29.25, + "epoch": 6.085380987838173, + "grad_norm": 1.414046222039776, + "kl": 0.1005859375, + "learning_rate": 3.387264607691772e-07, + "loss": 0.01586427353322506, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3061, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 69.10416841506958, + "completions/min_length": 25.25, + "epoch": 6.087366592206503, + "grad_norm": 2.696646432201644, + "kl": 0.109222412109375, + "learning_rate": 3.3842787014876635e-07, + "loss": -0.003164414083585143, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3062, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 64.55208444595337, + "completions/min_length": 23.75, + "epoch": 6.089352196574833, + "grad_norm": 0.003736785452553901, + "kl": 0.09490966796875, + "learning_rate": 3.3812934385367143e-07, + "loss": 9.490475349593908e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3063, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 64.17708539962769, + "completions/min_length": 16.125, + "epoch": 6.091337800943162, + "grad_norm": 0.008472093439139752, + "kl": 0.10333251953125, + "learning_rate": 3.3783088200274214e-07, + "loss": 0.00010339477739762515, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3064, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.5, + "completions/mean_length": 72.69791793823242, + "completions/min_length": 21.375, + "epoch": 6.093323405311492, + "grad_norm": 0.004180293151649204, + "kl": 0.10955810546875, + "learning_rate": 3.375324847148027e-07, + "loss": 0.00010954905883409083, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3065, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 59.34375190734863, + "completions/min_length": 25.875, + "epoch": 6.095309009679822, + "grad_norm": 0.005980884123344347, + "kl": 0.0999755859375, + "learning_rate": 3.372341521086511e-07, + "loss": 9.992992272600532e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3066, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.25, + "completions/mean_length": 73.40625286102295, + "completions/min_length": 23.375, + "epoch": 6.097294614048151, + "grad_norm": 0.0056832568749368645, + "kl": 0.11175537109375, + "learning_rate": 3.369358843030603e-07, + "loss": 0.00011180696310475469, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3067, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 66.25000143051147, + "completions/min_length": 23.875, + "epoch": 6.099280218416481, + "grad_norm": 0.0066111654280472144, + "kl": 0.084564208984375, + "learning_rate": 3.3663768141677693e-07, + "loss": 8.459092350676656e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3068, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 58.90625190734863, + "completions/min_length": 22.5, + "epoch": 6.10126582278481, + "grad_norm": 0.004695928888639055, + "kl": 0.08880615234375, + "learning_rate": 3.36339543568522e-07, + "loss": 8.8829779997468e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3069, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 70.20833444595337, + "completions/min_length": 25.875, + "epoch": 6.10325142715314, + "grad_norm": 0.0070060813514598655, + "kl": 0.0997314453125, + "learning_rate": 3.360414708769904e-07, + "loss": 9.97364113572985e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3070, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 59.44791889190674, + "completions/min_length": 21.25, + "epoch": 6.10523703152147, + "grad_norm": 0.004990590257159498, + "kl": 0.08837890625, + "learning_rate": 3.357434634608513e-07, + "loss": 8.825818076729774e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3071, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 64.71875143051147, + "completions/min_length": 29.625, + "epoch": 6.107222635889799, + "grad_norm": 0.004008167002824629, + "kl": 0.0865478515625, + "learning_rate": 3.354455214387479e-07, + "loss": 8.653854456497356e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3072, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 61.395835876464844, + "completions/min_length": 23.75, + "epoch": 6.109208240258129, + "grad_norm": 0.005212367280413786, + "kl": 0.080078125, + "learning_rate": 3.35147644929297e-07, + "loss": 7.999087392818183e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3073, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 66.22916841506958, + "completions/min_length": 25.75, + "epoch": 6.111193844626458, + "grad_norm": 0.004117133300155993, + "kl": 0.09332275390625, + "learning_rate": 3.3484983405109e-07, + "loss": 9.326735744252801e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3074, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 61.16666841506958, + "completions/min_length": 19.75, + "epoch": 6.113179448994788, + "grad_norm": 0.00493495322908631, + "kl": 0.093658447265625, + "learning_rate": 3.345520889226916e-07, + "loss": 9.373845387017354e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3075, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 72.72916889190674, + "completions/min_length": 29.625, + "epoch": 6.115165053363118, + "grad_norm": 0.00420416598356941, + "kl": 0.08868408203125, + "learning_rate": 3.3425440966264046e-07, + "loss": 8.86603957042098e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3076, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.875, + "completions/mean_length": 55.572917461395264, + "completions/min_length": 21.25, + "epoch": 6.117150657731447, + "grad_norm": 0.00451635606535441, + "kl": 0.090179443359375, + "learning_rate": 3.3395679638944905e-07, + "loss": 9.020412107929587e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3077, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 67.18750190734863, + "completions/min_length": 24.75, + "epoch": 6.119136262099777, + "grad_norm": 0.0036372871808990986, + "kl": 0.09808349609375, + "learning_rate": 3.336592492216038e-07, + "loss": 9.809157199924812e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3078, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 66.12500238418579, + "completions/min_length": 27.5, + "epoch": 6.121121866468107, + "grad_norm": 1.8198851418740167, + "kl": 0.09490966796875, + "learning_rate": 3.333617682775642e-07, + "loss": -0.0024086367338895798, + "memory(GiB)": 94.21, + "reward": 1.7812500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.7812500055879354, + "rewards/CineAccuracyORM/std": 0.1783013790845871, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3079, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.75, + "completions/mean_length": 62.29166793823242, + "completions/min_length": 19.0, + "epoch": 6.123107470836436, + "grad_norm": 0.0047328098487623635, + "kl": 0.085968017578125, + "learning_rate": 3.3306435367576374e-07, + "loss": 8.594350947532803e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3080, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 72.0416693687439, + "completions/min_length": 27.875, + "epoch": 6.125093075204766, + "grad_norm": 0.003855597711700682, + "kl": 0.0885009765625, + "learning_rate": 3.327670055346101e-07, + "loss": 8.844790136208758e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3081, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 69.87500190734863, + "completions/min_length": 26.5, + "epoch": 6.127078679573095, + "grad_norm": 0.007619012137971114, + "kl": 0.1019287109375, + "learning_rate": 3.324697239724834e-07, + "loss": 0.00010188139276579022, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3082, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.125, + "completions/mean_length": 66.06250286102295, + "completions/min_length": 21.75, + "epoch": 6.129064283941425, + "grad_norm": 0.004541770081438353, + "kl": 0.09552001953125, + "learning_rate": 3.321725091077381e-07, + "loss": 9.55801151576452e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3083, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.5, + "completions/mean_length": 55.97916889190674, + "completions/min_length": 22.875, + "epoch": 6.131049888309755, + "grad_norm": 0.006358819020512164, + "kl": 0.096405029296875, + "learning_rate": 3.318753610587015e-07, + "loss": 9.647863771533594e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3084, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.125, + "completions/mean_length": 59.87500190734863, + "completions/min_length": 29.625, + "epoch": 6.133035492678084, + "grad_norm": 0.0046408929976012115, + "kl": 0.094482421875, + "learning_rate": 3.315782799436747e-07, + "loss": 9.442068403586745e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3085, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 62.59375190734863, + "completions/min_length": 26.875, + "epoch": 6.135021097046414, + "grad_norm": 0.8790198056599116, + "kl": 0.081817626953125, + "learning_rate": 3.312812658809323e-07, + "loss": 0.008015388622879982, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3086, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 59.57291793823242, + "completions/min_length": 25.375, + "epoch": 6.137006701414743, + "grad_norm": 0.006116939070145271, + "kl": 0.071868896484375, + "learning_rate": 3.3098431898872124e-07, + "loss": 7.179949170676991e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3087, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 65.22916889190674, + "completions/min_length": 29.375, + "epoch": 6.138992305783073, + "grad_norm": 0.005717984469833963, + "kl": 0.10540771484375, + "learning_rate": 3.3068743938526323e-07, + "loss": 0.00010539943468756974, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3088, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 58.63541793823242, + "completions/min_length": 24.375, + "epoch": 6.140977910151403, + "grad_norm": 0.004018240263837148, + "kl": 0.090789794921875, + "learning_rate": 3.3039062718875206e-07, + "loss": 9.088363003684208e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3089, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 65.47916889190674, + "completions/min_length": 25.0, + "epoch": 6.142963514519732, + "grad_norm": 0.006415860515539578, + "kl": 0.0791015625, + "learning_rate": 3.3009388251735487e-07, + "loss": 7.90932826930657e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3090, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 218.5, + "completions/mean_length": 76.72916793823242, + "completions/min_length": 24.375, + "epoch": 6.144949118888062, + "grad_norm": 0.003787808274003879, + "kl": 0.08837890625, + "learning_rate": 3.297972054892122e-07, + "loss": 8.840192458592355e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3091, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 72.80208587646484, + "completions/min_length": 26.125, + "epoch": 6.146934723256392, + "grad_norm": 0.01912692166296288, + "kl": 0.10272216796875, + "learning_rate": 3.2950059622243744e-07, + "loss": 0.0001027283287839964, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3092, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 63.239585876464844, + "completions/min_length": 21.75, + "epoch": 6.148920327624721, + "grad_norm": 1.686119681058139, + "kl": 0.08721923828125, + "learning_rate": 3.29204054835117e-07, + "loss": 0.00876564346253872, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3093, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.25, + "completions/mean_length": 56.28125238418579, + "completions/min_length": 22.125, + "epoch": 6.150905931993051, + "grad_norm": 0.004917477590137736, + "kl": 0.080841064453125, + "learning_rate": 3.2890758144531054e-07, + "loss": 8.092878124443814e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3094, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 69.83333587646484, + "completions/min_length": 22.875, + "epoch": 6.15289153636138, + "grad_norm": 0.003942004129715165, + "kl": 0.0992431640625, + "learning_rate": 3.2861117617105037e-07, + "loss": 9.917528223013505e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3095, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 69.36458587646484, + "completions/min_length": 27.5, + "epoch": 6.15487714072971, + "grad_norm": 0.004561870749420421, + "kl": 0.09515380859375, + "learning_rate": 3.2831483913034173e-07, + "loss": 9.505756315775216e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3096, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 61.114585399627686, + "completions/min_length": 18.5, + "epoch": 6.1568627450980395, + "grad_norm": 1.054855954888787, + "kl": 0.073089599609375, + "learning_rate": 3.2801857044116276e-07, + "loss": -0.009976135566830635, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3097, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 61.114585399627686, + "completions/min_length": 24.625, + "epoch": 6.158848349466369, + "grad_norm": 0.0074671436207640425, + "kl": 0.1092529296875, + "learning_rate": 3.277223702214645e-07, + "loss": 0.00010932209261227399, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3098, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.75, + "completions/mean_length": 63.22916841506958, + "completions/min_length": 22.125, + "epoch": 6.160833953834699, + "grad_norm": 1.1157098221300585, + "kl": 0.10870361328125, + "learning_rate": 3.2742623858917026e-07, + "loss": -0.0015669962158426642, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3099, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 55.44791793823242, + "completions/min_length": 21.625, + "epoch": 6.162819558203028, + "grad_norm": 0.004786355941594205, + "kl": 0.082794189453125, + "learning_rate": 3.2713017566217626e-07, + "loss": 8.283949864562601e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3100, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 58.46875190734863, + "completions/min_length": 24.0, + "epoch": 6.164805162571358, + "grad_norm": 0.006038470188776243, + "kl": 0.08477783203125, + "learning_rate": 3.268341815583522e-07, + "loss": 8.474091009702533e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3101, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 218.75, + "completions/mean_length": 74.73958492279053, + "completions/min_length": 27.375, + "epoch": 6.1667907669396875, + "grad_norm": 0.006091407069440559, + "kl": 0.090240478515625, + "learning_rate": 3.26538256395539e-07, + "loss": 9.038918506121263e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3102, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 60.750001430511475, + "completions/min_length": 30.75, + "epoch": 6.168776371308017, + "grad_norm": 1.6498493795873581, + "kl": 0.101470947265625, + "learning_rate": 3.262424002915509e-07, + "loss": 0.00010155017662327737, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3103, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.25, + "completions/mean_length": 57.312501430511475, + "completions/min_length": 25.5, + "epoch": 6.1707619756763465, + "grad_norm": 0.005529079432820773, + "kl": 0.09033203125, + "learning_rate": 3.259466133641748e-07, + "loss": 9.037533891387284e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3104, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 61.27083444595337, + "completions/min_length": 19.5, + "epoch": 6.1727475800446765, + "grad_norm": 0.006956608097374181, + "kl": 0.105316162109375, + "learning_rate": 3.256508957311695e-07, + "loss": 0.00010536552872508764, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3105, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 65.64583492279053, + "completions/min_length": 26.25, + "epoch": 6.174733184413006, + "grad_norm": 0.003881511516277086, + "kl": 0.086669921875, + "learning_rate": 3.253552475102668e-07, + "loss": 8.665473433211446e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3106, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 63.58333444595337, + "completions/min_length": 21.75, + "epoch": 6.1767187887813355, + "grad_norm": 0.0042699233008304804, + "kl": 0.0902099609375, + "learning_rate": 3.2505966881917e-07, + "loss": 9.024595783557743e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3107, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 65.87500238418579, + "completions/min_length": 23.75, + "epoch": 6.178704393149665, + "grad_norm": 0.9087596886124952, + "kl": 0.09161376953125, + "learning_rate": 3.247641597755559e-07, + "loss": -0.005222773179411888, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3108, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.0, + "completions/mean_length": 55.968750953674316, + "completions/min_length": 22.875, + "epoch": 6.1806899975179945, + "grad_norm": 0.003960734513593447, + "kl": 0.0855712890625, + "learning_rate": 3.244687204970729e-07, + "loss": 8.55414109537378e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3109, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 66.14583492279053, + "completions/min_length": 19.75, + "epoch": 6.1826756018863245, + "grad_norm": 0.01626856857880739, + "kl": 0.104248046875, + "learning_rate": 3.2417335110134135e-07, + "loss": 0.00010417350858915597, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3110, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 76.01041889190674, + "completions/min_length": 27.375, + "epoch": 6.1846612062546535, + "grad_norm": 0.008821150174212686, + "kl": 0.0928955078125, + "learning_rate": 3.238780517059544e-07, + "loss": 9.291309106629342e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3111, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 66.43750333786011, + "completions/min_length": 26.125, + "epoch": 6.1866468106229835, + "grad_norm": 0.005613471856572807, + "kl": 0.101318359375, + "learning_rate": 3.235828224284769e-07, + "loss": 0.00010137448407476768, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3112, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 62.27083492279053, + "completions/min_length": 22.5, + "epoch": 6.188632414991313, + "grad_norm": 0.01502929158721786, + "kl": 0.101165771484375, + "learning_rate": 3.2328766338644594e-07, + "loss": 0.00010128612484550104, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3113, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.75, + "completions/mean_length": 66.73958587646484, + "completions/min_length": 26.0, + "epoch": 6.1906180193596425, + "grad_norm": 0.022065479931969258, + "kl": 0.0838623046875, + "learning_rate": 3.229925746973706e-07, + "loss": 8.390971197513863e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3114, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 58.16666793823242, + "completions/min_length": 23.125, + "epoch": 6.1926036237279725, + "grad_norm": 0.0049344301030036525, + "kl": 0.08892822265625, + "learning_rate": 3.2269755647873214e-07, + "loss": 8.899138629203662e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3115, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 71.38541889190674, + "completions/min_length": 19.75, + "epoch": 6.1945892280963015, + "grad_norm": 0.008573771948103145, + "kl": 0.0977783203125, + "learning_rate": 3.2240260884798354e-07, + "loss": 9.785166184883565e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3116, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 72.9791693687439, + "completions/min_length": 26.0, + "epoch": 6.1965748324646315, + "grad_norm": 0.01391361123301328, + "kl": 0.1021728515625, + "learning_rate": 3.221077319225499e-07, + "loss": 0.00010208313324255869, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3117, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 64.10416889190674, + "completions/min_length": 26.625, + "epoch": 6.198560436832961, + "grad_norm": 0.004979520402488125, + "kl": 0.108642578125, + "learning_rate": 3.2181292581982775e-07, + "loss": 0.00010855172149604186, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3118, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.875, + "completions/mean_length": 64.13541841506958, + "completions/min_length": 23.375, + "epoch": 6.2005460412012905, + "grad_norm": 0.003786666186186788, + "kl": 0.078826904296875, + "learning_rate": 3.215181906571858e-07, + "loss": 7.883564830990508e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3119, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 63.541667461395264, + "completions/min_length": 24.625, + "epoch": 6.2025316455696204, + "grad_norm": 0.008177802740061323, + "kl": 0.077545166015625, + "learning_rate": 3.2122352655196446e-07, + "loss": 7.75569787947461e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3120, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.125, + "completions/mean_length": 80.57291984558105, + "completions/min_length": 35.375, + "epoch": 6.2045172499379495, + "grad_norm": 0.00405138316536812, + "kl": 0.09844970703125, + "learning_rate": 3.2092893362147564e-07, + "loss": 9.840886195888743e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3121, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.625, + "completions/mean_length": 58.60416793823242, + "completions/min_length": 23.25, + "epoch": 6.2065028543062795, + "grad_norm": 0.0038341338556492975, + "kl": 0.099761962890625, + "learning_rate": 3.2063441198300333e-07, + "loss": 9.983066411223263e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3122, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 68.07291889190674, + "completions/min_length": 26.375, + "epoch": 6.208488458674609, + "grad_norm": 0.013338144597359672, + "kl": 0.1317138671875, + "learning_rate": 3.203399617538027e-07, + "loss": 0.00013176453649066389, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3123, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 62.72916793823242, + "completions/min_length": 23.75, + "epoch": 6.2104740630429385, + "grad_norm": 0.005667677148609116, + "kl": 0.08251953125, + "learning_rate": 3.2004558305110084e-07, + "loss": 8.24426970211789e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3124, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.125, + "completions/mean_length": 55.11458396911621, + "completions/min_length": 23.125, + "epoch": 6.212459667411268, + "grad_norm": 0.003959666188874111, + "kl": 0.08245849609375, + "learning_rate": 3.197512759920962e-07, + "loss": 8.23399459477514e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3125, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.75, + "completions/mean_length": 62.04166841506958, + "completions/min_length": 19.25, + "epoch": 6.2144452717795975, + "grad_norm": 0.003780881275547536, + "kl": 0.084503173828125, + "learning_rate": 3.194570406939585e-07, + "loss": 8.46053590066731e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3126, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 71.05208587646484, + "completions/min_length": 22.0, + "epoch": 6.216430876147927, + "grad_norm": 0.004401607326832969, + "kl": 0.098480224609375, + "learning_rate": 3.1916287727382925e-07, + "loss": 9.85489969025366e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3127, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 71.27083539962769, + "completions/min_length": 20.25, + "epoch": 6.218416480516257, + "grad_norm": 0.003568702121712919, + "kl": 0.1007080078125, + "learning_rate": 3.1886878584882086e-07, + "loss": 0.00010078072955366224, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3128, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 64.01041841506958, + "completions/min_length": 16.875, + "epoch": 6.2204020848845865, + "grad_norm": 0.0335251654545457, + "kl": 0.1275634765625, + "learning_rate": 3.1857476653601807e-07, + "loss": 0.00012765468272846192, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3129, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.125, + "completions/mean_length": 61.31250238418579, + "completions/min_length": 22.625, + "epoch": 6.222387689252916, + "grad_norm": 0.003775006586297897, + "kl": 0.081756591796875, + "learning_rate": 3.1828081945247576e-07, + "loss": 8.166694169631228e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3130, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 66.770836353302, + "completions/min_length": 26.25, + "epoch": 6.224373293621246, + "grad_norm": 1.7876240371486267, + "kl": 0.46697998046875, + "learning_rate": 3.179869447152206e-07, + "loss": -0.015796799212694168, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.30977265536785126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3131, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 63.13541841506958, + "completions/min_length": 22.125, + "epoch": 6.226358897989575, + "grad_norm": 0.007546557296293014, + "kl": 0.0853271484375, + "learning_rate": 3.176931424412505e-07, + "loss": 8.531093772035092e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3132, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 66.44791889190674, + "completions/min_length": 29.375, + "epoch": 6.228344502357905, + "grad_norm": 1.149030383377944, + "kl": 0.18267822265625, + "learning_rate": 3.173994127475344e-07, + "loss": 0.0008500342955812812, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3133, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 70.55208587646484, + "completions/min_length": 24.75, + "epoch": 6.230330106726234, + "grad_norm": 1.023121400909042, + "kl": 0.08843994140625, + "learning_rate": 3.171057557510124e-07, + "loss": -0.0018992971163243055, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3134, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 63.81250190734863, + "completions/min_length": 23.75, + "epoch": 6.232315711094564, + "grad_norm": 0.004095120055570903, + "kl": 0.092498779296875, + "learning_rate": 3.168121715685953e-07, + "loss": 9.244475950254127e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3135, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 65.36458492279053, + "completions/min_length": 25.125, + "epoch": 6.234301315462894, + "grad_norm": 0.009297164469579195, + "kl": 0.1077880859375, + "learning_rate": 3.1651866031716565e-07, + "loss": 0.00010797010327223688, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3136, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 71.65625190734863, + "completions/min_length": 31.5, + "epoch": 6.236286919831223, + "grad_norm": 0.00600505478165713, + "kl": 0.086944580078125, + "learning_rate": 3.162252221135766e-07, + "loss": 8.684572821948677e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3137, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.75, + "completions/mean_length": 64.00000143051147, + "completions/min_length": 22.5, + "epoch": 6.238272524199553, + "grad_norm": 1.5793968582535947, + "kl": 0.108154296875, + "learning_rate": 3.159318570746518e-07, + "loss": -0.011972403153777122, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3138, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.0, + "completions/mean_length": 57.156251430511475, + "completions/min_length": 23.375, + "epoch": 6.240258128567882, + "grad_norm": 0.005971067252399188, + "kl": 0.09368896484375, + "learning_rate": 3.156385653171862e-07, + "loss": 9.376247180625796e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3139, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 67.31250143051147, + "completions/min_length": 24.0, + "epoch": 6.242243732936212, + "grad_norm": 0.005082649895943942, + "kl": 0.0849609375, + "learning_rate": 3.153453469579458e-07, + "loss": 8.502871787641197e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3140, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 64.46875238418579, + "completions/min_length": 19.75, + "epoch": 6.244229337304542, + "grad_norm": 0.004809543612420979, + "kl": 0.09869384765625, + "learning_rate": 3.150522021136668e-07, + "loss": 9.877372940536588e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3141, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 65.13541793823242, + "completions/min_length": 21.75, + "epoch": 6.246214941672871, + "grad_norm": 0.005891090358056983, + "kl": 0.086822509765625, + "learning_rate": 3.1475913090105646e-07, + "loss": 8.68485658429563e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3142, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 69.25000238418579, + "completions/min_length": 23.375, + "epoch": 6.248200546041201, + "grad_norm": 0.004289105541940191, + "kl": 0.09405517578125, + "learning_rate": 3.1446613343679285e-07, + "loss": 9.39553719945252e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3143, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 65.57291793823242, + "completions/min_length": 20.875, + "epoch": 6.250186150409531, + "grad_norm": 1.1534487605935733, + "kl": 0.10833740234375, + "learning_rate": 3.141732098375245e-07, + "loss": 0.0024584531784057617, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3144, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 63.8541693687439, + "completions/min_length": 26.125, + "epoch": 6.25217175477786, + "grad_norm": 0.006726557592677674, + "kl": 0.092559814453125, + "learning_rate": 3.138803602198704e-07, + "loss": 9.255253098672256e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3145, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 73.19791889190674, + "completions/min_length": 26.125, + "epoch": 6.25415735914619, + "grad_norm": 0.0037410068523653145, + "kl": 0.093231201171875, + "learning_rate": 3.1358758470042045e-07, + "loss": 9.316120122093707e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3146, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 61.46875190734863, + "completions/min_length": 23.25, + "epoch": 6.256142963514519, + "grad_norm": 0.005160159337807292, + "kl": 0.073211669921875, + "learning_rate": 3.1329488339573464e-07, + "loss": 7.324671605601907e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3147, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 65.94791984558105, + "completions/min_length": 21.5, + "epoch": 6.258128567882849, + "grad_norm": 0.004274866184939987, + "kl": 0.0909423828125, + "learning_rate": 3.130022564223436e-07, + "loss": 9.085440979106352e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3148, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.0, + "completions/mean_length": 65.01041841506958, + "completions/min_length": 25.25, + "epoch": 6.260114172251179, + "grad_norm": 1.473045619044801, + "kl": 0.083404541015625, + "learning_rate": 3.127097038967483e-07, + "loss": 8.341297507286072e-05, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3149, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 61.53125286102295, + "completions/min_length": 28.625, + "epoch": 6.262099776619508, + "grad_norm": 0.016537414305204987, + "kl": 0.08648681640625, + "learning_rate": 3.124172259354206e-07, + "loss": 8.64926478243433e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3150, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 62.45833444595337, + "completions/min_length": 15.75, + "epoch": 6.264085380987838, + "grad_norm": 0.006032334743327232, + "kl": 0.0772705078125, + "learning_rate": 3.1212482265480177e-07, + "loss": 7.726001786068082e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3151, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.75, + "completions/mean_length": 76.48958492279053, + "completions/min_length": 33.25, + "epoch": 6.266070985356167, + "grad_norm": 0.004036827760249734, + "kl": 0.081878662109375, + "learning_rate": 3.118324941713041e-07, + "loss": 8.187924686353654e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3152, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 64.11458539962769, + "completions/min_length": 22.0, + "epoch": 6.268056589724497, + "grad_norm": 0.005858453117168549, + "kl": 0.084381103515625, + "learning_rate": 3.1154024060130956e-07, + "loss": 8.434015035163611e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3153, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.125, + "completions/mean_length": 67.52083539962769, + "completions/min_length": 26.0, + "epoch": 6.270042194092827, + "grad_norm": 0.003850758601447791, + "kl": 0.101776123046875, + "learning_rate": 3.1124806206117076e-07, + "loss": 0.00010177911462960765, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3154, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.375, + "completions/mean_length": 73.93750286102295, + "completions/min_length": 25.25, + "epoch": 6.272027798461156, + "grad_norm": 0.793783841303115, + "kl": 0.09716796875, + "learning_rate": 3.1095595866721005e-07, + "loss": -0.0011603438761085272, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3155, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 69.47916889190674, + "completions/min_length": 29.5, + "epoch": 6.274013402829486, + "grad_norm": 0.005801707410151214, + "kl": 0.08837890625, + "learning_rate": 3.106639305357198e-07, + "loss": 8.832634193822742e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3156, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 69.10416793823242, + "completions/min_length": 21.125, + "epoch": 6.275999007197816, + "grad_norm": 0.003441453882323851, + "kl": 0.075042724609375, + "learning_rate": 3.103719777829633e-07, + "loss": 7.503099914174527e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3157, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.625, + "completions/mean_length": 70.81250286102295, + "completions/min_length": 22.375, + "epoch": 6.277984611566145, + "grad_norm": 0.005261579875530533, + "kl": 0.10870361328125, + "learning_rate": 3.1008010052517263e-07, + "loss": 0.00010861671034945175, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3158, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 64.79166841506958, + "completions/min_length": 19.625, + "epoch": 6.279970215934475, + "grad_norm": 0.009419882186383302, + "kl": 0.08233642578125, + "learning_rate": 3.097882988785506e-07, + "loss": 8.234622509917244e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3159, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 66.91666984558105, + "completions/min_length": 24.25, + "epoch": 6.281955820302804, + "grad_norm": 0.004074886471241036, + "kl": 0.09979248046875, + "learning_rate": 3.094965729592697e-07, + "loss": 9.988283272832632e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3160, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 64.37500190734863, + "completions/min_length": 25.125, + "epoch": 6.283941424671134, + "grad_norm": 0.009121828539919732, + "kl": 0.0975341796875, + "learning_rate": 3.09204922883472e-07, + "loss": 9.764295828063041e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3161, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 70.07291793823242, + "completions/min_length": 29.875, + "epoch": 6.285927029039464, + "grad_norm": 0.006788249231196899, + "kl": 0.0780029296875, + "learning_rate": 3.089133487672698e-07, + "loss": 7.801067840773612e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3162, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 73.71875286102295, + "completions/min_length": 30.5, + "epoch": 6.287912633407793, + "grad_norm": 0.004614706270702021, + "kl": 0.089111328125, + "learning_rate": 3.0862185072674496e-07, + "loss": 8.905110007617623e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3163, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 69.47916889190674, + "completions/min_length": 23.875, + "epoch": 6.289898237776123, + "grad_norm": 0.005489914410934841, + "kl": 0.07635498046875, + "learning_rate": 3.0833042887794915e-07, + "loss": 7.634704525116831e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3164, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 71.43750286102295, + "completions/min_length": 17.875, + "epoch": 6.291883842144452, + "grad_norm": 0.015235608702993992, + "kl": 0.0916748046875, + "learning_rate": 3.080390833369036e-07, + "loss": 9.160333866020665e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3165, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 68.17708396911621, + "completions/min_length": 30.0, + "epoch": 6.293869446512782, + "grad_norm": 0.0037352158411413285, + "kl": 0.0762939453125, + "learning_rate": 3.0774781421959927e-07, + "loss": 7.629330502822995e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3166, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 74.645836353302, + "completions/min_length": 27.5, + "epoch": 6.295855050881112, + "grad_norm": 0.003958134861588333, + "kl": 0.07147216796875, + "learning_rate": 3.074566216419964e-07, + "loss": 7.147947326302528e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3167, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.25, + "completions/mean_length": 56.145835399627686, + "completions/min_length": 20.0, + "epoch": 6.297840655249441, + "grad_norm": 0.003947371774646639, + "kl": 0.074981689453125, + "learning_rate": 3.07165505720025e-07, + "loss": 7.496902981074527e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3168, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 72.45833444595337, + "completions/min_length": 23.5, + "epoch": 6.299826259617771, + "grad_norm": 0.003002157482170515, + "kl": 0.08294677734375, + "learning_rate": 3.068744665695846e-07, + "loss": 8.296724263345823e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3169, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.0, + "completions/mean_length": 77.73958444595337, + "completions/min_length": 23.0, + "epoch": 6.301811863986101, + "grad_norm": 0.005636080634464851, + "kl": 0.091400146484375, + "learning_rate": 3.0658350430654423e-07, + "loss": 9.134468564298004e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3170, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.25, + "completions/mean_length": 74.84375286102295, + "completions/min_length": 27.875, + "epoch": 6.30379746835443, + "grad_norm": 0.004474416035252437, + "kl": 0.08782958984375, + "learning_rate": 3.0629261904674203e-07, + "loss": 8.786410035099834e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3171, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 64.18750143051147, + "completions/min_length": 20.125, + "epoch": 6.30578307272276, + "grad_norm": 0.0038355680455892317, + "kl": 0.089324951171875, + "learning_rate": 3.060018109059857e-07, + "loss": 8.934485231293365e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3172, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.75, + "completions/mean_length": 60.322917461395264, + "completions/min_length": 23.75, + "epoch": 6.307768677091089, + "grad_norm": 0.00626091361326418, + "kl": 0.090972900390625, + "learning_rate": 3.057110800000522e-07, + "loss": 9.085195779334754e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3173, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 66.78125190734863, + "completions/min_length": 21.0, + "epoch": 6.309754281459419, + "grad_norm": 0.005614855591660481, + "kl": 0.0816650390625, + "learning_rate": 3.054204264446877e-07, + "loss": 8.169886859832332e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3174, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 67.80208587646484, + "completions/min_length": 22.625, + "epoch": 6.311739885827749, + "grad_norm": 1.3443157770217442, + "kl": 0.0947265625, + "learning_rate": 3.051298503556075e-07, + "loss": 0.012410818599164486, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.833333333954215, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3175, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 65.90625238418579, + "completions/min_length": 24.625, + "epoch": 6.313725490196078, + "grad_norm": 1.5680957958398518, + "kl": 0.10076904296875, + "learning_rate": 3.04839351848496e-07, + "loss": -0.0017739187460392714, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3176, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 63.520835876464844, + "completions/min_length": 20.5, + "epoch": 6.315711094564408, + "grad_norm": 0.003410419424258827, + "kl": 0.076080322265625, + "learning_rate": 3.0454893103900735e-07, + "loss": 7.616783841513097e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3177, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 74.62500190734863, + "completions/min_length": 29.625, + "epoch": 6.317696698932737, + "grad_norm": 0.007302252948029951, + "kl": 0.081024169921875, + "learning_rate": 3.042585880427639e-07, + "loss": 8.102280116872862e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3178, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 61.16666841506958, + "completions/min_length": 19.625, + "epoch": 6.319682303301067, + "grad_norm": 0.003403570888287035, + "kl": 0.08074951171875, + "learning_rate": 3.039683229753575e-07, + "loss": 8.084949513431638e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3179, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 65.15625047683716, + "completions/min_length": 19.75, + "epoch": 6.321667907669397, + "grad_norm": 2.1251951247506966, + "kl": 0.081329345703125, + "learning_rate": 3.0367813595234883e-07, + "loss": 0.014659504406154156, + "memory(GiB)": 94.21, + "reward": 1.7500000149011612, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.7500000074505806, + "rewards/CineAccuracyORM/std": 0.2592903971672058, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3180, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 68.03125190734863, + "completions/min_length": 24.125, + "epoch": 6.323653512037726, + "grad_norm": 0.004987353934230566, + "kl": 0.07977294921875, + "learning_rate": 3.033880270892676e-07, + "loss": 7.975176413310692e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3181, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 65.39583539962769, + "completions/min_length": 25.5, + "epoch": 6.325639116406056, + "grad_norm": 0.003433414165108702, + "kl": 0.072967529296875, + "learning_rate": 3.0309799650161227e-07, + "loss": 7.288772030733526e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3182, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.25, + "completions/mean_length": 67.39583587646484, + "completions/min_length": 18.875, + "epoch": 6.327624720774386, + "grad_norm": 0.004526580003160603, + "kl": 0.09686279296875, + "learning_rate": 3.0280804430485017e-07, + "loss": 9.676550689619035e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3183, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 65.78125190734863, + "completions/min_length": 22.75, + "epoch": 6.329610325142715, + "grad_norm": 0.0033851830381136086, + "kl": 0.06414794921875, + "learning_rate": 3.0251817061441776e-07, + "loss": 6.408228364307433e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3184, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 55.895835399627686, + "completions/min_length": 18.625, + "epoch": 6.331595929511045, + "grad_norm": 0.006131008702751606, + "kl": 0.077880859375, + "learning_rate": 3.0222837554571967e-07, + "loss": 7.786209607729688e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3185, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.625, + "completions/mean_length": 75.28125190734863, + "completions/min_length": 26.375, + "epoch": 6.333581533879374, + "grad_norm": 0.7290514364235661, + "kl": 0.1021728515625, + "learning_rate": 3.0193865921412963e-07, + "loss": 0.01342483889311552, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3186, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 64.42708444595337, + "completions/min_length": 20.75, + "epoch": 6.335567138247704, + "grad_norm": 0.00534200322926987, + "kl": 0.0909423828125, + "learning_rate": 3.0164902173498986e-07, + "loss": 9.094739652937278e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3187, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 67.45833444595337, + "completions/min_length": 27.75, + "epoch": 6.337552742616034, + "grad_norm": 0.004052765995189815, + "kl": 0.076995849609375, + "learning_rate": 3.0135946322361114e-07, + "loss": 7.689397898502648e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3188, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 105.875, + "completions/mean_length": 58.239585399627686, + "completions/min_length": 22.75, + "epoch": 6.339538346984363, + "grad_norm": 0.007492461380118997, + "kl": 0.093170166015625, + "learning_rate": 3.0106998379527296e-07, + "loss": 9.316718205809593e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3189, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 62.71875238418579, + "completions/min_length": 27.0, + "epoch": 6.341523951352693, + "grad_norm": 0.006452160772387257, + "kl": 0.086334228515625, + "learning_rate": 3.0078058356522325e-07, + "loss": 8.632532262708992e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3190, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.75, + "completions/mean_length": 65.06250238418579, + "completions/min_length": 22.125, + "epoch": 6.343509555721022, + "grad_norm": 1.2068679914908, + "kl": 0.092987060546875, + "learning_rate": 3.0049126264867846e-07, + "loss": 0.001519040553830564, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3191, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 76.177086353302, + "completions/min_length": 25.875, + "epoch": 6.345495160089352, + "grad_norm": 0.007136411488001945, + "kl": 0.09564208984375, + "learning_rate": 3.0020202116082347e-07, + "loss": 9.564954962115735e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3192, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 61.91666841506958, + "completions/min_length": 19.125, + "epoch": 6.347480764457682, + "grad_norm": 1.3304155625428375, + "kl": 0.097320556640625, + "learning_rate": 2.999128592168114e-07, + "loss": 0.006263755261898041, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3193, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 66.34375143051147, + "completions/min_length": 25.375, + "epoch": 6.349466368826011, + "grad_norm": 0.0112759389636487, + "kl": 0.096405029296875, + "learning_rate": 2.99623776931764e-07, + "loss": 9.63092315942049e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3194, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 67.67708539962769, + "completions/min_length": 23.875, + "epoch": 6.351451973194341, + "grad_norm": 0.005925655658320405, + "kl": 0.0809326171875, + "learning_rate": 2.9933477442077084e-07, + "loss": 8.095103839877993e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3195, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 65.17708444595337, + "completions/min_length": 26.25, + "epoch": 6.353437577562671, + "grad_norm": 0.004298581437508858, + "kl": 0.0694580078125, + "learning_rate": 2.990458517988901e-07, + "loss": 6.947157817194238e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3196, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 66.36458444595337, + "completions/min_length": 23.75, + "epoch": 6.355423181931, + "grad_norm": 0.0031663668112983856, + "kl": 0.07598876953125, + "learning_rate": 2.9875700918114786e-07, + "loss": 7.596309296786785e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3197, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 74.50000286102295, + "completions/min_length": 37.25, + "epoch": 6.35740878629933, + "grad_norm": 1.2698018778047133, + "kl": 0.08929443359375, + "learning_rate": 2.9846824668253887e-07, + "loss": 0.0009202770888805389, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3198, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 57.73958444595337, + "completions/min_length": 23.5, + "epoch": 6.359394390667659, + "grad_norm": 1.9044026818443813, + "kl": 0.09881591796875, + "learning_rate": 2.981795644180255e-07, + "loss": 0.007204392924904823, + "memory(GiB)": 94.21, + "reward": 1.9062500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.9062500074505806, + "rewards/CineAccuracyORM/std": 0.15001969039440155, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3199, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 66.93750238418579, + "completions/min_length": 23.375, + "epoch": 6.361379995035989, + "grad_norm": 0.0040278264469864634, + "kl": 0.089141845703125, + "learning_rate": 2.978909625025383e-07, + "loss": 8.910940960049629e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3200, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.375, + "completions/mean_length": 65.3854193687439, + "completions/min_length": 22.0, + "epoch": 6.363365599404319, + "grad_norm": 0.006368258610943689, + "kl": 0.09259033203125, + "learning_rate": 2.9760244105097585e-07, + "loss": 9.261471859645098e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3201, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 61.60416889190674, + "completions/min_length": 23.5, + "epoch": 6.365351203772648, + "grad_norm": 0.008213145947016, + "kl": 0.10150146484375, + "learning_rate": 2.9731400017820484e-07, + "loss": 0.00010158667282667011, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3202, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 64.3229193687439, + "completions/min_length": 19.875, + "epoch": 6.367336808140978, + "grad_norm": 0.005862015906315552, + "kl": 0.08489990234375, + "learning_rate": 2.970256399990596e-07, + "loss": 8.489063475281e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3203, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 66.53125238418579, + "completions/min_length": 22.75, + "epoch": 6.369322412509307, + "grad_norm": 0.00636125243233647, + "kl": 0.0858154296875, + "learning_rate": 2.9673736062834233e-07, + "loss": 8.585414616391063e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3204, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.5, + "completions/mean_length": 61.18750190734863, + "completions/min_length": 27.875, + "epoch": 6.371308016877637, + "grad_norm": 0.9202924284098827, + "kl": 0.2208251953125, + "learning_rate": 2.964491621808235e-07, + "loss": -0.009702635928988457, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3205, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 68.16666889190674, + "completions/min_length": 24.375, + "epoch": 6.373293621245967, + "grad_norm": 0.006685737713924595, + "kl": 0.11236572265625, + "learning_rate": 2.96161044771241e-07, + "loss": 0.00011227602226426825, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3206, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.375, + "completions/mean_length": 59.81250238418579, + "completions/min_length": 20.875, + "epoch": 6.375279225614296, + "grad_norm": 0.007893269074993735, + "kl": 0.09259033203125, + "learning_rate": 2.9587300851430053e-07, + "loss": 9.266478446079418e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3207, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 64.14583539962769, + "completions/min_length": 25.0, + "epoch": 6.377264829982626, + "grad_norm": 0.005524452127413556, + "kl": 0.084869384765625, + "learning_rate": 2.955850535246753e-07, + "loss": 8.495587098877877e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3208, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 66.31250190734863, + "completions/min_length": 22.25, + "epoch": 6.379250434350956, + "grad_norm": 0.005304418706715149, + "kl": 0.09014892578125, + "learning_rate": 2.9529717991700654e-07, + "loss": 9.00080194696784e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3209, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.5, + "completions/mean_length": 54.312500953674316, + "completions/min_length": 25.0, + "epoch": 6.381236038719285, + "grad_norm": 1.6934375785685654, + "kl": 0.083740234375, + "learning_rate": 2.950093878059027e-07, + "loss": 0.001064905314706266, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3210, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 65.84375143051147, + "completions/min_length": 28.125, + "epoch": 6.383221643087615, + "grad_norm": 0.00813484045008924, + "kl": 0.09820556640625, + "learning_rate": 2.947216773059401e-07, + "loss": 9.817527461564168e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3211, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 61.98958444595337, + "completions/min_length": 25.5, + "epoch": 6.385207247455944, + "grad_norm": 0.006731930220970469, + "kl": 0.108154296875, + "learning_rate": 2.944340485316624e-07, + "loss": 0.000108105901745148, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3212, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 61.916666984558105, + "completions/min_length": 24.375, + "epoch": 6.387192851824274, + "grad_norm": 0.00810795712192299, + "kl": 0.083831787109375, + "learning_rate": 2.9414650159758083e-07, + "loss": 8.381686348002404e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3213, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 60.427085399627686, + "completions/min_length": 20.125, + "epoch": 6.389178456192604, + "grad_norm": 0.006882810262558977, + "kl": 0.1248779296875, + "learning_rate": 2.93859036618174e-07, + "loss": 0.00012484054605010897, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3214, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 62.645835399627686, + "completions/min_length": 24.125, + "epoch": 6.391164060560933, + "grad_norm": 0.0048933111916164085, + "kl": 0.089141845703125, + "learning_rate": 2.9357165370788763e-07, + "loss": 8.914059435483068e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3215, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.5, + "completions/mean_length": 63.218750953674316, + "completions/min_length": 27.375, + "epoch": 6.393149664929263, + "grad_norm": 0.007082944969145969, + "kl": 0.11102294921875, + "learning_rate": 2.932843529811352e-07, + "loss": 0.00011099546100012958, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3216, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 67.46875286102295, + "completions/min_length": 27.125, + "epoch": 6.395135269297592, + "grad_norm": 0.006149838922129725, + "kl": 0.0863037109375, + "learning_rate": 2.9299713455229706e-07, + "loss": 8.632345998194069e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3217, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 68.36458539962769, + "completions/min_length": 21.75, + "epoch": 6.397120873665922, + "grad_norm": 0.007126567103740635, + "kl": 0.0880126953125, + "learning_rate": 2.9270999853572115e-07, + "loss": 8.799279748927802e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3218, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 64.82291746139526, + "completions/min_length": 23.0, + "epoch": 6.399106478034252, + "grad_norm": 0.0063088092896884706, + "kl": 0.1097412109375, + "learning_rate": 2.9242294504572253e-07, + "loss": 0.0001097028361982666, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3219, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 59.14583492279053, + "completions/min_length": 25.5, + "epoch": 6.401092082402581, + "grad_norm": 0.007119185629899003, + "kl": 0.07855224609375, + "learning_rate": 2.9213597419658314e-07, + "loss": 7.84529693191871e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3220, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 71.6666693687439, + "completions/min_length": 26.25, + "epoch": 6.403077686770911, + "grad_norm": 0.005209946067006763, + "kl": 0.095458984375, + "learning_rate": 2.918490861025523e-07, + "loss": 9.556749137118459e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3221, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.375, + "completions/mean_length": 67.04166889190674, + "completions/min_length": 23.875, + "epoch": 6.405063291139241, + "grad_norm": 0.007251241966328845, + "kl": 0.08367919921875, + "learning_rate": 2.9156228087784653e-07, + "loss": 8.368380804313347e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3222, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 54.20833492279053, + "completions/min_length": 16.625, + "epoch": 6.40704889550757, + "grad_norm": 0.00458563815799741, + "kl": 0.09002685546875, + "learning_rate": 2.912755586366485e-07, + "loss": 9.006025356939062e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3223, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 68.020836353302, + "completions/min_length": 24.125, + "epoch": 6.4090344998759, + "grad_norm": 0.0045176839309874805, + "kl": 0.084991455078125, + "learning_rate": 2.909889194931091e-07, + "loss": 8.502715354552492e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3224, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.625, + "completions/mean_length": 58.33333444595337, + "completions/min_length": 28.0, + "epoch": 6.411020104244229, + "grad_norm": 0.9724183104346384, + "kl": 0.089080810546875, + "learning_rate": 2.90702363561345e-07, + "loss": 0.006806063931435347, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3225, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 60.083335876464844, + "completions/min_length": 24.5, + "epoch": 6.413005708612559, + "grad_norm": 0.006325012243000106, + "kl": 0.08892822265625, + "learning_rate": 2.904158909554405e-07, + "loss": 8.890218305168673e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3226, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.0, + "completions/mean_length": 69.28125238418579, + "completions/min_length": 24.375, + "epoch": 6.414991312980889, + "grad_norm": 0.003616058769709026, + "kl": 0.106201171875, + "learning_rate": 2.901295017894466e-07, + "loss": 0.000106100516859442, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3227, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 65.02083539962769, + "completions/min_length": 22.375, + "epoch": 6.416976917349218, + "grad_norm": 0.007624530511207928, + "kl": 0.09033203125, + "learning_rate": 2.8984319617738083e-07, + "loss": 9.03638283489272e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3228, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 70.2291693687439, + "completions/min_length": 25.25, + "epoch": 6.418962521717548, + "grad_norm": 0.005822088922609145, + "kl": 0.108123779296875, + "learning_rate": 2.8955697423322723e-07, + "loss": 0.00010817102156579494, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3229, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 68.06250190734863, + "completions/min_length": 26.0, + "epoch": 6.420948126085877, + "grad_norm": 0.003536410411496393, + "kl": 0.095703125, + "learning_rate": 2.892708360709374e-07, + "loss": 9.572417184244841e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3230, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 65.38541984558105, + "completions/min_length": 26.625, + "epoch": 6.422933730454207, + "grad_norm": 0.005904712799171937, + "kl": 0.103790283203125, + "learning_rate": 2.8898478180442853e-07, + "loss": 0.00010377258149674162, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3231, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 67.53125238418579, + "completions/min_length": 23.75, + "epoch": 6.424919334822537, + "grad_norm": 0.004280373173076877, + "kl": 0.0972900390625, + "learning_rate": 2.8869881154758527e-07, + "loss": 9.732063335832208e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3232, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 62.18750238418579, + "completions/min_length": 22.75, + "epoch": 6.426904939190866, + "grad_norm": 0.00457266950693342, + "kl": 0.08758544921875, + "learning_rate": 2.8841292541425856e-07, + "loss": 8.750433335080743e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3233, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 68.23958587646484, + "completions/min_length": 23.125, + "epoch": 6.428890543559196, + "grad_norm": 0.004023876628912124, + "kl": 0.085906982421875, + "learning_rate": 2.881271235182655e-07, + "loss": 8.594644896220416e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3234, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.625, + "completions/mean_length": 59.60416889190674, + "completions/min_length": 27.125, + "epoch": 6.430876147927526, + "grad_norm": 0.003938395012429837, + "kl": 0.08551025390625, + "learning_rate": 2.8784140597339023e-07, + "loss": 8.552710642106831e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3235, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.5, + "completions/mean_length": 54.687501430511475, + "completions/min_length": 19.0, + "epoch": 6.432861752295855, + "grad_norm": 0.006045316535094096, + "kl": 0.07733154296875, + "learning_rate": 2.875557728933826e-07, + "loss": 7.739596185274422e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3236, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.5, + "completions/mean_length": 63.375001430511475, + "completions/min_length": 21.25, + "epoch": 6.434847356664185, + "grad_norm": 0.007648025007786075, + "kl": 0.0816650390625, + "learning_rate": 2.872702243919598e-07, + "loss": 8.164472819771618e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3237, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 61.427085876464844, + "completions/min_length": 26.375, + "epoch": 6.436832961032514, + "grad_norm": 0.014048365116325687, + "kl": 0.08197021484375, + "learning_rate": 2.869847605828042e-07, + "loss": 8.192491804948077e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3238, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 59.770835399627686, + "completions/min_length": 20.625, + "epoch": 6.438818565400844, + "grad_norm": 0.007596259507577925, + "kl": 0.10968017578125, + "learning_rate": 2.866993815795653e-07, + "loss": 0.00010956473124679178, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3239, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 66.35416889190674, + "completions/min_length": 26.5, + "epoch": 6.440804169769174, + "grad_norm": 0.004828051474948282, + "kl": 0.0782470703125, + "learning_rate": 2.8641408749585884e-07, + "loss": 7.840296166250482e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3240, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 60.52083492279053, + "completions/min_length": 27.625, + "epoch": 6.442789774137503, + "grad_norm": 0.005629559002850129, + "kl": 0.07391357421875, + "learning_rate": 2.8612887844526615e-07, + "loss": 7.402936171274632e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3241, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 67.14583492279053, + "completions/min_length": 24.125, + "epoch": 6.444775378505833, + "grad_norm": 0.0039585856233579336, + "kl": 0.09979248046875, + "learning_rate": 2.8584375454133503e-07, + "loss": 9.972216503228992e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3242, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 68.18750143051147, + "completions/min_length": 22.625, + "epoch": 6.446760982874162, + "grad_norm": 0.005487321365180753, + "kl": 0.09295654296875, + "learning_rate": 2.855587158975796e-07, + "loss": 9.301453246735036e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3243, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 57.97916841506958, + "completions/min_length": 19.875, + "epoch": 6.448746587242492, + "grad_norm": 0.005562677396413367, + "kl": 0.071624755859375, + "learning_rate": 2.852737626274797e-07, + "loss": 7.15503265382722e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3244, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 64.71875095367432, + "completions/min_length": 19.5, + "epoch": 6.450732191610822, + "grad_norm": 0.004315901498022904, + "kl": 0.08074951171875, + "learning_rate": 2.849888948444812e-07, + "loss": 8.06492316769436e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3245, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 67.78125190734863, + "completions/min_length": 24.625, + "epoch": 6.452717795979151, + "grad_norm": 0.004295211493529179, + "kl": 0.0836181640625, + "learning_rate": 2.847041126619964e-07, + "loss": 8.36151884868741e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3246, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.625, + "completions/mean_length": 62.770835399627686, + "completions/min_length": 26.125, + "epoch": 6.454703400347481, + "grad_norm": 0.003492843423629508, + "kl": 0.087158203125, + "learning_rate": 2.844194161934028e-07, + "loss": 8.721269841771573e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3247, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 68.71875333786011, + "completions/min_length": 31.5, + "epoch": 6.456689004715811, + "grad_norm": 0.0046016102960333245, + "kl": 0.1064453125, + "learning_rate": 2.841348055520445e-07, + "loss": 0.00010640530672390014, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3248, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 56.51041841506958, + "completions/min_length": 15.0, + "epoch": 6.45867460908414, + "grad_norm": 0.0033017773213425985, + "kl": 0.07391357421875, + "learning_rate": 2.8385028085123084e-07, + "loss": 7.382655167020857e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3249, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 66.73958587646484, + "completions/min_length": 25.25, + "epoch": 6.46066021345247, + "grad_norm": 0.007200043131407117, + "kl": 0.087799072265625, + "learning_rate": 2.8356584220423706e-07, + "loss": 8.77869242685847e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3250, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.0, + "completions/mean_length": 66.81250238418579, + "completions/min_length": 25.125, + "epoch": 6.462645817820799, + "grad_norm": 0.003764279457579972, + "kl": 0.0916748046875, + "learning_rate": 2.8328148972430463e-07, + "loss": 9.156690066447482e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3251, + "train_speed(iter/s)": 0.022647 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 64.89583444595337, + "completions/min_length": 24.125, + "epoch": 6.464631422189129, + "grad_norm": 0.011476157212154402, + "kl": 0.10333251953125, + "learning_rate": 2.8299722352463994e-07, + "loss": 0.00010348884097766131, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3252, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.625, + "completions/mean_length": 66.72916793823242, + "completions/min_length": 21.25, + "epoch": 6.466617026557459, + "grad_norm": 0.003595420393572264, + "kl": 0.100616455078125, + "learning_rate": 2.8271304371841565e-07, + "loss": 0.0001006111124297604, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3253, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.0, + "completions/mean_length": 53.354166984558105, + "completions/min_length": 16.375, + "epoch": 6.468602630925788, + "grad_norm": 0.004320378633937529, + "kl": 0.10955810546875, + "learning_rate": 2.8242895041877004e-07, + "loss": 0.00010947669215966016, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3254, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 62.46875190734863, + "completions/min_length": 21.0, + "epoch": 6.470588235294118, + "grad_norm": 0.004914148393736551, + "kl": 0.09356689453125, + "learning_rate": 2.821449437388062e-07, + "loss": 9.368818427901715e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3255, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.875, + "completions/mean_length": 64.03125143051147, + "completions/min_length": 26.875, + "epoch": 6.472573839662447, + "grad_norm": 0.005483349875600164, + "kl": 0.0869140625, + "learning_rate": 2.8186102379159376e-07, + "loss": 8.685316424816847e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3256, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.5, + "completions/mean_length": 60.83333444595337, + "completions/min_length": 25.875, + "epoch": 6.474559444030777, + "grad_norm": 0.003428552988963893, + "kl": 0.07391357421875, + "learning_rate": 2.8157719069016703e-07, + "loss": 7.392094994429499e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3257, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 63.562501430511475, + "completions/min_length": 21.0, + "epoch": 6.476545048399107, + "grad_norm": 0.0043699131111029485, + "kl": 0.081634521484375, + "learning_rate": 2.81293444547526e-07, + "loss": 8.163400343619287e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3258, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 58.54166793823242, + "completions/min_length": 19.25, + "epoch": 6.478530652767436, + "grad_norm": 0.0088400725748611, + "kl": 0.0804443359375, + "learning_rate": 2.810097854766361e-07, + "loss": 8.042525587370619e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3259, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 61.00000238418579, + "completions/min_length": 25.125, + "epoch": 6.480516257135766, + "grad_norm": 0.003855738016349987, + "kl": 0.0684814453125, + "learning_rate": 2.8072621359042837e-07, + "loss": 6.852075603092089e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3260, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 70.75000143051147, + "completions/min_length": 26.25, + "epoch": 6.482501861504096, + "grad_norm": 1.1249144863329308, + "kl": 0.08453369140625, + "learning_rate": 2.8044272900179835e-07, + "loss": 0.012785809114575386, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3261, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.25, + "completions/mean_length": 56.91666841506958, + "completions/min_length": 25.5, + "epoch": 6.484487465872425, + "grad_norm": 1.853775272476962, + "kl": 0.103271484375, + "learning_rate": 2.801593318236077e-07, + "loss": 0.0030850651673972607, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3262, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 63.07291841506958, + "completions/min_length": 14.75, + "epoch": 6.486473070240755, + "grad_norm": 0.016466420466611575, + "kl": 0.0888671875, + "learning_rate": 2.7987602216868255e-07, + "loss": 8.875381899997592e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3263, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.125, + "completions/mean_length": 68.770836353302, + "completions/min_length": 22.25, + "epoch": 6.488458674609084, + "grad_norm": 0.005128073103485684, + "kl": 0.1048583984375, + "learning_rate": 2.79592800149815e-07, + "loss": 0.00010473666770849377, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3264, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.5, + "completions/mean_length": 59.25000238418579, + "completions/min_length": 22.375, + "epoch": 6.490444278977414, + "grad_norm": 0.08252447953572653, + "kl": 0.21771240234375, + "learning_rate": 2.7930966587976143e-07, + "loss": 0.00021706405095756054, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3265, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 67.020836353302, + "completions/min_length": 25.25, + "epoch": 6.492429883345744, + "grad_norm": 0.0030416034477067193, + "kl": 0.0877685546875, + "learning_rate": 2.7902661947124333e-07, + "loss": 8.780603820923716e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3266, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.875, + "completions/mean_length": 65.71875238418579, + "completions/min_length": 26.125, + "epoch": 6.494415487714073, + "grad_norm": 0.0034813193820282407, + "kl": 0.07928466796875, + "learning_rate": 2.787436610369483e-07, + "loss": 7.926626858534291e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3267, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 63.06250190734863, + "completions/min_length": 24.875, + "epoch": 6.496401092082403, + "grad_norm": 0.010940672892437874, + "kl": 0.084136962890625, + "learning_rate": 2.784607906895278e-07, + "loss": 8.40155262267217e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3268, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 59.79166793823242, + "completions/min_length": 19.375, + "epoch": 6.498386696450732, + "grad_norm": 0.0033375829341551135, + "kl": 0.083587646484375, + "learning_rate": 2.7817800854159815e-07, + "loss": 8.355158934136853e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3269, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 58.10416793823242, + "completions/min_length": 23.5, + "epoch": 6.500372300819062, + "grad_norm": 2.0072254655143227, + "kl": 0.069976806640625, + "learning_rate": 2.778953147057416e-07, + "loss": 7.003545761108398e-05, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393530294299126, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3270, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.25, + "completions/mean_length": 59.91666841506958, + "completions/min_length": 23.5, + "epoch": 6.502357905187392, + "grad_norm": 0.004254714512047353, + "kl": 0.074066162109375, + "learning_rate": 2.776127092945041e-07, + "loss": 7.399563764920458e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3271, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.375, + "completions/mean_length": 58.60416889190674, + "completions/min_length": 24.25, + "epoch": 6.504343509555721, + "grad_norm": 2.4980405785880406, + "kl": 0.0784912109375, + "learning_rate": 2.7733019242039735e-07, + "loss": 7.837265729904175e-05, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921442165970802, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3272, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 70.5729193687439, + "completions/min_length": 25.25, + "epoch": 6.506329113924051, + "grad_norm": 0.004838239962596815, + "kl": 0.0928955078125, + "learning_rate": 2.770477641958968e-07, + "loss": 9.30547685129568e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3273, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 60.302085399627686, + "completions/min_length": 15.0, + "epoch": 6.508314718292381, + "grad_norm": 0.004417981794306718, + "kl": 0.0765380859375, + "learning_rate": 2.767654247334436e-07, + "loss": 7.65023214626126e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3274, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.5, + "completions/mean_length": 68.36458492279053, + "completions/min_length": 21.625, + "epoch": 6.51030032266071, + "grad_norm": 0.9052182285975585, + "kl": 0.093994140625, + "learning_rate": 2.7648317414544315e-07, + "loss": 0.0064375679939985275, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3275, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 64.27083444595337, + "completions/min_length": 26.125, + "epoch": 6.51228592702904, + "grad_norm": 0.004279239580923891, + "kl": 0.078399658203125, + "learning_rate": 2.762010125442651e-07, + "loss": 7.841864135116339e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3276, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.25, + "completions/mean_length": 55.447916984558105, + "completions/min_length": 20.0, + "epoch": 6.514271531397369, + "grad_norm": 0.008940178048317862, + "kl": 0.105072021484375, + "learning_rate": 2.7591894004224436e-07, + "loss": 0.0001050223654601723, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3277, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 59.67708492279053, + "completions/min_length": 20.125, + "epoch": 6.516257135765699, + "grad_norm": 0.008766169059350301, + "kl": 0.0819091796875, + "learning_rate": 2.756369567516799e-07, + "loss": 8.19417618913576e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3278, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 59.54166841506958, + "completions/min_length": 22.375, + "epoch": 6.518242740134029, + "grad_norm": 0.003470903820932684, + "kl": 0.080810546875, + "learning_rate": 2.753550627848349e-07, + "loss": 8.078533574007452e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3279, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 65.11458492279053, + "completions/min_length": 21.375, + "epoch": 6.520228344502358, + "grad_norm": 0.006296316365484303, + "kl": 0.093658447265625, + "learning_rate": 2.7507325825393783e-07, + "loss": 9.377703827340156e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3280, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.0, + "completions/mean_length": 51.83333396911621, + "completions/min_length": 19.0, + "epoch": 6.522213948870688, + "grad_norm": 0.008882777556686077, + "kl": 0.07305908203125, + "learning_rate": 2.7479154327118117e-07, + "loss": 7.30404062778689e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3281, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 63.56250238418579, + "completions/min_length": 26.0, + "epoch": 6.524199553239017, + "grad_norm": 0.0064920878592217976, + "kl": 0.094879150390625, + "learning_rate": 2.7450991794872127e-07, + "loss": 9.492039680480957e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3282, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.625, + "completions/mean_length": 67.36458539962769, + "completions/min_length": 22.25, + "epoch": 6.526185157607347, + "grad_norm": 0.00742882390242665, + "kl": 0.10125732421875, + "learning_rate": 2.742283823986795e-07, + "loss": 0.00010122793901246041, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3283, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.875, + "completions/mean_length": 70.04166984558105, + "completions/min_length": 23.25, + "epoch": 6.528170761975677, + "grad_norm": 0.0037649788742524547, + "kl": 0.082366943359375, + "learning_rate": 2.7394693673314094e-07, + "loss": 8.222565520554781e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3284, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 60.46875047683716, + "completions/min_length": 22.75, + "epoch": 6.530156366344006, + "grad_norm": 0.003939096813068536, + "kl": 0.100494384765625, + "learning_rate": 2.736655810641555e-07, + "loss": 0.0001004367513814941, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3285, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 66.92708492279053, + "completions/min_length": 22.0, + "epoch": 6.532141970712336, + "grad_norm": 1.5327424197118356, + "kl": 0.27783203125, + "learning_rate": 2.733843155037366e-07, + "loss": -0.003415369428694248, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3286, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 61.8854193687439, + "completions/min_length": 14.625, + "epoch": 6.5341275750806656, + "grad_norm": 0.00946429346113309, + "kl": 0.10809326171875, + "learning_rate": 2.7310314016386163e-07, + "loss": 0.00010801161988638341, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3287, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 67.29166746139526, + "completions/min_length": 22.0, + "epoch": 6.536113179448995, + "grad_norm": 0.8902188756007005, + "kl": 0.091064453125, + "learning_rate": 2.728220551564734e-07, + "loss": -0.015449434518814087, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3288, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 63.031251430511475, + "completions/min_length": 17.875, + "epoch": 6.538098783817325, + "grad_norm": 0.007049569568722197, + "kl": 0.0780029296875, + "learning_rate": 2.7254106059347746e-07, + "loss": 7.805964560247958e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3289, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 61.10416841506958, + "completions/min_length": 20.0, + "epoch": 6.540084388185654, + "grad_norm": 1.1789705562562038, + "kl": 0.0927734375, + "learning_rate": 2.722601565867435e-07, + "loss": 0.0015071779489517212, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3290, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 69.81250238418579, + "completions/min_length": 23.625, + "epoch": 6.542069992553984, + "grad_norm": 2.562191279643471, + "kl": 0.10760498046875, + "learning_rate": 2.719793432481058e-07, + "loss": -0.00011565785098355263, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.057790376245975494, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3291, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 59.697917461395264, + "completions/min_length": 22.875, + "epoch": 6.5440555969223135, + "grad_norm": 1.4417215365683687, + "kl": 0.094573974609375, + "learning_rate": 2.716986206893618e-07, + "loss": 0.00494935130700469, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3292, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.5, + "completions/mean_length": 52.02083396911621, + "completions/min_length": 17.75, + "epoch": 6.546041201290643, + "grad_norm": 0.007345456597503264, + "kl": 0.10198974609375, + "learning_rate": 2.714179890222734e-07, + "loss": 0.00010200271208304912, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3293, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 69.58333492279053, + "completions/min_length": 20.5, + "epoch": 6.5480268056589725, + "grad_norm": 1.4443127585743225, + "kl": 0.12353515625, + "learning_rate": 2.711374483585658e-07, + "loss": -0.00837962981313467, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3294, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 70.57291793823242, + "completions/min_length": 24.875, + "epoch": 6.550012410027302, + "grad_norm": 1.4237256159871454, + "kl": 0.081573486328125, + "learning_rate": 2.7085699880992845e-07, + "loss": 0.007895395159721375, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3295, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 57.56250238418579, + "completions/min_length": 19.75, + "epoch": 6.551998014395632, + "grad_norm": 0.006446528284835301, + "kl": 0.087432861328125, + "learning_rate": 2.7057664048801445e-07, + "loss": 8.73694516485557e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3296, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 60.94791889190674, + "completions/min_length": 20.875, + "epoch": 6.5539836187639615, + "grad_norm": 0.004534005863694354, + "kl": 0.07562255859375, + "learning_rate": 2.7029637350444026e-07, + "loss": 7.55086075514555e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3297, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.0, + "completions/mean_length": 56.93750190734863, + "completions/min_length": 19.5, + "epoch": 6.555969223132291, + "grad_norm": 0.006758925072616362, + "kl": 0.073638916015625, + "learning_rate": 2.700161979707859e-07, + "loss": 7.370335515588522e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3298, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 59.80208444595337, + "completions/min_length": 23.125, + "epoch": 6.5579548275006205, + "grad_norm": 0.005869538046115024, + "kl": 0.112152099609375, + "learning_rate": 2.6973611399859564e-07, + "loss": 0.00011218262807233259, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3299, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 57.75000286102295, + "completions/min_length": 18.625, + "epoch": 6.5599404318689505, + "grad_norm": 0.00731370251179893, + "kl": 0.08160400390625, + "learning_rate": 2.694561216993766e-07, + "loss": 8.1530241004657e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3300, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 53.28125190734863, + "completions/min_length": 20.5, + "epoch": 6.5619260362372795, + "grad_norm": 1.879212668413089, + "kl": 0.0811767578125, + "learning_rate": 2.691762211845997e-07, + "loss": -0.0052892486564815044, + "memory(GiB)": 94.21, + "reward": 1.9479166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9479166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3301, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 68.54166793823242, + "completions/min_length": 23.125, + "epoch": 6.5639116406056095, + "grad_norm": 0.006342730881970545, + "kl": 0.093017578125, + "learning_rate": 2.6889641256569973e-07, + "loss": 9.30671812966466e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3302, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 57.97916841506958, + "completions/min_length": 18.875, + "epoch": 6.565897244973939, + "grad_norm": 0.005479099147503255, + "kl": 0.09716796875, + "learning_rate": 2.686166959540739e-07, + "loss": 9.717326611280441e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3303, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 69.11458444595337, + "completions/min_length": 24.75, + "epoch": 6.5678828493422685, + "grad_norm": 0.004939207796209544, + "kl": 0.075286865234375, + "learning_rate": 2.6833707146108386e-07, + "loss": 7.521603401983157e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3304, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.0, + "completions/mean_length": 67.20833539962769, + "completions/min_length": 23.375, + "epoch": 6.5698684537105985, + "grad_norm": 0.2797970189304334, + "kl": 0.197479248046875, + "learning_rate": 2.68057539198054e-07, + "loss": 0.00019732053624466062, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3305, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.875, + "completions/mean_length": 68.28125238418579, + "completions/min_length": 23.25, + "epoch": 6.5718540580789275, + "grad_norm": 0.00428974557960698, + "kl": 0.081756591796875, + "learning_rate": 2.677780992762716e-07, + "loss": 8.18474218249321e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3306, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 69.63541889190674, + "completions/min_length": 25.875, + "epoch": 6.5738396624472575, + "grad_norm": 0.00429191783697208, + "kl": 0.07598876953125, + "learning_rate": 2.674987518069883e-07, + "loss": 7.596724026370794e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3307, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 60.42708492279053, + "completions/min_length": 23.25, + "epoch": 6.5758252668155865, + "grad_norm": 0.003882740737015487, + "kl": 0.10540771484375, + "learning_rate": 2.6721949690141776e-07, + "loss": 0.00010550117440288886, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3308, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 65.63541889190674, + "completions/min_length": 27.125, + "epoch": 6.5778108711839165, + "grad_norm": 0.003647339858864909, + "kl": 0.080902099609375, + "learning_rate": 2.669403346707376e-07, + "loss": 8.098404214251786e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3309, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 61.28125190734863, + "completions/min_length": 24.625, + "epoch": 6.5797964755522464, + "grad_norm": 0.005204546559226765, + "kl": 0.088958740234375, + "learning_rate": 2.666612652260885e-07, + "loss": 8.892134064808488e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3310, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 59.02083396911621, + "completions/min_length": 24.125, + "epoch": 6.5817820799205755, + "grad_norm": 1.922672390197199, + "kl": 0.0869140625, + "learning_rate": 2.6638228867857347e-07, + "loss": 0.00024019306874834, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3311, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 60.656251430511475, + "completions/min_length": 19.5, + "epoch": 6.5837676842889055, + "grad_norm": 0.0032595686262476106, + "kl": 0.0914306640625, + "learning_rate": 2.661034051392595e-07, + "loss": 9.139993926510215e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3312, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 61.520835399627686, + "completions/min_length": 15.125, + "epoch": 6.585753288657235, + "grad_norm": 0.006045836019464943, + "kl": 0.082916259765625, + "learning_rate": 2.658246147191756e-07, + "loss": 8.286805677926168e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3313, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 65.15625238418579, + "completions/min_length": 18.625, + "epoch": 6.5877388930255645, + "grad_norm": 0.0038154608461361413, + "kl": 0.11724853515625, + "learning_rate": 2.6554591752931455e-07, + "loss": 0.00011719104077201337, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3314, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 58.11458492279053, + "completions/min_length": 20.75, + "epoch": 6.589724497393894, + "grad_norm": 0.005446723226356524, + "kl": 0.078704833984375, + "learning_rate": 2.652673136806317e-07, + "loss": 7.86786840762943e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3315, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 59.54166793823242, + "completions/min_length": 19.375, + "epoch": 6.5917101017622235, + "grad_norm": 0.0034012921118688104, + "kl": 0.087188720703125, + "learning_rate": 2.649888032840448e-07, + "loss": 8.71704105520621e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3316, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.875, + "completions/mean_length": 75.50000286102295, + "completions/min_length": 22.375, + "epoch": 6.5936957061305534, + "grad_norm": 0.0036895044989605275, + "kl": 0.0858154296875, + "learning_rate": 2.647103864504353e-07, + "loss": 8.579499990446493e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3317, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.625, + "completions/mean_length": 56.3229193687439, + "completions/min_length": 19.125, + "epoch": 6.595681310498883, + "grad_norm": 0.004282274991072171, + "kl": 0.079315185546875, + "learning_rate": 2.644320632906466e-07, + "loss": 7.92255304986611e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3318, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.25, + "completions/mean_length": 66.55208539962769, + "completions/min_length": 19.0, + "epoch": 6.5976669148672125, + "grad_norm": 0.0031926142548439115, + "kl": 0.079437255859375, + "learning_rate": 2.6415383391548494e-07, + "loss": 7.944124809000641e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3319, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.25, + "completions/mean_length": 56.48958444595337, + "completions/min_length": 20.625, + "epoch": 6.599652519235542, + "grad_norm": 0.003399086594998678, + "kl": 0.092681884765625, + "learning_rate": 2.638756984357198e-07, + "loss": 9.269505244446918e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3320, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 64.33333539962769, + "completions/min_length": 22.875, + "epoch": 6.6016381236038715, + "grad_norm": 0.0038492704191697984, + "kl": 0.08599853515625, + "learning_rate": 2.635976569620823e-07, + "loss": 8.592945232521743e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3321, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 62.04166793823242, + "completions/min_length": 21.5, + "epoch": 6.603623727972201, + "grad_norm": 0.004115888856919297, + "kl": 0.072998046875, + "learning_rate": 2.6331970960526704e-07, + "loss": 7.300818833755329e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3322, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 62.57291841506958, + "completions/min_length": 19.0, + "epoch": 6.605609332340531, + "grad_norm": 0.0071639383081339245, + "kl": 0.090667724609375, + "learning_rate": 2.6304185647593105e-07, + "loss": 9.062886238098145e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3323, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 71.81250238418579, + "completions/min_length": 18.5, + "epoch": 6.6075949367088604, + "grad_norm": 0.0039402091299118825, + "kl": 0.091644287109375, + "learning_rate": 2.62764097684693e-07, + "loss": 9.170065459329635e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3324, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 77.41666889190674, + "completions/min_length": 25.875, + "epoch": 6.60958054107719, + "grad_norm": 0.008417197344632511, + "kl": 0.1055908203125, + "learning_rate": 2.6248643334213513e-07, + "loss": 0.00010556657798588276, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3325, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.375, + "completions/mean_length": 62.44791841506958, + "completions/min_length": 29.875, + "epoch": 6.61156614544552, + "grad_norm": 0.0036738559349019147, + "kl": 0.075714111328125, + "learning_rate": 2.6220886355880126e-07, + "loss": 7.577511132694781e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3326, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.375, + "completions/mean_length": 63.07291841506958, + "completions/min_length": 21.375, + "epoch": 6.613551749813849, + "grad_norm": 0.004403329476001133, + "kl": 0.086395263671875, + "learning_rate": 2.619313884451978e-07, + "loss": 8.63686072989367e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3327, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.25, + "completions/mean_length": 62.645835399627686, + "completions/min_length": 19.75, + "epoch": 6.615537354182179, + "grad_norm": 0.003608955352380806, + "kl": 0.0836181640625, + "learning_rate": 2.6165400811179363e-07, + "loss": 8.356331090908498e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3328, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.125, + "completions/mean_length": 72.13541889190674, + "completions/min_length": 24.75, + "epoch": 6.617522958550508, + "grad_norm": 0.0028169912923870478, + "kl": 0.074249267578125, + "learning_rate": 2.6137672266901986e-07, + "loss": 7.409360841847956e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3329, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 66.3854193687439, + "completions/min_length": 16.75, + "epoch": 6.619508562918838, + "grad_norm": 0.003873397827965879, + "kl": 0.0819091796875, + "learning_rate": 2.610995322272696e-07, + "loss": 8.192495442926884e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3330, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 63.708335399627686, + "completions/min_length": 20.5, + "epoch": 6.621494167287168, + "grad_norm": 2.7306234425268365, + "kl": 0.093902587890625, + "learning_rate": 2.6082243689689854e-07, + "loss": -0.010180055163800716, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3331, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 59.91666793823242, + "completions/min_length": 16.375, + "epoch": 6.623479771655497, + "grad_norm": 0.056881296270132796, + "kl": 0.103759765625, + "learning_rate": 2.605454367882238e-07, + "loss": 0.00010373154509579763, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3332, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 67.44791889190674, + "completions/min_length": 18.0, + "epoch": 6.625465376023827, + "grad_norm": 1.6922141995416082, + "kl": 0.09228515625, + "learning_rate": 2.6026853201152553e-07, + "loss": -0.009174630045890808, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666679084301, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3333, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 61.89583396911621, + "completions/min_length": 25.25, + "epoch": 6.627450980392156, + "grad_norm": 1.4010296660145436, + "kl": 0.098602294921875, + "learning_rate": 2.599917226770453e-07, + "loss": -0.0017798136686906219, + "memory(GiB)": 94.21, + "reward": 1.6458333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6458333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3334, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 57.98958444595337, + "completions/min_length": 15.75, + "epoch": 6.629436584760486, + "grad_norm": 0.8062404108832326, + "kl": 0.08477783203125, + "learning_rate": 2.5971500889498623e-07, + "loss": -0.012669868767261505, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3335, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.5, + "completions/mean_length": 72.43750190734863, + "completions/min_length": 21.375, + "epoch": 6.631422189128816, + "grad_norm": 0.005339388316280159, + "kl": 0.07958984375, + "learning_rate": 2.5943839077551487e-07, + "loss": 7.962968084029853e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3336, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 55.05208492279053, + "completions/min_length": 18.0, + "epoch": 6.633407793497145, + "grad_norm": 0.007697588192921968, + "kl": 0.092041015625, + "learning_rate": 2.5916186842875855e-07, + "loss": 9.184511145576835e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3337, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.5, + "completions/mean_length": 68.23958587646484, + "completions/min_length": 20.125, + "epoch": 6.635393397865475, + "grad_norm": 0.004779643424237663, + "kl": 0.0899658203125, + "learning_rate": 2.5888544196480625e-07, + "loss": 9.000241698231548e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3338, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 69.427086353302, + "completions/min_length": 25.375, + "epoch": 6.637379002233805, + "grad_norm": 0.004951351674302059, + "kl": 0.07647705078125, + "learning_rate": 2.586091114937099e-07, + "loss": 7.65608056099154e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3339, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.375, + "completions/mean_length": 56.82291793823242, + "completions/min_length": 26.625, + "epoch": 6.639364606602134, + "grad_norm": 0.00418829974208351, + "kl": 0.100006103515625, + "learning_rate": 2.5833287712548197e-07, + "loss": 0.00010003871284425259, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3340, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.0, + "completions/mean_length": 61.41666889190674, + "completions/min_length": 22.5, + "epoch": 6.641350210970464, + "grad_norm": 2.106885645935163, + "kl": 0.084716796875, + "learning_rate": 2.580567389700978e-07, + "loss": 0.002495020627975464, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.05653337761759758, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3341, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 66.79166841506958, + "completions/min_length": 22.125, + "epoch": 6.643335815338793, + "grad_norm": 0.005040883730130993, + "kl": 0.095458984375, + "learning_rate": 2.577806971374934e-07, + "loss": 9.541861800244078e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3342, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.375, + "completions/mean_length": 53.52083468437195, + "completions/min_length": 23.25, + "epoch": 6.645321419707123, + "grad_norm": 0.013046118876608146, + "kl": 0.084259033203125, + "learning_rate": 2.575047517375671e-07, + "loss": 8.429917215835303e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3343, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 56.44791793823242, + "completions/min_length": 19.0, + "epoch": 6.647307024075453, + "grad_norm": 0.9421490556089949, + "kl": 0.08135986328125, + "learning_rate": 2.5722890288017906e-07, + "loss": -0.00587873300537467, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3344, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 60.55208396911621, + "completions/min_length": 24.75, + "epoch": 6.649292628443782, + "grad_norm": 0.006296653356111127, + "kl": 0.072509765625, + "learning_rate": 2.5695315067515014e-07, + "loss": 7.25698919268325e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3345, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.625, + "completions/mean_length": 60.864586353302, + "completions/min_length": 19.75, + "epoch": 6.651278232812112, + "grad_norm": 0.004095511979683474, + "kl": 0.08154296875, + "learning_rate": 2.566774952322631e-07, + "loss": 8.152994996635243e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3346, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 61.65625190734863, + "completions/min_length": 18.0, + "epoch": 6.653263837180441, + "grad_norm": 0.005288149977675071, + "kl": 0.0723876953125, + "learning_rate": 2.5640193666126277e-07, + "loss": 7.223733700811863e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3347, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.375, + "completions/mean_length": 62.88541889190674, + "completions/min_length": 23.25, + "epoch": 6.655249441548771, + "grad_norm": 0.00449370621506626, + "kl": 0.08258056640625, + "learning_rate": 2.5612647507185426e-07, + "loss": 8.245091157732531e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3348, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.625, + "completions/mean_length": 65.17708587646484, + "completions/min_length": 22.125, + "epoch": 6.657235045917101, + "grad_norm": 0.9276794784041164, + "kl": 0.090484619140625, + "learning_rate": 2.558511105737051e-07, + "loss": -0.01506928913295269, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3349, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.125, + "completions/mean_length": 63.906251430511475, + "completions/min_length": 14.375, + "epoch": 6.65922065028543, + "grad_norm": 0.004896746325986625, + "kl": 0.077728271484375, + "learning_rate": 2.555758432764439e-07, + "loss": 7.765968621242791e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3350, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 58.36458492279053, + "completions/min_length": 21.375, + "epoch": 6.66120625465376, + "grad_norm": 0.003631481854431352, + "kl": 0.07855224609375, + "learning_rate": 2.553006732896601e-07, + "loss": 7.859925972297788e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3351, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.75, + "completions/mean_length": 60.75000238418579, + "completions/min_length": 24.75, + "epoch": 6.66319185902209, + "grad_norm": 0.004907615047471998, + "kl": 0.0843505859375, + "learning_rate": 2.550256007229051e-07, + "loss": 8.43419911689125e-05, + "memory(GiB)": 94.21, + "reward": 1.4375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.4375, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3352, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 57.64583444595337, + "completions/min_length": 24.0, + "epoch": 6.665177463390419, + "grad_norm": 0.0037962988035943708, + "kl": 0.08966064453125, + "learning_rate": 2.547506256856907e-07, + "loss": 8.961845742305741e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3353, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.125, + "completions/mean_length": 58.63541889190674, + "completions/min_length": 22.25, + "epoch": 6.667163067758749, + "grad_norm": 0.004024698613476663, + "kl": 0.063323974609375, + "learning_rate": 2.5447574828749094e-07, + "loss": 6.329286406980827e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3354, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 62.4479193687439, + "completions/min_length": 21.875, + "epoch": 6.669148672127078, + "grad_norm": 0.005221276344265532, + "kl": 0.078155517578125, + "learning_rate": 2.5420096863774e-07, + "loss": 7.819536403985694e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3355, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 64.05208492279053, + "completions/min_length": 24.375, + "epoch": 6.671134276495408, + "grad_norm": 0.0036717399578703115, + "kl": 0.0992431640625, + "learning_rate": 2.5392628684583326e-07, + "loss": 9.928403596859425e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3356, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 63.87500190734863, + "completions/min_length": 24.5, + "epoch": 6.673119880863738, + "grad_norm": 0.9452003011181197, + "kl": 0.073333740234375, + "learning_rate": 2.536517030211281e-07, + "loss": 0.0017337091267108917, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3357, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 63.145835399627686, + "completions/min_length": 19.25, + "epoch": 6.675105485232067, + "grad_norm": 0.0036780416918719954, + "kl": 0.065460205078125, + "learning_rate": 2.5337721727294183e-07, + "loss": 6.55170006211847e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3358, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 57.572917461395264, + "completions/min_length": 19.125, + "epoch": 6.677091089600397, + "grad_norm": 0.0041497013124343314, + "kl": 0.0875244140625, + "learning_rate": 2.531028297105529e-07, + "loss": 8.741334022488445e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3359, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.875, + "completions/mean_length": 68.39583587646484, + "completions/min_length": 20.375, + "epoch": 6.679076693968726, + "grad_norm": 0.003483883629828705, + "kl": 0.0755615234375, + "learning_rate": 2.528285404432013e-07, + "loss": 7.548509893240407e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3360, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 66.55208539962769, + "completions/min_length": 22.0, + "epoch": 6.681062298337056, + "grad_norm": 0.0046859575805675395, + "kl": 0.067352294921875, + "learning_rate": 2.52554349580087e-07, + "loss": 6.732526526320726e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3361, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 58.687500953674316, + "completions/min_length": 17.0, + "epoch": 6.683047902705386, + "grad_norm": 0.007134520061754884, + "kl": 0.10400390625, + "learning_rate": 2.522802572303716e-07, + "loss": 0.00010390947863925248, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3362, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.625, + "completions/mean_length": 63.635419845581055, + "completions/min_length": 19.0, + "epoch": 6.685033507073715, + "grad_norm": 0.00442518071558769, + "kl": 0.102996826171875, + "learning_rate": 2.520062635031768e-07, + "loss": 0.00010302726877853274, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3363, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 57.01041841506958, + "completions/min_length": 26.5, + "epoch": 6.687019111442045, + "grad_norm": 2.2339347833199987, + "kl": 0.09173583984375, + "learning_rate": 2.517323685075855e-07, + "loss": 0.0046552326530218124, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3364, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 67.09375190734863, + "completions/min_length": 26.25, + "epoch": 6.689004715810375, + "grad_norm": 0.005528153582910281, + "kl": 0.08135986328125, + "learning_rate": 2.514585723526414e-07, + "loss": 8.139129204209894e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3365, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 56.156250953674316, + "completions/min_length": 20.625, + "epoch": 6.690990320178704, + "grad_norm": 0.006064638235015257, + "kl": 0.072509765625, + "learning_rate": 2.5118487514734843e-07, + "loss": 7.242577703436837e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3366, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 69.78125381469727, + "completions/min_length": 23.375, + "epoch": 6.692975924547034, + "grad_norm": 0.0037611521765138862, + "kl": 0.09722900390625, + "learning_rate": 2.5091127700067094e-07, + "loss": 9.72005509538576e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3367, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.25, + "completions/mean_length": 57.66666841506958, + "completions/min_length": 21.625, + "epoch": 6.694961528915364, + "grad_norm": 0.006795767843164217, + "kl": 0.06707763671875, + "learning_rate": 2.5063777802153477e-07, + "loss": 6.716744974255562e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3368, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 64.2916693687439, + "completions/min_length": 27.625, + "epoch": 6.696947133283693, + "grad_norm": 0.00515757239988111, + "kl": 0.08392333984375, + "learning_rate": 2.503643783188251e-07, + "loss": 8.38606501929462e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3369, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 68.114586353302, + "completions/min_length": 18.875, + "epoch": 6.698932737652023, + "grad_norm": 1.3370124218827615, + "kl": 0.10272216796875, + "learning_rate": 2.5009107800138864e-07, + "loss": -0.0020703524351119995, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3370, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 67.21875238418579, + "completions/min_length": 20.875, + "epoch": 6.700918342020352, + "grad_norm": 0.005385096448298331, + "kl": 0.09234619140625, + "learning_rate": 2.4981787717803206e-07, + "loss": 9.226000838680193e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3371, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 67.89583539962769, + "completions/min_length": 23.25, + "epoch": 6.702903946388682, + "grad_norm": 0.003069569510925218, + "kl": 0.074554443359375, + "learning_rate": 2.4954477595752215e-07, + "loss": 7.460590859409422e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3372, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 62.45833492279053, + "completions/min_length": 20.875, + "epoch": 6.704889550757011, + "grad_norm": 0.0031521617109663107, + "kl": 0.076873779296875, + "learning_rate": 2.492717744485868e-07, + "loss": 7.6853517384734e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3373, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 60.375000953674316, + "completions/min_length": 24.0, + "epoch": 6.706875155125341, + "grad_norm": 0.008846982970644401, + "kl": 0.0711669921875, + "learning_rate": 2.4899887275991344e-07, + "loss": 7.109522266546264e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3374, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.5, + "completions/mean_length": 71.54166889190674, + "completions/min_length": 28.75, + "epoch": 6.708860759493671, + "grad_norm": 0.0032348623832891216, + "kl": 0.09027099609375, + "learning_rate": 2.4872607100014984e-07, + "loss": 9.030763612827286e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3375, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 67.41666793823242, + "completions/min_length": 20.0, + "epoch": 6.710846363862, + "grad_norm": 0.004185812365160031, + "kl": 0.0830078125, + "learning_rate": 2.484533692779047e-07, + "loss": 8.305534720420837e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3376, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 60.91666793823242, + "completions/min_length": 21.25, + "epoch": 6.71283196823033, + "grad_norm": 0.0035172018483580576, + "kl": 0.09246826171875, + "learning_rate": 2.48180767701746e-07, + "loss": 9.239085193257779e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3377, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 69.02083539962769, + "completions/min_length": 20.0, + "epoch": 6.71481757259866, + "grad_norm": 0.003868128395804147, + "kl": 0.11822509765625, + "learning_rate": 2.479082663802024e-07, + "loss": 0.0001183371277875267, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3378, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 58.19791841506958, + "completions/min_length": 21.625, + "epoch": 6.716803176966989, + "grad_norm": 0.005296618629163508, + "kl": 0.065673828125, + "learning_rate": 2.476358654217627e-07, + "loss": 6.562774069607258e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3379, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.875, + "completions/mean_length": 57.000001430511475, + "completions/min_length": 22.0, + "epoch": 6.718788781335319, + "grad_norm": 0.003523144574405198, + "kl": 0.085205078125, + "learning_rate": 2.4736356493487516e-07, + "loss": 8.507106394972652e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3380, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 71.76041889190674, + "completions/min_length": 22.875, + "epoch": 6.720774385703649, + "grad_norm": 0.0031552549941445437, + "kl": 0.0738525390625, + "learning_rate": 2.4709136502794875e-07, + "loss": 7.378502778010443e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3381, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 103.0, + "completions/mean_length": 51.98958396911621, + "completions/min_length": 20.125, + "epoch": 6.722759990071978, + "grad_norm": 0.006089569665799314, + "kl": 0.074676513671875, + "learning_rate": 2.4681926580935196e-07, + "loss": 7.465011003660038e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3382, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 60.33333444595337, + "completions/min_length": 25.875, + "epoch": 6.724745594440308, + "grad_norm": 0.008489593534347837, + "kl": 0.07373046875, + "learning_rate": 2.4654726738741294e-07, + "loss": 7.368314254563302e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3383, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 65.91666793823242, + "completions/min_length": 20.625, + "epoch": 6.726731198808637, + "grad_norm": 0.004086129068409202, + "kl": 0.079833984375, + "learning_rate": 2.462753698704207e-07, + "loss": 7.98147520981729e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3384, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 71.9166693687439, + "completions/min_length": 23.0, + "epoch": 6.728716803176967, + "grad_norm": 0.003876205627064784, + "kl": 0.06903076171875, + "learning_rate": 2.4600357336662317e-07, + "loss": 6.896528066135943e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3385, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 61.15625190734863, + "completions/min_length": 23.125, + "epoch": 6.730702407545296, + "grad_norm": 0.011433983871249859, + "kl": 0.112091064453125, + "learning_rate": 2.4573187798422814e-07, + "loss": 0.0001121356908697635, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3386, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 64.36458396911621, + "completions/min_length": 26.125, + "epoch": 6.732688011913626, + "grad_norm": 0.0036052930835619276, + "kl": 0.067901611328125, + "learning_rate": 2.454602838314037e-07, + "loss": 6.791093619540334e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3387, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.25, + "completions/mean_length": 66.645836353302, + "completions/min_length": 22.5, + "epoch": 6.734673616281956, + "grad_norm": 0.0029979367074048175, + "kl": 0.084320068359375, + "learning_rate": 2.4518879101627695e-07, + "loss": 8.429453009739518e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3388, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 65.08333539962769, + "completions/min_length": 26.375, + "epoch": 6.736659220650285, + "grad_norm": 1.275504371865935, + "kl": 0.086669921875, + "learning_rate": 2.449173996469353e-07, + "loss": 0.005607105791568756, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3389, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 58.13541793823242, + "completions/min_length": 23.25, + "epoch": 6.738644825018615, + "grad_norm": 0.004657554891619219, + "kl": 0.079132080078125, + "learning_rate": 2.4464610983142507e-07, + "loss": 7.919049676274881e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3390, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 63.281251430511475, + "completions/min_length": 20.875, + "epoch": 6.740630429386945, + "grad_norm": 0.03311615389314626, + "kl": 0.072235107421875, + "learning_rate": 2.443749216777528e-07, + "loss": 7.219881808850914e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3391, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 71.06250143051147, + "completions/min_length": 21.125, + "epoch": 6.742616033755274, + "grad_norm": 0.00642804698533361, + "kl": 0.1016845703125, + "learning_rate": 2.441038352938844e-07, + "loss": 0.0001016240130411461, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3392, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 185.125, + "completions/mean_length": 71.6354193687439, + "completions/min_length": 27.375, + "epoch": 6.744601638123604, + "grad_norm": 0.004152023186907524, + "kl": 0.091552734375, + "learning_rate": 2.4383285078774487e-07, + "loss": 9.157097520073876e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3393, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 64.78125190734863, + "completions/min_length": 25.875, + "epoch": 6.746587242491934, + "grad_norm": 0.00548445749648638, + "kl": 0.09490966796875, + "learning_rate": 2.4356196826721913e-07, + "loss": 9.482722089160234e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3394, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 64.34375095367432, + "completions/min_length": 24.75, + "epoch": 6.748572846860263, + "grad_norm": 1.9910135055442078, + "kl": 0.13055419921875, + "learning_rate": 2.4329118784015125e-07, + "loss": -0.006759150885045528, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3395, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 59.80208492279053, + "completions/min_length": 23.125, + "epoch": 6.750558451228593, + "grad_norm": 0.8985845256202774, + "kl": 0.0772705078125, + "learning_rate": 2.4302050961434443e-07, + "loss": 0.003503883257508278, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3396, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.0, + "completions/mean_length": 70.57291889190674, + "completions/min_length": 21.875, + "epoch": 6.752544055596922, + "grad_norm": 0.003441792077707185, + "kl": 0.076263427734375, + "learning_rate": 2.427499336975618e-07, + "loss": 7.62996933190152e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3397, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 64.82291793823242, + "completions/min_length": 25.125, + "epoch": 6.754529659965252, + "grad_norm": 0.004492297001124366, + "kl": 0.0665283203125, + "learning_rate": 2.424794601975254e-07, + "loss": 6.658166239503771e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3398, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 68.22916889190674, + "completions/min_length": 26.75, + "epoch": 6.756515264333581, + "grad_norm": 1.105991332780493, + "kl": 0.0843505859375, + "learning_rate": 2.4220908922191625e-07, + "loss": -0.012495587579905987, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3399, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.875, + "completions/mean_length": 49.833335399627686, + "completions/min_length": 18.25, + "epoch": 6.758500868701911, + "grad_norm": 1.2618422887222154, + "kl": 0.09228515625, + "learning_rate": 2.4193882087837514e-07, + "loss": 0.0025725262239575386, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3400, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.5, + "completions/mean_length": 72.08333587646484, + "completions/min_length": 24.875, + "epoch": 6.760486473070241, + "grad_norm": 0.004245976249590136, + "kl": 0.091400146484375, + "learning_rate": 2.416686552745013e-07, + "loss": 9.141544433077797e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3401, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 65.00000190734863, + "completions/min_length": 24.5, + "epoch": 6.76247207743857, + "grad_norm": 0.003321200138939367, + "kl": 0.064697265625, + "learning_rate": 2.413985925178538e-07, + "loss": 6.467117054853588e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3402, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.0, + "completions/mean_length": 65.91666841506958, + "completions/min_length": 21.25, + "epoch": 6.7644576818069, + "grad_norm": 0.004712657343597449, + "kl": 0.09051513671875, + "learning_rate": 2.411286327159503e-07, + "loss": 9.06178611330688e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3403, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 63.04166889190674, + "completions/min_length": 24.375, + "epoch": 6.76644328617523, + "grad_norm": 0.14265477761747114, + "kl": 0.110382080078125, + "learning_rate": 2.4085877597626704e-07, + "loss": 0.00011040962999686599, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3404, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 66.48958444595337, + "completions/min_length": 25.75, + "epoch": 6.768428890543559, + "grad_norm": 0.007692668588512472, + "kl": 0.09771728515625, + "learning_rate": 2.4058902240624056e-07, + "loss": 9.764514834387228e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3405, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.625, + "completions/mean_length": 55.45833444595337, + "completions/min_length": 20.375, + "epoch": 6.770414494911889, + "grad_norm": 0.008334446388096196, + "kl": 0.116455078125, + "learning_rate": 2.403193721132652e-07, + "loss": 0.00011653061665128917, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3406, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 59.50000190734863, + "completions/min_length": 19.875, + "epoch": 6.772400099280219, + "grad_norm": 0.006609193410780301, + "kl": 0.076904296875, + "learning_rate": 2.400498252046942e-07, + "loss": 7.69533944549039e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3407, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 60.020835399627686, + "completions/min_length": 26.375, + "epoch": 6.774385703648548, + "grad_norm": 0.007581474157891919, + "kl": 0.08856201171875, + "learning_rate": 2.3978038178784043e-07, + "loss": 8.853618055582047e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3408, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.875, + "completions/mean_length": 55.11458492279053, + "completions/min_length": 20.5, + "epoch": 6.776371308016878, + "grad_norm": 0.008603655093382775, + "kl": 0.08026123046875, + "learning_rate": 2.395110419699746e-07, + "loss": 8.022796828299761e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3409, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 65.614586353302, + "completions/min_length": 25.25, + "epoch": 6.778356912385207, + "grad_norm": 0.007818040047445268, + "kl": 0.09033203125, + "learning_rate": 2.3924180585832707e-07, + "loss": 9.022171434480697e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3410, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.25, + "completions/mean_length": 65.0729193687439, + "completions/min_length": 26.5, + "epoch": 6.780342516753537, + "grad_norm": 0.005496575624380883, + "kl": 0.0943603515625, + "learning_rate": 2.3897267356008617e-07, + "loss": 9.42759943427518e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3411, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 61.48958492279053, + "completions/min_length": 22.0, + "epoch": 6.782328121121866, + "grad_norm": 0.006980595328709287, + "kl": 0.08282470703125, + "learning_rate": 2.3870364518239925e-07, + "loss": 8.279483154183254e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3412, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.0, + "completions/mean_length": 59.83333492279053, + "completions/min_length": 26.0, + "epoch": 6.784313725490196, + "grad_norm": 0.00396278425089312, + "kl": 0.0894775390625, + "learning_rate": 2.384347208323726e-07, + "loss": 8.951907511800528e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3413, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 64.20833539962769, + "completions/min_length": 20.5, + "epoch": 6.786299329858526, + "grad_norm": 0.007165150411159889, + "kl": 0.080841064453125, + "learning_rate": 2.381659006170705e-07, + "loss": 8.075873483903706e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3414, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.0, + "completions/mean_length": 70.61458587646484, + "completions/min_length": 21.375, + "epoch": 6.788284934226855, + "grad_norm": 0.9550250805260357, + "kl": 0.10546875, + "learning_rate": 2.3789718464351577e-07, + "loss": 0.009096808731555939, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3415, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 64.48958539962769, + "completions/min_length": 20.5, + "epoch": 6.790270538595185, + "grad_norm": 0.004040958340010559, + "kl": 0.07293701171875, + "learning_rate": 2.3762857301869045e-07, + "loss": 7.294019451364875e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3416, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 66.59375143051147, + "completions/min_length": 20.625, + "epoch": 6.792256142963515, + "grad_norm": 0.004940515684547405, + "kl": 0.091949462890625, + "learning_rate": 2.373600658495341e-07, + "loss": 9.194164158543572e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3417, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 66.29166984558105, + "completions/min_length": 22.75, + "epoch": 6.794241747331844, + "grad_norm": 1.5329658316527761, + "kl": 0.1015625, + "learning_rate": 2.3709166324294545e-07, + "loss": -0.004124955274164677, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3418, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.625, + "completions/mean_length": 58.69791841506958, + "completions/min_length": 21.75, + "epoch": 6.796227351700174, + "grad_norm": 0.005483250933679256, + "kl": 0.07818603515625, + "learning_rate": 2.3682336530578139e-07, + "loss": 7.810754323145375e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3419, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 66.32291841506958, + "completions/min_length": 23.0, + "epoch": 6.798212956068504, + "grad_norm": 1.1923667757231087, + "kl": 0.115966796875, + "learning_rate": 2.3655517214485677e-07, + "loss": 0.014420520514249802, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3420, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.875, + "completions/mean_length": 61.42708444595337, + "completions/min_length": 23.5, + "epoch": 6.800198560436833, + "grad_norm": 0.10072646462925985, + "kl": 0.32781982421875, + "learning_rate": 2.3628708386694536e-07, + "loss": 0.0003270826709922403, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3421, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 63.6666693687439, + "completions/min_length": 23.25, + "epoch": 6.802184164805163, + "grad_norm": 0.00380862300248481, + "kl": 0.0791015625, + "learning_rate": 2.360191005787786e-07, + "loss": 7.902109064161777e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3422, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 62.88541793823242, + "completions/min_length": 25.0, + "epoch": 6.804169769173492, + "grad_norm": 0.0051991632425972874, + "kl": 0.086761474609375, + "learning_rate": 2.3575122238704627e-07, + "loss": 8.665939094498754e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3423, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 64.36458539962769, + "completions/min_length": 20.875, + "epoch": 6.806155373541822, + "grad_norm": 0.00412328354140257, + "kl": 0.076751708984375, + "learning_rate": 2.3548344939839666e-07, + "loss": 7.666759483981878e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3424, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 59.10416889190674, + "completions/min_length": 21.75, + "epoch": 6.808140977910151, + "grad_norm": 0.00577668054951878, + "kl": 0.0816650390625, + "learning_rate": 2.3521578171943562e-07, + "loss": 8.16772153484635e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3425, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.625, + "completions/mean_length": 60.71875190734863, + "completions/min_length": 24.5, + "epoch": 6.810126582278481, + "grad_norm": 0.003848301691714587, + "kl": 0.0677490234375, + "learning_rate": 2.3494821945672754e-07, + "loss": 6.783010030630976e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3426, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 60.927085399627686, + "completions/min_length": 24.625, + "epoch": 6.812112186646811, + "grad_norm": 0.0033841753454069427, + "kl": 0.09063720703125, + "learning_rate": 2.3468076271679487e-07, + "loss": 9.07600624486804e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3427, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.0, + "completions/mean_length": 64.58333492279053, + "completions/min_length": 26.625, + "epoch": 6.81409779101514, + "grad_norm": 0.0039693970757664065, + "kl": 0.09820556640625, + "learning_rate": 2.3441341160611749e-07, + "loss": 9.82606434263289e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3428, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.75, + "completions/mean_length": 65.32291841506958, + "completions/min_length": 21.75, + "epoch": 6.81608339538347, + "grad_norm": 0.003440035934574871, + "kl": 0.0855712890625, + "learning_rate": 2.3414616623113386e-07, + "loss": 8.55953257996589e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3429, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 56.833335399627686, + "completions/min_length": 19.5, + "epoch": 6.8180689997518, + "grad_norm": 0.004359568331895638, + "kl": 0.062957763671875, + "learning_rate": 2.3387902669824e-07, + "loss": 6.287854921538383e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3430, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.125, + "completions/mean_length": 57.656251430511475, + "completions/min_length": 22.25, + "epoch": 6.820054604120129, + "grad_norm": 0.09725043894259304, + "kl": 0.183380126953125, + "learning_rate": 2.3361199311378965e-07, + "loss": 0.00018321051902603358, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3431, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 62.062501430511475, + "completions/min_length": 23.125, + "epoch": 6.822040208488459, + "grad_norm": 1.58439091359758, + "kl": 0.0914306640625, + "learning_rate": 2.3334506558409473e-07, + "loss": 0.007666006684303284, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.05974817834794521, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.316736813634634, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3432, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 62.05208492279053, + "completions/min_length": 17.875, + "epoch": 6.824025812856789, + "grad_norm": 0.006249547464504909, + "kl": 0.083221435546875, + "learning_rate": 2.3307824421542489e-07, + "loss": 8.323766815010458e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3433, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 69.34375143051147, + "completions/min_length": 19.375, + "epoch": 6.826011417225118, + "grad_norm": 1.917976116834742, + "kl": 0.16632080078125, + "learning_rate": 2.3281152911400742e-07, + "loss": 0.005980097688734531, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3434, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.625, + "completions/mean_length": 64.05208587646484, + "completions/min_length": 26.75, + "epoch": 6.827997021593448, + "grad_norm": 0.00301577880458901, + "kl": 0.077239990234375, + "learning_rate": 2.325449203860273e-07, + "loss": 7.725731120444834e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3435, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 62.13541889190674, + "completions/min_length": 21.5, + "epoch": 6.829982625961777, + "grad_norm": 0.8332981422966501, + "kl": 0.075958251953125, + "learning_rate": 2.3227841813762688e-07, + "loss": -0.01034325361251831, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3436, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 64.14583444595337, + "completions/min_length": 25.0, + "epoch": 6.831968230330107, + "grad_norm": 0.003770732457586283, + "kl": 0.08013916015625, + "learning_rate": 2.3201202247490676e-07, + "loss": 8.001473906915635e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3437, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.75, + "completions/mean_length": 81.0416693687439, + "completions/min_length": 31.625, + "epoch": 6.833953834698436, + "grad_norm": 0.004250726128683241, + "kl": 0.088409423828125, + "learning_rate": 2.317457335039244e-07, + "loss": 8.844550757203251e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3438, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 57.23958444595337, + "completions/min_length": 22.125, + "epoch": 6.835939439066766, + "grad_norm": 0.0031637760279484115, + "kl": 0.060302734375, + "learning_rate": 2.3147955133069537e-07, + "loss": 6.025177935953252e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3439, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 67.40625095367432, + "completions/min_length": 19.875, + "epoch": 6.837925043435096, + "grad_norm": 1.4640450952701392, + "kl": 0.09423828125, + "learning_rate": 2.3121347606119257e-07, + "loss": 0.023468755185604095, + "memory(GiB)": 94.21, + "reward": 1.8125000149011612, + "reward_std": 0.05103103443980217, + "rewards/CineAccuracyORM/mean": 0.8125000027939677, + "rewards/CineAccuracyORM/std": 0.13744790107011795, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3440, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.625, + "completions/mean_length": 53.66666793823242, + "completions/min_length": 20.875, + "epoch": 6.839910647803425, + "grad_norm": 0.003385809205604886, + "kl": 0.088287353515625, + "learning_rate": 2.3094750780134587e-07, + "loss": 8.831812010612339e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3441, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 63.66666841506958, + "completions/min_length": 17.25, + "epoch": 6.841896252171755, + "grad_norm": 0.006235937234724296, + "kl": 0.11151123046875, + "learning_rate": 2.3068164665704336e-07, + "loss": 0.00011143732990603894, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3442, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 66.81250190734863, + "completions/min_length": 26.125, + "epoch": 6.843881856540085, + "grad_norm": 0.0034485157487708735, + "kl": 0.067840576171875, + "learning_rate": 2.304158927341298e-07, + "loss": 6.788223254261538e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3443, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 194.375, + "completions/mean_length": 74.22916793823242, + "completions/min_length": 25.75, + "epoch": 6.845867460908414, + "grad_norm": 0.008456224291448381, + "kl": 0.0745849609375, + "learning_rate": 2.3015024613840738e-07, + "loss": 7.461795030394569e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3444, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.0, + "completions/mean_length": 69.8229193687439, + "completions/min_length": 27.25, + "epoch": 6.847853065276744, + "grad_norm": 0.005448665479385711, + "kl": 0.081878662109375, + "learning_rate": 2.29884706975636e-07, + "loss": 8.166871702997014e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3445, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.5, + "completions/mean_length": 66.75000286102295, + "completions/min_length": 19.125, + "epoch": 6.849838669645074, + "grad_norm": 0.011120099062653334, + "kl": 0.096038818359375, + "learning_rate": 2.2961927535153215e-07, + "loss": 9.604525985196233e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3446, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 65.6666693687439, + "completions/min_length": 23.75, + "epoch": 6.851824274013403, + "grad_norm": 0.02400694885030784, + "kl": 0.13018798828125, + "learning_rate": 2.2935395137176994e-07, + "loss": 0.0001302231685258448, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3447, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 60.645835399627686, + "completions/min_length": 22.75, + "epoch": 6.853809878381733, + "grad_norm": 0.021029487979905625, + "kl": 0.12274169921875, + "learning_rate": 2.2908873514198073e-07, + "loss": 0.00012257686466909945, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3448, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 53.29166793823242, + "completions/min_length": 20.0, + "epoch": 6.855795482750062, + "grad_norm": 0.008970391235218558, + "kl": 0.086761474609375, + "learning_rate": 2.2882362676775242e-07, + "loss": 8.674825949128717e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3449, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 60.60416841506958, + "completions/min_length": 28.125, + "epoch": 6.857781087118392, + "grad_norm": 1.5560489122166241, + "kl": 0.090179443359375, + "learning_rate": 2.285586263546307e-07, + "loss": 0.002466946840286255, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3450, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 67.76041841506958, + "completions/min_length": 32.125, + "epoch": 6.859766691486721, + "grad_norm": 0.0033272374367833354, + "kl": 0.08209228515625, + "learning_rate": 2.2829373400811763e-07, + "loss": 8.206719940062612e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3451, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.625, + "completions/mean_length": 60.68750190734863, + "completions/min_length": 25.375, + "epoch": 6.861752295855051, + "grad_norm": 0.0037930915863598385, + "kl": 0.080841064453125, + "learning_rate": 2.280289498336724e-07, + "loss": 8.07654723757878e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3452, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 69.62500238418579, + "completions/min_length": 24.75, + "epoch": 6.863737900223381, + "grad_norm": 0.0033053027537494734, + "kl": 0.084625244140625, + "learning_rate": 2.2776427393671143e-07, + "loss": 8.468855958199129e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3453, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 192.25, + "completions/mean_length": 76.04166889190674, + "completions/min_length": 25.875, + "epoch": 6.86572350459171, + "grad_norm": 1.8740541670066135, + "kl": 0.086456298828125, + "learning_rate": 2.2749970642260796e-07, + "loss": 0.0014206450432538986, + "memory(GiB)": 94.21, + "reward": 1.7291666865348816, + "reward_std": 0.06454972177743912, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.3189288526773453, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3454, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 56.69791889190674, + "completions/min_length": 22.125, + "epoch": 6.86770910896004, + "grad_norm": 0.005013546322175355, + "kl": 0.082611083984375, + "learning_rate": 2.272352473966917e-07, + "loss": 8.264806092483923e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3455, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 61.927085399627686, + "completions/min_length": 26.25, + "epoch": 6.86969471332837, + "grad_norm": 0.003636777975726669, + "kl": 0.08807373046875, + "learning_rate": 2.2697089696424976e-07, + "loss": 8.801998774288222e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3456, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.875, + "completions/mean_length": 70.16666889190674, + "completions/min_length": 29.0, + "epoch": 6.871680317696699, + "grad_norm": 0.0049014740364062516, + "kl": 0.08245849609375, + "learning_rate": 2.267066552305253e-07, + "loss": 8.244160562753677e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3457, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 60.78125190734863, + "completions/min_length": 22.25, + "epoch": 6.873665922065029, + "grad_norm": 0.005104505422028492, + "kl": 0.077484130859375, + "learning_rate": 2.2644252230071898e-07, + "loss": 7.759316940791905e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3458, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 66.48958492279053, + "completions/min_length": 24.875, + "epoch": 6.875651526433359, + "grad_norm": 0.0050558672239010985, + "kl": 0.074737548828125, + "learning_rate": 2.2617849827998736e-07, + "loss": 7.47351732570678e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3459, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.625, + "completions/mean_length": 68.00000143051147, + "completions/min_length": 24.25, + "epoch": 6.877637130801688, + "grad_norm": 0.004046435436120138, + "kl": 0.08929443359375, + "learning_rate": 2.259145832734443e-07, + "loss": 8.930674812290817e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3460, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 62.21875190734863, + "completions/min_length": 21.125, + "epoch": 6.879622735170018, + "grad_norm": 0.005786762414426067, + "kl": 0.084991455078125, + "learning_rate": 2.2565077738616023e-07, + "loss": 8.511268970323727e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3461, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 63.68750190734863, + "completions/min_length": 22.875, + "epoch": 6.881608339538347, + "grad_norm": 0.003251246710402099, + "kl": 0.0687255859375, + "learning_rate": 2.2538708072316153e-07, + "loss": 6.87915162416175e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3462, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 71.8229193687439, + "completions/min_length": 26.875, + "epoch": 6.883593943906677, + "grad_norm": 0.005648669361979694, + "kl": 0.0877685546875, + "learning_rate": 2.2512349338943148e-07, + "loss": 8.774442540016025e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3463, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 62.875001430511475, + "completions/min_length": 27.75, + "epoch": 6.885579548275006, + "grad_norm": 0.005085901688629966, + "kl": 0.08203125, + "learning_rate": 2.2486001548991014e-07, + "loss": 8.193984103854746e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3464, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 62.468750953674316, + "completions/min_length": 20.0, + "epoch": 6.887565152643336, + "grad_norm": 0.0036000547692858933, + "kl": 0.0677490234375, + "learning_rate": 2.2459664712949323e-07, + "loss": 6.767272134311497e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3465, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 62.26041841506958, + "completions/min_length": 19.75, + "epoch": 6.889550757011666, + "grad_norm": 0.005018476686333123, + "kl": 0.078277587890625, + "learning_rate": 2.2433338841303363e-07, + "loss": 7.828741945559159e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3466, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 65.7604193687439, + "completions/min_length": 23.875, + "epoch": 6.891536361379995, + "grad_norm": 0.0054465563280586445, + "kl": 0.0760498046875, + "learning_rate": 2.2407023944534032e-07, + "loss": 7.601312245242298e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3467, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.125, + "completions/mean_length": 57.42708444595337, + "completions/min_length": 27.0, + "epoch": 6.893521965748325, + "grad_norm": 0.003232636385927136, + "kl": 0.061798095703125, + "learning_rate": 2.2380720033117829e-07, + "loss": 6.178372859722003e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3468, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 58.13541793823242, + "completions/min_length": 18.125, + "epoch": 6.895507570116655, + "grad_norm": 1.7874585035156132, + "kl": 0.094512939453125, + "learning_rate": 2.235442711752693e-07, + "loss": -0.011597169563174248, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3469, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.0, + "completions/mean_length": 56.22916793823242, + "completions/min_length": 21.25, + "epoch": 6.897493174484984, + "grad_norm": 0.004961953164904006, + "kl": 0.094879150390625, + "learning_rate": 2.2328145208229094e-07, + "loss": 9.486427006777376e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3470, + "train_speed(iter/s)": 0.022665 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 59.218751430511475, + "completions/min_length": 17.5, + "epoch": 6.899478778853314, + "grad_norm": 0.00332144047900622, + "kl": 0.0723876953125, + "learning_rate": 2.2301874315687692e-07, + "loss": 7.2396345785819e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3471, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 61.47916889190674, + "completions/min_length": 25.625, + "epoch": 6.901464383221644, + "grad_norm": 0.030713156031456686, + "kl": 0.10504150390625, + "learning_rate": 2.2275614450361758e-07, + "loss": 0.00010503574594622478, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3472, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 63.72916889190674, + "completions/min_length": 22.0, + "epoch": 6.903449987589973, + "grad_norm": 0.0063864484231935265, + "kl": 0.091461181640625, + "learning_rate": 2.2249365622705851e-07, + "loss": 9.151537960860878e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3473, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.125, + "completions/mean_length": 68.54166889190674, + "completions/min_length": 19.875, + "epoch": 6.905435591958303, + "grad_norm": 0.003606612700822254, + "kl": 0.0810546875, + "learning_rate": 2.222312784317027e-07, + "loss": 8.102629362838343e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3474, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.625, + "completions/mean_length": 69.96875238418579, + "completions/min_length": 26.125, + "epoch": 6.907421196326632, + "grad_norm": 0.00931615978106732, + "kl": 0.078948974609375, + "learning_rate": 2.2196901122200795e-07, + "loss": 7.907981489552185e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3475, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 66.13541841506958, + "completions/min_length": 27.25, + "epoch": 6.909406800694962, + "grad_norm": 0.003297868054470474, + "kl": 0.085235595703125, + "learning_rate": 2.217068547023882e-07, + "loss": 8.517040259903297e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3476, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.125, + "completions/mean_length": 73.78125143051147, + "completions/min_length": 31.75, + "epoch": 6.911392405063291, + "grad_norm": 0.003026304146279524, + "kl": 0.06988525390625, + "learning_rate": 2.2144480897721402e-07, + "loss": 6.988673703745008e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3477, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 71.03125190734863, + "completions/min_length": 26.5, + "epoch": 6.913378009431621, + "grad_norm": 0.005728868994121281, + "kl": 0.0775146484375, + "learning_rate": 2.2118287415081098e-07, + "loss": 7.752554665785283e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3478, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 60.000001430511475, + "completions/min_length": 26.875, + "epoch": 6.915363613799951, + "grad_norm": 0.003766331257942425, + "kl": 0.060638427734375, + "learning_rate": 2.209210503274614e-07, + "loss": 6.05880341026932e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3479, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 70.92708492279053, + "completions/min_length": 32.625, + "epoch": 6.91734921816828, + "grad_norm": 1.9026025174427164, + "kl": 0.08837890625, + "learning_rate": 2.2065933761140243e-07, + "loss": 0.004589976742863655, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3480, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 62.61458492279053, + "completions/min_length": 24.125, + "epoch": 6.91933482253661, + "grad_norm": 0.003322244130724308, + "kl": 0.085693359375, + "learning_rate": 2.2039773610682772e-07, + "loss": 8.572596561862156e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3481, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.625, + "completions/mean_length": 64.06250095367432, + "completions/min_length": 23.625, + "epoch": 6.9213204269049395, + "grad_norm": 0.005178868877266053, + "kl": 0.088104248046875, + "learning_rate": 2.2013624591788667e-07, + "loss": 8.814716420602053e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3482, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 68.00000286102295, + "completions/min_length": 28.125, + "epoch": 6.923306031273269, + "grad_norm": 3.061341271111363, + "kl": 0.095367431640625, + "learning_rate": 2.1987486714868382e-07, + "loss": -0.009309722110629082, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3483, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.875, + "completions/mean_length": 58.062501430511475, + "completions/min_length": 21.25, + "epoch": 6.9252916356415986, + "grad_norm": 1.6338134999234435, + "kl": 0.088165283203125, + "learning_rate": 2.1961359990327948e-07, + "loss": 0.0071833315305411816, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3484, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.375, + "completions/mean_length": 63.07291889190674, + "completions/min_length": 30.375, + "epoch": 6.9272772400099285, + "grad_norm": 0.004369369790120132, + "kl": 0.072235107421875, + "learning_rate": 2.1935244428569017e-07, + "loss": 7.217198435682803e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3485, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.125, + "completions/mean_length": 62.645835399627686, + "completions/min_length": 20.0, + "epoch": 6.929262844378258, + "grad_norm": 0.00444703640837691, + "kl": 0.081939697265625, + "learning_rate": 2.190914003998871e-07, + "loss": 8.190786320483312e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3486, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.625, + "completions/mean_length": 59.51041793823242, + "completions/min_length": 22.375, + "epoch": 6.9312484487465875, + "grad_norm": 0.0037834384347517516, + "kl": 0.06689453125, + "learning_rate": 2.1883046834979757e-07, + "loss": 6.698662764392793e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3487, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 78.9791693687439, + "completions/min_length": 30.0, + "epoch": 6.933234053114917, + "grad_norm": 0.0033281949718922317, + "kl": 0.0738525390625, + "learning_rate": 2.1856964823930446e-07, + "loss": 7.383064075838774e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3488, + "train_speed(iter/s)": 0.022666 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 59.895835161209106, + "completions/min_length": 24.375, + "epoch": 6.9352196574832465, + "grad_norm": 0.004177736602361458, + "kl": 0.0599365234375, + "learning_rate": 2.183089401722454e-07, + "loss": 5.99289451201912e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3489, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 63.40625286102295, + "completions/min_length": 22.125, + "epoch": 6.937205261851576, + "grad_norm": 0.00326064519813375, + "kl": 0.071380615234375, + "learning_rate": 2.180483442524142e-07, + "loss": 7.141270907595754e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3490, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.0, + "completions/mean_length": 63.48958492279053, + "completions/min_length": 22.375, + "epoch": 6.9391908662199056, + "grad_norm": 0.005937030871098344, + "kl": 0.07696533203125, + "learning_rate": 2.1778786058355952e-07, + "loss": 7.694762462051585e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3491, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.125, + "completions/mean_length": 55.770835399627686, + "completions/min_length": 20.125, + "epoch": 6.9411764705882355, + "grad_norm": 0.00635906098039419, + "kl": 0.063232421875, + "learning_rate": 2.1752748926938524e-07, + "loss": 6.31595539744012e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3492, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 63.395835399627686, + "completions/min_length": 22.875, + "epoch": 6.943162074956565, + "grad_norm": 0.0035734480581738823, + "kl": 0.075042724609375, + "learning_rate": 2.1726723041355115e-07, + "loss": 7.499953790102154e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3493, + "train_speed(iter/s)": 0.022667 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.125, + "completions/mean_length": 55.92708492279053, + "completions/min_length": 17.75, + "epoch": 6.9451476793248945, + "grad_norm": 0.003386932960175648, + "kl": 0.095184326171875, + "learning_rate": 2.170070841196715e-07, + "loss": 9.50338362599723e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3494, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 68.77083492279053, + "completions/min_length": 20.375, + "epoch": 6.9471332836932245, + "grad_norm": 0.017462602356197406, + "kl": 0.0860595703125, + "learning_rate": 2.1674705049131624e-07, + "loss": 8.608737698523328e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3495, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 71.64583444595337, + "completions/min_length": 27.25, + "epoch": 6.9491188880615535, + "grad_norm": 0.003342787719674488, + "kl": 0.079345703125, + "learning_rate": 2.1648712963201055e-07, + "loss": 7.933162123663351e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3496, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 66.93750190734863, + "completions/min_length": 25.125, + "epoch": 6.9511044924298835, + "grad_norm": 0.0047212923758240795, + "kl": 0.0821533203125, + "learning_rate": 2.1622732164523399e-07, + "loss": 8.209255611291155e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3497, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 185.25, + "completions/mean_length": 76.802086353302, + "completions/min_length": 24.375, + "epoch": 6.953090096798213, + "grad_norm": 0.005209700574145823, + "kl": 0.08551025390625, + "learning_rate": 2.1596762663442213e-07, + "loss": 8.537035319022834e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3498, + "train_speed(iter/s)": 0.022669 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 73.84375238418579, + "completions/min_length": 31.125, + "epoch": 6.9550757011665425, + "grad_norm": 0.004903439730164932, + "kl": 0.08502197265625, + "learning_rate": 2.1570804470296494e-07, + "loss": 8.500037802150473e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3499, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 62.28125190734863, + "completions/min_length": 24.125, + "epoch": 6.9570613055348725, + "grad_norm": 0.00488732235933871, + "kl": 0.073394775390625, + "learning_rate": 2.154485759542073e-07, + "loss": 7.344199548242614e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3500, + "train_speed(iter/s)": 0.022668 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 65.35416841506958, + "completions/min_length": 18.25, + "epoch": 6.9590469099032015, + "grad_norm": 0.004979974472857077, + "kl": 0.08349609375, + "learning_rate": 2.1518922049144938e-07, + "loss": 8.350598363904282e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3501, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.5, + "completions/mean_length": 74.60416793823242, + "completions/min_length": 24.25, + "epoch": 6.9610325142715315, + "grad_norm": 0.01165229206791968, + "kl": 0.10406494140625, + "learning_rate": 2.1492997841794647e-07, + "loss": 0.00010403131454950199, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3502, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.75, + "completions/mean_length": 69.91666889190674, + "completions/min_length": 25.625, + "epoch": 6.9630181186398605, + "grad_norm": 0.005017248050714145, + "kl": 0.072479248046875, + "learning_rate": 2.1467084983690787e-07, + "loss": 7.241721323225647e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3503, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.625, + "completions/mean_length": 60.28125190734863, + "completions/min_length": 24.0, + "epoch": 6.9650037230081905, + "grad_norm": 0.012709558011590925, + "kl": 0.106689453125, + "learning_rate": 2.1441183485149862e-07, + "loss": 0.00010654113430064172, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3504, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.125, + "completions/mean_length": 71.48958444595337, + "completions/min_length": 27.25, + "epoch": 6.96698932737652, + "grad_norm": 2.257972941853662, + "kl": 0.0977783203125, + "learning_rate": 2.1415293356483777e-07, + "loss": 0.00227789836935699, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.05974818021059036, + "rewards/CineAccuracyORM/mean": 0.6666666669771075, + "rewards/CineAccuracyORM/std": 0.28845512494444847, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3505, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 64.84375095367432, + "completions/min_length": 23.375, + "epoch": 6.9689749317448495, + "grad_norm": 0.7321872964269951, + "kl": 0.09014892578125, + "learning_rate": 2.1389414607999977e-07, + "loss": 0.006467541214078665, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3506, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 72.82291889190674, + "completions/min_length": 23.5, + "epoch": 6.9709605361131795, + "grad_norm": 0.0035710598634169696, + "kl": 0.068572998046875, + "learning_rate": 2.1363547250001335e-07, + "loss": 6.851096986792982e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3507, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 63.23958492279053, + "completions/min_length": 27.375, + "epoch": 6.972946140481509, + "grad_norm": 1.7366125750443138, + "kl": 0.090667724609375, + "learning_rate": 2.1337691292786159e-07, + "loss": 0.002294144593179226, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3508, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 60.000001668930054, + "completions/min_length": 19.25, + "epoch": 6.9749317448498385, + "grad_norm": 0.004224070629503523, + "kl": 0.08380126953125, + "learning_rate": 2.1311846746648322e-07, + "loss": 8.378517668461427e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3509, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.875, + "completions/mean_length": 62.69791889190674, + "completions/min_length": 18.875, + "epoch": 6.976917349218168, + "grad_norm": 0.005020541254357614, + "kl": 0.102081298828125, + "learning_rate": 2.128601362187706e-07, + "loss": 0.00010203639976680279, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3510, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.5, + "completions/mean_length": 62.12500190734863, + "completions/min_length": 27.25, + "epoch": 6.978902953586498, + "grad_norm": 1.7979416665445698, + "kl": 0.0810546875, + "learning_rate": 2.1260191928757077e-07, + "loss": -0.0007692854851484299, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3511, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 64.51041793823242, + "completions/min_length": 22.75, + "epoch": 6.980888557954827, + "grad_norm": 0.005936664651624782, + "kl": 0.07293701171875, + "learning_rate": 2.123438167756857e-07, + "loss": 7.291975634871051e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3512, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.125, + "completions/mean_length": 55.750001430511475, + "completions/min_length": 17.75, + "epoch": 6.982874162323157, + "grad_norm": 0.004799489362140963, + "kl": 0.075469970703125, + "learning_rate": 2.1208582878587123e-07, + "loss": 7.53152635297738e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3513, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 80.37500381469727, + "completions/min_length": 28.875, + "epoch": 6.9848597666914864, + "grad_norm": 0.003107792031337539, + "kl": 0.073760986328125, + "learning_rate": 2.1182795542083813e-07, + "loss": 7.378715963568538e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3514, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 63.13541841506958, + "completions/min_length": 26.125, + "epoch": 6.986845371059816, + "grad_norm": 0.0033922022698946635, + "kl": 0.07904052734375, + "learning_rate": 2.115701967832511e-07, + "loss": 7.897923933342099e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3515, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.0, + "completions/mean_length": 78.1041693687439, + "completions/min_length": 26.875, + "epoch": 6.9888309754281455, + "grad_norm": 1.0260712899067936, + "kl": 0.08111572265625, + "learning_rate": 2.1131255297572936e-07, + "loss": 0.012934553436934948, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3516, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 194.375, + "completions/mean_length": 77.77083587646484, + "completions/min_length": 28.0, + "epoch": 6.990816579796475, + "grad_norm": 0.008097382640221272, + "kl": 0.0845947265625, + "learning_rate": 2.1105502410084675e-07, + "loss": 8.456048090010881e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3517, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 71.83333492279053, + "completions/min_length": 28.0, + "epoch": 6.992802184164805, + "grad_norm": 0.00490636978181566, + "kl": 0.09381103515625, + "learning_rate": 2.1079761026113056e-07, + "loss": 9.380362462252378e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3518, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 66.42708683013916, + "completions/min_length": 23.125, + "epoch": 6.994787788533134, + "grad_norm": 0.005707926258265224, + "kl": 0.070587158203125, + "learning_rate": 2.1054031155906315e-07, + "loss": 7.06023711245507e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3519, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 61.427085876464844, + "completions/min_length": 21.625, + "epoch": 6.996773392901464, + "grad_norm": 0.003393516415305577, + "kl": 0.07763671875, + "learning_rate": 2.1028312809708037e-07, + "loss": 7.768211071379483e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3520, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 60.14583444595337, + "completions/min_length": 22.75, + "epoch": 6.998758997269794, + "grad_norm": 0.003576802054981375, + "kl": 0.0765380859375, + "learning_rate": 2.1002605997757238e-07, + "loss": 7.656447996851057e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3521, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.25, + "completions/mean_length": 77.39583683013916, + "completions/min_length": 24.375, + "epoch": 7.00198560436833, + "grad_norm": 0.0031969466456915722, + "kl": 0.069580078125, + "learning_rate": 2.0976910730288354e-07, + "loss": 6.961668987059966e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3522, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 66.08333492279053, + "completions/min_length": 29.0, + "epoch": 7.003971208736659, + "grad_norm": 0.0037654768409044895, + "kl": 0.09014892578125, + "learning_rate": 2.0951227017531253e-07, + "loss": 9.014253737404943e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3523, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 64.62500286102295, + "completions/min_length": 26.75, + "epoch": 7.005956813104989, + "grad_norm": 0.003520291615816827, + "kl": 0.0677490234375, + "learning_rate": 2.0925554869711127e-07, + "loss": 6.771342305000871e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3524, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 65.34375095367432, + "completions/min_length": 24.125, + "epoch": 7.007942417473318, + "grad_norm": 0.003084352715649954, + "kl": 0.07745361328125, + "learning_rate": 2.089989429704863e-07, + "loss": 7.758366700727493e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3525, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.0, + "completions/mean_length": 64.02083587646484, + "completions/min_length": 24.75, + "epoch": 7.009928021841648, + "grad_norm": 0.003513686127529461, + "kl": 0.070587158203125, + "learning_rate": 2.0874245309759768e-07, + "loss": 7.050807471387088e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3526, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 65.85416841506958, + "completions/min_length": 23.0, + "epoch": 7.011913626209978, + "grad_norm": 0.7406721199127559, + "kl": 0.18603515625, + "learning_rate": 2.0848607918055976e-07, + "loss": 0.018216852098703384, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3527, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 69.85416793823242, + "completions/min_length": 23.875, + "epoch": 7.013899230578307, + "grad_norm": 0.0032009757200067088, + "kl": 0.075531005859375, + "learning_rate": 2.0822982132144034e-07, + "loss": 7.552739407401532e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3528, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.0, + "completions/mean_length": 68.11458683013916, + "completions/min_length": 26.0, + "epoch": 7.015884834946637, + "grad_norm": 0.004476144446104526, + "kl": 0.089202880859375, + "learning_rate": 2.079736796222607e-07, + "loss": 8.9102795755025e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3529, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 65.07291841506958, + "completions/min_length": 22.875, + "epoch": 7.017870439314967, + "grad_norm": 0.002994012045499423, + "kl": 0.0665283203125, + "learning_rate": 2.0771765418499715e-07, + "loss": 6.65126062813215e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3530, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 65.40625143051147, + "completions/min_length": 20.375, + "epoch": 7.019856043683296, + "grad_norm": 0.00294143413171213, + "kl": 0.0889892578125, + "learning_rate": 2.0746174511157844e-07, + "loss": 8.884790440788493e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3531, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.5, + "completions/mean_length": 55.97916793823242, + "completions/min_length": 21.5, + "epoch": 7.021841648051626, + "grad_norm": 0.005378833640088181, + "kl": 0.085113525390625, + "learning_rate": 2.072059525038873e-07, + "loss": 8.504880679538473e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3532, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 63.17708492279053, + "completions/min_length": 19.125, + "epoch": 7.023827252419955, + "grad_norm": 0.003568418971912661, + "kl": 0.086029052734375, + "learning_rate": 2.0695027646376063e-07, + "loss": 8.603769674664363e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3533, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 71.22916841506958, + "completions/min_length": 17.375, + "epoch": 7.025812856788285, + "grad_norm": 0.0030669190865770107, + "kl": 0.0733642578125, + "learning_rate": 2.0669471709298804e-07, + "loss": 7.332459790632129e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3534, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 71.43750286102295, + "completions/min_length": 26.0, + "epoch": 7.027798461156615, + "grad_norm": 0.0028867647510709827, + "kl": 0.08544921875, + "learning_rate": 2.064392744933135e-07, + "loss": 8.547432662453502e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3535, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 66.89583587646484, + "completions/min_length": 28.0, + "epoch": 7.029784065524944, + "grad_norm": 0.0050638388537128185, + "kl": 0.08343505859375, + "learning_rate": 2.061839487664342e-07, + "loss": 8.334654557984322e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3536, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.5, + "completions/mean_length": 71.32291841506958, + "completions/min_length": 26.5, + "epoch": 7.031769669893274, + "grad_norm": 0.012672089032473681, + "kl": 0.11083984375, + "learning_rate": 2.0592874001400056e-07, + "loss": 0.00011071137123508379, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3537, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.75, + "completions/mean_length": 73.90625143051147, + "completions/min_length": 29.5, + "epoch": 7.033755274261603, + "grad_norm": 0.0031235418965348057, + "kl": 0.0833740234375, + "learning_rate": 2.0567364833761686e-07, + "loss": 8.346197864739224e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3538, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.75, + "completions/mean_length": 71.37500238418579, + "completions/min_length": 21.75, + "epoch": 7.035740878629933, + "grad_norm": 0.0036628494077136425, + "kl": 0.068145751953125, + "learning_rate": 2.0541867383884042e-07, + "loss": 6.81786477798596e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3539, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 63.437500953674316, + "completions/min_length": 22.75, + "epoch": 7.037726482998263, + "grad_norm": 3.148342490849975, + "kl": 0.08062744140625, + "learning_rate": 2.0516381661918192e-07, + "loss": 0.0010254066437482834, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3540, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.875, + "completions/mean_length": 70.2916693687439, + "completions/min_length": 27.125, + "epoch": 7.039712087366592, + "grad_norm": 0.0032683331730226613, + "kl": 0.07977294921875, + "learning_rate": 2.049090767801057e-07, + "loss": 7.983069372130558e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3541, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 66.21875095367432, + "completions/min_length": 26.75, + "epoch": 7.041697691734922, + "grad_norm": 0.0038616391656452896, + "kl": 0.08355712890625, + "learning_rate": 2.0465445442302885e-07, + "loss": 8.349579002242535e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3542, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.375, + "completions/mean_length": 77.90625190734863, + "completions/min_length": 28.25, + "epoch": 7.043683296103252, + "grad_norm": 0.003226334948950208, + "kl": 0.078582763671875, + "learning_rate": 2.0439994964932217e-07, + "loss": 7.855678995838389e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3543, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 64.80208539962769, + "completions/min_length": 26.0, + "epoch": 7.045668900471581, + "grad_norm": 0.003889540449057347, + "kl": 0.0782470703125, + "learning_rate": 2.0414556256030952e-07, + "loss": 7.817507139407098e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3544, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 62.83333444595337, + "completions/min_length": 22.25, + "epoch": 7.047654504839911, + "grad_norm": 0.004836073333392053, + "kl": 0.08123779296875, + "learning_rate": 2.0389129325726756e-07, + "loss": 8.123174484353513e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3545, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 71.12500190734863, + "completions/min_length": 28.5, + "epoch": 7.04964010920824, + "grad_norm": 1.9903818126039807, + "kl": 0.0830078125, + "learning_rate": 2.0363714184142667e-07, + "loss": -0.0016092360019683838, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3546, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 234.25, + "completions/mean_length": 73.51041984558105, + "completions/min_length": 23.5, + "epoch": 7.05162571357657, + "grad_norm": 1.0141092596339412, + "kl": 0.113616943359375, + "learning_rate": 2.0338310841396976e-07, + "loss": -0.005444263573735952, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3547, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.875, + "completions/mean_length": 61.833335399627686, + "completions/min_length": 24.375, + "epoch": 7.0536113179449, + "grad_norm": 0.003813418030064665, + "kl": 0.072845458984375, + "learning_rate": 2.0312919307603283e-07, + "loss": 7.275915413629264e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3548, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.0, + "completions/mean_length": 66.67708492279053, + "completions/min_length": 31.0, + "epoch": 7.055596922313229, + "grad_norm": 0.007228511181624242, + "kl": 0.103515625, + "learning_rate": 2.0287539592870519e-07, + "loss": 0.00010354012192692608, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3549, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 64.39583492279053, + "completions/min_length": 21.375, + "epoch": 7.057582526681559, + "grad_norm": 0.0037024744323373437, + "kl": 0.0670166015625, + "learning_rate": 2.0262171707302894e-07, + "loss": 6.693199975416064e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3550, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 71.54166793823242, + "completions/min_length": 27.875, + "epoch": 7.059568131049888, + "grad_norm": 0.0040026235516167534, + "kl": 0.076690673828125, + "learning_rate": 2.0236815660999884e-07, + "loss": 7.67509700381197e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3551, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 63.08333492279053, + "completions/min_length": 23.75, + "epoch": 7.061553735418218, + "grad_norm": 0.0072198913357872175, + "kl": 0.08221435546875, + "learning_rate": 2.0211471464056306e-07, + "loss": 8.226436330005527e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3552, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 62.739585876464844, + "completions/min_length": 21.125, + "epoch": 7.063539339786548, + "grad_norm": 0.0069164108224131085, + "kl": 0.074493408203125, + "learning_rate": 2.018613912656219e-07, + "loss": 7.459659536834806e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3553, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.875, + "completions/mean_length": 75.52083539962769, + "completions/min_length": 27.25, + "epoch": 7.065524944154877, + "grad_norm": 0.005123172358202033, + "kl": 0.067352294921875, + "learning_rate": 2.0160818658602912e-07, + "loss": 6.738472438883036e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3554, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.5, + "completions/mean_length": 65.06250047683716, + "completions/min_length": 21.75, + "epoch": 7.067510548523207, + "grad_norm": 0.006288664909237334, + "kl": 0.063751220703125, + "learning_rate": 2.013551007025906e-07, + "loss": 6.374895747285336e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3555, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 66.52083444595337, + "completions/min_length": 22.625, + "epoch": 7.069496152891537, + "grad_norm": 0.007216577441415266, + "kl": 0.076904296875, + "learning_rate": 2.0110213371606538e-07, + "loss": 7.687686593271792e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3556, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 62.052085399627686, + "completions/min_length": 20.875, + "epoch": 7.071481757259866, + "grad_norm": 0.007705580361005364, + "kl": 0.077667236328125, + "learning_rate": 2.008492857271652e-07, + "loss": 7.767813804093748e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3557, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 64.31250238418579, + "completions/min_length": 26.75, + "epoch": 7.073467361628196, + "grad_norm": 1.333871949930698, + "kl": 0.09466552734375, + "learning_rate": 2.0059655683655397e-07, + "loss": 0.008863439783453941, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3558, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 62.833335399627686, + "completions/min_length": 22.75, + "epoch": 7.075452965996525, + "grad_norm": 0.006352459488767535, + "kl": 0.0841064453125, + "learning_rate": 2.003439471448487e-07, + "loss": 8.404585241805762e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3559, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.75, + "completions/mean_length": 57.07291793823242, + "completions/min_length": 18.125, + "epoch": 7.077438570364855, + "grad_norm": 0.005142236595226734, + "kl": 0.099578857421875, + "learning_rate": 2.0009145675261858e-07, + "loss": 9.96140151983127e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3560, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 67.18750238418579, + "completions/min_length": 23.125, + "epoch": 7.079424174733185, + "grad_norm": 0.005396894378942624, + "kl": 0.0826416015625, + "learning_rate": 1.9983908576038527e-07, + "loss": 8.264069765573367e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3561, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 61.04166793823242, + "completions/min_length": 20.0, + "epoch": 7.081409779101514, + "grad_norm": 0.0057154503947840244, + "kl": 0.069976806640625, + "learning_rate": 1.9958683426862332e-07, + "loss": 6.998908065725118e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3562, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 71.26041841506958, + "completions/min_length": 22.125, + "epoch": 7.083395383469844, + "grad_norm": 0.006523748080471988, + "kl": 0.0814208984375, + "learning_rate": 1.9933470237775923e-07, + "loss": 8.142885053530335e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3563, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 70.40625190734863, + "completions/min_length": 24.125, + "epoch": 7.085380987838173, + "grad_norm": 0.004770751718460034, + "kl": 0.07879638671875, + "learning_rate": 1.9908269018817215e-07, + "loss": 7.872957939980552e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3564, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 62.19791793823242, + "completions/min_length": 23.625, + "epoch": 7.087366592206503, + "grad_norm": 0.005293463431732053, + "kl": 0.0784912109375, + "learning_rate": 1.9883079780019374e-07, + "loss": 7.849084067856893e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3565, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.0, + "completions/mean_length": 73.81250333786011, + "completions/min_length": 22.0, + "epoch": 7.089352196574833, + "grad_norm": 0.0043115206827554165, + "kl": 0.09637451171875, + "learning_rate": 1.9857902531410735e-07, + "loss": 9.627666440792382e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3566, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 67.93750238418579, + "completions/min_length": 29.25, + "epoch": 7.091337800943162, + "grad_norm": 0.005312950164005668, + "kl": 0.07513427734375, + "learning_rate": 1.9832737283014938e-07, + "loss": 7.514693425036967e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3567, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 61.989585399627686, + "completions/min_length": 21.875, + "epoch": 7.093323405311492, + "grad_norm": 0.003709909475130948, + "kl": 0.07904052734375, + "learning_rate": 1.9807584044850784e-07, + "loss": 7.900722266640514e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3568, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.125, + "completions/mean_length": 70.00000190734863, + "completions/min_length": 21.375, + "epoch": 7.095309009679822, + "grad_norm": 0.004269799730926922, + "kl": 0.0792236328125, + "learning_rate": 1.97824428269323e-07, + "loss": 7.928709237603471e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3569, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.75, + "completions/mean_length": 69.08333444595337, + "completions/min_length": 24.0, + "epoch": 7.097294614048151, + "grad_norm": 0.025801014928430586, + "kl": 0.102874755859375, + "learning_rate": 1.9757313639268763e-07, + "loss": 0.00010284609015798196, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3570, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 60.36458444595337, + "completions/min_length": 17.75, + "epoch": 7.099280218416481, + "grad_norm": 0.006109804196207869, + "kl": 0.06494140625, + "learning_rate": 1.973219649186465e-07, + "loss": 6.498794391518459e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3571, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.25, + "completions/mean_length": 54.03125190734863, + "completions/min_length": 21.5, + "epoch": 7.10126582278481, + "grad_norm": 0.003118831974520875, + "kl": 0.069549560546875, + "learning_rate": 1.97070913947196e-07, + "loss": 6.952178227948025e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3572, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 194.25, + "completions/mean_length": 84.84375333786011, + "completions/min_length": 25.25, + "epoch": 7.10325142715314, + "grad_norm": 0.004979906408050475, + "kl": 0.11053466796875, + "learning_rate": 1.9681998357828522e-07, + "loss": 0.00011044459824915975, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3573, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.25, + "completions/mean_length": 66.333336353302, + "completions/min_length": 30.75, + "epoch": 7.10523703152147, + "grad_norm": 0.0032481137081087972, + "kl": 0.084136962890625, + "learning_rate": 1.965691739118146e-07, + "loss": 8.406926644966006e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3574, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 68.16666889190674, + "completions/min_length": 23.125, + "epoch": 7.107222635889799, + "grad_norm": 0.004992303066333042, + "kl": 0.07928466796875, + "learning_rate": 1.963184850476372e-07, + "loss": 7.927630213089287e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3575, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 62.43750190734863, + "completions/min_length": 24.625, + "epoch": 7.109208240258129, + "grad_norm": 0.013000234501473869, + "kl": 0.0938720703125, + "learning_rate": 1.9606791708555736e-07, + "loss": 9.381659037899226e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3576, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 61.583335399627686, + "completions/min_length": 28.875, + "epoch": 7.111193844626458, + "grad_norm": 0.004466753708183528, + "kl": 0.075164794921875, + "learning_rate": 1.9581747012533117e-07, + "loss": 7.516539335483685e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3577, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 73.95833587646484, + "completions/min_length": 28.5, + "epoch": 7.113179448994788, + "grad_norm": 2.1816427754376555, + "kl": 0.076507568359375, + "learning_rate": 1.9556714426666772e-07, + "loss": -0.014472301118075848, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3578, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.0, + "completions/mean_length": 71.28125095367432, + "completions/min_length": 24.25, + "epoch": 7.115165053363118, + "grad_norm": 0.004389491435964849, + "kl": 0.091796875, + "learning_rate": 1.953169396092267e-07, + "loss": 9.177574975183234e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3579, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.5, + "completions/mean_length": 69.53125286102295, + "completions/min_length": 25.625, + "epoch": 7.117150657731447, + "grad_norm": 1.2283282246893905, + "kl": 0.080108642578125, + "learning_rate": 1.9506685625261965e-07, + "loss": -0.01618211343884468, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3580, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.75, + "completions/mean_length": 79.34375238418579, + "completions/min_length": 30.0, + "epoch": 7.119136262099777, + "grad_norm": 0.0032686577537887223, + "kl": 0.06640625, + "learning_rate": 1.9481689429641058e-07, + "loss": 6.644184031756595e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3581, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.625, + "completions/mean_length": 75.42708587646484, + "completions/min_length": 22.0, + "epoch": 7.121121866468107, + "grad_norm": 0.0036956789255020617, + "kl": 0.09368896484375, + "learning_rate": 1.9456705384011423e-07, + "loss": 9.37871154746972e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3582, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.75, + "completions/mean_length": 83.45833492279053, + "completions/min_length": 30.5, + "epoch": 7.123107470836436, + "grad_norm": 0.003298404515633062, + "kl": 0.081878662109375, + "learning_rate": 1.943173349831978e-07, + "loss": 8.184403122868389e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3583, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 72.58333683013916, + "completions/min_length": 27.625, + "epoch": 7.125093075204766, + "grad_norm": 0.0029432985497142706, + "kl": 0.08953857421875, + "learning_rate": 1.940677378250794e-07, + "loss": 8.96383571671322e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3584, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 63.375001430511475, + "completions/min_length": 22.25, + "epoch": 7.127078679573095, + "grad_norm": 0.056145638678491885, + "kl": 0.109130859375, + "learning_rate": 1.9381826246512916e-07, + "loss": 0.00010919794294750318, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3585, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 66.72916841506958, + "completions/min_length": 22.875, + "epoch": 7.129064283941425, + "grad_norm": 0.00377531799194084, + "kl": 0.081024169921875, + "learning_rate": 1.935689090026687e-07, + "loss": 8.096140663838014e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3586, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 69.07291889190674, + "completions/min_length": 30.75, + "epoch": 7.131049888309755, + "grad_norm": 0.00352380794184653, + "kl": 0.085845947265625, + "learning_rate": 1.9331967753697077e-07, + "loss": 8.580145367886871e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3587, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 64.92708492279053, + "completions/min_length": 24.0, + "epoch": 7.133035492678084, + "grad_norm": 0.0032822665562121005, + "kl": 0.08056640625, + "learning_rate": 1.9307056816725954e-07, + "loss": 8.053382771322504e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3588, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.375, + "completions/mean_length": 78.81250286102295, + "completions/min_length": 29.75, + "epoch": 7.135021097046414, + "grad_norm": 0.004317907003769507, + "kl": 0.069122314453125, + "learning_rate": 1.9282158099271117e-07, + "loss": 6.904612382641062e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3589, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.25, + "completions/mean_length": 60.94791889190674, + "completions/min_length": 26.375, + "epoch": 7.137006701414743, + "grad_norm": 0.005277435573608788, + "kl": 0.07647705078125, + "learning_rate": 1.9257271611245245e-07, + "loss": 7.652993372175843e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3590, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 63.666667461395264, + "completions/min_length": 25.125, + "epoch": 7.138992305783073, + "grad_norm": 0.006943862837693607, + "kl": 0.09051513671875, + "learning_rate": 1.9232397362556192e-07, + "loss": 9.048033825820312e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3591, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 68.520836353302, + "completions/min_length": 23.25, + "epoch": 7.140977910151403, + "grad_norm": 0.9024465011530635, + "kl": 0.06005859375, + "learning_rate": 1.9207535363106947e-07, + "loss": 0.012049295008182526, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3592, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 63.18750238418579, + "completions/min_length": 21.25, + "epoch": 7.142963514519732, + "grad_norm": 0.0035971724577689664, + "kl": 0.069122314453125, + "learning_rate": 1.9182685622795565e-07, + "loss": 6.906967610120773e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3593, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 71.89583539962769, + "completions/min_length": 24.875, + "epoch": 7.144949118888062, + "grad_norm": 0.007377335249580746, + "kl": 0.08782958984375, + "learning_rate": 1.91578481515153e-07, + "loss": 8.78995269886218e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3594, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.5, + "completions/mean_length": 78.32291984558105, + "completions/min_length": 25.5, + "epoch": 7.146934723256392, + "grad_norm": 0.0032559502457722178, + "kl": 0.08148193359375, + "learning_rate": 1.9133022959154443e-07, + "loss": 8.146220352500677e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3595, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 62.29166841506958, + "completions/min_length": 17.5, + "epoch": 7.148920327624721, + "grad_norm": 1.029303708454347, + "kl": 0.07916259765625, + "learning_rate": 1.9108210055596464e-07, + "loss": -0.021941017359495163, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.31764985248446465, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3596, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 69.50000143051147, + "completions/min_length": 24.375, + "epoch": 7.150905931993051, + "grad_norm": 0.0036447591427848907, + "kl": 0.0963134765625, + "learning_rate": 1.9083409450719896e-07, + "loss": 9.627988038118929e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3597, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 74.17708539962769, + "completions/min_length": 30.375, + "epoch": 7.15289153636138, + "grad_norm": 0.005512578834195529, + "kl": 0.0830078125, + "learning_rate": 1.9058621154398352e-07, + "loss": 8.303741924464703e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3598, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 74.88541841506958, + "completions/min_length": 30.0, + "epoch": 7.15487714072971, + "grad_norm": 0.004643165784310934, + "kl": 0.07440185546875, + "learning_rate": 1.9033845176500656e-07, + "loss": 7.443320646416396e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3599, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 68.270836353302, + "completions/min_length": 26.5, + "epoch": 7.1568627450980395, + "grad_norm": 0.003744574858599434, + "kl": 0.08074951171875, + "learning_rate": 1.900908152689062e-07, + "loss": 8.071074262261391e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3600, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 70.28125238418579, + "completions/min_length": 29.625, + "epoch": 7.158848349466369, + "grad_norm": 0.0050952511347941805, + "kl": 0.0889892578125, + "learning_rate": 1.898433021542716e-07, + "loss": 8.900444663595408e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3601, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 65.11458492279053, + "completions/min_length": 19.25, + "epoch": 7.160833953834699, + "grad_norm": 2.6681930964879954, + "kl": 0.089599609375, + "learning_rate": 1.895959125196433e-07, + "loss": -0.007380373775959015, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.06650752201676369, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.18335824459791183, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3602, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 67.31250190734863, + "completions/min_length": 23.625, + "epoch": 7.162819558203028, + "grad_norm": 1.160380406495555, + "kl": 0.07415771484375, + "learning_rate": 1.8934864646351223e-07, + "loss": 0.013130133971571922, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3603, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.75, + "completions/mean_length": 63.42708444595337, + "completions/min_length": 24.25, + "epoch": 7.164805162571358, + "grad_norm": 0.6578778375472332, + "kl": 0.067138671875, + "learning_rate": 1.891015040843203e-07, + "loss": 0.0071890633553266525, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3604, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 61.54166841506958, + "completions/min_length": 19.0, + "epoch": 7.1667907669396875, + "grad_norm": 0.005028032023527505, + "kl": 0.0787353515625, + "learning_rate": 1.8885448548046045e-07, + "loss": 7.874410948716104e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3605, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 70.88541793823242, + "completions/min_length": 23.875, + "epoch": 7.168776371308017, + "grad_norm": 0.005638272695269193, + "kl": 0.073272705078125, + "learning_rate": 1.8860759075027567e-07, + "loss": 7.32544285710901e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3606, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.25, + "completions/mean_length": 78.85416841506958, + "completions/min_length": 22.625, + "epoch": 7.1707619756763465, + "grad_norm": 0.0032251042552413293, + "kl": 0.082733154296875, + "learning_rate": 1.8836081999206032e-07, + "loss": 8.284873911179602e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3607, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 66.76041793823242, + "completions/min_length": 23.5, + "epoch": 7.1727475800446765, + "grad_norm": 0.04925859862028904, + "kl": 0.13531494140625, + "learning_rate": 1.8811417330405905e-07, + "loss": 0.00013531502918340266, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3608, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 59.87500190734863, + "completions/min_length": 20.375, + "epoch": 7.174733184413006, + "grad_norm": 1.1658615432916306, + "kl": 0.093475341796875, + "learning_rate": 1.8786765078446686e-07, + "loss": 0.004339361097663641, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3609, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 69.66666841506958, + "completions/min_length": 21.0, + "epoch": 7.1767187887813355, + "grad_norm": 0.0032043902690534155, + "kl": 0.079010009765625, + "learning_rate": 1.8762125253143014e-07, + "loss": 7.893408474046737e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3610, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 73.98958539962769, + "completions/min_length": 29.75, + "epoch": 7.178704393149665, + "grad_norm": 0.003570326702981123, + "kl": 0.074798583984375, + "learning_rate": 1.8737497864304486e-07, + "loss": 7.483335502911359e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3611, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 66.38541984558105, + "completions/min_length": 24.5, + "epoch": 7.1806899975179945, + "grad_norm": 0.003849065498481363, + "kl": 0.075408935546875, + "learning_rate": 1.8712882921735807e-07, + "loss": 7.548542635049671e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3612, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.0, + "completions/mean_length": 74.04166889190674, + "completions/min_length": 23.375, + "epoch": 7.1826756018863245, + "grad_norm": 0.0032969798299527374, + "kl": 0.09912109375, + "learning_rate": 1.8688280435236732e-07, + "loss": 9.90181797533296e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3613, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 71.05208492279053, + "completions/min_length": 24.625, + "epoch": 7.1846612062546535, + "grad_norm": 3.1178019987571712, + "kl": 0.07476806640625, + "learning_rate": 1.8663690414602e-07, + "loss": 0.006656696554273367, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3614, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.75, + "completions/mean_length": 73.25000286102295, + "completions/min_length": 22.5, + "epoch": 7.1866468106229835, + "grad_norm": 1.1595501212157437, + "kl": 0.07952880859375, + "learning_rate": 1.8639112869621466e-07, + "loss": -0.0056605227291584015, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3615, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 59.34375286102295, + "completions/min_length": 19.875, + "epoch": 7.188632414991313, + "grad_norm": 0.004380555733444782, + "kl": 0.05828857421875, + "learning_rate": 1.8614547810079945e-07, + "loss": 5.830806912854314e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3616, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.75, + "completions/mean_length": 65.88541889190674, + "completions/min_length": 32.25, + "epoch": 7.1906180193596425, + "grad_norm": 1.06476297550661, + "kl": 0.08282470703125, + "learning_rate": 1.85899952457573e-07, + "loss": 0.011797348968684673, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3617, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.375, + "completions/mean_length": 75.71875238418579, + "completions/min_length": 28.75, + "epoch": 7.1926036237279725, + "grad_norm": 0.7337227952823936, + "kl": 0.070068359375, + "learning_rate": 1.8565455186428454e-07, + "loss": 0.0020629758946597576, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3618, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 66.44791793823242, + "completions/min_length": 20.125, + "epoch": 7.1945892280963015, + "grad_norm": 0.0056943358350315994, + "kl": 0.058319091796875, + "learning_rate": 1.8540927641863342e-07, + "loss": 5.823990795761347e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3619, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 77.20833587646484, + "completions/min_length": 27.75, + "epoch": 7.1965748324646315, + "grad_norm": 0.006876746035764185, + "kl": 0.0811767578125, + "learning_rate": 1.8516412621826865e-07, + "loss": 8.115197852021083e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3620, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 69.58333587646484, + "completions/min_length": 28.75, + "epoch": 7.198560436832961, + "grad_norm": 0.004658252605430419, + "kl": 0.083953857421875, + "learning_rate": 1.849191013607902e-07, + "loss": 8.403870015172288e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3621, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 66.36458492279053, + "completions/min_length": 18.25, + "epoch": 7.2005460412012905, + "grad_norm": 1.4665064388892504, + "kl": 0.07342529296875, + "learning_rate": 1.846742019437472e-07, + "loss": -0.017064182087779045, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3622, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 64.22916793823242, + "completions/min_length": 17.625, + "epoch": 7.2025316455696204, + "grad_norm": 0.005372467996328005, + "kl": 0.09332275390625, + "learning_rate": 1.844294280646399e-07, + "loss": 9.324972052127123e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3623, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.0, + "completions/mean_length": 73.14583683013916, + "completions/min_length": 27.75, + "epoch": 7.2045172499379495, + "grad_norm": 0.003615080561329921, + "kl": 0.08953857421875, + "learning_rate": 1.8418477982091767e-07, + "loss": 8.955893281381577e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3624, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 67.70833492279053, + "completions/min_length": 25.25, + "epoch": 7.2065028543062795, + "grad_norm": 0.0034238844445201888, + "kl": 0.089599609375, + "learning_rate": 1.8394025730997986e-07, + "loss": 8.977434481494129e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3625, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 69.01041841506958, + "completions/min_length": 23.875, + "epoch": 7.208488458674609, + "grad_norm": 0.0032246664842049536, + "kl": 0.084564208984375, + "learning_rate": 1.8369586062917692e-07, + "loss": 8.454352791886777e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3626, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 72.35416746139526, + "completions/min_length": 21.125, + "epoch": 7.2104740630429385, + "grad_norm": 0.0035702864863273835, + "kl": 0.077392578125, + "learning_rate": 1.8345158987580789e-07, + "loss": 7.734754763077945e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3627, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 66.73958444595337, + "completions/min_length": 24.375, + "epoch": 7.212459667411268, + "grad_norm": 0.004527331339223867, + "kl": 0.07568359375, + "learning_rate": 1.832074451471221e-07, + "loss": 7.57636662456207e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3628, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 72.11458492279053, + "completions/min_length": 26.875, + "epoch": 7.2144452717795975, + "grad_norm": 0.003252213307206212, + "kl": 0.072357177734375, + "learning_rate": 1.8296342654031915e-07, + "loss": 7.23116536391899e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3629, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 67.13541841506958, + "completions/min_length": 22.375, + "epoch": 7.216430876147927, + "grad_norm": 0.003263218079245263, + "kl": 0.068878173828125, + "learning_rate": 1.827195341525476e-07, + "loss": 6.885667971801013e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3630, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 66.19791889190674, + "completions/min_length": 22.25, + "epoch": 7.218416480516257, + "grad_norm": 0.005888469859037103, + "kl": 0.0772705078125, + "learning_rate": 1.824757680809067e-07, + "loss": 7.722133887000382e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3631, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.5, + "completions/mean_length": 72.91666793823242, + "completions/min_length": 24.875, + "epoch": 7.2204020848845865, + "grad_norm": 0.0033011679554252817, + "kl": 0.066375732421875, + "learning_rate": 1.8223212842244445e-07, + "loss": 6.642604421358556e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3632, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 68.00000238418579, + "completions/min_length": 24.5, + "epoch": 7.222387689252916, + "grad_norm": 0.0030918208023705614, + "kl": 0.064300537109375, + "learning_rate": 1.8198861527415927e-07, + "loss": 6.431159272324294e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3633, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.375, + "completions/mean_length": 65.44791841506958, + "completions/min_length": 17.375, + "epoch": 7.224373293621246, + "grad_norm": 0.003680694392114215, + "kl": 0.062042236328125, + "learning_rate": 1.8174522873299907e-07, + "loss": 6.202972144819796e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3634, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.875, + "completions/mean_length": 71.65625238418579, + "completions/min_length": 21.0, + "epoch": 7.226358897989575, + "grad_norm": 0.0038020674881131256, + "kl": 0.06915283203125, + "learning_rate": 1.815019688958609e-07, + "loss": 6.918206781847402e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3635, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 67.72916841506958, + "completions/min_length": 20.5, + "epoch": 7.228344502357905, + "grad_norm": 1.85318553184896, + "kl": 0.0838623046875, + "learning_rate": 1.8125883585959207e-07, + "loss": 0.0008341341163031757, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3636, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.625, + "completions/mean_length": 58.85416841506958, + "completions/min_length": 21.375, + "epoch": 7.230330106726234, + "grad_norm": 1.1640243228901448, + "kl": 0.09576416015625, + "learning_rate": 1.8101582972098883e-07, + "loss": -0.0021224557422101498, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3637, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.375, + "completions/mean_length": 59.97916841506958, + "completions/min_length": 25.875, + "epoch": 7.232315711094564, + "grad_norm": 0.008255185801152087, + "kl": 0.066986083984375, + "learning_rate": 1.8077295057679694e-07, + "loss": 6.694767944281921e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3638, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 63.13541889190674, + "completions/min_length": 18.0, + "epoch": 7.234301315462894, + "grad_norm": 1.3654172547692, + "kl": 0.107025146484375, + "learning_rate": 1.8053019852371194e-07, + "loss": 0.01162738911807537, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3639, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 67.54166889190674, + "completions/min_length": 21.0, + "epoch": 7.236286919831223, + "grad_norm": 0.0032878012538459568, + "kl": 0.0692138671875, + "learning_rate": 1.8028757365837882e-07, + "loss": 6.929754454176873e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3640, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.0, + "completions/mean_length": 57.57291889190674, + "completions/min_length": 20.125, + "epoch": 7.238272524199553, + "grad_norm": 0.0035414413192917907, + "kl": 0.05938720703125, + "learning_rate": 1.800450760773914e-07, + "loss": 5.931744090048596e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3641, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 71.13541984558105, + "completions/min_length": 30.75, + "epoch": 7.240258128567882, + "grad_norm": 0.00784555685785729, + "kl": 0.0799560546875, + "learning_rate": 1.7980270587729336e-07, + "loss": 7.995144551387057e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3642, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 68.97916889190674, + "completions/min_length": 22.25, + "epoch": 7.242243732936212, + "grad_norm": 0.003570096861221876, + "kl": 0.075531005859375, + "learning_rate": 1.7956046315457723e-07, + "loss": 7.559488585684448e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3643, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 71.63541841506958, + "completions/min_length": 23.375, + "epoch": 7.244229337304542, + "grad_norm": 0.005837440928017228, + "kl": 0.074310302734375, + "learning_rate": 1.793183480056853e-07, + "loss": 7.432702841470018e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3644, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.375, + "completions/mean_length": 73.31250238418579, + "completions/min_length": 25.875, + "epoch": 7.246214941672871, + "grad_norm": 0.004100565317778416, + "kl": 0.069091796875, + "learning_rate": 1.7907636052700864e-07, + "loss": 6.91145978635177e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3645, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 71.68750238418579, + "completions/min_length": 22.625, + "epoch": 7.248200546041201, + "grad_norm": 0.004179961518369871, + "kl": 0.06793212890625, + "learning_rate": 1.7883450081488732e-07, + "loss": 6.79224613122642e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3646, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 60.000001430511475, + "completions/min_length": 20.375, + "epoch": 7.250186150409531, + "grad_norm": 0.05588890825765668, + "kl": 0.08642578125, + "learning_rate": 1.785927689656115e-07, + "loss": 8.647267532069236e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3647, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 67.72916889190674, + "completions/min_length": 24.5, + "epoch": 7.25217175477786, + "grad_norm": 0.0038290025056919638, + "kl": 0.085540771484375, + "learning_rate": 1.783511650754194e-07, + "loss": 8.555524982511997e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3648, + "train_speed(iter/s)": 0.022664 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.5, + "completions/mean_length": 75.93750286102295, + "completions/min_length": 17.375, + "epoch": 7.25415735914619, + "grad_norm": 0.0034253709040634895, + "kl": 0.064971923828125, + "learning_rate": 1.7810968924049863e-07, + "loss": 6.494233821285889e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3649, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.375, + "completions/mean_length": 65.02083539962769, + "completions/min_length": 21.125, + "epoch": 7.256142963514519, + "grad_norm": 0.004813522845239572, + "kl": 0.071014404296875, + "learning_rate": 1.778683415569861e-07, + "loss": 7.104119868017733e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3650, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 66.94791889190674, + "completions/min_length": 27.375, + "epoch": 7.258128567882849, + "grad_norm": 0.003657625826420808, + "kl": 0.068511962890625, + "learning_rate": 1.7762712212096726e-07, + "loss": 6.845461757620797e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3651, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 58.781251430511475, + "completions/min_length": 18.0, + "epoch": 7.260114172251179, + "grad_norm": 0.004929471879582024, + "kl": 0.067047119140625, + "learning_rate": 1.7738603102847693e-07, + "loss": 6.699098594253883e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3652, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 66.70833492279053, + "completions/min_length": 17.875, + "epoch": 7.262099776619508, + "grad_norm": 0.007014026328482691, + "kl": 0.08203125, + "learning_rate": 1.771450683754984e-07, + "loss": 8.207526843762025e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3653, + "train_speed(iter/s)": 0.022663 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.25, + "completions/mean_length": 70.25000286102295, + "completions/min_length": 26.125, + "epoch": 7.264085380987838, + "grad_norm": 0.3831553643252573, + "kl": 0.164306640625, + "learning_rate": 1.7690423425796418e-07, + "loss": 0.00016419807798229158, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3654, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 70.87500190734863, + "completions/min_length": 25.625, + "epoch": 7.266070985356167, + "grad_norm": 0.003026175210895782, + "kl": 0.0753173828125, + "learning_rate": 1.766635287717556e-07, + "loss": 7.538365025538951e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3655, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.125, + "completions/mean_length": 67.94791841506958, + "completions/min_length": 19.125, + "epoch": 7.268056589724497, + "grad_norm": 0.003917943083694712, + "kl": 0.0838623046875, + "learning_rate": 1.7642295201270258e-07, + "loss": 8.375368634006009e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3656, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 67.97916793823242, + "completions/min_length": 25.5, + "epoch": 7.270042194092827, + "grad_norm": 0.004391567091387477, + "kl": 0.0802001953125, + "learning_rate": 1.761825040765836e-07, + "loss": 8.012625039555132e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3657, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 77.44791889190674, + "completions/min_length": 23.125, + "epoch": 7.272027798461156, + "grad_norm": 0.010286213826059558, + "kl": 0.07977294921875, + "learning_rate": 1.759421850591265e-07, + "loss": 7.975217886269093e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3658, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 65.34375190734863, + "completions/min_length": 22.5, + "epoch": 7.274013402829486, + "grad_norm": 1.1376665233089656, + "kl": 0.067047119140625, + "learning_rate": 1.757019950560071e-07, + "loss": 0.008707335218787193, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3659, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 70.75000286102295, + "completions/min_length": 29.625, + "epoch": 7.275999007197816, + "grad_norm": 0.0034317891252947782, + "kl": 0.06640625, + "learning_rate": 1.7546193416285028e-07, + "loss": 6.636339094256982e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3660, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 64.38541793823242, + "completions/min_length": 21.0, + "epoch": 7.277984611566145, + "grad_norm": 0.003994041936600481, + "kl": 0.079681396484375, + "learning_rate": 1.7522200247522962e-07, + "loss": 7.963718235259876e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3661, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.375, + "completions/mean_length": 73.69791793823242, + "completions/min_length": 21.625, + "epoch": 7.279970215934475, + "grad_norm": 0.004547813220369437, + "kl": 0.068115234375, + "learning_rate": 1.749822000886667e-07, + "loss": 6.820970156695694e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3662, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 65.25000238418579, + "completions/min_length": 18.5, + "epoch": 7.281955820302804, + "grad_norm": 0.003709043515648076, + "kl": 0.071563720703125, + "learning_rate": 1.747425270986323e-07, + "loss": 7.153738988563418e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3663, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.5, + "completions/mean_length": 73.79166889190674, + "completions/min_length": 24.75, + "epoch": 7.283941424671134, + "grad_norm": 0.007759282471757907, + "kl": 0.093658447265625, + "learning_rate": 1.7450298360054522e-07, + "loss": 9.368751489091665e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3664, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 70.60416841506958, + "completions/min_length": 22.5, + "epoch": 7.285927029039464, + "grad_norm": 0.004104329198069711, + "kl": 0.0682373046875, + "learning_rate": 1.7426356968977263e-07, + "loss": 6.822431168984622e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3665, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 64.07291841506958, + "completions/min_length": 20.375, + "epoch": 7.287912633407793, + "grad_norm": 0.006823326499259972, + "kl": 0.08245849609375, + "learning_rate": 1.7402428546163073e-07, + "loss": 8.253773557953537e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3666, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.125, + "completions/mean_length": 61.26041841506958, + "completions/min_length": 16.875, + "epoch": 7.289898237776123, + "grad_norm": 0.0038679289246737803, + "kl": 0.062408447265625, + "learning_rate": 1.7378513101138327e-07, + "loss": 6.239158392418176e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3667, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 60.36458444595337, + "completions/min_length": 15.625, + "epoch": 7.291883842144452, + "grad_norm": 0.004286003661968799, + "kl": 0.070831298828125, + "learning_rate": 1.7354610643424295e-07, + "loss": 7.08344450686127e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3668, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 66.72916793823242, + "completions/min_length": 20.125, + "epoch": 7.293869446512782, + "grad_norm": 0.004885827089730887, + "kl": 0.0672607421875, + "learning_rate": 1.7330721182537072e-07, + "loss": 6.717625365126878e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3669, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.0, + "completions/mean_length": 75.37500238418579, + "completions/min_length": 24.0, + "epoch": 7.295855050881112, + "grad_norm": 0.018599687850253455, + "kl": 0.111846923828125, + "learning_rate": 1.730684472798753e-07, + "loss": 0.00011176713451277465, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3670, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 60.45833492279053, + "completions/min_length": 22.0, + "epoch": 7.297840655249441, + "grad_norm": 0.003950746716331168, + "kl": 0.073028564453125, + "learning_rate": 1.7282981289281428e-07, + "loss": 7.299071876332164e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3671, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 67.81250095367432, + "completions/min_length": 18.375, + "epoch": 7.299826259617771, + "grad_norm": 0.00506439280920554, + "kl": 0.06732177734375, + "learning_rate": 1.7259130875919292e-07, + "loss": 6.73301619826816e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3672, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 72.42708492279053, + "completions/min_length": 22.25, + "epoch": 7.301811863986101, + "grad_norm": 0.0036809495612570146, + "kl": 0.06866455078125, + "learning_rate": 1.7235293497396463e-07, + "loss": 6.871431833133101e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3673, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.875, + "completions/mean_length": 70.23958539962769, + "completions/min_length": 19.0, + "epoch": 7.30379746835443, + "grad_norm": 0.003067456714673495, + "kl": 0.096466064453125, + "learning_rate": 1.7211469163203142e-07, + "loss": 9.651621803641319e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3674, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 60.66666889190674, + "completions/min_length": 20.75, + "epoch": 7.30578307272276, + "grad_norm": 0.004575498811356828, + "kl": 0.07110595703125, + "learning_rate": 1.7187657882824285e-07, + "loss": 7.108508725650609e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3675, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.5, + "completions/mean_length": 67.41666889190674, + "completions/min_length": 28.0, + "epoch": 7.307768677091089, + "grad_norm": 0.005539836408968533, + "kl": 0.068817138671875, + "learning_rate": 1.71638596657397e-07, + "loss": 6.887719791848212e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3676, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.5, + "completions/mean_length": 74.65625190734863, + "completions/min_length": 27.25, + "epoch": 7.309754281459419, + "grad_norm": 0.21519201724514148, + "kl": 0.322509765625, + "learning_rate": 1.7140074521423942e-07, + "loss": 0.0003220312064513564, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3677, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 66.895836353302, + "completions/min_length": 25.375, + "epoch": 7.311739885827749, + "grad_norm": 0.004288689024352217, + "kl": 0.066070556640625, + "learning_rate": 1.7116302459346378e-07, + "loss": 6.615303573198617e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3678, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 71.37500143051147, + "completions/min_length": 28.625, + "epoch": 7.313725490196078, + "grad_norm": 0.002940887286837973, + "kl": 0.07940673828125, + "learning_rate": 1.7092543488971196e-07, + "loss": 7.931856089271605e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3679, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.0, + "completions/mean_length": 66.83333587646484, + "completions/min_length": 23.0, + "epoch": 7.315711094564408, + "grad_norm": 0.00489847858574876, + "kl": 0.07293701171875, + "learning_rate": 1.7068797619757318e-07, + "loss": 7.286130858119577e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3680, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 70.43750238418579, + "completions/min_length": 25.0, + "epoch": 7.317696698932737, + "grad_norm": 0.7745185552654259, + "kl": 0.096099853515625, + "learning_rate": 1.704506486115851e-07, + "loss": -0.0005042193224653602, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3681, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.0, + "completions/mean_length": 74.50000238418579, + "completions/min_length": 24.0, + "epoch": 7.319682303301067, + "grad_norm": 2.542847507760862, + "kl": 0.386383056640625, + "learning_rate": 1.7021345222623296e-07, + "loss": 0.007674667984247208, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3682, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.75, + "completions/mean_length": 75.01041841506958, + "completions/min_length": 25.875, + "epoch": 7.321667907669397, + "grad_norm": 1.057283856004343, + "kl": 0.073974609375, + "learning_rate": 1.699763871359494e-07, + "loss": -0.0048217372968792915, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3683, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.5, + "completions/mean_length": 69.35416793823242, + "completions/min_length": 22.75, + "epoch": 7.323653512037726, + "grad_norm": 0.004235557869708119, + "kl": 0.08636474609375, + "learning_rate": 1.697394534351154e-07, + "loss": 8.627890929346904e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3684, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.125, + "completions/mean_length": 79.67708539962769, + "completions/min_length": 24.375, + "epoch": 7.325639116406056, + "grad_norm": 0.003045113123197206, + "kl": 0.06195068359375, + "learning_rate": 1.6950265121805925e-07, + "loss": 6.197634502314031e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3685, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.75, + "completions/mean_length": 67.5729193687439, + "completions/min_length": 25.125, + "epoch": 7.327624720774386, + "grad_norm": 0.0036909093213039804, + "kl": 0.0718994140625, + "learning_rate": 1.6926598057905667e-07, + "loss": 7.185227877926081e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3686, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 67.35416841506958, + "completions/min_length": 20.0, + "epoch": 7.329610325142715, + "grad_norm": 2.269020247000388, + "kl": 0.078460693359375, + "learning_rate": 1.6902944161233156e-07, + "loss": 0.008842803537845612, + "memory(GiB)": 94.21, + "reward": 1.6666666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.6666666716337204, + "rewards/CineAccuracyORM/std": 0.375051774084568, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3687, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 72.00000238418579, + "completions/min_length": 19.0, + "epoch": 7.331595929511045, + "grad_norm": 0.00448452043183264, + "kl": 0.070159912109375, + "learning_rate": 1.6879303441205534e-07, + "loss": 7.01812095940113e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3688, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 66.04166841506958, + "completions/min_length": 26.25, + "epoch": 7.333581533879374, + "grad_norm": 0.005052600696867211, + "kl": 0.073028564453125, + "learning_rate": 1.685567590723463e-07, + "loss": 7.30978135834448e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3689, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 68.95833539962769, + "completions/min_length": 24.625, + "epoch": 7.335567138247704, + "grad_norm": 0.0033262025791730046, + "kl": 0.0770263671875, + "learning_rate": 1.6832061568727112e-07, + "loss": 7.711358921369538e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3690, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 75.38541746139526, + "completions/min_length": 26.625, + "epoch": 7.337552742616034, + "grad_norm": 0.004018029285821298, + "kl": 0.063812255859375, + "learning_rate": 1.6808460435084314e-07, + "loss": 6.3762825448066e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3691, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 64.47916841506958, + "completions/min_length": 26.25, + "epoch": 7.339538346984363, + "grad_norm": 0.937614273501965, + "kl": 0.063751220703125, + "learning_rate": 1.6784872515702397e-07, + "loss": 0.0017140706768259406, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3692, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.75, + "completions/mean_length": 80.00000333786011, + "completions/min_length": 27.0, + "epoch": 7.341523951352693, + "grad_norm": 0.4813402208183356, + "kl": 0.06744384765625, + "learning_rate": 1.6761297819972188e-07, + "loss": -0.02132234536111355, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3693, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 54.38541841506958, + "completions/min_length": 18.125, + "epoch": 7.343509555721022, + "grad_norm": 0.004352288017120049, + "kl": 0.080718994140625, + "learning_rate": 1.6737736357279242e-07, + "loss": 8.059882384259254e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3694, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.625, + "completions/mean_length": 70.40625143051147, + "completions/min_length": 18.625, + "epoch": 7.345495160089352, + "grad_norm": 0.004990534503485936, + "kl": 0.071929931640625, + "learning_rate": 1.671418813700395e-07, + "loss": 7.192611519712955e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3695, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.0, + "completions/mean_length": 70.18750238418579, + "completions/min_length": 18.875, + "epoch": 7.347480764457682, + "grad_norm": 0.005006900854738232, + "kl": 0.065093994140625, + "learning_rate": 1.669065316852133e-07, + "loss": 6.509164086310193e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3696, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.0, + "completions/mean_length": 75.88541793823242, + "completions/min_length": 31.25, + "epoch": 7.349466368826011, + "grad_norm": 0.00550760219413375, + "kl": 0.084442138671875, + "learning_rate": 1.666713146120114e-07, + "loss": 8.445083949482068e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3697, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 68.04166841506958, + "completions/min_length": 24.5, + "epoch": 7.351451973194341, + "grad_norm": 0.0034477512932322004, + "kl": 0.07135009765625, + "learning_rate": 1.6643623024407904e-07, + "loss": 7.129686127882451e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3698, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 69.35416841506958, + "completions/min_length": 27.125, + "epoch": 7.353437577562671, + "grad_norm": 0.00520627033337959, + "kl": 0.069671630859375, + "learning_rate": 1.6620127867500804e-07, + "loss": 6.969399692025036e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3699, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.0, + "completions/mean_length": 70.96875190734863, + "completions/min_length": 17.375, + "epoch": 7.355423181931, + "grad_norm": 0.004511789607719959, + "kl": 0.075042724609375, + "learning_rate": 1.659664599983379e-07, + "loss": 7.499841740354896e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3700, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.0, + "completions/mean_length": 77.08333539962769, + "completions/min_length": 17.875, + "epoch": 7.35740878629933, + "grad_norm": 0.0036551016086177044, + "kl": 0.078765869140625, + "learning_rate": 1.657317743075547e-07, + "loss": 7.884902151999995e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3701, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 67.41666841506958, + "completions/min_length": 20.5, + "epoch": 7.359394390667659, + "grad_norm": 0.0038184194672485614, + "kl": 0.067535400390625, + "learning_rate": 1.6549722169609194e-07, + "loss": 6.749337626388296e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3702, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.0, + "completions/mean_length": 77.6041693687439, + "completions/min_length": 32.75, + "epoch": 7.361379995035989, + "grad_norm": 0.003729448272134716, + "kl": 0.086639404296875, + "learning_rate": 1.6526280225733018e-07, + "loss": 8.666288340464234e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3703, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 64.04166841506958, + "completions/min_length": 19.5, + "epoch": 7.363365599404319, + "grad_norm": 0.0035874148845389973, + "kl": 0.086578369140625, + "learning_rate": 1.6502851608459668e-07, + "loss": 8.64756730152294e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3704, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 58.27083492279053, + "completions/min_length": 17.875, + "epoch": 7.365351203772648, + "grad_norm": 0.003226149159225007, + "kl": 0.06427001953125, + "learning_rate": 1.647943632711656e-07, + "loss": 6.430871144402772e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3705, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 62.11458444595337, + "completions/min_length": 18.0, + "epoch": 7.367336808140978, + "grad_norm": 0.004341457548288576, + "kl": 0.09503173828125, + "learning_rate": 1.6456034391025846e-07, + "loss": 9.500091982772574e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3706, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.125, + "completions/mean_length": 66.35416841506958, + "completions/min_length": 30.0, + "epoch": 7.369322412509307, + "grad_norm": 1.2009485138725684, + "kl": 0.0792236328125, + "learning_rate": 1.6432645809504308e-07, + "loss": -0.011171567253768444, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3707, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 62.72916841506958, + "completions/min_length": 16.75, + "epoch": 7.371308016877637, + "grad_norm": 0.005311722189602332, + "kl": 0.089691162109375, + "learning_rate": 1.6409270591863455e-07, + "loss": 8.972895739134401e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3708, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 62.625001430511475, + "completions/min_length": 22.5, + "epoch": 7.373293621245967, + "grad_norm": 1.6268324295172136, + "kl": 0.074859619140625, + "learning_rate": 1.6385908747409483e-07, + "loss": -0.010995155200362206, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.06436607986688614, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3709, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 60.9791693687439, + "completions/min_length": 20.0, + "epoch": 7.375279225614296, + "grad_norm": 1.4815832501801613, + "kl": 0.086883544921875, + "learning_rate": 1.6362560285443194e-07, + "loss": -0.00015392526984214783, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3710, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 68.41666841506958, + "completions/min_length": 20.625, + "epoch": 7.377264829982626, + "grad_norm": 1.684582227167695, + "kl": 0.078704833984375, + "learning_rate": 1.6339225215260156e-07, + "loss": 0.01579081267118454, + "memory(GiB)": 94.21, + "reward": 1.9062500149011612, + "reward_std": 0.057790378108620644, + "rewards/CineAccuracyORM/mean": 0.9062500074505806, + "rewards/CineAccuracyORM/std": 0.15001969039440155, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3711, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 68.59375333786011, + "completions/min_length": 26.625, + "epoch": 7.379250434350956, + "grad_norm": 0.003311680993112983, + "kl": 0.079254150390625, + "learning_rate": 1.6315903546150533e-07, + "loss": 7.929686398711056e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3712, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.875, + "completions/mean_length": 74.57291984558105, + "completions/min_length": 25.5, + "epoch": 7.381236038719285, + "grad_norm": 0.006924024548109724, + "kl": 0.0894775390625, + "learning_rate": 1.6292595287399175e-07, + "loss": 8.934707148000598e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3713, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 63.7604193687439, + "completions/min_length": 17.25, + "epoch": 7.383221643087615, + "grad_norm": 0.003552167184073572, + "kl": 0.070220947265625, + "learning_rate": 1.6269300448285616e-07, + "loss": 7.019557961029932e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3714, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 61.44791889190674, + "completions/min_length": 22.375, + "epoch": 7.385207247455944, + "grad_norm": 0.0032023829340718335, + "kl": 0.0833740234375, + "learning_rate": 1.6246019038084008e-07, + "loss": 8.337446342920884e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3715, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 72.01041889190674, + "completions/min_length": 29.0, + "epoch": 7.387192851824274, + "grad_norm": 0.039242887684721936, + "kl": 0.08941650390625, + "learning_rate": 1.6222751066063184e-07, + "loss": 8.92629032023251e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3716, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 64.25000095367432, + "completions/min_length": 19.125, + "epoch": 7.389178456192604, + "grad_norm": 0.010162560220756218, + "kl": 0.081787109375, + "learning_rate": 1.6199496541486646e-07, + "loss": 8.174665708793327e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3717, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 69.833336353302, + "completions/min_length": 29.125, + "epoch": 7.391164060560933, + "grad_norm": 0.008968494846799457, + "kl": 0.081756591796875, + "learning_rate": 1.6176255473612477e-07, + "loss": 8.176537085091695e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3718, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 67.27083587646484, + "completions/min_length": 21.75, + "epoch": 7.393149664929263, + "grad_norm": 0.009010814340404596, + "kl": 0.085052490234375, + "learning_rate": 1.6153027871693482e-07, + "loss": 8.500387048115954e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3719, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 71.81250238418579, + "completions/min_length": 26.25, + "epoch": 7.395135269297592, + "grad_norm": 0.00867112265807232, + "kl": 0.085784912109375, + "learning_rate": 1.6129813744977027e-07, + "loss": 8.597113628638908e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3720, + "train_speed(iter/s)": 0.022662 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.0, + "completions/mean_length": 74.75000381469727, + "completions/min_length": 27.5, + "epoch": 7.397120873665922, + "grad_norm": 0.0033974172272662295, + "kl": 0.0770263671875, + "learning_rate": 1.6106613102705192e-07, + "loss": 7.699093839619309e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3721, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 79.06250381469727, + "completions/min_length": 24.75, + "epoch": 7.399106478034252, + "grad_norm": 0.006914635592586072, + "kl": 0.080047607421875, + "learning_rate": 1.6083425954114604e-07, + "loss": 8.005928248167038e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3722, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 73.5416693687439, + "completions/min_length": 24.5, + "epoch": 7.401092082402581, + "grad_norm": 0.009539204097367379, + "kl": 0.082061767578125, + "learning_rate": 1.606025230843659e-07, + "loss": 8.195355621865019e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3723, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 62.69791793823242, + "completions/min_length": 21.75, + "epoch": 7.403077686770911, + "grad_norm": 0.017346996036748843, + "kl": 0.079925537109375, + "learning_rate": 1.603709217489708e-07, + "loss": 8.00511843408458e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3724, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 82.77083587646484, + "completions/min_length": 32.375, + "epoch": 7.405063291139241, + "grad_norm": 0.0036209832409043437, + "kl": 0.08355712890625, + "learning_rate": 1.6013945562716613e-07, + "loss": 8.352326403837651e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3725, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 71.41666746139526, + "completions/min_length": 25.0, + "epoch": 7.40704889550757, + "grad_norm": 0.0030248817930064007, + "kl": 0.067626953125, + "learning_rate": 1.5990812481110322e-07, + "loss": 6.757134542567655e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3726, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 72.82291841506958, + "completions/min_length": 29.125, + "epoch": 7.4090344998759, + "grad_norm": 0.013381122097346324, + "kl": 0.092742919921875, + "learning_rate": 1.5967692939288018e-07, + "loss": 9.265523112844676e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3727, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.75, + "completions/mean_length": 63.15625190734863, + "completions/min_length": 23.75, + "epoch": 7.411020104244229, + "grad_norm": 1.0832269839662216, + "kl": 0.071044921875, + "learning_rate": 1.5944586946454054e-07, + "loss": 0.008739238604903221, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3728, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.0, + "completions/mean_length": 72.47916793823242, + "completions/min_length": 25.25, + "epoch": 7.413005708612559, + "grad_norm": 0.007544861598679511, + "kl": 0.092529296875, + "learning_rate": 1.5921494511807427e-07, + "loss": 9.254638280253857e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3729, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 66.34375286102295, + "completions/min_length": 23.375, + "epoch": 7.414991312980889, + "grad_norm": 0.00966382524771158, + "kl": 0.071197509765625, + "learning_rate": 1.5898415644541757e-07, + "loss": 7.125074625946581e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3730, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.125, + "completions/mean_length": 80.802086353302, + "completions/min_length": 18.875, + "epoch": 7.416976917349218, + "grad_norm": 0.0030872172996127987, + "kl": 0.073822021484375, + "learning_rate": 1.587535035384519e-07, + "loss": 7.389920938294381e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3731, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 73.12500143051147, + "completions/min_length": 32.0, + "epoch": 7.418962521717548, + "grad_norm": 0.0038140118721076686, + "kl": 0.06573486328125, + "learning_rate": 1.585229864890056e-07, + "loss": 6.571651465492323e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3732, + "train_speed(iter/s)": 0.022661 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 214.0, + "completions/mean_length": 81.68750190734863, + "completions/min_length": 21.875, + "epoch": 7.420948126085877, + "grad_norm": 0.004606751552241449, + "kl": 0.074798583984375, + "learning_rate": 1.5829260538885202e-07, + "loss": 7.473808364011347e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3733, + "train_speed(iter/s)": 0.02266 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 63.80208444595337, + "completions/min_length": 24.75, + "epoch": 7.422933730454207, + "grad_norm": 0.005899212493461777, + "kl": 0.060760498046875, + "learning_rate": 1.5806236032971087e-07, + "loss": 6.0760499764001e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3734, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 68.17708492279053, + "completions/min_length": 21.375, + "epoch": 7.424919334822537, + "grad_norm": 0.0038211351096829015, + "kl": 0.0880126953125, + "learning_rate": 1.5783225140324784e-07, + "loss": 8.808104757918045e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3735, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.25, + "completions/mean_length": 57.125001192092896, + "completions/min_length": 20.375, + "epoch": 7.426904939190866, + "grad_norm": 0.0051748181219927, + "kl": 0.07061767578125, + "learning_rate": 1.576022787010739e-07, + "loss": 7.057129550958052e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3736, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.0, + "completions/mean_length": 70.14583587646484, + "completions/min_length": 27.75, + "epoch": 7.428890543559196, + "grad_norm": 0.006471687925077589, + "kl": 0.076416015625, + "learning_rate": 1.5737244231474622e-07, + "loss": 7.638930401299149e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3737, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 64.83333539962769, + "completions/min_length": 15.625, + "epoch": 7.430876147927526, + "grad_norm": 0.011829371247196185, + "kl": 0.08349609375, + "learning_rate": 1.571427423357678e-07, + "loss": 8.343130321009085e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3738, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.5, + "completions/mean_length": 71.75000143051147, + "completions/min_length": 25.125, + "epoch": 7.432861752295855, + "grad_norm": 0.0050189514726394074, + "kl": 0.075408935546875, + "learning_rate": 1.5691317885558674e-07, + "loss": 7.537516648881137e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3739, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 71.75000143051147, + "completions/min_length": 26.75, + "epoch": 7.434847356664185, + "grad_norm": 0.0049279509134033915, + "kl": 0.089813232421875, + "learning_rate": 1.5668375196559752e-07, + "loss": 8.974697266239673e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3740, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 68.83333539962769, + "completions/min_length": 26.75, + "epoch": 7.436832961032514, + "grad_norm": 0.004362219475897078, + "kl": 0.065704345703125, + "learning_rate": 1.5645446175713965e-07, + "loss": 6.572126585524529e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3741, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.75, + "completions/mean_length": 76.12500286102295, + "completions/min_length": 21.5, + "epoch": 7.438818565400844, + "grad_norm": 0.003470998081431405, + "kl": 0.06817626953125, + "learning_rate": 1.5622530832149844e-07, + "loss": 6.820004637120292e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3742, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 227.375, + "completions/mean_length": 72.19791841506958, + "completions/min_length": 20.125, + "epoch": 7.440804169769174, + "grad_norm": 1.1073680879673646, + "kl": 0.069976806640625, + "learning_rate": 1.559962917499048e-07, + "loss": 0.006491821259260178, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3743, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 61.15625190734863, + "completions/min_length": 17.375, + "epoch": 7.442789774137503, + "grad_norm": 0.004105450294446705, + "kl": 0.075225830078125, + "learning_rate": 1.5576741213353533e-07, + "loss": 7.524635293520987e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3744, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.125, + "completions/mean_length": 65.29166841506958, + "completions/min_length": 20.25, + "epoch": 7.444775378505833, + "grad_norm": 0.8790661554746932, + "kl": 0.077423095703125, + "learning_rate": 1.5553866956351158e-07, + "loss": 0.006379756145179272, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3745, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 68.88541889190674, + "completions/min_length": 22.125, + "epoch": 7.446760982874162, + "grad_norm": 0.005639493796178616, + "kl": 0.08843994140625, + "learning_rate": 1.5531006413090113e-07, + "loss": 8.833927859086543e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3746, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.0, + "completions/mean_length": 59.531250953674316, + "completions/min_length": 22.875, + "epoch": 7.448746587242492, + "grad_norm": 0.0029492879741779853, + "kl": 0.069976806640625, + "learning_rate": 1.5508159592671643e-07, + "loss": 6.995679723331705e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3747, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.125, + "completions/mean_length": 71.17708587646484, + "completions/min_length": 19.375, + "epoch": 7.450732191610822, + "grad_norm": 0.00480114962165369, + "kl": 0.0621337890625, + "learning_rate": 1.5485326504191582e-07, + "loss": 6.207319529494271e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3748, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.125, + "completions/mean_length": 68.364586353302, + "completions/min_length": 24.0, + "epoch": 7.452717795979151, + "grad_norm": 0.0033737137917122075, + "kl": 0.073333740234375, + "learning_rate": 1.546250715674024e-07, + "loss": 7.329837535507977e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3749, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.375, + "completions/mean_length": 68.61458683013916, + "completions/min_length": 23.5, + "epoch": 7.454703400347481, + "grad_norm": 0.005075451648058721, + "kl": 0.075836181640625, + "learning_rate": 1.54397015594025e-07, + "loss": 7.57932139094919e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3750, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 101.75, + "completions/mean_length": 54.38541793823242, + "completions/min_length": 19.375, + "epoch": 7.456689004715811, + "grad_norm": 0.010110975905962378, + "kl": 0.07672119140625, + "learning_rate": 1.541690972125778e-07, + "loss": 7.67758465372026e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3751, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 67.64583492279053, + "completions/min_length": 27.75, + "epoch": 7.45867460908414, + "grad_norm": 0.002907574246184972, + "kl": 0.063629150390625, + "learning_rate": 1.5394131651379978e-07, + "loss": 6.357365055009723e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3752, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 64.30208492279053, + "completions/min_length": 21.0, + "epoch": 7.46066021345247, + "grad_norm": 0.0043593015739744586, + "kl": 0.067626953125, + "learning_rate": 1.537136735883751e-07, + "loss": 6.76568306516856e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3753, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 69.14583492279053, + "completions/min_length": 21.625, + "epoch": 7.462645817820799, + "grad_norm": 0.005498400713452654, + "kl": 0.065155029296875, + "learning_rate": 1.534861685269337e-07, + "loss": 6.51189693599008e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3754, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.625, + "completions/mean_length": 69.33333539962769, + "completions/min_length": 24.875, + "epoch": 7.464631422189129, + "grad_norm": 0.005280976080447809, + "kl": 0.065216064453125, + "learning_rate": 1.5325880142004976e-07, + "loss": 6.517578003695235e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3755, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 70.91666889190674, + "completions/min_length": 27.625, + "epoch": 7.466617026557459, + "grad_norm": 0.0051732542778264575, + "kl": 0.074066162109375, + "learning_rate": 1.5303157235824321e-07, + "loss": 7.413503772113472e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3756, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 66.57291889190674, + "completions/min_length": 20.5, + "epoch": 7.468602630925788, + "grad_norm": 0.005910914817176071, + "kl": 0.076995849609375, + "learning_rate": 1.5280448143197888e-07, + "loss": 7.704936433583498e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3757, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 68.00000095367432, + "completions/min_length": 24.25, + "epoch": 7.470588235294118, + "grad_norm": 0.006723453251228797, + "kl": 0.078125, + "learning_rate": 1.5257752873166636e-07, + "loss": 7.806930807419121e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3758, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 72.05208492279053, + "completions/min_length": 20.125, + "epoch": 7.472573839662447, + "grad_norm": 0.004978863733786664, + "kl": 0.072784423828125, + "learning_rate": 1.523507143476605e-07, + "loss": 7.274825475178659e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3759, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.125, + "completions/mean_length": 56.562501430511475, + "completions/min_length": 21.125, + "epoch": 7.474559444030777, + "grad_norm": 0.008352283349744437, + "kl": 0.08782958984375, + "learning_rate": 1.5212403837026073e-07, + "loss": 8.781059295870364e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3760, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 68.32291746139526, + "completions/min_length": 27.875, + "epoch": 7.476545048399107, + "grad_norm": 0.0049554829647225065, + "kl": 0.066436767578125, + "learning_rate": 1.5189750088971193e-07, + "loss": 6.647556438110769e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3761, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 66.31250143051147, + "completions/min_length": 25.75, + "epoch": 7.478530652767436, + "grad_norm": 0.004140960885884446, + "kl": 0.065521240234375, + "learning_rate": 1.5167110199620332e-07, + "loss": 6.554085848620161e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3762, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 69.91666841506958, + "completions/min_length": 30.125, + "epoch": 7.480516257135766, + "grad_norm": 0.004859213633604374, + "kl": 0.081146240234375, + "learning_rate": 1.5144484177986882e-07, + "loss": 8.106774475891143e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3763, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.75, + "completions/mean_length": 66.97916889190674, + "completions/min_length": 24.375, + "epoch": 7.482501861504096, + "grad_norm": 1.9024908033060055, + "kl": 0.090301513671875, + "learning_rate": 1.512187203307881e-07, + "loss": -0.008817656897008419, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3764, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 66.46875190734863, + "completions/min_length": 21.75, + "epoch": 7.484487465872425, + "grad_norm": 0.003921188377169107, + "kl": 0.07464599609375, + "learning_rate": 1.5099273773898458e-07, + "loss": 7.463831570930779e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3765, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 62.94791793823242, + "completions/min_length": 24.5, + "epoch": 7.486473070240755, + "grad_norm": 0.0034034509333290977, + "kl": 0.07257080078125, + "learning_rate": 1.507668940944266e-07, + "loss": 7.255197851918638e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3766, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.875, + "completions/mean_length": 64.10416841506958, + "completions/min_length": 21.875, + "epoch": 7.488458674609084, + "grad_norm": 0.004638652438963976, + "kl": 0.063385009765625, + "learning_rate": 1.5054118948702777e-07, + "loss": 6.33524323347956e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3767, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 62.177085399627686, + "completions/min_length": 24.375, + "epoch": 7.490444278977414, + "grad_norm": 0.005349412643887444, + "kl": 0.087188720703125, + "learning_rate": 1.5031562400664544e-07, + "loss": 8.722725760890171e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3768, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 66.17708492279053, + "completions/min_length": 23.5, + "epoch": 7.492429883345744, + "grad_norm": 1.1079419482299055, + "kl": 0.078125, + "learning_rate": 1.5009019774308246e-07, + "loss": -0.005216313526034355, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3769, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 60.35416841506958, + "completions/min_length": 22.375, + "epoch": 7.494415487714073, + "grad_norm": 0.004640896821779925, + "kl": 0.0684814453125, + "learning_rate": 1.4986491078608553e-07, + "loss": 6.846869655419141e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3770, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 81.31250333786011, + "completions/min_length": 31.875, + "epoch": 7.496401092082403, + "grad_norm": 0.004028672403164803, + "kl": 0.0826416015625, + "learning_rate": 1.4963976322534634e-07, + "loss": 8.25505267130211e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3771, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.625, + "completions/mean_length": 71.57291984558105, + "completions/min_length": 24.0, + "epoch": 7.498386696450732, + "grad_norm": 0.004321633643688434, + "kl": 0.09393310546875, + "learning_rate": 1.49414755150501e-07, + "loss": 9.384322765981779e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3772, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 66.12500190734863, + "completions/min_length": 22.375, + "epoch": 7.500372300819062, + "grad_norm": 0.0042623108101929276, + "kl": 0.077178955078125, + "learning_rate": 1.4918988665113001e-07, + "loss": 7.707101758569479e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3773, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 63.23958683013916, + "completions/min_length": 20.75, + "epoch": 7.502357905187392, + "grad_norm": 0.005855976134385495, + "kl": 0.05926513671875, + "learning_rate": 1.4896515781675816e-07, + "loss": 5.921687261434272e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3774, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 72.32291889190674, + "completions/min_length": 30.125, + "epoch": 7.504343509555721, + "grad_norm": 1.2358669762558965, + "kl": 0.089691162109375, + "learning_rate": 1.4874056873685502e-07, + "loss": -0.01331346295773983, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3775, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.875, + "completions/mean_length": 68.34375333786011, + "completions/min_length": 29.25, + "epoch": 7.506329113924051, + "grad_norm": 0.0054817869126398905, + "kl": 0.075653076171875, + "learning_rate": 1.485161195008341e-07, + "loss": 7.557076605735347e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3776, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 66.86458444595337, + "completions/min_length": 22.75, + "epoch": 7.508314718292381, + "grad_norm": 0.005857472286654964, + "kl": 0.08251953125, + "learning_rate": 1.4829181019805347e-07, + "loss": 8.240701572503895e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3777, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 61.375001430511475, + "completions/min_length": 22.0, + "epoch": 7.51030032266071, + "grad_norm": 0.0037511826756653297, + "kl": 0.077484130859375, + "learning_rate": 1.4806764091781564e-07, + "loss": 7.743419701000676e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3778, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.125, + "completions/mean_length": 61.76041793823242, + "completions/min_length": 27.25, + "epoch": 7.51228592702904, + "grad_norm": 0.005221846055470195, + "kl": 0.06646728515625, + "learning_rate": 1.4784361174936698e-07, + "loss": 6.639228377025574e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3779, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 69.75000286102295, + "completions/min_length": 26.875, + "epoch": 7.514271531397369, + "grad_norm": 0.006329034328525194, + "kl": 0.09356689453125, + "learning_rate": 1.476197227818985e-07, + "loss": 9.355320798931643e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3780, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 64.66666889190674, + "completions/min_length": 23.75, + "epoch": 7.516257135765699, + "grad_norm": 0.0032341825381367443, + "kl": 0.08154296875, + "learning_rate": 1.47395974104545e-07, + "loss": 8.149001223500818e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3781, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 73.20833492279053, + "completions/min_length": 27.375, + "epoch": 7.518242740134029, + "grad_norm": 0.005130550171671116, + "kl": 0.0706787109375, + "learning_rate": 1.471723658063856e-07, + "loss": 7.061640644678846e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3782, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 66.53125190734863, + "completions/min_length": 27.5, + "epoch": 7.520228344502358, + "grad_norm": 0.0035068428068767256, + "kl": 0.0836181640625, + "learning_rate": 1.4694889797644367e-07, + "loss": 8.361946674995124e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3783, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 58.97916841506958, + "completions/min_length": 24.5, + "epoch": 7.522213948870688, + "grad_norm": 0.006485952362945099, + "kl": 0.06097412109375, + "learning_rate": 1.467255707036863e-07, + "loss": 6.101143662817776e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3784, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 60.32291841506958, + "completions/min_length": 21.5, + "epoch": 7.524199553239017, + "grad_norm": 0.006433364720810747, + "kl": 0.07720947265625, + "learning_rate": 1.4650238407702503e-07, + "loss": 7.71501800045371e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3785, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.375, + "completions/mean_length": 67.04166889190674, + "completions/min_length": 27.625, + "epoch": 7.526185157607347, + "grad_norm": 0.003986761223215965, + "kl": 0.080596923828125, + "learning_rate": 1.4627933818531534e-07, + "loss": 8.061522385105491e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3786, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 55.37500190734863, + "completions/min_length": 18.375, + "epoch": 7.528170761975677, + "grad_norm": 0.004357447648336865, + "kl": 0.070159912109375, + "learning_rate": 1.4605643311735626e-07, + "loss": 7.016626477707177e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3787, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 65.6979193687439, + "completions/min_length": 19.25, + "epoch": 7.530156366344006, + "grad_norm": 1.9299555046978079, + "kl": 0.07037353515625, + "learning_rate": 1.4583366896189138e-07, + "loss": -0.003820901270955801, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3788, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 64.97916841506958, + "completions/min_length": 21.0, + "epoch": 7.532141970712336, + "grad_norm": 1.0584800564007786, + "kl": 0.085906982421875, + "learning_rate": 1.456110458076077e-07, + "loss": 0.010107404552400112, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3789, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 68.81250143051147, + "completions/min_length": 24.75, + "epoch": 7.5341275750806656, + "grad_norm": 0.0032157425030490605, + "kl": 0.069183349609375, + "learning_rate": 1.4538856374313608e-07, + "loss": 6.924809713382274e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3790, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.375, + "completions/mean_length": 69.95833492279053, + "completions/min_length": 20.625, + "epoch": 7.536113179448995, + "grad_norm": 0.003533533666412546, + "kl": 0.065826416015625, + "learning_rate": 1.4516622285705155e-07, + "loss": 6.572699203388765e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3791, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.25, + "completions/mean_length": 68.89583587646484, + "completions/min_length": 27.5, + "epoch": 7.538098783817325, + "grad_norm": 2.004831805286954, + "kl": 0.097900390625, + "learning_rate": 1.4494402323787296e-07, + "loss": 0.005995898507535458, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3792, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 68.7604193687439, + "completions/min_length": 27.75, + "epoch": 7.540084388185654, + "grad_norm": 0.004367124048634768, + "kl": 0.089019775390625, + "learning_rate": 1.4472196497406236e-07, + "loss": 8.89451039256528e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3793, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 67.33333444595337, + "completions/min_length": 25.375, + "epoch": 7.542069992553984, + "grad_norm": 0.0031753886345738573, + "kl": 0.0809326171875, + "learning_rate": 1.445000481540263e-07, + "loss": 8.095223165582865e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3794, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 70.59375190734863, + "completions/min_length": 28.5, + "epoch": 7.5440555969223135, + "grad_norm": 0.006079296620358689, + "kl": 0.08233642578125, + "learning_rate": 1.442782728661141e-07, + "loss": 8.227508806157857e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3795, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 69.37500190734863, + "completions/min_length": 24.125, + "epoch": 7.546041201290643, + "grad_norm": 0.004001955070932801, + "kl": 0.066253662109375, + "learning_rate": 1.4405663919861977e-07, + "loss": 6.627905531786382e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3796, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.0, + "completions/mean_length": 61.57291793823242, + "completions/min_length": 23.0, + "epoch": 7.5480268056589725, + "grad_norm": 0.0050473173425620655, + "kl": 0.07659912109375, + "learning_rate": 1.438351472397799e-07, + "loss": 7.653064676560462e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3797, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 72.42708539962769, + "completions/min_length": 32.75, + "epoch": 7.550012410027302, + "grad_norm": 0.004899609328287532, + "kl": 0.080535888671875, + "learning_rate": 1.436137970777755e-07, + "loss": 8.046612492762506e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3798, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 59.22916793823242, + "completions/min_length": 28.0, + "epoch": 7.551998014395632, + "grad_norm": 1.7891476329441975, + "kl": 0.076080322265625, + "learning_rate": 1.433925888007308e-07, + "loss": -0.0001973348407773301, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.05103103630244732, + "rewards/CineAccuracyORM/mean": 0.7083333432674408, + "rewards/CineAccuracyORM/std": 0.3245695158839226, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3799, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 68.6979193687439, + "completions/min_length": 26.25, + "epoch": 7.5539836187639615, + "grad_norm": 0.004071081516049575, + "kl": 0.076019287109375, + "learning_rate": 1.4317152249671337e-07, + "loss": 7.594324415549636e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3800, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 65.51041841506958, + "completions/min_length": 22.625, + "epoch": 7.555969223132291, + "grad_norm": 0.00372150144917829, + "kl": 0.06512451171875, + "learning_rate": 1.4295059825373461e-07, + "loss": 6.519451562780887e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3801, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 65.67708539962769, + "completions/min_length": 23.5, + "epoch": 7.5579548275006205, + "grad_norm": 0.0031629980312355395, + "kl": 0.082366943359375, + "learning_rate": 1.427298161597491e-07, + "loss": 8.251075632870197e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3802, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 66.20833539962769, + "completions/min_length": 27.25, + "epoch": 7.5599404318689505, + "grad_norm": 0.020388673417674308, + "kl": 0.102783203125, + "learning_rate": 1.425091763026548e-07, + "loss": 0.00010280066635459661, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3803, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 67.30208539962769, + "completions/min_length": 23.875, + "epoch": 7.5619260362372795, + "grad_norm": 0.004594253566820516, + "kl": 0.072967529296875, + "learning_rate": 1.4228867877029333e-07, + "loss": 7.303150778170675e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3804, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.25, + "completions/mean_length": 74.65625286102295, + "completions/min_length": 23.75, + "epoch": 7.5639116406056095, + "grad_norm": 0.00311300994489222, + "kl": 0.074127197265625, + "learning_rate": 1.4206832365044923e-07, + "loss": 7.414726132992655e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3805, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 64.03125238418579, + "completions/min_length": 20.5, + "epoch": 7.565897244973939, + "grad_norm": 0.005210254097619597, + "kl": 0.083526611328125, + "learning_rate": 1.418481110308507e-07, + "loss": 8.356572652701288e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3806, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 63.145835399627686, + "completions/min_length": 21.0, + "epoch": 7.5678828493422685, + "grad_norm": 0.005606869038669202, + "kl": 0.0777587890625, + "learning_rate": 1.4162804099916932e-07, + "loss": 7.770962110953405e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3807, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 62.510417461395264, + "completions/min_length": 17.5, + "epoch": 7.5698684537105985, + "grad_norm": 0.005429184242739169, + "kl": 0.073699951171875, + "learning_rate": 1.414081136430193e-07, + "loss": 7.376716530416161e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3808, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.125, + "completions/mean_length": 61.38541793823242, + "completions/min_length": 23.875, + "epoch": 7.5718540580789275, + "grad_norm": 0.006276110970431092, + "kl": 0.067138671875, + "learning_rate": 1.4118832904995875e-07, + "loss": 6.710530578857288e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3809, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 67.7604193687439, + "completions/min_length": 20.875, + "epoch": 7.5738396624472575, + "grad_norm": 0.0029336926068383026, + "kl": 0.0693359375, + "learning_rate": 1.409686873074884e-07, + "loss": 6.93323599989526e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3810, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.375, + "completions/mean_length": 70.63541865348816, + "completions/min_length": 22.75, + "epoch": 7.5758252668155865, + "grad_norm": 0.004899217007670302, + "kl": 0.1007080078125, + "learning_rate": 1.407491885030523e-07, + "loss": 0.00010074060264742002, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3811, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 68.80208492279053, + "completions/min_length": 24.25, + "epoch": 7.5778108711839165, + "grad_norm": 0.0038766021489333147, + "kl": 0.08477783203125, + "learning_rate": 1.4052983272403757e-07, + "loss": 8.473803609376773e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3812, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 69.54166841506958, + "completions/min_length": 25.75, + "epoch": 7.5797964755522464, + "grad_norm": 0.004622770475997417, + "kl": 0.088134765625, + "learning_rate": 1.4031062005777473e-07, + "loss": 8.805944526102394e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3813, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 66.72916841506958, + "completions/min_length": 25.75, + "epoch": 7.5817820799205755, + "grad_norm": 0.0034354835599022635, + "kl": 0.07568359375, + "learning_rate": 1.400915505915367e-07, + "loss": 7.565581472590566e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3814, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 61.114585399627686, + "completions/min_length": 17.375, + "epoch": 7.5837676842889055, + "grad_norm": 0.005817830902787357, + "kl": 0.069793701171875, + "learning_rate": 1.3987262441254e-07, + "loss": 6.983404455240816e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3815, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 68.16666793823242, + "completions/min_length": 28.125, + "epoch": 7.585753288657235, + "grad_norm": 0.005157108721092594, + "kl": 0.0640869140625, + "learning_rate": 1.3965384160794347e-07, + "loss": 6.403190491255373e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3816, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.75, + "completions/mean_length": 74.78125190734863, + "completions/min_length": 20.625, + "epoch": 7.5877388930255645, + "grad_norm": 0.004824453982976149, + "kl": 0.08428955078125, + "learning_rate": 1.3943520226484962e-07, + "loss": 8.422048267675564e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3817, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 62.30208444595337, + "completions/min_length": 22.5, + "epoch": 7.589724497393894, + "grad_norm": 0.0038294148197715572, + "kl": 0.077789306640625, + "learning_rate": 1.392167064703032e-07, + "loss": 7.776329584885389e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3818, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.875, + "completions/mean_length": 72.90625095367432, + "completions/min_length": 25.25, + "epoch": 7.5917101017622235, + "grad_norm": 0.006284562017998628, + "kl": 0.099090576171875, + "learning_rate": 1.3899835431129175e-07, + "loss": 9.91778215393424e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3819, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 69.96875190734863, + "completions/min_length": 26.5, + "epoch": 7.5936957061305534, + "grad_norm": 0.0035761528511995206, + "kl": 0.07391357421875, + "learning_rate": 1.3878014587474662e-07, + "loss": 7.381969771813601e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3820, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 69.32291841506958, + "completions/min_length": 24.625, + "epoch": 7.595681310498883, + "grad_norm": 0.0036286533714774724, + "kl": 0.10113525390625, + "learning_rate": 1.3856208124754088e-07, + "loss": 0.00010125023982254788, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3821, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 62.31250190734863, + "completions/min_length": 19.25, + "epoch": 7.5976669148672125, + "grad_norm": 0.0034877308395075577, + "kl": 0.072998046875, + "learning_rate": 1.3834416051649055e-07, + "loss": 7.299243588931859e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3822, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.0, + "completions/mean_length": 54.86458492279053, + "completions/min_length": 18.625, + "epoch": 7.599652519235542, + "grad_norm": 0.003132542223039554, + "kl": 0.073883056640625, + "learning_rate": 1.381263837683549e-07, + "loss": 7.385817298199981e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3823, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 71.76041984558105, + "completions/min_length": 23.75, + "epoch": 7.6016381236038715, + "grad_norm": 0.0039784297043140345, + "kl": 0.089874267578125, + "learning_rate": 1.379087510898352e-07, + "loss": 8.992602670332417e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3824, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 58.16666793823242, + "completions/min_length": 17.625, + "epoch": 7.603623727972201, + "grad_norm": 0.00713559702926011, + "kl": 0.068206787109375, + "learning_rate": 1.376912625675757e-07, + "loss": 6.826203025411814e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3825, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 72.03125286102295, + "completions/min_length": 20.0, + "epoch": 7.605609332340531, + "grad_norm": 0.0031252772054555663, + "kl": 0.0643310546875, + "learning_rate": 1.3747391828816347e-07, + "loss": 6.431882502511144e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3826, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 64.28125143051147, + "completions/min_length": 22.5, + "epoch": 7.6075949367088604, + "grad_norm": 0.00351842678753326, + "kl": 0.06585693359375, + "learning_rate": 1.3725671833812764e-07, + "loss": 6.579048931598663e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3827, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 197.375, + "completions/mean_length": 78.53125095367432, + "completions/min_length": 25.25, + "epoch": 7.60958054107719, + "grad_norm": 0.0037101368806568813, + "kl": 0.086029052734375, + "learning_rate": 1.3703966280394036e-07, + "loss": 8.598676504334435e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3828, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 71.10416889190674, + "completions/min_length": 25.0, + "epoch": 7.61156614544552, + "grad_norm": 0.004323057091798282, + "kl": 0.072601318359375, + "learning_rate": 1.3682275177201603e-07, + "loss": 7.258218829520047e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3829, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 64.96875143051147, + "completions/min_length": 20.875, + "epoch": 7.613551749813849, + "grad_norm": 0.0057692096428140225, + "kl": 0.074859619140625, + "learning_rate": 1.366059853287113e-07, + "loss": 7.488192932214588e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3830, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 63.84375190734863, + "completions/min_length": 21.75, + "epoch": 7.615537354182179, + "grad_norm": 0.9623374980197493, + "kl": 0.0753631591796875, + "learning_rate": 1.3638936356032588e-07, + "loss": 0.008153287693858147, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166669771075, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3831, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 68.86458539962769, + "completions/min_length": 23.625, + "epoch": 7.617522958550508, + "grad_norm": 0.005837272487351229, + "kl": 0.0802001953125, + "learning_rate": 1.361728865531012e-07, + "loss": 8.019919914659113e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3832, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 69.25000286102295, + "completions/min_length": 20.5, + "epoch": 7.619508562918838, + "grad_norm": 0.0034887088475826413, + "kl": 0.08001708984375, + "learning_rate": 1.3595655439322163e-07, + "loss": 7.99636691226624e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3833, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.875, + "completions/mean_length": 57.364586353302, + "completions/min_length": 16.875, + "epoch": 7.621494167287168, + "grad_norm": 0.004570205789125989, + "kl": 0.069915771484375, + "learning_rate": 1.3574036716681364e-07, + "loss": 6.994244176894426e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3834, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 74.8229193687439, + "completions/min_length": 28.25, + "epoch": 7.623479771655497, + "grad_norm": 1.0640701791162204, + "kl": 0.08599853515625, + "learning_rate": 1.3552432495994575e-07, + "loss": 0.0036701548378914595, + "memory(GiB)": 94.21, + "reward": 1.71875, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.71875, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3835, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.625, + "completions/mean_length": 73.0104193687439, + "completions/min_length": 30.25, + "epoch": 7.625465376023827, + "grad_norm": 0.007398065378854317, + "kl": 0.0765380859375, + "learning_rate": 1.3530842785862928e-07, + "loss": 7.650951738469303e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3836, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.875, + "completions/mean_length": 61.145835399627686, + "completions/min_length": 16.375, + "epoch": 7.627450980392156, + "grad_norm": 0.002851991828770374, + "kl": 0.065643310546875, + "learning_rate": 1.3509267594881713e-07, + "loss": 6.563091301359236e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3837, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 66.70833492279053, + "completions/min_length": 25.5, + "epoch": 7.629436584760486, + "grad_norm": 0.004248191399794432, + "kl": 0.065399169921875, + "learning_rate": 1.348770693164051e-07, + "loss": 6.544891220983118e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3838, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.75, + "completions/mean_length": 59.53125238418579, + "completions/min_length": 20.75, + "epoch": 7.631422189128816, + "grad_norm": 0.003373364449470411, + "kl": 0.063232421875, + "learning_rate": 1.3466160804723042e-07, + "loss": 6.322674744296819e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3839, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 61.14583492279053, + "completions/min_length": 20.25, + "epoch": 7.633407793497145, + "grad_norm": 0.005341285629886416, + "kl": 0.069915771484375, + "learning_rate": 1.3444629222707305e-07, + "loss": 6.989236135268584e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3840, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 68.18750238418579, + "completions/min_length": 26.75, + "epoch": 7.635393397865475, + "grad_norm": 0.003544579968788282, + "kl": 0.0733642578125, + "learning_rate": 1.3423112194165497e-07, + "loss": 7.330399967031553e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3841, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 64.59375190734863, + "completions/min_length": 18.25, + "epoch": 7.637379002233805, + "grad_norm": 1.3279956347277142, + "kl": 0.08892822265625, + "learning_rate": 1.3401609727663988e-07, + "loss": -0.01843891851603985, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3842, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.25, + "completions/mean_length": 61.427085399627686, + "completions/min_length": 22.75, + "epoch": 7.639364606602134, + "grad_norm": 1.4531341156494462, + "kl": 0.226776123046875, + "learning_rate": 1.3380121831763354e-07, + "loss": -0.015078927390277386, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3843, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.0, + "completions/mean_length": 58.60416889190674, + "completions/min_length": 17.625, + "epoch": 7.641350210970464, + "grad_norm": 0.009695697133040825, + "kl": 0.0684814453125, + "learning_rate": 1.335864851501841e-07, + "loss": 6.850939098512754e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3844, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.75, + "completions/mean_length": 66.09375190734863, + "completions/min_length": 22.875, + "epoch": 7.643335815338793, + "grad_norm": 0.9979786384564562, + "kl": 0.08343505859375, + "learning_rate": 1.3337189785978125e-07, + "loss": 0.0007822262123227119, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3845, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.125, + "completions/mean_length": 59.19791889190674, + "completions/min_length": 24.25, + "epoch": 7.645321419707123, + "grad_norm": 0.0038477150137922337, + "kl": 0.087158203125, + "learning_rate": 1.331574565318569e-07, + "loss": 8.724215877009556e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3846, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 61.07291841506958, + "completions/min_length": 21.125, + "epoch": 7.647307024075453, + "grad_norm": 0.005068134747830288, + "kl": 0.06317138671875, + "learning_rate": 1.3294316125178473e-07, + "loss": 6.313101766863838e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3847, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 63.562500953674316, + "completions/min_length": 27.25, + "epoch": 7.649292628443782, + "grad_norm": 0.0037538206632814564, + "kl": 0.07318115234375, + "learning_rate": 1.3272901210488014e-07, + "loss": 7.314438698813319e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3848, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 67.4166693687439, + "completions/min_length": 26.125, + "epoch": 7.651278232812112, + "grad_norm": 0.005920629853202438, + "kl": 0.083740234375, + "learning_rate": 1.3251500917640067e-07, + "loss": 8.384125248994678e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3849, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 63.739585876464844, + "completions/min_length": 20.25, + "epoch": 7.653263837180441, + "grad_norm": 0.0032517789972344125, + "kl": 0.070159912109375, + "learning_rate": 1.3230115255154538e-07, + "loss": 7.010986155364662e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3850, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 69.42708492279053, + "completions/min_length": 24.625, + "epoch": 7.655249441548771, + "grad_norm": 0.0035803549911116884, + "kl": 0.079986572265625, + "learning_rate": 1.3208744231545492e-07, + "loss": 7.997571083251387e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3851, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.625, + "completions/mean_length": 64.45833492279053, + "completions/min_length": 24.5, + "epoch": 7.657235045917101, + "grad_norm": 0.011159006428603476, + "kl": 0.094818115234375, + "learning_rate": 1.318738785532123e-07, + "loss": 9.478756692260504e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3852, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.25, + "completions/mean_length": 56.39583444595337, + "completions/min_length": 17.375, + "epoch": 7.65922065028543, + "grad_norm": 0.002846713363513895, + "kl": 0.073455810546875, + "learning_rate": 1.3166046134984142e-07, + "loss": 7.338711293414235e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3853, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 62.604167461395264, + "completions/min_length": 22.125, + "epoch": 7.66120625465376, + "grad_norm": 0.0038446117106336595, + "kl": 0.069427490234375, + "learning_rate": 1.3144719079030853e-07, + "loss": 6.934361590538174e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3854, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.875, + "completions/mean_length": 62.333335399627686, + "completions/min_length": 28.125, + "epoch": 7.66319185902209, + "grad_norm": 0.004201565164909353, + "kl": 0.076568603515625, + "learning_rate": 1.3123406695952117e-07, + "loss": 7.650322368135676e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3855, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.25, + "completions/mean_length": 62.364585399627686, + "completions/min_length": 23.25, + "epoch": 7.665177463390419, + "grad_norm": 0.0069294008205900495, + "kl": 0.078399658203125, + "learning_rate": 1.3102108994232825e-07, + "loss": 7.845751679269597e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3856, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 64.95833492279053, + "completions/min_length": 18.875, + "epoch": 7.667163067758749, + "grad_norm": 1.187931763149915, + "kl": 0.097198486328125, + "learning_rate": 1.3080825982352077e-07, + "loss": -0.002422519028186798, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3857, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 69.52083492279053, + "completions/min_length": 28.875, + "epoch": 7.669148672127078, + "grad_norm": 0.002938643074916235, + "kl": 0.082244873046875, + "learning_rate": 1.3059557668783084e-07, + "loss": 8.231324318330735e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3858, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 58.56250190734863, + "completions/min_length": 22.75, + "epoch": 7.671134276495408, + "grad_norm": 0.003799052043380333, + "kl": 0.06842041015625, + "learning_rate": 1.30383040619932e-07, + "loss": 6.852422666270286e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3859, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 62.98958492279053, + "completions/min_length": 26.625, + "epoch": 7.673119880863738, + "grad_norm": 0.003458717684124392, + "kl": 0.079071044921875, + "learning_rate": 1.3017065170443946e-07, + "loss": 7.898827607277781e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3860, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 69.07291841506958, + "completions/min_length": 24.625, + "epoch": 7.675105485232067, + "grad_norm": 0.003952200137858557, + "kl": 0.0587158203125, + "learning_rate": 1.2995841002591006e-07, + "loss": 5.869668530067429e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3861, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 67.36458587646484, + "completions/min_length": 25.5, + "epoch": 7.677091089600397, + "grad_norm": 0.003941403506617953, + "kl": 0.0537109375, + "learning_rate": 1.2974631566884136e-07, + "loss": 5.367131598177366e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3862, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 63.46875238418579, + "completions/min_length": 28.0, + "epoch": 7.679076693968726, + "grad_norm": 0.009386621949527022, + "kl": 0.081939697265625, + "learning_rate": 1.2953436871767298e-07, + "loss": 8.189590880647302e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3863, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 60.062501430511475, + "completions/min_length": 21.5, + "epoch": 7.681062298337056, + "grad_norm": 2.6844684037516444, + "kl": 0.21484375, + "learning_rate": 1.293225692567852e-07, + "loss": -0.0160828884691, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3864, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 66.13541793823242, + "completions/min_length": 24.875, + "epoch": 7.683047902705386, + "grad_norm": 1.0338487841698705, + "kl": 0.063751220703125, + "learning_rate": 1.2911091737050027e-07, + "loss": 0.010955302976071835, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3865, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 66.65625190734863, + "completions/min_length": 23.0, + "epoch": 7.685033507073715, + "grad_norm": 0.008101933829857383, + "kl": 0.085479736328125, + "learning_rate": 1.288994131430811e-07, + "loss": 8.540777344023809e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3866, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.0, + "completions/mean_length": 56.46875190734863, + "completions/min_length": 19.75, + "epoch": 7.687019111442045, + "grad_norm": 0.00580529201571199, + "kl": 0.062530517578125, + "learning_rate": 1.2868805665873184e-07, + "loss": 6.254997424548492e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3867, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 66.14583492279053, + "completions/min_length": 27.875, + "epoch": 7.689004715810375, + "grad_norm": 0.005377720616617876, + "kl": 0.083740234375, + "learning_rate": 1.2847684800159853e-07, + "loss": 8.378714846912771e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3868, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 65.00000095367432, + "completions/min_length": 29.5, + "epoch": 7.690990320178704, + "grad_norm": 0.006377999742467398, + "kl": 0.082977294921875, + "learning_rate": 1.282657872557676e-07, + "loss": 8.286055526696146e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3869, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.25, + "completions/mean_length": 58.437501430511475, + "completions/min_length": 22.5, + "epoch": 7.692975924547034, + "grad_norm": 0.004440370370414892, + "kl": 0.097930908203125, + "learning_rate": 1.2805487450526665e-07, + "loss": 9.791778575163335e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3870, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.625, + "completions/mean_length": 63.87500190734863, + "completions/min_length": 23.0, + "epoch": 7.694961528915364, + "grad_norm": 1.3893921491849417, + "kl": 0.0911865234375, + "learning_rate": 1.2784410983406486e-07, + "loss": -0.0011434592306613922, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3871, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 67.91666889190674, + "completions/min_length": 29.875, + "epoch": 7.696947133283693, + "grad_norm": 0.003432952514643224, + "kl": 0.058441162109375, + "learning_rate": 1.2763349332607187e-07, + "loss": 5.8417776017449796e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3872, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 60.52083492279053, + "completions/min_length": 19.125, + "epoch": 7.698932737652023, + "grad_norm": 0.007740923115435708, + "kl": 0.095245361328125, + "learning_rate": 1.274230250651389e-07, + "loss": 9.521457104710862e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3873, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 63.65625238418579, + "completions/min_length": 17.0, + "epoch": 7.700918342020352, + "grad_norm": 0.004679144919841729, + "kl": 0.068572998046875, + "learning_rate": 1.272127051350576e-07, + "loss": 6.858836422907189e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3874, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 60.6666693687439, + "completions/min_length": 22.875, + "epoch": 7.702903946388682, + "grad_norm": 0.0033703393272158026, + "kl": 0.06671142578125, + "learning_rate": 1.2700253361956091e-07, + "loss": 6.672116433037445e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3875, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 68.81250095367432, + "completions/min_length": 23.75, + "epoch": 7.704889550757011, + "grad_norm": 0.00444089625798143, + "kl": 0.065704345703125, + "learning_rate": 1.2679251060232276e-07, + "loss": 6.565947842318565e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3876, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 63.77083492279053, + "completions/min_length": 22.625, + "epoch": 7.706875155125341, + "grad_norm": 0.0037948797975675265, + "kl": 0.08343505859375, + "learning_rate": 1.2658263616695752e-07, + "loss": 8.33876256365329e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3877, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.125, + "completions/mean_length": 55.68750190734863, + "completions/min_length": 20.0, + "epoch": 7.708860759493671, + "grad_norm": 0.0032764098465749053, + "kl": 0.067962646484375, + "learning_rate": 1.2637291039702103e-07, + "loss": 6.792052590753883e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3878, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 64.25000238418579, + "completions/min_length": 23.5, + "epoch": 7.710846363862, + "grad_norm": 2.0970881609837786, + "kl": 0.078155517578125, + "learning_rate": 1.2616333337600937e-07, + "loss": -0.011503200978040695, + "memory(GiB)": 94.21, + "reward": 1.8437500149011612, + "reward_std": 0.05779037997126579, + "rewards/CineAccuracyORM/mean": 0.8437500037252903, + "rewards/CineAccuracyORM/std": 0.16290925815701485, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3879, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 70.38541841506958, + "completions/min_length": 28.875, + "epoch": 7.71283196823033, + "grad_norm": 0.003226638792558709, + "kl": 0.07977294921875, + "learning_rate": 1.259539051873595e-07, + "loss": 7.976028427947313e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3880, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.625, + "completions/mean_length": 79.10416889190674, + "completions/min_length": 28.0, + "epoch": 7.71481757259866, + "grad_norm": 0.0035974545990974903, + "kl": 0.07464599609375, + "learning_rate": 1.257446259144494e-07, + "loss": 7.466791430488229e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3881, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.375, + "completions/mean_length": 57.23958492279053, + "completions/min_length": 20.375, + "epoch": 7.716803176966989, + "grad_norm": 0.005518855760240403, + "kl": 0.07415771484375, + "learning_rate": 1.2553549564059785e-07, + "loss": 7.423026545438915e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3882, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 66.10416889190674, + "completions/min_length": 20.25, + "epoch": 7.718788781335319, + "grad_norm": 0.005431646810206131, + "kl": 0.07177734375, + "learning_rate": 1.253265144490636e-07, + "loss": 7.168115553213283e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3883, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 69.23958539962769, + "completions/min_length": 25.75, + "epoch": 7.720774385703649, + "grad_norm": 0.0062842108858253976, + "kl": 0.082977294921875, + "learning_rate": 1.2511768242304704e-07, + "loss": 8.305140363518149e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3884, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 69.05208587646484, + "completions/min_length": 26.75, + "epoch": 7.722759990071978, + "grad_norm": 0.005109899437266735, + "kl": 0.068939208984375, + "learning_rate": 1.2490899964568823e-07, + "loss": 6.900308653712273e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3885, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.375, + "completions/mean_length": 58.35416841506958, + "completions/min_length": 19.125, + "epoch": 7.724745594440308, + "grad_norm": 0.003215219035282089, + "kl": 0.090911865234375, + "learning_rate": 1.2470046620006857e-07, + "loss": 9.096015128307045e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3886, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.625, + "completions/mean_length": 72.31250286102295, + "completions/min_length": 26.0, + "epoch": 7.726731198808637, + "grad_norm": 0.005316643862158534, + "kl": 0.08538818359375, + "learning_rate": 1.2449208216920948e-07, + "loss": 8.545012678951025e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3887, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.5, + "completions/mean_length": 67.35416889190674, + "completions/min_length": 27.5, + "epoch": 7.728716803176967, + "grad_norm": 0.0056797898705136405, + "kl": 0.0538330078125, + "learning_rate": 1.242838476360729e-07, + "loss": 5.389576472225599e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3888, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 66.25000238418579, + "completions/min_length": 25.875, + "epoch": 7.730702407545296, + "grad_norm": 0.005387402377753929, + "kl": 0.07366943359375, + "learning_rate": 1.2407576268356196e-07, + "loss": 7.36262445570901e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3889, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 65.50000190734863, + "completions/min_length": 28.0, + "epoch": 7.732688011913626, + "grad_norm": 0.003991402989264238, + "kl": 0.069580078125, + "learning_rate": 1.2386782739451945e-07, + "loss": 6.965005013626069e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3890, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.5, + "completions/mean_length": 69.38541841506958, + "completions/min_length": 24.5, + "epoch": 7.734673616281956, + "grad_norm": 0.003969477911141373, + "kl": 0.07080078125, + "learning_rate": 1.2366004185172874e-07, + "loss": 7.072483276715502e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3891, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 69.8541693687439, + "completions/min_length": 26.75, + "epoch": 7.736659220650285, + "grad_norm": 0.003786106620533908, + "kl": 0.0819091796875, + "learning_rate": 1.234524061379139e-07, + "loss": 8.186566265067086e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3892, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 63.85416841506958, + "completions/min_length": 27.625, + "epoch": 7.738644825018615, + "grad_norm": 0.004941575415389164, + "kl": 0.07891845703125, + "learning_rate": 1.2324492033573892e-07, + "loss": 7.896412716945633e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3893, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.5, + "completions/mean_length": 62.708335399627686, + "completions/min_length": 23.625, + "epoch": 7.740630429386945, + "grad_norm": 0.011346158168316975, + "kl": 0.0738525390625, + "learning_rate": 1.230375845278086e-07, + "loss": 7.386261131614447e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3894, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.875, + "completions/mean_length": 78.71875333786011, + "completions/min_length": 30.75, + "epoch": 7.742616033755274, + "grad_norm": 0.0036536957602840083, + "kl": 0.079437255859375, + "learning_rate": 1.228303987966675e-07, + "loss": 7.951415318530053e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3895, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.25, + "completions/mean_length": 85.39583587646484, + "completions/min_length": 30.25, + "epoch": 7.744601638123604, + "grad_norm": 0.00280663164142008, + "kl": 0.059967041015625, + "learning_rate": 1.2262336322480076e-07, + "loss": 6.000800567562692e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3896, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 65.12500190734863, + "completions/min_length": 19.75, + "epoch": 7.746587242491934, + "grad_norm": 0.008002030435721228, + "kl": 0.090118408203125, + "learning_rate": 1.2241647789463383e-07, + "loss": 9.01563762454316e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3897, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 68.52083683013916, + "completions/min_length": 22.125, + "epoch": 7.748572846860263, + "grad_norm": 0.003710607147933289, + "kl": 0.07269287109375, + "learning_rate": 1.2220974288853208e-07, + "loss": 7.264433952514082e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3898, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.5, + "completions/mean_length": 70.84375286102295, + "completions/min_length": 25.625, + "epoch": 7.750558451228593, + "grad_norm": 0.0046607345701737074, + "kl": 0.073089599609375, + "learning_rate": 1.2200315828880091e-07, + "loss": 7.309476495720446e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3899, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.125, + "completions/mean_length": 67.51041889190674, + "completions/min_length": 26.0, + "epoch": 7.752544055596922, + "grad_norm": 0.004811622878731625, + "kl": 0.06817626953125, + "learning_rate": 1.2179672417768644e-07, + "loss": 6.826211756560951e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3900, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 61.96875238418579, + "completions/min_length": 21.5, + "epoch": 7.754529659965252, + "grad_norm": 0.008958614384968833, + "kl": 0.06927490234375, + "learning_rate": 1.2159044063737416e-07, + "loss": 6.927098729647696e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3901, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 66.90625238418579, + "completions/min_length": 23.75, + "epoch": 7.756515264333581, + "grad_norm": 0.002930563414245782, + "kl": 0.070953369140625, + "learning_rate": 1.213843077499901e-07, + "loss": 7.09326850483194e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3902, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.25, + "completions/mean_length": 75.47916984558105, + "completions/min_length": 26.75, + "epoch": 7.758500868701911, + "grad_norm": 0.002937133231444342, + "kl": 0.07049560546875, + "learning_rate": 1.2117832559760032e-07, + "loss": 7.053184526739642e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3903, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 64.53125095367432, + "completions/min_length": 22.875, + "epoch": 7.760486473070241, + "grad_norm": 0.0030746675067557672, + "kl": 0.079132080078125, + "learning_rate": 1.209724942622104e-07, + "loss": 7.920600182842463e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3904, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.375, + "completions/mean_length": 73.28125238418579, + "completions/min_length": 27.5, + "epoch": 7.76247207743857, + "grad_norm": 0.0030448895607094566, + "kl": 0.079376220703125, + "learning_rate": 1.2076681382576649e-07, + "loss": 7.947770063765347e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3905, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 71.21875238418579, + "completions/min_length": 28.25, + "epoch": 7.7644576818069, + "grad_norm": 0.003104678129102219, + "kl": 0.082977294921875, + "learning_rate": 1.2056128437015423e-07, + "loss": 8.300077024614438e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3906, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 108.75, + "completions/mean_length": 59.88541841506958, + "completions/min_length": 25.25, + "epoch": 7.76644328617523, + "grad_norm": 0.004007677571024156, + "kl": 0.071533203125, + "learning_rate": 1.203559059771992e-07, + "loss": 7.14878406142816e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3907, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.125, + "completions/mean_length": 76.43750333786011, + "completions/min_length": 30.5, + "epoch": 7.768428890543559, + "grad_norm": 0.0031034467729061906, + "kl": 0.075775146484375, + "learning_rate": 1.2015067872866692e-07, + "loss": 7.574223855044693e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3908, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 69.25000143051147, + "completions/min_length": 23.375, + "epoch": 7.770414494911889, + "grad_norm": 0.002958063125281366, + "kl": 0.061553955078125, + "learning_rate": 1.1994560270626303e-07, + "loss": 6.148389365989715e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3909, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.25, + "completions/mean_length": 57.80208492279053, + "completions/min_length": 21.75, + "epoch": 7.772400099280219, + "grad_norm": 0.004323160213968585, + "kl": 0.068328857421875, + "learning_rate": 1.1974067799163236e-07, + "loss": 6.843900337116793e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3910, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 62.59375238418579, + "completions/min_length": 20.75, + "epoch": 7.774385703648548, + "grad_norm": 0.002687032254564319, + "kl": 0.089019775390625, + "learning_rate": 1.1953590466636e-07, + "loss": 8.915412763599306e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3911, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 68.77083492279053, + "completions/min_length": 28.125, + "epoch": 7.776371308016878, + "grad_norm": 0.0091714866763257, + "kl": 0.06671142578125, + "learning_rate": 1.193312828119704e-07, + "loss": 6.665413093287498e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3912, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 65.35416889190674, + "completions/min_length": 26.5, + "epoch": 7.778356912385207, + "grad_norm": 0.005737679892904283, + "kl": 0.0882568359375, + "learning_rate": 1.1912681250992818e-07, + "loss": 8.836119377519935e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3913, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.25, + "completions/mean_length": 67.28125143051147, + "completions/min_length": 24.75, + "epoch": 7.780342516753537, + "grad_norm": 0.004287156588465882, + "kl": 0.062164306640625, + "learning_rate": 1.1892249384163716e-07, + "loss": 6.219823262654245e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3914, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.25, + "completions/mean_length": 79.52083539962769, + "completions/min_length": 27.5, + "epoch": 7.782328121121866, + "grad_norm": 0.003097808161389412, + "kl": 0.08026123046875, + "learning_rate": 1.1871832688844064e-07, + "loss": 8.024100679904222e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3915, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 72.50000047683716, + "completions/min_length": 26.875, + "epoch": 7.784313725490196, + "grad_norm": 0.004664612230150489, + "kl": 0.07159423828125, + "learning_rate": 1.1851431173162246e-07, + "loss": 7.156870560720563e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3916, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 63.30208396911621, + "completions/min_length": 26.25, + "epoch": 7.786299329858526, + "grad_norm": 0.003746678758558942, + "kl": 0.07366943359375, + "learning_rate": 1.1831044845240517e-07, + "loss": 7.370024832198396e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3917, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 65.02083587646484, + "completions/min_length": 21.375, + "epoch": 7.788284934226855, + "grad_norm": 0.006024809792970926, + "kl": 0.08074951171875, + "learning_rate": 1.1810673713195091e-07, + "loss": 8.068706665653735e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3918, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 66.14583587646484, + "completions/min_length": 24.125, + "epoch": 7.790270538595185, + "grad_norm": 0.004079514111671265, + "kl": 0.07763671875, + "learning_rate": 1.1790317785136178e-07, + "loss": 7.769724470563233e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3919, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 65.37500143051147, + "completions/min_length": 29.125, + "epoch": 7.792256142963515, + "grad_norm": 0.0035315499964259163, + "kl": 0.069915771484375, + "learning_rate": 1.1769977069167881e-07, + "loss": 6.988467066548765e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3920, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 69.32291984558105, + "completions/min_length": 32.625, + "epoch": 7.794241747331844, + "grad_norm": 1.8762943647906174, + "kl": 0.08843994140625, + "learning_rate": 1.1749651573388297e-07, + "loss": 0.00012748813605867326, + "memory(GiB)": 94.21, + "reward": 1.84375, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.84375, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3921, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 69.40625190734863, + "completions/min_length": 22.75, + "epoch": 7.796227351700174, + "grad_norm": 0.03621445969327345, + "kl": 0.1529541015625, + "learning_rate": 1.1729341305889418e-07, + "loss": 0.0001528703432995826, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3922, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.375, + "completions/mean_length": 76.79166793823242, + "completions/min_length": 23.75, + "epoch": 7.798212956068504, + "grad_norm": 0.0039241723109373905, + "kl": 0.079925537109375, + "learning_rate": 1.1709046274757206e-07, + "loss": 7.990228914422914e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3923, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 62.125001430511475, + "completions/min_length": 22.625, + "epoch": 7.800198560436833, + "grad_norm": 0.008012809373704676, + "kl": 0.062469482421875, + "learning_rate": 1.1688766488071567e-07, + "loss": 6.249164289329201e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3924, + "train_speed(iter/s)": 0.022653 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 70.52083539962769, + "completions/min_length": 27.25, + "epoch": 7.802184164805163, + "grad_norm": 0.08130746158088281, + "kl": 0.13604736328125, + "learning_rate": 1.1668501953906278e-07, + "loss": 0.00013631889305543154, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3925, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 71.03125190734863, + "completions/min_length": 22.125, + "epoch": 7.804169769173492, + "grad_norm": 0.0038527334217906624, + "kl": 0.0703125, + "learning_rate": 1.1648252680329124e-07, + "loss": 7.025589002296329e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3926, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.125, + "completions/mean_length": 59.79166889190674, + "completions/min_length": 21.0, + "epoch": 7.806155373541822, + "grad_norm": 0.0041760832134088936, + "kl": 0.0712890625, + "learning_rate": 1.1628018675401746e-07, + "loss": 7.130164885893464e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3927, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 67.79166889190674, + "completions/min_length": 21.125, + "epoch": 7.808140977910151, + "grad_norm": 0.005805128359271577, + "kl": 0.07257080078125, + "learning_rate": 1.1607799947179731e-07, + "loss": 7.261057908181101e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3928, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.875, + "completions/mean_length": 55.718751430511475, + "completions/min_length": 19.125, + "epoch": 7.810126582278481, + "grad_norm": 0.004760596682025078, + "kl": 0.06549072265625, + "learning_rate": 1.1587596503712593e-07, + "loss": 6.548453529831022e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3929, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.125, + "completions/mean_length": 62.66666889190674, + "completions/min_length": 24.375, + "epoch": 7.812112186646811, + "grad_norm": 0.0029035237032259955, + "kl": 0.0738525390625, + "learning_rate": 1.1567408353043772e-07, + "loss": 7.38902308512479e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3930, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.25, + "completions/mean_length": 74.04166793823242, + "completions/min_length": 30.625, + "epoch": 7.81409779101514, + "grad_norm": 0.0030996362781128047, + "kl": 0.07464599609375, + "learning_rate": 1.1547235503210567e-07, + "loss": 7.468024705303833e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3931, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.25, + "completions/mean_length": 61.29166889190674, + "completions/min_length": 21.875, + "epoch": 7.81608339538347, + "grad_norm": 0.003118357433967189, + "kl": 0.0721435546875, + "learning_rate": 1.1527077962244264e-07, + "loss": 7.216303492896259e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3932, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 70.12500143051147, + "completions/min_length": 31.75, + "epoch": 7.8180689997518, + "grad_norm": 1.0024017915716292, + "kl": 0.06280517578125, + "learning_rate": 1.1506935738169966e-07, + "loss": 0.012461773119866848, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3933, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 65.62500238418579, + "completions/min_length": 24.625, + "epoch": 7.820054604120129, + "grad_norm": 0.004677012561452153, + "kl": 0.07257080078125, + "learning_rate": 1.1486808839006756e-07, + "loss": 7.2706192440819e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3934, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 70.93750143051147, + "completions/min_length": 21.5, + "epoch": 7.822040208488459, + "grad_norm": 0.00356557350716112, + "kl": 0.06951904296875, + "learning_rate": 1.1466697272767573e-07, + "loss": 6.948283407837152e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3935, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.0, + "completions/mean_length": 72.28125143051147, + "completions/min_length": 26.0, + "epoch": 7.824025812856789, + "grad_norm": 0.004005422780099686, + "kl": 0.0704345703125, + "learning_rate": 1.144660104745923e-07, + "loss": 7.047977123875171e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3936, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 71.10416889190674, + "completions/min_length": 26.75, + "epoch": 7.826011417225118, + "grad_norm": 0.004455311189684372, + "kl": 0.06365966796875, + "learning_rate": 1.142652017108252e-07, + "loss": 6.372777716023847e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3937, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 71.78125190734863, + "completions/min_length": 28.875, + "epoch": 7.827997021593448, + "grad_norm": 1.2196324186255922, + "kl": 0.06695556640625, + "learning_rate": 1.1406454651632041e-07, + "loss": -0.001656563370488584, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3938, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 71.83333444595337, + "completions/min_length": 23.25, + "epoch": 7.829982625961777, + "grad_norm": 0.004990552170123915, + "kl": 0.086395263671875, + "learning_rate": 1.1386404497096286e-07, + "loss": 8.630442607682198e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3939, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.875, + "completions/mean_length": 78.23958492279053, + "completions/min_length": 19.375, + "epoch": 7.831968230330107, + "grad_norm": 0.003390239272871487, + "kl": 0.07720947265625, + "learning_rate": 1.1366369715457686e-07, + "loss": 7.72233324823901e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3940, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 112.25, + "completions/mean_length": 57.81250190734863, + "completions/min_length": 17.75, + "epoch": 7.833953834698436, + "grad_norm": 0.017397658830204277, + "kl": 0.0635986328125, + "learning_rate": 1.1346350314692483e-07, + "loss": 6.358828250085935e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3941, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 64.89583539962769, + "completions/min_length": 16.0, + "epoch": 7.835939439066766, + "grad_norm": 0.003349649973414028, + "kl": 0.07232666015625, + "learning_rate": 1.1326346302770856e-07, + "loss": 7.224710134323686e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3942, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 63.63541841506958, + "completions/min_length": 22.375, + "epoch": 7.837925043435096, + "grad_norm": 1.131790717561578, + "kl": 0.08013916015625, + "learning_rate": 1.1306357687656803e-07, + "loss": 0.00048503652215003967, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3943, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.75, + "completions/mean_length": 65.60416793823242, + "completions/min_length": 28.5, + "epoch": 7.839910647803425, + "grad_norm": 0.003771207554082733, + "kl": 0.075836181640625, + "learning_rate": 1.128638447730823e-07, + "loss": 7.567742432001978e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3944, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.25, + "completions/mean_length": 68.90625143051147, + "completions/min_length": 27.875, + "epoch": 7.841896252171755, + "grad_norm": 0.006001467102062294, + "kl": 0.068817138671875, + "learning_rate": 1.1266426679676916e-07, + "loss": 6.874018436064944e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3945, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 68.27083587646484, + "completions/min_length": 26.125, + "epoch": 7.843881856540085, + "grad_norm": 0.007147402732750008, + "kl": 0.080718994140625, + "learning_rate": 1.1246484302708464e-07, + "loss": 8.062220149440691e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3946, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 63.395836353302, + "completions/min_length": 24.5, + "epoch": 7.845867460908414, + "grad_norm": 0.004209083040122269, + "kl": 0.06805419921875, + "learning_rate": 1.1226557354342359e-07, + "loss": 6.810553168179467e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3947, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 69.55208587646484, + "completions/min_length": 30.75, + "epoch": 7.847853065276744, + "grad_norm": 0.00288344454114979, + "kl": 0.06158447265625, + "learning_rate": 1.120664584251197e-07, + "loss": 6.161877536214888e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3948, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.625, + "completions/mean_length": 70.75000143051147, + "completions/min_length": 26.75, + "epoch": 7.849838669645074, + "grad_norm": 0.003549570276149742, + "kl": 0.0755615234375, + "learning_rate": 1.1186749775144461e-07, + "loss": 7.552633178420365e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3949, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 82.44791889190674, + "completions/min_length": 30.375, + "epoch": 7.851824274013403, + "grad_norm": 0.004586165674970795, + "kl": 0.082794189453125, + "learning_rate": 1.1166869160160897e-07, + "loss": 8.27049880172126e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3950, + "train_speed(iter/s)": 0.022659 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 202.125, + "completions/mean_length": 70.36458587646484, + "completions/min_length": 26.125, + "epoch": 7.853809878381733, + "grad_norm": 0.005966937217203743, + "kl": 0.075042724609375, + "learning_rate": 1.114700400547619e-07, + "loss": 7.49956670915708e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3951, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 72.65625143051147, + "completions/min_length": 30.5, + "epoch": 7.855795482750062, + "grad_norm": 0.0044505339446736444, + "kl": 0.07171630859375, + "learning_rate": 1.1127154318999055e-07, + "loss": 7.179038948379457e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3952, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.875, + "completions/mean_length": 65.35416889190674, + "completions/min_length": 23.75, + "epoch": 7.857781087118392, + "grad_norm": 0.00562006406060834, + "kl": 0.06060791015625, + "learning_rate": 1.1107320108632107e-07, + "loss": 6.057949940441176e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3953, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 70.40625238418579, + "completions/min_length": 33.625, + "epoch": 7.859766691486721, + "grad_norm": 0.004724624752407142, + "kl": 0.06048583984375, + "learning_rate": 1.1087501382271752e-07, + "loss": 6.0438782384153455e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3954, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 62.52083492279053, + "completions/min_length": 16.75, + "epoch": 7.861752295855051, + "grad_norm": 0.005456370823632248, + "kl": 0.065673828125, + "learning_rate": 1.106769814780823e-07, + "loss": 6.565966759808362e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3955, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 66.84375095367432, + "completions/min_length": 19.625, + "epoch": 7.863737900223381, + "grad_norm": 2.084006326342192, + "kl": 0.078125, + "learning_rate": 1.1047910413125666e-07, + "loss": 0.001870767562650144, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3956, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.75, + "completions/mean_length": 66.05208492279053, + "completions/min_length": 24.375, + "epoch": 7.86572350459171, + "grad_norm": 0.003694985994740653, + "kl": 0.07672119140625, + "learning_rate": 1.1028138186101954e-07, + "loss": 7.670488412259147e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3957, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 61.72916841506958, + "completions/min_length": 22.875, + "epoch": 7.86770910896004, + "grad_norm": 0.0036277151361194175, + "kl": 0.0550537109375, + "learning_rate": 1.1008381474608847e-07, + "loss": 5.509276161319576e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3958, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 74.23958587646484, + "completions/min_length": 27.75, + "epoch": 7.86969471332837, + "grad_norm": 0.6972247215945936, + "kl": 0.068634033203125, + "learning_rate": 1.098864028651193e-07, + "loss": -0.007476377300918102, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166669771075, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3959, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.375, + "completions/mean_length": 73.34375238418579, + "completions/min_length": 23.375, + "epoch": 7.871680317696699, + "grad_norm": 0.9724131018010981, + "kl": 0.07489013671875, + "learning_rate": 1.0968914629670567e-07, + "loss": -0.006799768656492233, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3960, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.75, + "completions/mean_length": 60.500001430511475, + "completions/min_length": 22.125, + "epoch": 7.873665922065029, + "grad_norm": 0.0033869019037312174, + "kl": 0.06768798828125, + "learning_rate": 1.0949204511937987e-07, + "loss": 6.76974595990032e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3961, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.0, + "completions/mean_length": 64.87500238418579, + "completions/min_length": 19.375, + "epoch": 7.875651526433359, + "grad_norm": 0.0052025305653062804, + "kl": 0.069122314453125, + "learning_rate": 1.092950994116118e-07, + "loss": 6.916293932590634e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3962, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.0, + "completions/mean_length": 66.52083492279053, + "completions/min_length": 23.125, + "epoch": 7.877637130801688, + "grad_norm": 0.003574107424793834, + "kl": 0.076171875, + "learning_rate": 1.0909830925181007e-07, + "loss": 7.619446114404127e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3963, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 66.82291841506958, + "completions/min_length": 25.375, + "epoch": 7.879622735170018, + "grad_norm": 0.0046079734417892, + "kl": 0.064056396484375, + "learning_rate": 1.0890167471832079e-07, + "loss": 6.407794717233628e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3964, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 61.89583492279053, + "completions/min_length": 21.625, + "epoch": 7.881608339538347, + "grad_norm": 0.004339507910135874, + "kl": 0.05731201171875, + "learning_rate": 1.0870519588942839e-07, + "loss": 5.7298144383821636e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3965, + "train_speed(iter/s)": 0.022658 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 68.08333587646484, + "completions/min_length": 30.625, + "epoch": 7.883593943906677, + "grad_norm": 0.0060038038960135, + "kl": 0.0738525390625, + "learning_rate": 1.0850887284335557e-07, + "loss": 7.388897938653827e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3966, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 69.57291889190674, + "completions/min_length": 25.875, + "epoch": 7.885579548275006, + "grad_norm": 0.003203117876970192, + "kl": 0.064453125, + "learning_rate": 1.0831270565826256e-07, + "loss": 6.445555482059717e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3967, + "train_speed(iter/s)": 0.022657 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.0, + "completions/mean_length": 65.40625286102295, + "completions/min_length": 26.0, + "epoch": 7.887565152643336, + "grad_norm": 0.003903757523988194, + "kl": 0.0623779296875, + "learning_rate": 1.0811669441224747e-07, + "loss": 6.240440416149795e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3968, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 61.187501430511475, + "completions/min_length": 21.5, + "epoch": 7.889550757011666, + "grad_norm": 0.005555410627060006, + "kl": 0.064208984375, + "learning_rate": 1.0792083918334694e-07, + "loss": 6.419791316147894e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3969, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.25, + "completions/mean_length": 69.69791746139526, + "completions/min_length": 26.125, + "epoch": 7.891536361379995, + "grad_norm": 0.003928206364544882, + "kl": 0.0650634765625, + "learning_rate": 1.0772514004953482e-07, + "loss": 6.50549991405569e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3970, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 71.00000238418579, + "completions/min_length": 24.5, + "epoch": 7.893521965748325, + "grad_norm": 0.0034110524675974307, + "kl": 0.099365234375, + "learning_rate": 1.0752959708872323e-07, + "loss": 9.939714800566435e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3971, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 68.41666793823242, + "completions/min_length": 23.25, + "epoch": 7.895507570116655, + "grad_norm": 0.0037294759588588767, + "kl": 0.082122802734375, + "learning_rate": 1.0733421037876212e-07, + "loss": 8.214589615818113e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3972, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.375, + "completions/mean_length": 71.38541889190674, + "completions/min_length": 22.625, + "epoch": 7.897493174484984, + "grad_norm": 0.0034435531961676076, + "kl": 0.08465576171875, + "learning_rate": 1.0713897999743887e-07, + "loss": 8.470690227113664e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3973, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.375, + "completions/mean_length": 64.18750143051147, + "completions/min_length": 22.0, + "epoch": 7.899478778853314, + "grad_norm": 0.005370907145198794, + "kl": 0.069732666015625, + "learning_rate": 1.0694390602247915e-07, + "loss": 6.971009133849293e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3974, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.375, + "completions/mean_length": 56.98958444595337, + "completions/min_length": 22.625, + "epoch": 7.901464383221644, + "grad_norm": 0.0035612176504184437, + "kl": 0.071868896484375, + "learning_rate": 1.0674898853154595e-07, + "loss": 7.19110103091225e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3975, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 67.11458587646484, + "completions/min_length": 21.875, + "epoch": 7.903449987589973, + "grad_norm": 1.5403271126599327, + "kl": 0.06585693359375, + "learning_rate": 1.0655422760223991e-07, + "loss": -0.0016677286475896835, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3976, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 71.08333539962769, + "completions/min_length": 27.875, + "epoch": 7.905435591958303, + "grad_norm": 0.005724520886709921, + "kl": 0.07281494140625, + "learning_rate": 1.063596233120997e-07, + "loss": 7.272971561178565e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3977, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.875, + "completions/mean_length": 59.85416889190674, + "completions/min_length": 16.375, + "epoch": 7.907421196326632, + "grad_norm": 0.004306675059180274, + "kl": 0.058319091796875, + "learning_rate": 1.061651757386015e-07, + "loss": 5.830315058119595e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3978, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.5, + "completions/mean_length": 76.71875333786011, + "completions/min_length": 24.5, + "epoch": 7.909406800694962, + "grad_norm": 2.4623701375594593, + "kl": 0.09613037109375, + "learning_rate": 1.0597088495915885e-07, + "loss": 0.0013852929696440697, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3979, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.625, + "completions/mean_length": 80.46875286102295, + "completions/min_length": 35.5, + "epoch": 7.911392405063291, + "grad_norm": 1.0930304552102974, + "kl": 0.087646484375, + "learning_rate": 1.0577675105112327e-07, + "loss": 0.0016900020418688655, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3980, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.375, + "completions/mean_length": 69.97916913032532, + "completions/min_length": 22.375, + "epoch": 7.913378009431621, + "grad_norm": 0.0045694434191028335, + "kl": 0.079986572265625, + "learning_rate": 1.055827740917834e-07, + "loss": 8.007009455468506e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3981, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 69.35416841506958, + "completions/min_length": 22.625, + "epoch": 7.915363613799951, + "grad_norm": 0.003578187146288709, + "kl": 0.063201904296875, + "learning_rate": 1.0538895415836586e-07, + "loss": 6.32756418781355e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3982, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 70.60416841506958, + "completions/min_length": 28.375, + "epoch": 7.91734921816828, + "grad_norm": 0.005479163859475252, + "kl": 0.075531005859375, + "learning_rate": 1.0519529132803435e-07, + "loss": 7.554507465101779e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3983, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 60.73958492279053, + "completions/min_length": 19.875, + "epoch": 7.91933482253661, + "grad_norm": 0.05137156518902156, + "kl": 0.075531005859375, + "learning_rate": 1.0500178567788992e-07, + "loss": 7.551905582658947e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3984, + "train_speed(iter/s)": 0.022656 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 67.26041889190674, + "completions/min_length": 29.5, + "epoch": 7.9213204269049395, + "grad_norm": 0.0035733051544297022, + "kl": 0.06304931640625, + "learning_rate": 1.0480843728497185e-07, + "loss": 6.30665963399224e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3985, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.0, + "completions/mean_length": 64.04166841506958, + "completions/min_length": 23.5, + "epoch": 7.923306031273269, + "grad_norm": 1.2324147131910126, + "kl": 0.08349609375, + "learning_rate": 1.04615246226256e-07, + "loss": -0.004237992223352194, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.1870916150510311, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3986, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.875, + "completions/mean_length": 81.65625190734863, + "completions/min_length": 25.375, + "epoch": 7.9252916356415986, + "grad_norm": 0.0049523810292853, + "kl": 0.069549560546875, + "learning_rate": 1.0442221257865569e-07, + "loss": 6.958723679417744e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3987, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 66.8229193687439, + "completions/min_length": 25.375, + "epoch": 7.9272772400099285, + "grad_norm": 2.1543636817651994, + "kl": 0.07806396484375, + "learning_rate": 1.0422933641902209e-07, + "loss": -0.00914867501705885, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3988, + "train_speed(iter/s)": 0.022655 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.25, + "completions/mean_length": 65.10416793823242, + "completions/min_length": 27.0, + "epoch": 7.929262844378258, + "grad_norm": 0.003563084520555722, + "kl": 0.068206787109375, + "learning_rate": 1.0403661782414297e-07, + "loss": 6.818232213845477e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3989, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.625, + "completions/mean_length": 67.31250190734863, + "completions/min_length": 27.0, + "epoch": 7.9312484487465875, + "grad_norm": 0.006284192524869154, + "kl": 0.09234619140625, + "learning_rate": 1.0384405687074398e-07, + "loss": 9.232441516360268e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3990, + "train_speed(iter/s)": 0.022654 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 65.96875238418579, + "completions/min_length": 27.125, + "epoch": 7.933234053114917, + "grad_norm": 0.004095798525843277, + "kl": 0.064422607421875, + "learning_rate": 1.0365165363548756e-07, + "loss": 6.44034007564187e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3991, + "train_speed(iter/s)": 0.022652 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 67.58333396911621, + "completions/min_length": 21.25, + "epoch": 7.9352196574832465, + "grad_norm": 0.003729799932089468, + "kl": 0.069854736328125, + "learning_rate": 1.0345940819497356e-07, + "loss": 6.987794040469453e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3992, + "train_speed(iter/s)": 0.022651 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.625, + "completions/mean_length": 61.69791793823242, + "completions/min_length": 26.25, + "epoch": 7.937205261851576, + "grad_norm": 0.0031253009101299216, + "kl": 0.06756591796875, + "learning_rate": 1.0326732062573928e-07, + "loss": 6.76056879456155e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3993, + "train_speed(iter/s)": 0.02265 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 69.72916841506958, + "completions/min_length": 23.25, + "epoch": 7.9391908662199056, + "grad_norm": 0.003471343815855204, + "kl": 0.069488525390625, + "learning_rate": 1.0307539100425861e-07, + "loss": 6.961461622267962e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3994, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 65.21875286102295, + "completions/min_length": 25.75, + "epoch": 7.9411764705882355, + "grad_norm": 0.0030315112973599463, + "kl": 0.0889892578125, + "learning_rate": 1.028836194069428e-07, + "loss": 8.894351776689291e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3995, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 63.85416793823242, + "completions/min_length": 24.625, + "epoch": 7.943162074956565, + "grad_norm": 0.0039174260477899975, + "kl": 0.0589599609375, + "learning_rate": 1.0269200591014043e-07, + "loss": 5.8904166508000344e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3996, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.625, + "completions/mean_length": 63.18750190734863, + "completions/min_length": 19.125, + "epoch": 7.9451476793248945, + "grad_norm": 0.0034270196269446072, + "kl": 0.06951904296875, + "learning_rate": 1.0250055059013668e-07, + "loss": 6.958996527828276e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3997, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.75, + "completions/mean_length": 63.94791841506958, + "completions/min_length": 18.125, + "epoch": 7.9471332836932245, + "grad_norm": 0.005162829840412207, + "kl": 0.082916259765625, + "learning_rate": 1.0230925352315417e-07, + "loss": 8.288287790492177e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3998, + "train_speed(iter/s)": 0.022649 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 60.65625238418579, + "completions/min_length": 22.0, + "epoch": 7.9491188880615535, + "grad_norm": 0.0032668409913887402, + "kl": 0.0660400390625, + "learning_rate": 1.0211811478535237e-07, + "loss": 6.602262874366716e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 3999, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 65.46875190734863, + "completions/min_length": 16.375, + "epoch": 7.9511044924298835, + "grad_norm": 0.0032284745387936202, + "kl": 0.0870361328125, + "learning_rate": 1.0192713445282758e-07, + "loss": 8.700194302946329e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4000, + "train_speed(iter/s)": 0.022648 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 67.76041889190674, + "completions/min_length": 22.75, + "epoch": 7.953090096798213, + "grad_norm": 0.003204916427357215, + "kl": 0.07672119140625, + "learning_rate": 1.0173631260161325e-07, + "loss": 7.67066449043341e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4001, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.25, + "completions/mean_length": 69.27083539962769, + "completions/min_length": 24.25, + "epoch": 7.9550757011665425, + "grad_norm": 1.6446934124304393, + "kl": 0.07208251953125, + "learning_rate": 1.015456493076795e-07, + "loss": -0.015286803245544434, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4002, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 64.739586353302, + "completions/min_length": 19.625, + "epoch": 7.9570613055348725, + "grad_norm": 0.003191960120300276, + "kl": 0.089691162109375, + "learning_rate": 1.0135514464693367e-07, + "loss": 8.963925938587636e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4003, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 98.375, + "completions/mean_length": 51.14583396911621, + "completions/min_length": 20.0, + "epoch": 7.9590469099032015, + "grad_norm": 0.0052657993842108175, + "kl": 0.06829833984375, + "learning_rate": 1.0116479869521966e-07, + "loss": 6.825456512160599e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4004, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.5, + "completions/mean_length": 58.50000190734863, + "completions/min_length": 20.0, + "epoch": 7.9610325142715315, + "grad_norm": 0.0049532569589100595, + "kl": 0.071990966796875, + "learning_rate": 1.0097461152831788e-07, + "loss": 7.203160203061998e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4005, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 63.70833444595337, + "completions/min_length": 25.875, + "epoch": 7.9630181186398605, + "grad_norm": 0.0027871873703943484, + "kl": 0.073394775390625, + "learning_rate": 1.0078458322194656e-07, + "loss": 7.327820640057325e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4006, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 71.052086353302, + "completions/min_length": 22.875, + "epoch": 7.9650037230081905, + "grad_norm": 0.003864486813525583, + "kl": 0.070159912109375, + "learning_rate": 1.0059471385175966e-07, + "loss": 7.01984972693026e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4007, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 69.13541841506958, + "completions/min_length": 22.25, + "epoch": 7.96698932737652, + "grad_norm": 0.00489322407329731, + "kl": 0.06585693359375, + "learning_rate": 1.0040500349334819e-07, + "loss": 6.583495996892452e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4008, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 61.73958396911621, + "completions/min_length": 21.5, + "epoch": 7.9689749317448495, + "grad_norm": 0.00307659212544642, + "kl": 0.077667236328125, + "learning_rate": 1.0021545222224004e-07, + "loss": 7.769867079332471e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4009, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.0, + "completions/mean_length": 70.75000095367432, + "completions/min_length": 26.125, + "epoch": 7.9709605361131795, + "grad_norm": 0.0031728972314285543, + "kl": 0.060546875, + "learning_rate": 1.0002606011389947e-07, + "loss": 6.048924842616543e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4010, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.0, + "completions/mean_length": 74.09375190734863, + "completions/min_length": 21.875, + "epoch": 7.972946140481509, + "grad_norm": 0.0023315589751248846, + "kl": 0.063995361328125, + "learning_rate": 9.983682724372778e-08, + "loss": 6.404868327081203e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4011, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 72.34375143051147, + "completions/min_length": 20.875, + "epoch": 7.9749317448498385, + "grad_norm": 0.003709643454558521, + "kl": 0.066192626953125, + "learning_rate": 9.964775368706224e-08, + "loss": 6.613855657633394e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4012, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 70.5729193687439, + "completions/min_length": 25.75, + "epoch": 7.976917349218168, + "grad_norm": 0.0028197719205080457, + "kl": 0.06787109375, + "learning_rate": 9.945883951917733e-08, + "loss": 6.784204015275463e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4013, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 70.70833539962769, + "completions/min_length": 22.25, + "epoch": 7.978902953586498, + "grad_norm": 0.0028342742453965997, + "kl": 0.063629150390625, + "learning_rate": 9.927008481528393e-08, + "loss": 6.357110396493226e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4014, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 69.28125286102295, + "completions/min_length": 23.5, + "epoch": 7.980888557954827, + "grad_norm": 0.0030782556058395706, + "kl": 0.06329345703125, + "learning_rate": 9.90814896505291e-08, + "loss": 6.321975524770096e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4015, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.0, + "completions/mean_length": 72.58333587646484, + "completions/min_length": 26.75, + "epoch": 7.982874162323157, + "grad_norm": 0.0033123396647415223, + "kl": 0.07421875, + "learning_rate": 9.889305409999655e-08, + "loss": 7.412924605887383e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4016, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 63.29166793823242, + "completions/min_length": 27.125, + "epoch": 7.9848597666914864, + "grad_norm": 0.0029834375885894747, + "kl": 0.0618896484375, + "learning_rate": 9.870477823870676e-08, + "loss": 6.177874456625432e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4017, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 61.00000190734863, + "completions/min_length": 26.125, + "epoch": 7.986845371059816, + "grad_norm": 3.940874805059957, + "kl": 0.097564697265625, + "learning_rate": 9.851666214161609e-08, + "loss": -0.007386643439531326, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4018, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 68.56250190734863, + "completions/min_length": 25.0, + "epoch": 7.9888309754281455, + "grad_norm": 0.009082513953037507, + "kl": 0.0775146484375, + "learning_rate": 9.832870588361769e-08, + "loss": 7.744548929622397e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4019, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 65.3854193687439, + "completions/min_length": 29.25, + "epoch": 7.990816579796475, + "grad_norm": 0.006155403665751887, + "kl": 0.077850341796875, + "learning_rate": 9.814090953954107e-08, + "loss": 7.795381679898128e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4020, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 71.09375238418579, + "completions/min_length": 26.875, + "epoch": 7.992802184164805, + "grad_norm": 0.002904945499749162, + "kl": 0.06842041015625, + "learning_rate": 9.79532731841518e-08, + "loss": 6.833539373474196e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4021, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 66.57291841506958, + "completions/min_length": 22.75, + "epoch": 7.994787788533134, + "grad_norm": 0.0028966892993152315, + "kl": 0.055084228515625, + "learning_rate": 9.776579689215208e-08, + "loss": 5.5054631957318634e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4022, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 74.11458587646484, + "completions/min_length": 24.375, + "epoch": 7.996773392901464, + "grad_norm": 0.002860955525261995, + "kl": 0.0621337890625, + "learning_rate": 9.75784807381802e-08, + "loss": 6.212804146343842e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4023, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 63.58333396911621, + "completions/min_length": 26.25, + "epoch": 7.998758997269794, + "grad_norm": 1.7040984097229772, + "kl": 0.071533203125, + "learning_rate": 9.73913247968105e-08, + "loss": -0.0030699982307851315, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4024, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.0, + "completions/mean_length": 61.06250190734863, + "completions/min_length": 21.875, + "epoch": 8.001985604368329, + "grad_norm": 0.00317775556564026, + "kl": 0.064727783203125, + "learning_rate": 9.720432914255405e-08, + "loss": 6.46327025606297e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4025, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 73.7604193687439, + "completions/min_length": 25.125, + "epoch": 8.00397120873666, + "grad_norm": 0.003873065301164897, + "kl": 0.061187744140625, + "learning_rate": 9.701749384985753e-08, + "loss": 6.116794247645885e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4026, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.75, + "completions/mean_length": 71.71875190734863, + "completions/min_length": 20.875, + "epoch": 8.005956813104989, + "grad_norm": 0.0031089829157041247, + "kl": 0.1026611328125, + "learning_rate": 9.683081899310425e-08, + "loss": 0.00010262842988595366, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4027, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 67.40625190734863, + "completions/min_length": 23.375, + "epoch": 8.007942417473318, + "grad_norm": 0.00313588910603342, + "kl": 0.07672119140625, + "learning_rate": 9.664430464661355e-08, + "loss": 7.669499609619379e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4028, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 70.27083444595337, + "completions/min_length": 22.625, + "epoch": 8.009928021841649, + "grad_norm": 0.0029826969212667326, + "kl": 0.079864501953125, + "learning_rate": 9.645795088464049e-08, + "loss": 7.972571620484814e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4029, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 71.21875286102295, + "completions/min_length": 22.625, + "epoch": 8.011913626209978, + "grad_norm": 0.0037555325614692476, + "kl": 0.085418701171875, + "learning_rate": 9.627175778137681e-08, + "loss": 8.544397132936865e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4030, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 63.64583492279053, + "completions/min_length": 20.75, + "epoch": 8.013899230578307, + "grad_norm": 0.0029392915911424402, + "kl": 0.054046630859375, + "learning_rate": 9.608572541094979e-08, + "loss": 5.404389230534434e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4031, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 68.39583587646484, + "completions/min_length": 19.625, + "epoch": 8.015884834946636, + "grad_norm": 0.0032770770532584326, + "kl": 0.0848388671875, + "learning_rate": 9.589985384742272e-08, + "loss": 8.481745317112654e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4032, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 57.63541793823242, + "completions/min_length": 23.25, + "epoch": 8.017870439314967, + "grad_norm": 0.03650768508430622, + "kl": 0.13665771484375, + "learning_rate": 9.571414316479526e-08, + "loss": 0.00013677947572432458, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4033, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 68.85416841506958, + "completions/min_length": 22.0, + "epoch": 8.019856043683296, + "grad_norm": 0.002998717115668105, + "kl": 0.054656982421875, + "learning_rate": 9.552859343700287e-08, + "loss": 5.469063762575388e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4034, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 62.156251430511475, + "completions/min_length": 23.25, + "epoch": 8.021841648051625, + "grad_norm": 2.107848497134729, + "kl": 0.073486328125, + "learning_rate": 9.534320473791657e-08, + "loss": -0.007579359225928783, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4035, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 74.17708492279053, + "completions/min_length": 27.75, + "epoch": 8.023827252419956, + "grad_norm": 0.00395081026216689, + "kl": 0.0684814453125, + "learning_rate": 9.515797714134388e-08, + "loss": 6.839950219728053e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4036, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 66.01041841506958, + "completions/min_length": 21.5, + "epoch": 8.025812856788285, + "grad_norm": 0.0026239163575432797, + "kl": 0.055023193359375, + "learning_rate": 9.497291072102764e-08, + "loss": 5.510847404366359e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4037, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.0, + "completions/mean_length": 75.11458587646484, + "completions/min_length": 30.375, + "epoch": 8.027798461156614, + "grad_norm": 0.003687340491071612, + "kl": 0.066009521484375, + "learning_rate": 9.478800555064693e-08, + "loss": 6.606719398405403e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4038, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.0, + "completions/mean_length": 55.46875190734863, + "completions/min_length": 24.5, + "epoch": 8.029784065524945, + "grad_norm": 0.005124023417827816, + "kl": 0.096099853515625, + "learning_rate": 9.460326170381616e-08, + "loss": 9.604774822946638e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4039, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.625, + "completions/mean_length": 50.312501668930054, + "completions/min_length": 20.5, + "epoch": 8.031769669893274, + "grad_norm": 0.004241864820696868, + "kl": 0.072723388671875, + "learning_rate": 9.441867925408603e-08, + "loss": 7.272530638147146e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4040, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.5, + "completions/mean_length": 72.81250095367432, + "completions/min_length": 30.75, + "epoch": 8.033755274261603, + "grad_norm": 0.003914396390815221, + "kl": 0.087921142578125, + "learning_rate": 9.42342582749428e-08, + "loss": 8.797112241154537e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4041, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.375, + "completions/mean_length": 79.66666984558105, + "completions/min_length": 32.25, + "epoch": 8.035740878629934, + "grad_norm": 0.0027636424029366035, + "kl": 0.07147216796875, + "learning_rate": 9.404999883980818e-08, + "loss": 7.146518328227103e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4042, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.75, + "completions/mean_length": 65.76041889190674, + "completions/min_length": 21.5, + "epoch": 8.037726482998263, + "grad_norm": 0.0034206465232461828, + "kl": 0.0628662109375, + "learning_rate": 9.386590102204006e-08, + "loss": 6.285082054091617e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4043, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.5, + "completions/mean_length": 62.67708492279053, + "completions/min_length": 21.75, + "epoch": 8.039712087366592, + "grad_norm": 0.0030202910660872668, + "kl": 0.062347412109375, + "learning_rate": 9.368196489493158e-08, + "loss": 6.229724385775626e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4044, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 65.21875190734863, + "completions/min_length": 29.625, + "epoch": 8.041697691734921, + "grad_norm": 0.004281372973099747, + "kl": 0.061248779296875, + "learning_rate": 9.349819053171143e-08, + "loss": 6.11817158642225e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4045, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 65.57291889190674, + "completions/min_length": 22.375, + "epoch": 8.043683296103252, + "grad_norm": 0.004369602290796568, + "kl": 0.073211669921875, + "learning_rate": 9.331457800554438e-08, + "loss": 7.32242115191184e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4046, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 69.47916793823242, + "completions/min_length": 24.25, + "epoch": 8.045668900471581, + "grad_norm": 0.003263658173740175, + "kl": 0.071929931640625, + "learning_rate": 9.31311273895305e-08, + "loss": 7.192780321929604e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4047, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 60.05208444595337, + "completions/min_length": 21.75, + "epoch": 8.04765450483991, + "grad_norm": 0.0026106636178048507, + "kl": 0.056884765625, + "learning_rate": 9.294783875670525e-08, + "loss": 5.689482713933103e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4048, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 71.78125143051147, + "completions/min_length": 26.875, + "epoch": 8.04964010920824, + "grad_norm": 0.003861870313570463, + "kl": 0.069244384765625, + "learning_rate": 9.27647121800399e-08, + "loss": 6.936269346624613e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4049, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 60.33333492279053, + "completions/min_length": 25.375, + "epoch": 8.05162571357657, + "grad_norm": 0.006238776573855147, + "kl": 0.071929931640625, + "learning_rate": 9.258174773244087e-08, + "loss": 7.195457874331623e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4050, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.0, + "completions/mean_length": 72.94791841506958, + "completions/min_length": 24.875, + "epoch": 8.053611317944899, + "grad_norm": 0.6090824620184125, + "kl": 0.067230224609375, + "learning_rate": 9.239894548675048e-08, + "loss": 0.0017213996034115553, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4051, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 60.39583492279053, + "completions/min_length": 26.625, + "epoch": 8.05559692231323, + "grad_norm": 0.005101422124545626, + "kl": 0.10040283203125, + "learning_rate": 9.221630551574599e-08, + "loss": 0.00010034696606453508, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4052, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 73.37500143051147, + "completions/min_length": 24.375, + "epoch": 8.057582526681559, + "grad_norm": 0.002804186407576077, + "kl": 0.069610595703125, + "learning_rate": 9.20338278921401e-08, + "loss": 6.949788075871766e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4053, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 59.437501430511475, + "completions/min_length": 23.875, + "epoch": 8.059568131049888, + "grad_norm": 0.00411066377930944, + "kl": 0.0697021484375, + "learning_rate": 9.185151268858155e-08, + "loss": 6.966612272663042e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4054, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.5, + "completions/mean_length": 72.73958492279053, + "completions/min_length": 26.75, + "epoch": 8.061553735418219, + "grad_norm": 0.004542473033607802, + "kl": 0.074981689453125, + "learning_rate": 9.166935997765362e-08, + "loss": 7.499718776671216e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4055, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 75.08333492279053, + "completions/min_length": 27.0, + "epoch": 8.063539339786548, + "grad_norm": 1.1801106984366552, + "kl": 0.093902587890625, + "learning_rate": 9.148736983187517e-08, + "loss": 0.014648554846644402, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4056, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 68.11458492279053, + "completions/min_length": 35.0, + "epoch": 8.065524944154877, + "grad_norm": 0.00396779478542917, + "kl": 0.060150146484375, + "learning_rate": 9.130554232370047e-08, + "loss": 6.0100439441157505e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4057, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 65.14583444595337, + "completions/min_length": 25.5, + "epoch": 8.067510548523206, + "grad_norm": 1.2509881699243472, + "kl": 0.0645751953125, + "learning_rate": 9.112387752551876e-08, + "loss": 0.02153899148106575, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4058, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 62.97916793823242, + "completions/min_length": 24.875, + "epoch": 8.069496152891537, + "grad_norm": 0.003362197664843273, + "kl": 0.072052001953125, + "learning_rate": 9.094237550965494e-08, + "loss": 7.204393477877602e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4059, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.75, + "completions/mean_length": 58.13541841506958, + "completions/min_length": 18.0, + "epoch": 8.071481757259866, + "grad_norm": 0.003201434858193056, + "kl": 0.0660400390625, + "learning_rate": 9.076103634836857e-08, + "loss": 6.600699271075428e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4060, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.375, + "completions/mean_length": 75.54166841506958, + "completions/min_length": 29.0, + "epoch": 8.073467361628195, + "grad_norm": 0.002389214077048698, + "kl": 0.066497802734375, + "learning_rate": 9.057986011385477e-08, + "loss": 6.653812306467444e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4061, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.5, + "completions/mean_length": 64.88541793823242, + "completions/min_length": 27.0, + "epoch": 8.075452965996526, + "grad_norm": 0.004664337703180663, + "kl": 0.053863525390625, + "learning_rate": 9.039884687824383e-08, + "loss": 5.383374082157388e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4062, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 68.28125286102295, + "completions/min_length": 21.875, + "epoch": 8.077438570364855, + "grad_norm": 0.0037005413346408054, + "kl": 0.082000732421875, + "learning_rate": 9.02179967136008e-08, + "loss": 8.186304330592975e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4063, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 71.80208492279053, + "completions/min_length": 25.5, + "epoch": 8.079424174733184, + "grad_norm": 0.003275447530899798, + "kl": 0.0560302734375, + "learning_rate": 9.003730969192586e-08, + "loss": 5.599199721473269e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4064, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 60.09375190734863, + "completions/min_length": 19.375, + "epoch": 8.081409779101515, + "grad_norm": 0.005609424848859443, + "kl": 0.068939208984375, + "learning_rate": 8.985678588515472e-08, + "loss": 6.901115557411686e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4065, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.125, + "completions/mean_length": 69.03125095367432, + "completions/min_length": 24.75, + "epoch": 8.083395383469844, + "grad_norm": 0.0027454080180372925, + "kl": 0.06927490234375, + "learning_rate": 8.967642536515741e-08, + "loss": 6.925908382982016e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4066, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 64.20833444595337, + "completions/min_length": 18.375, + "epoch": 8.085380987838173, + "grad_norm": 0.004700629423002741, + "kl": 0.0675048828125, + "learning_rate": 8.949622820373947e-08, + "loss": 6.75107876304537e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4067, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 74.00000333786011, + "completions/min_length": 27.0, + "epoch": 8.087366592206504, + "grad_norm": 0.003273976470075609, + "kl": 0.071014404296875, + "learning_rate": 8.931619447264139e-08, + "loss": 7.101658411556855e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4068, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 60.59375190734863, + "completions/min_length": 22.375, + "epoch": 8.089352196574833, + "grad_norm": 0.004178098590463913, + "kl": 0.072509765625, + "learning_rate": 8.913632424353811e-08, + "loss": 7.248263864312321e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4069, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 66.14583539962769, + "completions/min_length": 30.25, + "epoch": 8.091337800943162, + "grad_norm": 0.002955209261232975, + "kl": 0.05487060546875, + "learning_rate": 8.895661758804019e-08, + "loss": 5.485026849783026e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4070, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.0, + "completions/mean_length": 70.47916889190674, + "completions/min_length": 27.375, + "epoch": 8.09332340531149, + "grad_norm": 0.0036654771903251167, + "kl": 0.061492919921875, + "learning_rate": 8.877707457769246e-08, + "loss": 6.141657650005072e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4071, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.5, + "completions/mean_length": 58.61458444595337, + "completions/min_length": 18.625, + "epoch": 8.095309009679822, + "grad_norm": 0.003092474652028288, + "kl": 0.069610595703125, + "learning_rate": 8.85976952839747e-08, + "loss": 6.964871863601729e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4072, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.875, + "completions/mean_length": 62.7291693687439, + "completions/min_length": 24.5, + "epoch": 8.09729461404815, + "grad_norm": 0.0031028527747228654, + "kl": 0.073638916015625, + "learning_rate": 8.841847977830196e-08, + "loss": 7.363753684330732e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4073, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 64.21875095367432, + "completions/min_length": 28.25, + "epoch": 8.09928021841648, + "grad_norm": 0.0027384040201808833, + "kl": 0.05816650390625, + "learning_rate": 8.823942813202351e-08, + "loss": 5.811927258037031e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4074, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 68.12500190734863, + "completions/min_length": 21.875, + "epoch": 8.10126582278481, + "grad_norm": 0.0029689076591138033, + "kl": 0.08026123046875, + "learning_rate": 8.806054041642364e-08, + "loss": 8.026678551686928e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4075, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 76.92708587646484, + "completions/min_length": 22.625, + "epoch": 8.10325142715314, + "grad_norm": 0.004230721993249369, + "kl": 0.07611083984375, + "learning_rate": 8.788181670272165e-08, + "loss": 7.606188592035323e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4076, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.75, + "completions/mean_length": 58.177085876464844, + "completions/min_length": 17.375, + "epoch": 8.105237031521469, + "grad_norm": 0.820822794801226, + "kl": 0.0841064453125, + "learning_rate": 8.770325706207087e-08, + "loss": 0.0140147116035223, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4077, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 63.27083396911621, + "completions/min_length": 26.25, + "epoch": 8.1072226358898, + "grad_norm": 0.006041376137169389, + "kl": 0.081451416015625, + "learning_rate": 8.752486156556011e-08, + "loss": 8.147205517161638e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4078, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 64.76041889190674, + "completions/min_length": 19.875, + "epoch": 8.109208240258129, + "grad_norm": 0.0035371449929289733, + "kl": 0.05908203125, + "learning_rate": 8.734663028421207e-08, + "loss": 5.902050179429352e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4079, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 71.91666889190674, + "completions/min_length": 28.5, + "epoch": 8.111193844626458, + "grad_norm": 0.002545608884861, + "kl": 0.07098388671875, + "learning_rate": 8.71685632889847e-08, + "loss": 7.094950706232339e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4080, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 65.61458492279053, + "completions/min_length": 24.0, + "epoch": 8.113179448994789, + "grad_norm": 0.004816686634404716, + "kl": 0.069732666015625, + "learning_rate": 8.699066065077004e-08, + "loss": 6.979263707762584e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4081, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 76.25000286102295, + "completions/min_length": 29.25, + "epoch": 8.115165053363118, + "grad_norm": 0.0034794009648493143, + "kl": 0.0733642578125, + "learning_rate": 8.681292244039507e-08, + "loss": 7.324795296881348e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4082, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 68.59375286102295, + "completions/min_length": 25.0, + "epoch": 8.117150657731447, + "grad_norm": 0.0025915006659035476, + "kl": 0.05963134765625, + "learning_rate": 8.663534872862127e-08, + "loss": 5.96054524066858e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4083, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 65.46875238418579, + "completions/min_length": 25.25, + "epoch": 8.119136262099776, + "grad_norm": 0.0036968277020846033, + "kl": 0.073486328125, + "learning_rate": 8.645793958614439e-08, + "loss": 7.34370551072061e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4084, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.25, + "completions/mean_length": 73.56250190734863, + "completions/min_length": 24.625, + "epoch": 8.121121866468107, + "grad_norm": 1.0190478048282317, + "kl": 0.06744384765625, + "learning_rate": 8.62806950835947e-08, + "loss": -0.006059292703866959, + "memory(GiB)": 94.21, + "reward": 1.7083333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7083333358168602, + "rewards/CineAccuracyORM/std": 0.32266222313046455, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4085, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.0, + "completions/mean_length": 55.70833492279053, + "completions/min_length": 16.375, + "epoch": 8.123107470836436, + "grad_norm": 0.005860968019304921, + "kl": 0.064727783203125, + "learning_rate": 8.610361529153721e-08, + "loss": 6.472819222835824e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4086, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.125, + "completions/mean_length": 66.60416793823242, + "completions/min_length": 20.625, + "epoch": 8.125093075204765, + "grad_norm": 0.0038769452834917424, + "kl": 0.076568603515625, + "learning_rate": 8.592670028047105e-08, + "loss": 7.65736767789349e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4087, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.375, + "completions/mean_length": 66.17708539962769, + "completions/min_length": 20.5, + "epoch": 8.127078679573096, + "grad_norm": 0.0037981058310298777, + "kl": 0.072662353515625, + "learning_rate": 8.574995012082986e-08, + "loss": 7.259578705998138e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4088, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 65.36458587646484, + "completions/min_length": 19.25, + "epoch": 8.129064283941425, + "grad_norm": 0.0032027048390117097, + "kl": 0.0709228515625, + "learning_rate": 8.557336488298184e-08, + "loss": 7.099266804289073e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4089, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.625, + "completions/mean_length": 58.63541793823242, + "completions/min_length": 19.375, + "epoch": 8.131049888309754, + "grad_norm": 0.0035778668017920966, + "kl": 0.08074951171875, + "learning_rate": 8.539694463722907e-08, + "loss": 8.079313556663692e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4090, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 74.66666984558105, + "completions/min_length": 29.375, + "epoch": 8.133035492678085, + "grad_norm": 0.0037775964220259457, + "kl": 0.06561279296875, + "learning_rate": 8.52206894538085e-08, + "loss": 6.557812594110146e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4091, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.5, + "completions/mean_length": 70.48958444595337, + "completions/min_length": 21.625, + "epoch": 8.135021097046414, + "grad_norm": 0.006361624664001505, + "kl": 0.0615234375, + "learning_rate": 8.504459940289094e-08, + "loss": 6.153984577395022e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4092, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 64.67708539962769, + "completions/min_length": 20.875, + "epoch": 8.137006701414743, + "grad_norm": 1.3408310523189166, + "kl": 0.133636474609375, + "learning_rate": 8.486867455458147e-08, + "loss": -0.003510394599288702, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4093, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.625, + "completions/mean_length": 76.08333587646484, + "completions/min_length": 21.75, + "epoch": 8.138992305783074, + "grad_norm": 0.0038748184402661455, + "kl": 0.07562255859375, + "learning_rate": 8.469291497891978e-08, + "loss": 7.560574886156246e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4094, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.25, + "completions/mean_length": 62.083335399627686, + "completions/min_length": 21.625, + "epoch": 8.140977910151403, + "grad_norm": 0.012435054507674568, + "kl": 0.076202392578125, + "learning_rate": 8.451732074587925e-08, + "loss": 7.620621181558818e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4095, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 60.375000953674316, + "completions/min_length": 19.625, + "epoch": 8.142963514519732, + "grad_norm": 0.01043506512653113, + "kl": 0.09088134765625, + "learning_rate": 8.434189192536784e-08, + "loss": 9.082769247470424e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4096, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 63.06250286102295, + "completions/min_length": 22.625, + "epoch": 8.14494911888806, + "grad_norm": 0.0039158193857139045, + "kl": 0.0780029296875, + "learning_rate": 8.416662858722767e-08, + "loss": 7.798105070833117e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4097, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 71.55208587646484, + "completions/min_length": 31.125, + "epoch": 8.146934723256392, + "grad_norm": 0.008277865993363477, + "kl": 0.081756591796875, + "learning_rate": 8.39915308012345e-08, + "loss": 8.176160918083042e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4098, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.125, + "completions/mean_length": 73.51041793823242, + "completions/min_length": 32.375, + "epoch": 8.14892032762472, + "grad_norm": 1.6377928806517925, + "kl": 0.091583251953125, + "learning_rate": 8.381659863709878e-08, + "loss": 0.002245645970106125, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166669771075, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4099, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 66.05208444595337, + "completions/min_length": 21.75, + "epoch": 8.15090593199305, + "grad_norm": 0.0029685574784494456, + "kl": 0.073638916015625, + "learning_rate": 8.364183216446463e-08, + "loss": 7.353271939791739e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4100, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 65.01041889190674, + "completions/min_length": 22.625, + "epoch": 8.15289153636138, + "grad_norm": 0.003499743460911753, + "kl": 0.094970703125, + "learning_rate": 8.346723145291012e-08, + "loss": 9.492230310570449e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4101, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 63.51041793823242, + "completions/min_length": 23.875, + "epoch": 8.15487714072971, + "grad_norm": 0.006329133950969244, + "kl": 0.084197998046875, + "learning_rate": 8.329279657194777e-08, + "loss": 8.425174746662378e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4102, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 67.13541889190674, + "completions/min_length": 21.0, + "epoch": 8.156862745098039, + "grad_norm": 0.007857997723493278, + "kl": 0.071685791015625, + "learning_rate": 8.311852759102384e-08, + "loss": 7.163742702687159e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4103, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 66.25000286102295, + "completions/min_length": 22.75, + "epoch": 8.15884834946637, + "grad_norm": 0.006423835602411644, + "kl": 0.08184814453125, + "learning_rate": 8.294442457951839e-08, + "loss": 8.184979378711432e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4104, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 67.34375143051147, + "completions/min_length": 22.25, + "epoch": 8.160833953834699, + "grad_norm": 0.004043087551216295, + "kl": 0.0616455078125, + "learning_rate": 8.277048760674571e-08, + "loss": 6.158360338304192e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4105, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 70.33333587646484, + "completions/min_length": 26.25, + "epoch": 8.162819558203028, + "grad_norm": 0.0027480908783146405, + "kl": 0.07806396484375, + "learning_rate": 8.259671674195357e-08, + "loss": 7.805389032000676e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4106, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.625, + "completions/mean_length": 75.4166693687439, + "completions/min_length": 23.25, + "epoch": 8.164805162571358, + "grad_norm": 0.003731988327493677, + "kl": 0.078857421875, + "learning_rate": 8.242311205432417e-08, + "loss": 7.878868200350553e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4107, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 62.89583492279053, + "completions/min_length": 20.375, + "epoch": 8.166790766939688, + "grad_norm": 0.02846207143304965, + "kl": 0.099334716796875, + "learning_rate": 8.224967361297313e-08, + "loss": 9.936468995874748e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4108, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.0, + "completions/mean_length": 69.96875190734863, + "completions/min_length": 24.125, + "epoch": 8.168776371308017, + "grad_norm": 0.011059952324884666, + "kl": 0.0650634765625, + "learning_rate": 8.207640148694966e-08, + "loss": 6.504629709525034e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4109, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.75, + "completions/mean_length": 69.1979193687439, + "completions/min_length": 28.5, + "epoch": 8.170761975676346, + "grad_norm": 0.003633069098050467, + "kl": 0.059112548828125, + "learning_rate": 8.190329574523769e-08, + "loss": 5.9063844673801214e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4110, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.75, + "completions/mean_length": 71.98958444595337, + "completions/min_length": 22.25, + "epoch": 8.172747580044676, + "grad_norm": 0.005319457264759278, + "kl": 0.08221435546875, + "learning_rate": 8.173035645675402e-08, + "loss": 8.227470971178263e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4111, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.5, + "completions/mean_length": 73.16666793823242, + "completions/min_length": 23.5, + "epoch": 8.174733184413006, + "grad_norm": 0.0031280795768653755, + "kl": 0.07427978515625, + "learning_rate": 8.155758369034931e-08, + "loss": 7.426535012200475e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4112, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 66.41666793823242, + "completions/min_length": 16.375, + "epoch": 8.176718788781335, + "grad_norm": 1.5545793203686245, + "kl": 0.075897216796875, + "learning_rate": 8.138497751480843e-08, + "loss": -0.00784082617610693, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4113, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 69.73958492279053, + "completions/min_length": 23.25, + "epoch": 8.178704393149665, + "grad_norm": 0.0032630133977261198, + "kl": 0.0810546875, + "learning_rate": 8.121253799884925e-08, + "loss": 8.110299677355215e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4114, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 65.89583587646484, + "completions/min_length": 27.25, + "epoch": 8.180689997517995, + "grad_norm": 0.008743879651911703, + "kl": 0.075897216796875, + "learning_rate": 8.104026521112383e-08, + "loss": 7.593727787025273e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4115, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 74.89583492279053, + "completions/min_length": 29.625, + "epoch": 8.182675601886324, + "grad_norm": 0.0029907263768583067, + "kl": 0.065582275390625, + "learning_rate": 8.086815922021773e-08, + "loss": 6.562257476616651e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4116, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 75.56250238418579, + "completions/min_length": 33.25, + "epoch": 8.184661206254654, + "grad_norm": 0.0028489223577943357, + "kl": 0.060882568359375, + "learning_rate": 8.069622009464971e-08, + "loss": 6.0922317061340436e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4117, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.875, + "completions/mean_length": 63.937501430511475, + "completions/min_length": 20.875, + "epoch": 8.186646810622983, + "grad_norm": 0.0030751870741928377, + "kl": 0.074798583984375, + "learning_rate": 8.052444790287277e-08, + "loss": 7.484326488338411e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4118, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.75, + "completions/mean_length": 64.85416746139526, + "completions/min_length": 25.5, + "epoch": 8.188632414991313, + "grad_norm": 1.1219008008134526, + "kl": 0.068817138671875, + "learning_rate": 8.035284271327275e-08, + "loss": -0.0020768556278198957, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4119, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 67.40625286102295, + "completions/min_length": 26.375, + "epoch": 8.190618019359643, + "grad_norm": 0.0034370151587727283, + "kl": 0.0684814453125, + "learning_rate": 8.018140459416961e-08, + "loss": 6.846078031230718e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4120, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.875, + "completions/mean_length": 62.57291793823242, + "completions/min_length": 19.625, + "epoch": 8.192603623727972, + "grad_norm": 0.004910802271437788, + "kl": 0.06805419921875, + "learning_rate": 8.001013361381647e-08, + "loss": 6.797845708206296e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4121, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 64.06250190734863, + "completions/min_length": 23.875, + "epoch": 8.194589228096302, + "grad_norm": 0.0030304360408127838, + "kl": 0.069976806640625, + "learning_rate": 7.983902984039964e-08, + "loss": 6.999982724664733e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4122, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 74.27083587646484, + "completions/min_length": 23.0, + "epoch": 8.19657483246463, + "grad_norm": 0.0029410458394492347, + "kl": 0.064727783203125, + "learning_rate": 7.966809334203973e-08, + "loss": 6.478787690866739e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4123, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 71.06250190734863, + "completions/min_length": 28.625, + "epoch": 8.198560436832961, + "grad_norm": 1.0115103998029777, + "kl": 0.0679931640625, + "learning_rate": 7.949732418678989e-08, + "loss": -0.004359547048807144, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4124, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 71.25000095367432, + "completions/min_length": 26.75, + "epoch": 8.20054604120129, + "grad_norm": 0.006876665635144105, + "kl": 0.06024169921875, + "learning_rate": 7.932672244263694e-08, + "loss": 6.0246908105909824e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4125, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.625, + "completions/mean_length": 57.38541841506958, + "completions/min_length": 20.625, + "epoch": 8.20253164556962, + "grad_norm": 0.0025990072756236693, + "kl": 0.07000732421875, + "learning_rate": 7.915628817750126e-08, + "loss": 7.009755790932104e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4126, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 64.22916841506958, + "completions/min_length": 18.375, + "epoch": 8.20451724993795, + "grad_norm": 0.003151522712492612, + "kl": 0.06591796875, + "learning_rate": 7.898602145923616e-08, + "loss": 6.577809836016968e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4127, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 66.19791793823242, + "completions/min_length": 27.375, + "epoch": 8.20650285430628, + "grad_norm": 0.00721878819101889, + "kl": 0.08380126953125, + "learning_rate": 7.881592235562867e-08, + "loss": 8.370440627913922e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4128, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.375, + "completions/mean_length": 69.39583492279053, + "completions/min_length": 27.625, + "epoch": 8.208488458674609, + "grad_norm": 0.0027359970759121316, + "kl": 0.087371826171875, + "learning_rate": 7.864599093439867e-08, + "loss": 8.735234587220475e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4129, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.875, + "completions/mean_length": 70.46875190734863, + "completions/min_length": 23.0, + "epoch": 8.21047406304294, + "grad_norm": 0.003015899280429733, + "kl": 0.068267822265625, + "learning_rate": 7.847622726319963e-08, + "loss": 6.83972320985049e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4130, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.625, + "completions/mean_length": 53.61458444595337, + "completions/min_length": 19.5, + "epoch": 8.212459667411268, + "grad_norm": 0.002594072173009858, + "kl": 0.05609130859375, + "learning_rate": 7.830663140961813e-08, + "loss": 5.607612183666788e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4131, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.25, + "completions/mean_length": 61.60416793823242, + "completions/min_length": 22.875, + "epoch": 8.214445271779597, + "grad_norm": 0.0031819551447294482, + "kl": 0.050537109375, + "learning_rate": 7.81372034411738e-08, + "loss": 5.0561644457047805e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4132, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 66.36458587646484, + "completions/min_length": 22.625, + "epoch": 8.216430876147928, + "grad_norm": 0.0031428361418349773, + "kl": 0.075103759765625, + "learning_rate": 7.796794342531948e-08, + "loss": 7.505275425501168e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4133, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 64.83333587646484, + "completions/min_length": 20.875, + "epoch": 8.218416480516257, + "grad_norm": 0.003842512834826663, + "kl": 0.069549560546875, + "learning_rate": 7.779885142944143e-08, + "loss": 6.952918192837387e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4134, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 64.05208492279053, + "completions/min_length": 22.125, + "epoch": 8.220402084884586, + "grad_norm": 0.003592286326326378, + "kl": 0.05181884765625, + "learning_rate": 7.762992752085845e-08, + "loss": 5.182556196814403e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4135, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.125, + "completions/mean_length": 73.30208539962769, + "completions/min_length": 28.125, + "epoch": 8.222387689252916, + "grad_norm": 0.004092355370945299, + "kl": 0.096466064453125, + "learning_rate": 7.746117176682298e-08, + "loss": 9.637869516154751e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4136, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 73.91666889190674, + "completions/min_length": 21.0, + "epoch": 8.224373293621246, + "grad_norm": 0.004276666411209176, + "kl": 0.06622314453125, + "learning_rate": 7.729258423452034e-08, + "loss": 6.61782396491617e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4137, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 70.46875238418579, + "completions/min_length": 23.875, + "epoch": 8.226358897989575, + "grad_norm": 0.0034572716650727886, + "kl": 0.059356689453125, + "learning_rate": 7.712416499106866e-08, + "loss": 5.928528480581008e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4138, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 113.0, + "completions/mean_length": 58.593751430511475, + "completions/min_length": 17.75, + "epoch": 8.228344502357904, + "grad_norm": 0.005875647132462992, + "kl": 0.073455810546875, + "learning_rate": 7.695591410351937e-08, + "loss": 7.339852163568139e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4139, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 74.95833587646484, + "completions/min_length": 26.25, + "epoch": 8.230330106726235, + "grad_norm": 0.005229200179778609, + "kl": 0.07354736328125, + "learning_rate": 7.678783163885677e-08, + "loss": 7.35757130314596e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4140, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 69.36458587646484, + "completions/min_length": 22.5, + "epoch": 8.232315711094564, + "grad_norm": 0.011158680805305942, + "kl": 0.0693359375, + "learning_rate": 7.661991766399783e-08, + "loss": 6.93343099555932e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4141, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.75, + "completions/mean_length": 79.71875286102295, + "completions/min_length": 27.375, + "epoch": 8.234301315462893, + "grad_norm": 0.0032093801197590932, + "kl": 0.072296142578125, + "learning_rate": 7.645217224579298e-08, + "loss": 7.238502439577132e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4142, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.25, + "completions/mean_length": 79.03125190734863, + "completions/min_length": 33.625, + "epoch": 8.236286919831224, + "grad_norm": 0.0027834144916026836, + "kl": 0.0782470703125, + "learning_rate": 7.6284595451025e-08, + "loss": 7.819591701263562e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4143, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 68.54166984558105, + "completions/min_length": 31.75, + "epoch": 8.238272524199553, + "grad_norm": 0.0054449676195110755, + "kl": 0.0589599609375, + "learning_rate": 7.611718734640992e-08, + "loss": 5.8841393183683977e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4144, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 66.94791841506958, + "completions/min_length": 18.625, + "epoch": 8.240258128567882, + "grad_norm": 0.0028408136476142138, + "kl": 0.051544189453125, + "learning_rate": 7.594994799859661e-08, + "loss": 5.152905941940844e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4145, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.875, + "completions/mean_length": 77.87500190734863, + "completions/min_length": 22.25, + "epoch": 8.242243732936213, + "grad_norm": 0.003269737092685624, + "kl": 0.063323974609375, + "learning_rate": 7.57828774741664e-08, + "loss": 6.335931539069861e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4146, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.125, + "completions/mean_length": 63.625001430511475, + "completions/min_length": 20.75, + "epoch": 8.244229337304542, + "grad_norm": 0.0034545693166952575, + "kl": 0.0953369140625, + "learning_rate": 7.561597583963386e-08, + "loss": 9.530247189104557e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4147, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 71.53125143051147, + "completions/min_length": 27.875, + "epoch": 8.246214941672871, + "grad_norm": 1.8560897717679077, + "kl": 0.069244384765625, + "learning_rate": 7.544924316144596e-08, + "loss": 0.001715024933218956, + "memory(GiB)": 94.21, + "reward": 1.6979166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.6979166716337204, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4148, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 65.36458444595337, + "completions/min_length": 23.5, + "epoch": 8.2482005460412, + "grad_norm": 0.0058954658707857325, + "kl": 0.074554443359375, + "learning_rate": 7.528267950598244e-08, + "loss": 7.45235156500712e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4149, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 71.08333539962769, + "completions/min_length": 26.25, + "epoch": 8.250186150409531, + "grad_norm": 0.004067599576072198, + "kl": 0.069915771484375, + "learning_rate": 7.511628493955591e-08, + "loss": 6.992083945078775e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4150, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.0, + "completions/mean_length": 66.27083539962769, + "completions/min_length": 22.75, + "epoch": 8.25217175477786, + "grad_norm": 0.002812674172602009, + "kl": 0.059722900390625, + "learning_rate": 7.495005952841182e-08, + "loss": 5.9765385231003165e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4151, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 64.71875190734863, + "completions/min_length": 17.75, + "epoch": 8.25415735914619, + "grad_norm": 2.132789467048576, + "kl": 0.078033447265625, + "learning_rate": 7.478400333872775e-08, + "loss": 0.00574676226824522, + "memory(GiB)": 94.21, + "reward": 1.5520833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.5520833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4152, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 63.208335876464844, + "completions/min_length": 19.625, + "epoch": 8.25614296351452, + "grad_norm": 0.005439234240909005, + "kl": 0.0738525390625, + "learning_rate": 7.461811643661447e-08, + "loss": 7.384568016277626e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4153, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.125, + "completions/mean_length": 68.11458492279053, + "completions/min_length": 29.0, + "epoch": 8.25812856788285, + "grad_norm": 0.005615012995516823, + "kl": 0.069091796875, + "learning_rate": 7.445239888811489e-08, + "loss": 6.908044451847672e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4154, + "train_speed(iter/s)": 0.022646 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.5, + "completions/mean_length": 69.80208587646484, + "completions/min_length": 26.375, + "epoch": 8.260114172251178, + "grad_norm": 0.00862034448060035, + "kl": 0.0672607421875, + "learning_rate": 7.428685075920488e-08, + "loss": 6.722434773109853e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4155, + "train_speed(iter/s)": 0.022645 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.5, + "completions/mean_length": 72.09375286102295, + "completions/min_length": 21.125, + "epoch": 8.26209977661951, + "grad_norm": 0.003062503697419942, + "kl": 0.061309814453125, + "learning_rate": 7.412147211579267e-08, + "loss": 6.11907453276217e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4156, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.75, + "completions/mean_length": 71.70833539962769, + "completions/min_length": 26.5, + "epoch": 8.264085380987838, + "grad_norm": 0.002940944783097714, + "kl": 0.075286865234375, + "learning_rate": 7.395626302371866e-08, + "loss": 7.530323637183756e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4157, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.375, + "completions/mean_length": 69.98958587646484, + "completions/min_length": 22.5, + "epoch": 8.266070985356167, + "grad_norm": 0.004050780055793523, + "kl": 0.05810546875, + "learning_rate": 7.379122354875672e-08, + "loss": 5.808388232253492e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4158, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 71.44791889190674, + "completions/min_length": 21.625, + "epoch": 8.268056589724498, + "grad_norm": 0.0041788549885734315, + "kl": 0.061431884765625, + "learning_rate": 7.362635375661224e-08, + "loss": 6.149184628156945e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4159, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.125, + "completions/mean_length": 72.68750095367432, + "completions/min_length": 24.375, + "epoch": 8.270042194092827, + "grad_norm": 0.0035975429601007762, + "kl": 0.071258544921875, + "learning_rate": 7.346165371292334e-08, + "loss": 7.12537657818757e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4160, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.0, + "completions/mean_length": 60.69791841506958, + "completions/min_length": 19.625, + "epoch": 8.272027798461156, + "grad_norm": 0.003689711535103805, + "kl": 0.067352294921875, + "learning_rate": 7.329712348326089e-08, + "loss": 6.73617105348967e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4161, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 189.875, + "completions/mean_length": 69.48958444595337, + "completions/min_length": 19.5, + "epoch": 8.274013402829485, + "grad_norm": 0.013905783244096769, + "kl": 0.080718994140625, + "learning_rate": 7.313276313312761e-08, + "loss": 8.067675662459806e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4162, + "train_speed(iter/s)": 0.022644 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 200.375, + "completions/mean_length": 76.47916889190674, + "completions/min_length": 26.375, + "epoch": 8.275999007197816, + "grad_norm": 0.0037742084416650286, + "kl": 0.057647705078125, + "learning_rate": 7.296857272795914e-08, + "loss": 5.7666573411552235e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4163, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 61.10416841506958, + "completions/min_length": 26.125, + "epoch": 8.277984611566145, + "grad_norm": 0.0031587650698535743, + "kl": 0.05743408203125, + "learning_rate": 7.280455233312294e-08, + "loss": 5.738392792409286e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4164, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.875, + "completions/mean_length": 63.73958492279053, + "completions/min_length": 16.75, + "epoch": 8.279970215934474, + "grad_norm": 0.008640423519536786, + "kl": 0.073883056640625, + "learning_rate": 7.264070201391908e-08, + "loss": 7.377046858891845e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4165, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 65.20833587646484, + "completions/min_length": 24.0, + "epoch": 8.281955820302805, + "grad_norm": 0.005045420398543357, + "kl": 0.0760498046875, + "learning_rate": 7.247702183558007e-08, + "loss": 7.604577695019543e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4166, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 69.895836353302, + "completions/min_length": 23.0, + "epoch": 8.283941424671134, + "grad_norm": 0.0047622319186527845, + "kl": 0.066497802734375, + "learning_rate": 7.231351186327029e-08, + "loss": 6.651488365605474e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4167, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 71.20833587646484, + "completions/min_length": 26.75, + "epoch": 8.285927029039463, + "grad_norm": 0.0036402493787030593, + "kl": 0.073089599609375, + "learning_rate": 7.215017216208663e-08, + "loss": 7.308584463316947e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4168, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 67.30208539962769, + "completions/min_length": 19.875, + "epoch": 8.287912633407794, + "grad_norm": 0.002346681151183932, + "kl": 0.0552978515625, + "learning_rate": 7.198700279705816e-08, + "loss": 5.5294203775702044e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4169, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 72.56250286102295, + "completions/min_length": 27.375, + "epoch": 8.289898237776123, + "grad_norm": 0.0024313301656414834, + "kl": 0.064056396484375, + "learning_rate": 7.18240038331459e-08, + "loss": 6.406335887731984e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4170, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 77.44791984558105, + "completions/min_length": 26.5, + "epoch": 8.291883842144452, + "grad_norm": 0.0033700621789313095, + "kl": 0.072906494140625, + "learning_rate": 7.166117533524335e-08, + "loss": 7.281712896656245e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4171, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.0, + "completions/mean_length": 73.81250095367432, + "completions/min_length": 23.125, + "epoch": 8.293869446512783, + "grad_norm": 0.004418134707223982, + "kl": 0.06329345703125, + "learning_rate": 7.149851736817608e-08, + "loss": 6.32929295534268e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4172, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 61.395835876464844, + "completions/min_length": 21.625, + "epoch": 8.295855050881112, + "grad_norm": 0.007909765588515852, + "kl": 0.0648193359375, + "learning_rate": 7.133602999670152e-08, + "loss": 6.484874757006764e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4173, + "train_speed(iter/s)": 0.022643 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.75, + "completions/mean_length": 71.16666889190674, + "completions/min_length": 24.125, + "epoch": 8.297840655249441, + "grad_norm": 0.0032113258236920804, + "kl": 0.062652587890625, + "learning_rate": 7.117371328550958e-08, + "loss": 6.254656909732148e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4174, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.5, + "completions/mean_length": 69.09375238418579, + "completions/min_length": 22.5, + "epoch": 8.29982625961777, + "grad_norm": 0.04362664365925082, + "kl": 0.146697998046875, + "learning_rate": 7.10115672992218e-08, + "loss": 0.00014679976447951049, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4175, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.125, + "completions/mean_length": 74.73958683013916, + "completions/min_length": 26.5, + "epoch": 8.301811863986101, + "grad_norm": 0.003522197880223704, + "kl": 0.0709228515625, + "learning_rate": 7.084959210239217e-08, + "loss": 7.094758620951325e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4176, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 60.187500953674316, + "completions/min_length": 22.875, + "epoch": 8.30379746835443, + "grad_norm": 0.0075772484225363835, + "kl": 0.063812255859375, + "learning_rate": 7.068778775950635e-08, + "loss": 6.386919994838536e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4177, + "train_speed(iter/s)": 0.022642 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.625, + "completions/mean_length": 72.52083587646484, + "completions/min_length": 27.625, + "epoch": 8.30578307272276, + "grad_norm": 0.0038363602636287708, + "kl": 0.063751220703125, + "learning_rate": 7.052615433498194e-08, + "loss": 6.378068792400882e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4178, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 217.75, + "completions/mean_length": 83.78125190734863, + "completions/min_length": 23.375, + "epoch": 8.30776867709109, + "grad_norm": 0.0028843949095304793, + "kl": 0.0670166015625, + "learning_rate": 7.036469189316902e-08, + "loss": 6.692897295579314e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4179, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 106.625, + "completions/mean_length": 56.927085876464844, + "completions/min_length": 21.75, + "epoch": 8.30975428145942, + "grad_norm": 0.0035091268879498387, + "kl": 0.0538330078125, + "learning_rate": 7.020340049834905e-08, + "loss": 5.382742892834358e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4180, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 66.25000143051147, + "completions/min_length": 20.25, + "epoch": 8.311739885827748, + "grad_norm": 0.00444426978875113, + "kl": 0.0625, + "learning_rate": 7.004228021473551e-08, + "loss": 6.246841803658754e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4181, + "train_speed(iter/s)": 0.022641 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.5, + "completions/mean_length": 71.40625190734863, + "completions/min_length": 27.5, + "epoch": 8.313725490196079, + "grad_norm": 0.0027014348671752595, + "kl": 0.06146240234375, + "learning_rate": 6.988133110647399e-08, + "loss": 6.152319838292897e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4182, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 76.14583492279053, + "completions/min_length": 27.0, + "epoch": 8.315711094564408, + "grad_norm": 1.0067082371838725, + "kl": 0.062164306640625, + "learning_rate": 6.972055323764154e-08, + "loss": 0.004236373584717512, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.3624799847602844, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4183, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 194.625, + "completions/mean_length": 82.458336353302, + "completions/min_length": 30.375, + "epoch": 8.317696698932737, + "grad_norm": 0.0067966967303409774, + "kl": 0.0849609375, + "learning_rate": 6.955994667224758e-08, + "loss": 8.506099402438849e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4184, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 68.85416841506958, + "completions/min_length": 21.875, + "epoch": 8.319682303301068, + "grad_norm": 0.0024507451794612708, + "kl": 0.05487060546875, + "learning_rate": 6.939951147423268e-08, + "loss": 5.4899119277251884e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4185, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 72.22916793823242, + "completions/min_length": 26.75, + "epoch": 8.321667907669397, + "grad_norm": 0.0030961854293762737, + "kl": 0.067657470703125, + "learning_rate": 6.923924770746964e-08, + "loss": 6.76825875416398e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4186, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.375, + "completions/mean_length": 80.26041984558105, + "completions/min_length": 21.5, + "epoch": 8.323653512037726, + "grad_norm": 0.0023012681409521223, + "kl": 0.056182861328125, + "learning_rate": 6.907915543576309e-08, + "loss": 5.6165015848819166e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4187, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 66.06250143051147, + "completions/min_length": 20.125, + "epoch": 8.325639116406055, + "grad_norm": 0.0031776690081823794, + "kl": 0.0855712890625, + "learning_rate": 6.8919234722849e-08, + "loss": 8.568624616600573e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4188, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 72.53125190734863, + "completions/min_length": 26.875, + "epoch": 8.327624720774386, + "grad_norm": 0.002911179027491114, + "kl": 0.05902099609375, + "learning_rate": 6.875948563239514e-08, + "loss": 5.8942554460372776e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4189, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 63.38541793823242, + "completions/min_length": 20.75, + "epoch": 8.329610325142715, + "grad_norm": 0.003549150843362108, + "kl": 0.06512451171875, + "learning_rate": 6.859990822800121e-08, + "loss": 6.507930083898827e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4190, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 70.25000238418579, + "completions/min_length": 21.125, + "epoch": 8.331595929511044, + "grad_norm": 0.003814766423729236, + "kl": 0.07135009765625, + "learning_rate": 6.844050257319822e-08, + "loss": 7.128540892153978e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4191, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.375, + "completions/mean_length": 70.88541889190674, + "completions/min_length": 24.625, + "epoch": 8.333581533879375, + "grad_norm": 0.0029312588171825104, + "kl": 0.071319580078125, + "learning_rate": 6.828126873144908e-08, + "loss": 7.132625614758581e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4192, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 61.3541693687439, + "completions/min_length": 23.125, + "epoch": 8.335567138247704, + "grad_norm": 0.006832784007863792, + "kl": 0.0645751953125, + "learning_rate": 6.812220676614822e-08, + "loss": 6.461890006903559e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4193, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.625, + "completions/mean_length": 67.75000286102295, + "completions/min_length": 16.375, + "epoch": 8.337552742616033, + "grad_norm": 0.003571284680253693, + "kl": 0.066802978515625, + "learning_rate": 6.796331674062145e-08, + "loss": 6.683095853077248e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4194, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 86.34375286102295, + "completions/min_length": 35.5, + "epoch": 8.339538346984364, + "grad_norm": 0.003652215954812687, + "kl": 0.0672607421875, + "learning_rate": 6.78045987181265e-08, + "loss": 6.717565702274442e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4195, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 71.47916889190674, + "completions/min_length": 28.625, + "epoch": 8.341523951352693, + "grad_norm": 0.0028766820526552237, + "kl": 0.068206787109375, + "learning_rate": 6.764605276185226e-08, + "loss": 6.815900997025892e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4196, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.625, + "completions/mean_length": 64.84375190734863, + "completions/min_length": 23.375, + "epoch": 8.343509555721022, + "grad_norm": 0.006820664564001713, + "kl": 0.075592041015625, + "learning_rate": 6.748767893491919e-08, + "loss": 7.563058898085728e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4197, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.0, + "completions/mean_length": 72.18750095367432, + "completions/min_length": 23.25, + "epoch": 8.345495160089353, + "grad_norm": 1.6536671859206011, + "kl": 0.082611083984375, + "learning_rate": 6.732947730037935e-08, + "loss": -0.007601022720336914, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4198, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 62.86458492279053, + "completions/min_length": 19.75, + "epoch": 8.347480764457682, + "grad_norm": 0.0038511280632069367, + "kl": 0.075103759765625, + "learning_rate": 6.717144792121638e-08, + "loss": 7.513690798077732e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4199, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.75, + "completions/mean_length": 77.86458587646484, + "completions/min_length": 32.125, + "epoch": 8.349466368826011, + "grad_norm": 1.5334265370103473, + "kl": 0.0750732421875, + "learning_rate": 6.701359086034487e-08, + "loss": 0.01664251834154129, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.2573831044137478, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4200, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 78.520836353302, + "completions/min_length": 29.25, + "epoch": 8.35145197319434, + "grad_norm": 0.5257373404217704, + "kl": 0.244232177734375, + "learning_rate": 6.685590618061132e-08, + "loss": -0.020722713321447372, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4201, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 64.5416693687439, + "completions/min_length": 22.0, + "epoch": 8.353437577562671, + "grad_norm": 0.004414779735111849, + "kl": 0.0689697265625, + "learning_rate": 6.669839394479315e-08, + "loss": 6.89075532136485e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4202, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.0, + "completions/mean_length": 71.01041841506958, + "completions/min_length": 21.875, + "epoch": 8.355423181931, + "grad_norm": 0.002537753573189819, + "kl": 0.07489013671875, + "learning_rate": 6.654105421559958e-08, + "loss": 7.493824523407966e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4203, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.5, + "completions/mean_length": 72.63541889190674, + "completions/min_length": 24.875, + "epoch": 8.35740878629933, + "grad_norm": 0.006485065913781162, + "kl": 0.072998046875, + "learning_rate": 6.638388705567067e-08, + "loss": 7.296283729374409e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4204, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 61.01041793823242, + "completions/min_length": 22.625, + "epoch": 8.35939439066766, + "grad_norm": 0.0027873423026066172, + "kl": 0.0648193359375, + "learning_rate": 6.622689252757813e-08, + "loss": 6.491924432339147e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4205, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 62.687501430511475, + "completions/min_length": 26.875, + "epoch": 8.361379995035989, + "grad_norm": 0.0035625609199343247, + "kl": 0.06781005859375, + "learning_rate": 6.607007069382497e-08, + "loss": 6.777978705940768e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4206, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 68.802086353302, + "completions/min_length": 25.0, + "epoch": 8.363365599404318, + "grad_norm": 0.002636273941322249, + "kl": 0.052398681640625, + "learning_rate": 6.59134216168451e-08, + "loss": 5.239554957370274e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4207, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.25, + "completions/mean_length": 66.77083492279053, + "completions/min_length": 21.125, + "epoch": 8.365351203772649, + "grad_norm": 0.003237833987991112, + "kl": 0.069549560546875, + "learning_rate": 6.575694535900411e-08, + "loss": 6.955623393878341e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4208, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 68.83333539962769, + "completions/min_length": 29.875, + "epoch": 8.367336808140978, + "grad_norm": 0.0030580252218723194, + "kl": 0.06549072265625, + "learning_rate": 6.560064198259835e-08, + "loss": 6.559217581525445e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4209, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 192.25, + "completions/mean_length": 85.88541984558105, + "completions/min_length": 34.875, + "epoch": 8.369322412509307, + "grad_norm": 0.00428882858298063, + "kl": 0.06048583984375, + "learning_rate": 6.544451154985548e-08, + "loss": 6.042775567038916e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4210, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.625, + "completions/mean_length": 68.27083539962769, + "completions/min_length": 26.375, + "epoch": 8.371308016877638, + "grad_norm": 0.007642428985250278, + "kl": 0.08489990234375, + "learning_rate": 6.528855412293449e-08, + "loss": 8.489205356454477e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4211, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.75, + "completions/mean_length": 67.23958539962769, + "completions/min_length": 27.875, + "epoch": 8.373293621245967, + "grad_norm": 0.004110090739025641, + "kl": 0.053070068359375, + "learning_rate": 6.513276976392529e-08, + "loss": 5.307815081323497e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4212, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.25, + "completions/mean_length": 71.66666889190674, + "completions/min_length": 23.125, + "epoch": 8.375279225614296, + "grad_norm": 0.003948887329439252, + "kl": 0.069793701171875, + "learning_rate": 6.497715853484898e-08, + "loss": 6.986410880926996e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4213, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 84.72916889190674, + "completions/min_length": 33.125, + "epoch": 8.377264829982625, + "grad_norm": 0.0036518461866186305, + "kl": 0.07989501953125, + "learning_rate": 6.482172049765782e-08, + "loss": 7.978919893503189e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4214, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 70.48958587646484, + "completions/min_length": 22.0, + "epoch": 8.379250434350956, + "grad_norm": 0.004215292981769877, + "kl": 0.065887451171875, + "learning_rate": 6.466645571423484e-08, + "loss": 6.589628901565447e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4215, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 70.66666889190674, + "completions/min_length": 20.375, + "epoch": 8.381236038719285, + "grad_norm": 0.005037587241198357, + "kl": 0.079742431640625, + "learning_rate": 6.451136424639447e-08, + "loss": 7.974650361575186e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4216, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.375, + "completions/mean_length": 62.60416889190674, + "completions/min_length": 21.625, + "epoch": 8.383221643087614, + "grad_norm": 0.002494850930944186, + "kl": 0.0552978515625, + "learning_rate": 6.435644615588176e-08, + "loss": 5.533179501071572e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4217, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 67.95833492279053, + "completions/min_length": 19.625, + "epoch": 8.385207247455945, + "grad_norm": 0.0029556599577618987, + "kl": 0.057647705078125, + "learning_rate": 6.420170150437292e-08, + "loss": 5.755452366429381e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4218, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.625, + "completions/mean_length": 70.85416841506958, + "completions/min_length": 20.125, + "epoch": 8.387192851824274, + "grad_norm": 0.003131981637420512, + "kl": 0.06744384765625, + "learning_rate": 6.40471303534751e-08, + "loss": 6.743191624991596e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4219, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 76.28125333786011, + "completions/min_length": 22.125, + "epoch": 8.389178456192603, + "grad_norm": 0.0030258545100966144, + "kl": 0.081634521484375, + "learning_rate": 6.389273276472657e-08, + "loss": 8.156164403771982e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4220, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 67.17708492279053, + "completions/min_length": 22.5, + "epoch": 8.391164060560934, + "grad_norm": 1.1906277765303357, + "kl": 0.07061767578125, + "learning_rate": 6.373850879959602e-08, + "loss": 0.008760648779571056, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4221, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.875, + "completions/mean_length": 74.92708683013916, + "completions/min_length": 30.0, + "epoch": 8.393149664929263, + "grad_norm": 0.004700337332481203, + "kl": 0.079437255859375, + "learning_rate": 6.358445851948358e-08, + "loss": 7.939561328385025e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4222, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.25, + "completions/mean_length": 73.81250143051147, + "completions/min_length": 21.0, + "epoch": 8.395135269297592, + "grad_norm": 0.003181765384117037, + "kl": 0.073760986328125, + "learning_rate": 6.343058198571966e-08, + "loss": 7.376407302217558e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4223, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 65.13541841506958, + "completions/min_length": 26.5, + "epoch": 8.397120873665923, + "grad_norm": 0.002593709281589465, + "kl": 0.053741455078125, + "learning_rate": 6.327687925956616e-08, + "loss": 5.3710275096818805e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4224, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 71.37500095367432, + "completions/min_length": 27.75, + "epoch": 8.399106478034252, + "grad_norm": 0.0029549552511358066, + "kl": 0.061309814453125, + "learning_rate": 6.312335040221512e-08, + "loss": 6.12829317105934e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4225, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 69.93750238418579, + "completions/min_length": 34.625, + "epoch": 8.401092082402581, + "grad_norm": 0.0040503309644984555, + "kl": 0.089111328125, + "learning_rate": 6.29699954747896e-08, + "loss": 8.924187568482012e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4226, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.75, + "completions/mean_length": 72.50000286102295, + "completions/min_length": 22.75, + "epoch": 8.40307768677091, + "grad_norm": 0.002764478413730987, + "kl": 0.058563232421875, + "learning_rate": 6.28168145383438e-08, + "loss": 5.852927279192954e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4227, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 66.65625095367432, + "completions/min_length": 22.0, + "epoch": 8.405063291139241, + "grad_norm": 0.7286812119471732, + "kl": 0.061614990234375, + "learning_rate": 6.26638076538622e-08, + "loss": 0.007325816433876753, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4228, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.625, + "completions/mean_length": 63.12500190734863, + "completions/min_length": 24.25, + "epoch": 8.40704889550757, + "grad_norm": 0.0031088792543383084, + "kl": 0.073577880859375, + "learning_rate": 6.251097488225993e-08, + "loss": 7.355278648901731e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4229, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 67.10416889190674, + "completions/min_length": 32.125, + "epoch": 8.409034499875899, + "grad_norm": 1.5544014210439892, + "kl": 0.07586669921875, + "learning_rate": 6.23583162843832e-08, + "loss": 0.010579612106084824, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4230, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 58.687501430511475, + "completions/min_length": 21.875, + "epoch": 8.41102010424423, + "grad_norm": 0.7775305181757273, + "kl": 0.09600830078125, + "learning_rate": 6.220583192100848e-08, + "loss": -0.007324880920350552, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4231, + "train_speed(iter/s)": 0.02264 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 66.28125190734863, + "completions/min_length": 26.875, + "epoch": 8.413005708612559, + "grad_norm": 0.0030960546925664955, + "kl": 0.056488037109375, + "learning_rate": 6.205352185284319e-08, + "loss": 5.657611109199934e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4232, + "train_speed(iter/s)": 0.022639 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.875, + "completions/mean_length": 72.77083492279053, + "completions/min_length": 22.625, + "epoch": 8.414991312980888, + "grad_norm": 0.0027934893166460246, + "kl": 0.063079833984375, + "learning_rate": 6.190138614052515e-08, + "loss": 6.310495518846437e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4233, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 72.63542032241821, + "completions/min_length": 28.5, + "epoch": 8.416976917349219, + "grad_norm": 0.004146431643709181, + "kl": 0.072967529296875, + "learning_rate": 6.174942484462282e-08, + "loss": 7.292776717804372e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4234, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 69.58333587646484, + "completions/min_length": 24.75, + "epoch": 8.418962521717548, + "grad_norm": 1.6997985722253017, + "kl": 0.073577880859375, + "learning_rate": 6.159763802563534e-08, + "loss": -0.0006705643609166145, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4235, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 64.72916841506958, + "completions/min_length": 27.875, + "epoch": 8.420948126085877, + "grad_norm": 0.0039164728072086956, + "kl": 0.06524658203125, + "learning_rate": 6.144602574399227e-08, + "loss": 6.52932794764638e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4236, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 82.56250286102295, + "completions/min_length": 35.125, + "epoch": 8.422933730454208, + "grad_norm": 0.003622016252429963, + "kl": 0.085418701171875, + "learning_rate": 6.129458806005349e-08, + "loss": 8.547968172933906e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4237, + "train_speed(iter/s)": 0.022638 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.625, + "completions/mean_length": 81.63541793823242, + "completions/min_length": 31.875, + "epoch": 8.424919334822537, + "grad_norm": 0.003517787247546516, + "kl": 0.088775634765625, + "learning_rate": 6.11433250341099e-08, + "loss": 8.87192363734357e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4238, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 198.625, + "completions/mean_length": 76.4166693687439, + "completions/min_length": 25.75, + "epoch": 8.426904939190866, + "grad_norm": 0.005314102891701261, + "kl": 0.065643310546875, + "learning_rate": 6.099223672638227e-08, + "loss": 6.562695489265025e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4239, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 76.91666841506958, + "completions/min_length": 27.0, + "epoch": 8.428890543559195, + "grad_norm": 0.0025162435901807533, + "kl": 0.06201171875, + "learning_rate": 6.084132319702212e-08, + "loss": 6.199840572662652e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4240, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 66.59375143051147, + "completions/min_length": 22.625, + "epoch": 8.430876147927526, + "grad_norm": 0.0037691266586001877, + "kl": 0.054931640625, + "learning_rate": 6.069058450611158e-08, + "loss": 5.492793206940405e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4241, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 67.23958492279053, + "completions/min_length": 20.75, + "epoch": 8.432861752295855, + "grad_norm": 0.003074204847039791, + "kl": 0.075714111328125, + "learning_rate": 6.054002071366265e-08, + "loss": 7.568797445856035e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4242, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.625, + "completions/mean_length": 65.73958539962769, + "completions/min_length": 27.25, + "epoch": 8.434847356664184, + "grad_norm": 0.0027713608369422664, + "kl": 0.056732177734375, + "learning_rate": 6.038963187961826e-08, + "loss": 5.6736327678663656e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4243, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 74.48958587646484, + "completions/min_length": 27.0, + "epoch": 8.436832961032515, + "grad_norm": 0.0027990209739054196, + "kl": 0.07598876953125, + "learning_rate": 6.023941806385114e-08, + "loss": 7.607359293615445e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4244, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 80.41666841506958, + "completions/min_length": 25.375, + "epoch": 8.438818565400844, + "grad_norm": 0.00470007314401911, + "kl": 0.0718994140625, + "learning_rate": 6.008937932616487e-08, + "loss": 7.186474249465391e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4245, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 67.02083492279053, + "completions/min_length": 22.375, + "epoch": 8.440804169769173, + "grad_norm": 0.0034211241649920285, + "kl": 0.052581787109375, + "learning_rate": 5.993951572629296e-08, + "loss": 5.2560772019205615e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4246, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 65.58333539962769, + "completions/min_length": 26.125, + "epoch": 8.442789774137504, + "grad_norm": 0.007679420822045764, + "kl": 0.072967529296875, + "learning_rate": 5.978982732389914e-08, + "loss": 7.304772589122877e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4247, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 63.864586353302, + "completions/min_length": 24.0, + "epoch": 8.444775378505833, + "grad_norm": 0.002941298141620421, + "kl": 0.072021484375, + "learning_rate": 5.96403141785779e-08, + "loss": 7.194951467681676e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4248, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.75, + "completions/mean_length": 62.260417461395264, + "completions/min_length": 20.125, + "epoch": 8.446760982874162, + "grad_norm": 0.002982447261665682, + "kl": 0.062408447265625, + "learning_rate": 5.949097634985345e-08, + "loss": 6.244637916097417e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4249, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.0, + "completions/mean_length": 65.72916841506958, + "completions/min_length": 24.5, + "epoch": 8.448746587242493, + "grad_norm": 0.004061167575328291, + "kl": 0.0643310546875, + "learning_rate": 5.9341813897180295e-08, + "loss": 6.43485618638806e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4250, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.75, + "completions/mean_length": 75.78125190734863, + "completions/min_length": 30.75, + "epoch": 8.450732191610822, + "grad_norm": 0.0036886980081035923, + "kl": 0.074249267578125, + "learning_rate": 5.919282687994337e-08, + "loss": 7.428896060446277e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4251, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 71.39583492279053, + "completions/min_length": 26.75, + "epoch": 8.45271779597915, + "grad_norm": 0.003754215023226811, + "kl": 0.06500244140625, + "learning_rate": 5.9044015357457344e-08, + "loss": 6.497999129351228e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4252, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 70.60416841506958, + "completions/min_length": 28.375, + "epoch": 8.45470340034748, + "grad_norm": 0.004753403156328406, + "kl": 0.061370849609375, + "learning_rate": 5.8895379388967546e-08, + "loss": 6.135796866146848e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4253, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.125, + "completions/mean_length": 60.812500953674316, + "completions/min_length": 25.75, + "epoch": 8.45668900471581, + "grad_norm": 0.0029484180583949303, + "kl": 0.05322265625, + "learning_rate": 5.874691903364887e-08, + "loss": 5.318777039065026e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4254, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 63.802085161209106, + "completions/min_length": 22.625, + "epoch": 8.45867460908414, + "grad_norm": 0.006974291887355687, + "kl": 0.072845458984375, + "learning_rate": 5.859863435060669e-08, + "loss": 7.275898678926751e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4255, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.625, + "completions/mean_length": 62.61458444595337, + "completions/min_length": 22.0, + "epoch": 8.460660213452469, + "grad_norm": 0.004393716802451101, + "kl": 0.06036376953125, + "learning_rate": 5.845052539887635e-08, + "loss": 6.040423613740131e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4256, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 64.4791693687439, + "completions/min_length": 20.0, + "epoch": 8.4626458178208, + "grad_norm": 0.0028247826577669256, + "kl": 0.064788818359375, + "learning_rate": 5.8302592237423175e-08, + "loss": 6.487766950158402e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4257, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 71.47917032241821, + "completions/min_length": 28.5, + "epoch": 8.464631422189129, + "grad_norm": 0.003860351102665582, + "kl": 0.065704345703125, + "learning_rate": 5.8154834925142336e-08, + "loss": 6.57498458167538e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4258, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 69.27083492279053, + "completions/min_length": 30.875, + "epoch": 8.466617026557458, + "grad_norm": 0.004167788357369793, + "kl": 0.075347900390625, + "learning_rate": 5.800725352085945e-08, + "loss": 7.528180140070617e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4259, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 74.39583539962769, + "completions/min_length": 23.5, + "epoch": 8.468602630925789, + "grad_norm": 0.002724167928408849, + "kl": 0.07220458984375, + "learning_rate": 5.7859848083329554e-08, + "loss": 7.223599823191762e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4260, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 65.72916841506958, + "completions/min_length": 23.75, + "epoch": 8.470588235294118, + "grad_norm": 0.00408320712736982, + "kl": 0.068389892578125, + "learning_rate": 5.7712618671238e-08, + "loss": 6.84212427586317e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4261, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 72.84375190734863, + "completions/min_length": 26.375, + "epoch": 8.472573839662447, + "grad_norm": 0.004481179002260378, + "kl": 0.058319091796875, + "learning_rate": 5.756556534320012e-08, + "loss": 5.8295088820159435e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4262, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 71.37500286102295, + "completions/min_length": 29.375, + "epoch": 8.474559444030778, + "grad_norm": 0.0031078136993953763, + "kl": 0.06671142578125, + "learning_rate": 5.74186881577608e-08, + "loss": 6.670726725133136e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4263, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 71.04166841506958, + "completions/min_length": 24.5, + "epoch": 8.476545048399107, + "grad_norm": 0.002399091317865348, + "kl": 0.0645751953125, + "learning_rate": 5.72719871733951e-08, + "loss": 6.458575080614537e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4264, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.0, + "completions/mean_length": 69.63541889190674, + "completions/min_length": 26.0, + "epoch": 8.478530652767436, + "grad_norm": 0.0030160972524807513, + "kl": 0.054412841796875, + "learning_rate": 5.712546244850774e-08, + "loss": 5.437999789137393e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.3916747123003006, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4265, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.625, + "completions/mean_length": 62.6979193687439, + "completions/min_length": 19.875, + "epoch": 8.480516257135765, + "grad_norm": 0.0039402541023382314, + "kl": 0.0728759765625, + "learning_rate": 5.69791140414333e-08, + "loss": 7.2953145718202e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4266, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.0, + "completions/mean_length": 79.16666889190674, + "completions/min_length": 29.375, + "epoch": 8.482501861504096, + "grad_norm": 0.0027388598511840965, + "kl": 0.0777587890625, + "learning_rate": 5.683294201043626e-08, + "loss": 7.77281093178317e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4267, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 70.27083587646484, + "completions/min_length": 22.625, + "epoch": 8.484487465872425, + "grad_norm": 0.0027472378620899854, + "kl": 0.061431884765625, + "learning_rate": 5.6686946413710915e-08, + "loss": 6.136958836577833e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4268, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 78.81250190734863, + "completions/min_length": 25.75, + "epoch": 8.486473070240754, + "grad_norm": 0.005557592653558524, + "kl": 0.064544677734375, + "learning_rate": 5.654112730938104e-08, + "loss": 6.451752415159717e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4269, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.625, + "completions/mean_length": 57.84375190734863, + "completions/min_length": 18.75, + "epoch": 8.488458674609085, + "grad_norm": 0.004690608841158578, + "kl": 0.059417724609375, + "learning_rate": 5.639548475550055e-08, + "loss": 5.9457765019033104e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4270, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.875, + "completions/mean_length": 68.70833539962769, + "completions/min_length": 27.625, + "epoch": 8.490444278977414, + "grad_norm": 0.005158530661617823, + "kl": 0.080047607421875, + "learning_rate": 5.62500188100527e-08, + "loss": 7.993084727786481e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4271, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 64.50000238418579, + "completions/min_length": 23.0, + "epoch": 8.492429883345743, + "grad_norm": 0.004765370413656595, + "kl": 0.07568359375, + "learning_rate": 5.6104729530950686e-08, + "loss": 7.56825611460954e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4272, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.875, + "completions/mean_length": 61.187500953674316, + "completions/min_length": 28.0, + "epoch": 8.494415487714074, + "grad_norm": 0.0033980122903548007, + "kl": 0.071319580078125, + "learning_rate": 5.595961697603724e-08, + "loss": 7.129830919438973e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4273, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 61.98958444595337, + "completions/min_length": 23.125, + "epoch": 8.496401092082403, + "grad_norm": 0.005157100041246674, + "kl": 0.066162109375, + "learning_rate": 5.581468120308458e-08, + "loss": 6.612966535612941e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4274, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 59.947916984558105, + "completions/min_length": 20.5, + "epoch": 8.498386696450732, + "grad_norm": 1.202237066003347, + "kl": 0.063873291015625, + "learning_rate": 5.566992226979511e-08, + "loss": 6.392722571035847e-05, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4275, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.0, + "completions/mean_length": 71.96875286102295, + "completions/min_length": 26.75, + "epoch": 8.500372300819063, + "grad_norm": 0.003245217642608059, + "kl": 0.05828857421875, + "learning_rate": 5.5525340233800236e-08, + "loss": 5.830578083987348e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4276, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 63.72916889190674, + "completions/min_length": 19.75, + "epoch": 8.502357905187392, + "grad_norm": 0.0025823708561869088, + "kl": 0.068878173828125, + "learning_rate": 5.5380935152661066e-08, + "loss": 6.880475848447531e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4277, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.125, + "completions/mean_length": 71.40625286102295, + "completions/min_length": 23.375, + "epoch": 8.50434350955572, + "grad_norm": 0.0025413177228581544, + "kl": 0.0621337890625, + "learning_rate": 5.523670708386857e-08, + "loss": 6.217691407073289e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4278, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 78.0416693687439, + "completions/min_length": 28.25, + "epoch": 8.50632911392405, + "grad_norm": 0.010576234588147514, + "kl": 0.102294921875, + "learning_rate": 5.5092656084842816e-08, + "loss": 0.0001023239383357577, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4279, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 72.44791793823242, + "completions/min_length": 31.25, + "epoch": 8.50831471829238, + "grad_norm": 0.0030030364125630004, + "kl": 0.06475830078125, + "learning_rate": 5.4948782212933754e-08, + "loss": 6.477470742538571e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4280, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 58.52083444595337, + "completions/min_length": 25.0, + "epoch": 8.51030032266071, + "grad_norm": 0.002976566570308528, + "kl": 0.0640411376953125, + "learning_rate": 5.480508552542052e-08, + "loss": 6.40340440440923e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4281, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 64.64583587646484, + "completions/min_length": 24.375, + "epoch": 8.512285927029039, + "grad_norm": 2.855996228761785, + "kl": 0.065338134765625, + "learning_rate": 5.4661566079511834e-08, + "loss": 0.018806789070367813, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4282, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 69.56250286102295, + "completions/min_length": 28.5, + "epoch": 8.51427153139737, + "grad_norm": 0.002981304820025445, + "kl": 0.060394287109375, + "learning_rate": 5.451822393234601e-08, + "loss": 6.039762229193002e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4283, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 67.2916693687439, + "completions/min_length": 26.0, + "epoch": 8.516257135765699, + "grad_norm": 0.003040293098356286, + "kl": 0.078399658203125, + "learning_rate": 5.4375059140990386e-08, + "loss": 7.839395402697846e-05, + "memory(GiB)": 94.21, + "reward": 1.625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.625, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4284, + "train_speed(iter/s)": 0.022637 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.875, + "completions/mean_length": 63.4791693687439, + "completions/min_length": 19.25, + "epoch": 8.518242740134028, + "grad_norm": 0.00347720028417795, + "kl": 0.0587158203125, + "learning_rate": 5.4232071762442154e-08, + "loss": 5.863062688149512e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4285, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.25, + "completions/mean_length": 66.22916841506958, + "completions/min_length": 25.125, + "epoch": 8.520228344502359, + "grad_norm": 0.0034003435344987436, + "kl": 0.070953369140625, + "learning_rate": 5.408926185362756e-08, + "loss": 7.095082401065156e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4286, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 206.625, + "completions/mean_length": 79.79166793823242, + "completions/min_length": 32.375, + "epoch": 8.522213948870688, + "grad_norm": 0.003123158970219177, + "kl": 0.060943603515625, + "learning_rate": 5.394662947140216e-08, + "loss": 6.092020703363232e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4287, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 71.41666793823242, + "completions/min_length": 32.0, + "epoch": 8.524199553239017, + "grad_norm": 0.003812726552957413, + "kl": 0.07879638671875, + "learning_rate": 5.3804174672550995e-08, + "loss": 7.875763549236581e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4288, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 70.39583587646484, + "completions/min_length": 22.875, + "epoch": 8.526185157607348, + "grad_norm": 0.003215315309765408, + "kl": 0.05633544921875, + "learning_rate": 5.366189751378858e-08, + "loss": 5.6320404837606475e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4289, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 69.57291889190674, + "completions/min_length": 28.125, + "epoch": 8.528170761975677, + "grad_norm": 0.00284477513539326, + "kl": 0.071624755859375, + "learning_rate": 5.351979805175816e-08, + "loss": 7.154759805416688e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4290, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 69.30208587646484, + "completions/min_length": 20.375, + "epoch": 8.530156366344006, + "grad_norm": 0.003522690712312541, + "kl": 0.063812255859375, + "learning_rate": 5.337787634303287e-08, + "loss": 6.390751514118165e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4291, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 71.84375190734863, + "completions/min_length": 28.5, + "epoch": 8.532141970712335, + "grad_norm": 0.0038711904303571753, + "kl": 0.072418212890625, + "learning_rate": 5.3236132444114565e-08, + "loss": 7.230650226119906e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4292, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 71.90625190734863, + "completions/min_length": 21.5, + "epoch": 8.534127575080666, + "grad_norm": 0.006730149456461537, + "kl": 0.06573486328125, + "learning_rate": 5.3094566411434674e-08, + "loss": 6.571359699591994e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4293, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.625, + "completions/mean_length": 75.21875238418579, + "completions/min_length": 26.75, + "epoch": 8.536113179448995, + "grad_norm": 0.004244741918720068, + "kl": 0.06988525390625, + "learning_rate": 5.295317830135354e-08, + "loss": 6.992699491092935e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4294, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.5, + "completions/mean_length": 75.52083683013916, + "completions/min_length": 34.125, + "epoch": 8.538098783817324, + "grad_norm": 0.0032200952559136263, + "kl": 0.0736083984375, + "learning_rate": 5.281196817016065e-08, + "loss": 7.35716603230685e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4295, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.0, + "completions/mean_length": 62.03125238418579, + "completions/min_length": 22.5, + "epoch": 8.540084388185655, + "grad_norm": 0.005503542730934274, + "kl": 0.061126708984375, + "learning_rate": 5.267093607407514e-08, + "loss": 6.12151634413749e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4296, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 69.94791793823242, + "completions/min_length": 28.25, + "epoch": 8.542069992553984, + "grad_norm": 0.004664872690549267, + "kl": 0.0621337890625, + "learning_rate": 5.25300820692447e-08, + "loss": 6.211151776369661e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4297, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.5, + "completions/mean_length": 64.56250238418579, + "completions/min_length": 26.625, + "epoch": 8.544055596922313, + "grad_norm": 0.0029709481370372307, + "kl": 0.06231689453125, + "learning_rate": 5.2389406211746204e-08, + "loss": 6.231469888007268e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4298, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 215.25, + "completions/mean_length": 81.16666984558105, + "completions/min_length": 19.875, + "epoch": 8.546041201290643, + "grad_norm": 0.0028812112026739703, + "kl": 0.08538818359375, + "learning_rate": 5.224890855758596e-08, + "loss": 8.541432180209085e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4299, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.25, + "completions/mean_length": 64.86458492279053, + "completions/min_length": 24.0, + "epoch": 8.548026805658973, + "grad_norm": 0.003408443506572342, + "kl": 0.063995361328125, + "learning_rate": 5.2108589162698835e-08, + "loss": 6.398290861397982e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4300, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 73.26041841506958, + "completions/min_length": 23.875, + "epoch": 8.550012410027302, + "grad_norm": 0.0031046325957754576, + "kl": 0.058563232421875, + "learning_rate": 5.196844808294926e-08, + "loss": 5.855833296664059e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4301, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 68.50000238418579, + "completions/min_length": 16.375, + "epoch": 8.551998014395632, + "grad_norm": 1.7719131384050684, + "kl": 0.07171630859375, + "learning_rate": 5.182848537413009e-08, + "loss": -0.006205078214406967, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.2444935366511345, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4302, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 107.75, + "completions/mean_length": 54.97916841506958, + "completions/min_length": 20.375, + "epoch": 8.553983618763962, + "grad_norm": 0.0054062772106716, + "kl": 0.07305908203125, + "learning_rate": 5.1688701091963606e-08, + "loss": 7.297085539903492e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4303, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 66.94791889190674, + "completions/min_length": 23.875, + "epoch": 8.55596922313229, + "grad_norm": 0.0028579897064800945, + "kl": 0.062713623046875, + "learning_rate": 5.1549095292101053e-08, + "loss": 6.26999099040404e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4304, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 67.14583539962769, + "completions/min_length": 21.5, + "epoch": 8.55795482750062, + "grad_norm": 1.0395606041340089, + "kl": 0.0679931640625, + "learning_rate": 5.1409668030122366e-08, + "loss": 0.003949841484427452, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4305, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 63.812501430511475, + "completions/min_length": 19.875, + "epoch": 8.55994043186895, + "grad_norm": 0.004336387656050706, + "kl": 0.10955810546875, + "learning_rate": 5.1270419361536366e-08, + "loss": 0.00010955502511933446, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4306, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.125, + "completions/mean_length": 63.94791889190674, + "completions/min_length": 26.0, + "epoch": 8.56192603623728, + "grad_norm": 0.004381164084689803, + "kl": 0.069122314453125, + "learning_rate": 5.113134934178121e-08, + "loss": 6.911862146807835e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4307, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.625, + "completions/mean_length": 65.09375095367432, + "completions/min_length": 22.125, + "epoch": 8.563911640605609, + "grad_norm": 0.002698544257996323, + "kl": 0.061187744140625, + "learning_rate": 5.099245802622332e-08, + "loss": 6.119812314864248e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4308, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.375, + "completions/mean_length": 61.58333492279053, + "completions/min_length": 27.5, + "epoch": 8.56589724497394, + "grad_norm": 0.003842949749127718, + "kl": 0.052703857421875, + "learning_rate": 5.085374547015853e-08, + "loss": 5.272622365737334e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4309, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.5, + "completions/mean_length": 62.02083492279053, + "completions/min_length": 27.5, + "epoch": 8.567882849342269, + "grad_norm": 0.0037307035861505818, + "kl": 0.056610107421875, + "learning_rate": 5.071521172881127e-08, + "loss": 5.6617318477947265e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4310, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.875, + "completions/mean_length": 69.40625238418579, + "completions/min_length": 24.75, + "epoch": 8.569868453710598, + "grad_norm": 0.003585948129568317, + "kl": 0.06494140625, + "learning_rate": 5.057685685733465e-08, + "loss": 6.493809632956982e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4311, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.375, + "completions/mean_length": 65.12500143051147, + "completions/min_length": 25.375, + "epoch": 8.571854058078928, + "grad_norm": 0.7021310653246982, + "kl": 0.0634765625, + "learning_rate": 5.0438680910810885e-08, + "loss": -0.007124175317585468, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4312, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 78.69791889190674, + "completions/min_length": 30.25, + "epoch": 8.573839662447257, + "grad_norm": 0.003846357900154206, + "kl": 0.076080322265625, + "learning_rate": 5.0300683944250634e-08, + "loss": 7.605641440022737e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4313, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.5, + "completions/mean_length": 74.645836353302, + "completions/min_length": 29.5, + "epoch": 8.575825266815587, + "grad_norm": 0.00285625080560995, + "kl": 0.06475830078125, + "learning_rate": 5.016286601259334e-08, + "loss": 6.47250417387113e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4314, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 67.40625286102295, + "completions/min_length": 21.75, + "epoch": 8.577810871183917, + "grad_norm": 0.0031269006023052137, + "kl": 0.062835693359375, + "learning_rate": 5.002522717070751e-08, + "loss": 6.28582711215131e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4315, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 68.90625238418579, + "completions/min_length": 21.5, + "epoch": 8.579796475552246, + "grad_norm": 0.004004684717315213, + "kl": 0.075958251953125, + "learning_rate": 4.988776747338985e-08, + "loss": 7.59678368922323e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4316, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 67.1354193687439, + "completions/min_length": 22.25, + "epoch": 8.581782079920576, + "grad_norm": 0.0031676358037586554, + "kl": 0.083221435546875, + "learning_rate": 4.9750486975366156e-08, + "loss": 8.315112791024148e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4317, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.5, + "completions/mean_length": 60.51041889190674, + "completions/min_length": 23.0, + "epoch": 8.583767684288905, + "grad_norm": 0.0032964343830204725, + "kl": 0.067047119140625, + "learning_rate": 4.9613385731290814e-08, + "loss": 6.703739200020209e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4318, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.375, + "completions/mean_length": 66.78125238418579, + "completions/min_length": 20.375, + "epoch": 8.585753288657235, + "grad_norm": 0.005429413123891288, + "kl": 0.06689453125, + "learning_rate": 4.947646379574655e-08, + "loss": 6.688515713904053e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4319, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.125, + "completions/mean_length": 67.12500143051147, + "completions/min_length": 17.375, + "epoch": 8.587738893025564, + "grad_norm": 0.003958134800928919, + "kl": 0.07861328125, + "learning_rate": 4.933972122324509e-08, + "loss": 7.857757009333e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4320, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.75, + "completions/mean_length": 82.52083539962769, + "completions/min_length": 34.0, + "epoch": 8.589724497393894, + "grad_norm": 0.0025624410794586657, + "kl": 0.067108154296875, + "learning_rate": 4.920315806822639e-08, + "loss": 6.712192407576367e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4321, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.0, + "completions/mean_length": 73.54166889190674, + "completions/min_length": 26.5, + "epoch": 8.591710101762224, + "grad_norm": 0.0023238310276203107, + "kl": 0.067291259765625, + "learning_rate": 4.9066774385059404e-08, + "loss": 6.729447341058403e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4322, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 68.78125143051147, + "completions/min_length": 22.0, + "epoch": 8.593695706130553, + "grad_norm": 0.003239949744782852, + "kl": 0.068389892578125, + "learning_rate": 4.893057022804109e-08, + "loss": 6.837428372818977e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4323, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.875, + "completions/mean_length": 62.79166793823242, + "completions/min_length": 21.0, + "epoch": 8.595681310498883, + "grad_norm": 0.005528437410291154, + "kl": 0.079925537109375, + "learning_rate": 4.87945456513974e-08, + "loss": 7.989796722540632e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4324, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.875, + "completions/mean_length": 57.97916841506958, + "completions/min_length": 17.875, + "epoch": 8.597666914867213, + "grad_norm": 0.00394475641244904, + "kl": 0.0464019775390625, + "learning_rate": 4.865870070928274e-08, + "loss": 4.6373490476980805e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4325, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 64.36458539962769, + "completions/min_length": 20.125, + "epoch": 8.599652519235542, + "grad_norm": 0.0029878306576865505, + "kl": 0.056640625, + "learning_rate": 4.852303545577974e-08, + "loss": 5.660884198732674e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4326, + "train_speed(iter/s)": 0.022636 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.75, + "completions/mean_length": 71.520836353302, + "completions/min_length": 26.5, + "epoch": 8.601638123603871, + "grad_norm": 1.022228262628643, + "kl": 0.093536376953125, + "learning_rate": 4.838754994489952e-08, + "loss": 0.01451178640127182, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4327, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 74.50000190734863, + "completions/min_length": 27.0, + "epoch": 8.603623727972202, + "grad_norm": 0.003898653765595462, + "kl": 0.066802978515625, + "learning_rate": 4.8252244230581995e-08, + "loss": 6.686054985038936e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4328, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.625, + "completions/mean_length": 78.68750286102295, + "completions/min_length": 24.875, + "epoch": 8.605609332340531, + "grad_norm": 0.0042089250653228666, + "kl": 0.069244384765625, + "learning_rate": 4.811711836669507e-08, + "loss": 6.914118421263993e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4329, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 70.02083444595337, + "completions/min_length": 22.375, + "epoch": 8.60759493670886, + "grad_norm": 0.0026959948443466596, + "kl": 0.069061279296875, + "learning_rate": 4.798217240703534e-08, + "loss": 6.907604984007776e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4330, + "train_speed(iter/s)": 0.022635 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 69.10416793823242, + "completions/min_length": 27.25, + "epoch": 8.60958054107719, + "grad_norm": 0.003001994985852151, + "kl": 0.059478759765625, + "learning_rate": 4.7847406405327694e-08, + "loss": 5.946995224803686e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4331, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 59.44791841506958, + "completions/min_length": 21.25, + "epoch": 8.61156614544552, + "grad_norm": 0.002710076595829114, + "kl": 0.0653533935546875, + "learning_rate": 4.7712820415225286e-08, + "loss": 6.538275192724541e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4332, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.75, + "completions/mean_length": 76.47916984558105, + "completions/min_length": 23.5, + "epoch": 8.61355174981385, + "grad_norm": 0.003148918017560327, + "kl": 0.06231689453125, + "learning_rate": 4.757841449030975e-08, + "loss": 6.237607885850593e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4333, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 71.03125143051147, + "completions/min_length": 21.875, + "epoch": 8.615537354182178, + "grad_norm": 0.02866596850944244, + "kl": 0.099945068359375, + "learning_rate": 4.744418868409089e-08, + "loss": 9.997825691243634e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4334, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 78.16666841506958, + "completions/min_length": 29.375, + "epoch": 8.61752295855051, + "grad_norm": 0.004532200988931007, + "kl": 0.090301513671875, + "learning_rate": 4.7310143050006854e-08, + "loss": 9.02952961041592e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4335, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.125, + "completions/mean_length": 68.63541841506958, + "completions/min_length": 25.125, + "epoch": 8.619508562918838, + "grad_norm": 0.002568692727568496, + "kl": 0.0552978515625, + "learning_rate": 4.7176277641424015e-08, + "loss": 5.525143205886707e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4336, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.375, + "completions/mean_length": 61.781251430511475, + "completions/min_length": 30.125, + "epoch": 8.621494167287167, + "grad_norm": 0.006796090742259831, + "kl": 0.06402587890625, + "learning_rate": 4.704259251163728e-08, + "loss": 6.401139398803934e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4337, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.25, + "completions/mean_length": 71.70833587646484, + "completions/min_length": 24.75, + "epoch": 8.623479771655498, + "grad_norm": 1.0790322340250689, + "kl": 0.0604248046875, + "learning_rate": 4.6909087713869314e-08, + "loss": -0.0013025652151554823, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4338, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 65.87500190734863, + "completions/min_length": 17.5, + "epoch": 8.625465376023827, + "grad_norm": 0.003158150752199311, + "kl": 0.062652587890625, + "learning_rate": 4.6775763301271423e-08, + "loss": 6.260805093916133e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4339, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 70.4166693687439, + "completions/min_length": 26.375, + "epoch": 8.627450980392156, + "grad_norm": 0.0038178650820808453, + "kl": 0.07220458984375, + "learning_rate": 4.6642619326922706e-08, + "loss": 7.214218931039795e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4340, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 68.91666889190674, + "completions/min_length": 21.25, + "epoch": 8.629436584760487, + "grad_norm": 0.0030149792009534957, + "kl": 0.06640625, + "learning_rate": 4.650965584383082e-08, + "loss": 6.636662146775052e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4341, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.5, + "completions/mean_length": 78.81250286102295, + "completions/min_length": 28.875, + "epoch": 8.631422189128816, + "grad_norm": 0.0028463456571123126, + "kl": 0.063232421875, + "learning_rate": 4.6376872904931307e-08, + "loss": 6.317744555417448e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4342, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 70.45833683013916, + "completions/min_length": 27.125, + "epoch": 8.633407793497145, + "grad_norm": 0.007217393926369768, + "kl": 0.072662353515625, + "learning_rate": 4.6244270563087605e-08, + "loss": 7.258918776642531e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4343, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 71.9791693687439, + "completions/min_length": 28.75, + "epoch": 8.635393397865474, + "grad_norm": 0.0030741068151509065, + "kl": 0.069244384765625, + "learning_rate": 4.611184887109204e-08, + "loss": 6.927357026143e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4344, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 62.07291793823242, + "completions/min_length": 18.125, + "epoch": 8.637379002233805, + "grad_norm": 0.006693699779112772, + "kl": 0.0615234375, + "learning_rate": 4.5979607881664216e-08, + "loss": 6.15669705439359e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4345, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 66.29166841506958, + "completions/min_length": 23.125, + "epoch": 8.639364606602134, + "grad_norm": 0.0034996730766743527, + "kl": 0.068206787109375, + "learning_rate": 4.584754764745208e-08, + "loss": 6.822988507337868e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4346, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 68.25000143051147, + "completions/min_length": 25.75, + "epoch": 8.641350210970463, + "grad_norm": 0.0050779400643106995, + "kl": 0.073822021484375, + "learning_rate": 4.571566822103179e-08, + "loss": 7.377025031019002e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4347, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 66.47916889190674, + "completions/min_length": 23.625, + "epoch": 8.643335815338794, + "grad_norm": 0.0028356437793893314, + "kl": 0.05548095703125, + "learning_rate": 4.558396965490713e-08, + "loss": 5.548873741645366e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4348, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 73.82291841506958, + "completions/min_length": 23.875, + "epoch": 8.645321419707123, + "grad_norm": 0.002857309706917698, + "kl": 0.06439208984375, + "learning_rate": 4.545245200151038e-08, + "loss": 6.439985008910298e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4349, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.375, + "completions/mean_length": 76.80208539962769, + "completions/min_length": 30.375, + "epoch": 8.647307024075452, + "grad_norm": 0.0034409392505946284, + "kl": 0.0726318359375, + "learning_rate": 4.532111531320132e-08, + "loss": 7.249845657497644e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4350, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.375, + "completions/mean_length": 63.01041793823242, + "completions/min_length": 21.0, + "epoch": 8.649292628443783, + "grad_norm": 0.9981713138931096, + "kl": 0.06121826171875, + "learning_rate": 4.518995964226796e-08, + "loss": 0.0010815877467393875, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4351, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 64.89583492279053, + "completions/min_length": 21.375, + "epoch": 8.651278232812112, + "grad_norm": 0.0025715259489696835, + "kl": 0.063018798828125, + "learning_rate": 4.5058985040926255e-08, + "loss": 6.299058441072702e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4352, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 63.052086353302, + "completions/min_length": 25.5, + "epoch": 8.653263837180441, + "grad_norm": 0.004046280471654746, + "kl": 0.078399658203125, + "learning_rate": 4.492819156131994e-08, + "loss": 7.83963332651183e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4353, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 65.76041889190674, + "completions/min_length": 25.0, + "epoch": 8.655249441548772, + "grad_norm": 0.002532960040820793, + "kl": 0.066070556640625, + "learning_rate": 4.479757925552058e-08, + "loss": 6.610581476707011e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4354, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.25, + "completions/mean_length": 70.32291841506958, + "completions/min_length": 22.375, + "epoch": 8.657235045917101, + "grad_norm": 0.0026719322266955986, + "kl": 0.077911376953125, + "learning_rate": 4.466714817552791e-08, + "loss": 7.784062472637743e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4355, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 67.37500143051147, + "completions/min_length": 25.125, + "epoch": 8.65922065028543, + "grad_norm": 0.005673084626474441, + "kl": 0.064605712890625, + "learning_rate": 4.453689837326918e-08, + "loss": 6.464245961979032e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4356, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 60.48958468437195, + "completions/min_length": 22.25, + "epoch": 8.66120625465376, + "grad_norm": 0.007509430135032619, + "kl": 0.067657470703125, + "learning_rate": 4.440682990059963e-08, + "loss": 6.757605297025293e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4357, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.75, + "completions/mean_length": 72.31250143051147, + "completions/min_length": 21.5, + "epoch": 8.66319185902209, + "grad_norm": 0.002362393488109078, + "kl": 0.05548095703125, + "learning_rate": 4.427694280930244e-08, + "loss": 5.552250877371989e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4358, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 70.27083683013916, + "completions/min_length": 25.375, + "epoch": 8.66517746339042, + "grad_norm": 0.006079095180469086, + "kl": 0.06689453125, + "learning_rate": 4.41472371510882e-08, + "loss": 6.685058178845793e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4359, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 70.2604193687439, + "completions/min_length": 20.75, + "epoch": 8.667163067758748, + "grad_norm": 0.003153394170152237, + "kl": 0.060211181640625, + "learning_rate": 4.401771297759582e-08, + "loss": 6.01303436269518e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4360, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 77.76041889190674, + "completions/min_length": 29.625, + "epoch": 8.66914867212708, + "grad_norm": 0.0033560459080369497, + "kl": 0.07391357421875, + "learning_rate": 4.388837034039139e-08, + "loss": 7.383030606433749e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4361, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 199.5, + "completions/mean_length": 76.96875238418579, + "completions/min_length": 20.75, + "epoch": 8.671134276495408, + "grad_norm": 0.0030432030582164377, + "kl": 0.058807373046875, + "learning_rate": 4.375920929096899e-08, + "loss": 5.881188189960085e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4362, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.875, + "completions/mean_length": 64.76041984558105, + "completions/min_length": 26.875, + "epoch": 8.673119880863737, + "grad_norm": 0.002653118266666515, + "kl": 0.066436767578125, + "learning_rate": 4.363022988075049e-08, + "loss": 6.651584408245981e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4363, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 74.16666746139526, + "completions/min_length": 30.375, + "epoch": 8.675105485232068, + "grad_norm": 0.0028939883958346816, + "kl": 0.073974609375, + "learning_rate": 4.3501432161085204e-08, + "loss": 7.400155300274491e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4364, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 66.34375095367432, + "completions/min_length": 27.625, + "epoch": 8.677091089600397, + "grad_norm": 0.005420757829492257, + "kl": 0.065948486328125, + "learning_rate": 4.3372816183250504e-08, + "loss": 6.587877578567713e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4365, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.125, + "completions/mean_length": 61.22916889190674, + "completions/min_length": 25.5, + "epoch": 8.679076693968726, + "grad_norm": 0.004118572699474102, + "kl": 0.0518798828125, + "learning_rate": 4.3244381998450985e-08, + "loss": 5.1863105909433216e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4366, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 62.187500953674316, + "completions/min_length": 23.375, + "epoch": 8.681062298337057, + "grad_norm": 0.01360768609035256, + "kl": 0.076934814453125, + "learning_rate": 4.311612965781902e-08, + "loss": 7.697496039327234e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4367, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.125, + "completions/mean_length": 80.40625190734863, + "completions/min_length": 23.375, + "epoch": 8.683047902705386, + "grad_norm": 0.0030538540885225564, + "kl": 0.0726318359375, + "learning_rate": 4.298805921241472e-08, + "loss": 7.262681174324825e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4368, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 70.23958396911621, + "completions/min_length": 26.25, + "epoch": 8.685033507073715, + "grad_norm": 0.004360091104944317, + "kl": 0.067169189453125, + "learning_rate": 4.286017071322551e-08, + "loss": 6.708032742608339e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4369, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.625, + "completions/mean_length": 65.17708587646484, + "completions/min_length": 18.625, + "epoch": 8.687019111442044, + "grad_norm": 0.0062072339655240664, + "kl": 0.063751220703125, + "learning_rate": 4.273246421116666e-08, + "loss": 6.380338891176507e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4370, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 67.27083492279053, + "completions/min_length": 21.75, + "epoch": 8.689004715810375, + "grad_norm": 2.065877159633669, + "kl": 0.065643310546875, + "learning_rate": 4.2604939757080795e-08, + "loss": 0.00811000820249319, + "memory(GiB)": 94.21, + "reward": 1.78125, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.78125, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4371, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.375, + "completions/mean_length": 75.91666889190674, + "completions/min_length": 27.5, + "epoch": 8.690990320178704, + "grad_norm": 0.003151320419609507, + "kl": 0.06903076171875, + "learning_rate": 4.247759740173812e-08, + "loss": 6.90828965161927e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4372, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.125, + "completions/mean_length": 70.06250190734863, + "completions/min_length": 21.875, + "epoch": 8.692975924547033, + "grad_norm": 0.002664019921460677, + "kl": 0.08465576171875, + "learning_rate": 4.2350437195836475e-08, + "loss": 8.462080586468801e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4373, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 100.375, + "completions/mean_length": 51.04166841506958, + "completions/min_length": 19.5, + "epoch": 8.694961528915364, + "grad_norm": 0.006446605491018157, + "kl": 0.057098388671875, + "learning_rate": 4.222345919000092e-08, + "loss": 5.714341386919841e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4374, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 75.73958539962769, + "completions/min_length": 34.25, + "epoch": 8.696947133283693, + "grad_norm": 0.005169266313044711, + "kl": 0.067169189453125, + "learning_rate": 4.2096663434784075e-08, + "loss": 6.709879380650818e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4375, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.375, + "completions/mean_length": 77.30208492279053, + "completions/min_length": 24.625, + "epoch": 8.698932737652022, + "grad_norm": 0.002661207865047449, + "kl": 0.06329345703125, + "learning_rate": 4.197004998066617e-08, + "loss": 6.326305447146297e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4376, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.375, + "completions/mean_length": 69.18750190734863, + "completions/min_length": 27.875, + "epoch": 8.700918342020353, + "grad_norm": 0.004350187649417303, + "kl": 0.0780029296875, + "learning_rate": 4.18436188780546e-08, + "loss": 7.804400229360908e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4377, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.25, + "completions/mean_length": 71.90625190734863, + "completions/min_length": 25.0, + "epoch": 8.702903946388682, + "grad_norm": 0.0026534479251968178, + "kl": 0.063751220703125, + "learning_rate": 4.171737017728433e-08, + "loss": 6.37941702734679e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4378, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 73.72916841506958, + "completions/min_length": 27.25, + "epoch": 8.704889550757011, + "grad_norm": 0.003167417594718814, + "kl": 0.073577880859375, + "learning_rate": 4.1591303928617816e-08, + "loss": 7.360319432336837e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4379, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.375, + "completions/mean_length": 59.93750190734863, + "completions/min_length": 22.0, + "epoch": 8.706875155125342, + "grad_norm": 0.0029642928326458155, + "kl": 0.069183349609375, + "learning_rate": 4.146542018224447e-08, + "loss": 6.91590248607099e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4380, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.25, + "completions/mean_length": 70.87500143051147, + "completions/min_length": 26.875, + "epoch": 8.708860759493671, + "grad_norm": 0.003076930544420089, + "kl": 0.0693359375, + "learning_rate": 4.133971898828148e-08, + "loss": 6.934001430636272e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4381, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.0, + "completions/mean_length": 73.26041841506958, + "completions/min_length": 28.25, + "epoch": 8.710846363862, + "grad_norm": 0.0026343066816678657, + "kl": 0.076690673828125, + "learning_rate": 4.121420039677315e-08, + "loss": 7.668466423638165e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4382, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 69.39583587646484, + "completions/min_length": 24.625, + "epoch": 8.71283196823033, + "grad_norm": 0.0025134000011850003, + "kl": 0.0633544921875, + "learning_rate": 4.1088864457691e-08, + "loss": 6.340233085211366e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4383, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 71.11458492279053, + "completions/min_length": 24.25, + "epoch": 8.71481757259866, + "grad_norm": 0.5866298080370995, + "kl": 0.060516357421875, + "learning_rate": 4.096371122093406e-08, + "loss": -0.0036966167390346527, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4384, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 71.36458539962769, + "completions/min_length": 29.125, + "epoch": 8.71680317696699, + "grad_norm": 0.005658364354300024, + "kl": 0.075836181640625, + "learning_rate": 4.0838740736328424e-08, + "loss": 7.569265289930627e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4385, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 71.23958492279053, + "completions/min_length": 20.625, + "epoch": 8.718788781335318, + "grad_norm": 0.004745203322434925, + "kl": 0.060455322265625, + "learning_rate": 4.071395305362757e-08, + "loss": 6.045115151209757e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4386, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 71.3229193687439, + "completions/min_length": 28.125, + "epoch": 8.720774385703649, + "grad_norm": 0.0030081968566983337, + "kl": 0.0743408203125, + "learning_rate": 4.05893482225122e-08, + "loss": 7.430857658619061e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4387, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 68.8854193687439, + "completions/min_length": 22.125, + "epoch": 8.722759990071978, + "grad_norm": 0.0032003860535846514, + "kl": 0.073699951171875, + "learning_rate": 4.046492629259002e-08, + "loss": 7.387294317595661e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4388, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 66.57291841506958, + "completions/min_length": 27.375, + "epoch": 8.724745594440307, + "grad_norm": 0.010367991392090112, + "kl": 0.0819091796875, + "learning_rate": 4.034068731339618e-08, + "loss": 8.194800466299057e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4389, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 74.59375095367432, + "completions/min_length": 21.75, + "epoch": 8.726731198808638, + "grad_norm": 0.5760345758111557, + "kl": 0.524017333984375, + "learning_rate": 4.021663133439279e-08, + "loss": 0.014094110578298569, + "memory(GiB)": 94.21, + "reward": 1.9583333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9583333358168602, + "rewards/CineAccuracyORM/std": 0.06154574826359749, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4390, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.25, + "completions/mean_length": 78.82291841506958, + "completions/min_length": 27.375, + "epoch": 8.728716803176967, + "grad_norm": 0.0044237752692234365, + "kl": 0.08050537109375, + "learning_rate": 4.0092758404969175e-08, + "loss": 8.051890472415835e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4391, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 67.22916841506958, + "completions/min_length": 33.0, + "epoch": 8.730702407545296, + "grad_norm": 0.0038700111106996136, + "kl": 0.056549072265625, + "learning_rate": 3.9969068574441824e-08, + "loss": 5.657468500430696e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4392, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.75, + "completions/mean_length": 74.37500286102295, + "completions/min_length": 23.5, + "epoch": 8.732688011913627, + "grad_norm": 0.002807277627931084, + "kl": 0.0853271484375, + "learning_rate": 3.9845561892054403e-08, + "loss": 8.529365004505962e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4393, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 73.4166693687439, + "completions/min_length": 23.75, + "epoch": 8.734673616281956, + "grad_norm": 0.0029709371056979585, + "kl": 0.07586669921875, + "learning_rate": 3.972223840697736e-08, + "loss": 7.577847281936556e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4394, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 62.10416793823242, + "completions/min_length": 22.375, + "epoch": 8.736659220650285, + "grad_norm": 0.005011853020338243, + "kl": 0.068206787109375, + "learning_rate": 3.9599098168308584e-08, + "loss": 6.820028647780418e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4395, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 68.53125190734863, + "completions/min_length": 25.5, + "epoch": 8.738644825018614, + "grad_norm": 1.1299773243689113, + "kl": 0.057586669921875, + "learning_rate": 3.94761412250727e-08, + "loss": 5.751972639700398e-05, + "memory(GiB)": 94.21, + "reward": 1.9791666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9791666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4396, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.625, + "completions/mean_length": 69.00000286102295, + "completions/min_length": 22.625, + "epoch": 8.740630429386945, + "grad_norm": 0.003192592629962907, + "kl": 0.0638427734375, + "learning_rate": 3.93533676262216e-08, + "loss": 6.379103433573619e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4397, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 65.76041793823242, + "completions/min_length": 21.375, + "epoch": 8.742616033755274, + "grad_norm": 0.006871814102693593, + "kl": 0.0655517578125, + "learning_rate": 3.9230777420634074e-08, + "loss": 6.54618997941725e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4398, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 64.92708539962769, + "completions/min_length": 25.75, + "epoch": 8.744601638123603, + "grad_norm": 1.151784311920379, + "kl": 0.0804443359375, + "learning_rate": 3.9108370657115694e-08, + "loss": 0.007262674625962973, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4399, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 69.07291889190674, + "completions/min_length": 25.625, + "epoch": 8.746587242491934, + "grad_norm": 0.0027087210847725274, + "kl": 0.0643310546875, + "learning_rate": 3.898614738439954e-08, + "loss": 6.434237002395093e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4400, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 71.98958492279053, + "completions/min_length": 24.0, + "epoch": 8.748572846860263, + "grad_norm": 2.4739733532689145, + "kl": 0.07244873046875, + "learning_rate": 3.886410765114512e-08, + "loss": -0.0008494257926940918, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4401, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 81.46875286102295, + "completions/min_length": 26.375, + "epoch": 8.750558451228592, + "grad_norm": 0.0027276815159795664, + "kl": 0.059112548828125, + "learning_rate": 3.874225150593896e-08, + "loss": 5.905494253966026e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4402, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 63.96875238418579, + "completions/min_length": 24.25, + "epoch": 8.752544055596923, + "grad_norm": 0.00242544359179178, + "kl": 0.053558349609375, + "learning_rate": 3.8620578997294875e-08, + "loss": 5.351314030122012e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4403, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.0, + "completions/mean_length": 69.58333587646484, + "completions/min_length": 22.875, + "epoch": 8.754529659965252, + "grad_norm": 0.0026694014466304614, + "kl": 0.081207275390625, + "learning_rate": 3.849909017365299e-08, + "loss": 8.125405292958021e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4404, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.5, + "completions/mean_length": 71.13541841506958, + "completions/min_length": 28.375, + "epoch": 8.756515264333581, + "grad_norm": 0.004636396487296402, + "kl": 0.063873291015625, + "learning_rate": 3.8377785083380854e-08, + "loss": 6.384911830537021e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4405, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 65.00000190734863, + "completions/min_length": 18.875, + "epoch": 8.758500868701912, + "grad_norm": 0.0036103020447160886, + "kl": 0.060943603515625, + "learning_rate": 3.825666377477238e-08, + "loss": 6.0983551520621404e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4406, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 197.25, + "completions/mean_length": 80.3541693687439, + "completions/min_length": 20.25, + "epoch": 8.760486473070241, + "grad_norm": 0.004808304235861437, + "kl": 0.06878662109375, + "learning_rate": 3.8135726296048666e-08, + "loss": 6.880710134282708e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4407, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 74.50000238418579, + "completions/min_length": 26.375, + "epoch": 8.76247207743857, + "grad_norm": 0.005372567667871199, + "kl": 0.067779541015625, + "learning_rate": 3.801497269535764e-08, + "loss": 6.781538104405627e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4408, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.0, + "completions/mean_length": 80.19791841506958, + "completions/min_length": 26.375, + "epoch": 8.764457681806899, + "grad_norm": 0.003009130406111027, + "kl": 0.075469970703125, + "learning_rate": 3.789440302077362e-08, + "loss": 7.546728011220694e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4409, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 57.843751430511475, + "completions/min_length": 19.375, + "epoch": 8.76644328617523, + "grad_norm": 0.004252710038085795, + "kl": 0.056884765625, + "learning_rate": 3.777401732029822e-08, + "loss": 5.687782686436549e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4410, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 74.333336353302, + "completions/min_length": 30.125, + "epoch": 8.768428890543559, + "grad_norm": 0.0025588154312017426, + "kl": 0.070098876953125, + "learning_rate": 3.765381564185943e-08, + "loss": 7.016959716565907e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4411, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 55.89583492279053, + "completions/min_length": 15.375, + "epoch": 8.770414494911888, + "grad_norm": 0.0069810790830688845, + "kl": 0.056427001953125, + "learning_rate": 3.753379803331197e-08, + "loss": 5.644853808917105e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4412, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.375, + "completions/mean_length": 62.833335876464844, + "completions/min_length": 23.125, + "epoch": 8.772400099280219, + "grad_norm": 0.0048134086733442275, + "kl": 0.058502197265625, + "learning_rate": 3.741396454243767e-08, + "loss": 5.844461702508852e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4413, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 63.11458492279053, + "completions/min_length": 22.5, + "epoch": 8.774385703648548, + "grad_norm": 0.0037792404684749395, + "kl": 0.097259521484375, + "learning_rate": 3.729431521694476e-08, + "loss": 9.723665425553918e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4414, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.375, + "completions/mean_length": 60.14583492279053, + "completions/min_length": 19.25, + "epoch": 8.776371308016877, + "grad_norm": 0.02407754325142591, + "kl": 0.059295654296875, + "learning_rate": 3.7174850104468027e-08, + "loss": 5.933472857577726e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4415, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 65.01041889190674, + "completions/min_length": 27.125, + "epoch": 8.778356912385208, + "grad_norm": 0.004422849079507957, + "kl": 0.057342529296875, + "learning_rate": 3.705556925256925e-08, + "loss": 5.734210935770534e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4416, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.25, + "completions/mean_length": 70.52083444595337, + "completions/min_length": 26.375, + "epoch": 8.780342516753537, + "grad_norm": 1.0285375341232266, + "kl": 0.061981201171875, + "learning_rate": 3.6936472708736567e-08, + "loss": 0.018292339518666267, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4417, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.25, + "completions/mean_length": 66.67708492279053, + "completions/min_length": 22.375, + "epoch": 8.782328121121866, + "grad_norm": 0.00464340737462757, + "kl": 0.08123779296875, + "learning_rate": 3.6817560520384926e-08, + "loss": 8.124877786030993e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4418, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 67.72916841506958, + "completions/min_length": 26.125, + "epoch": 8.784313725490197, + "grad_norm": 0.0026755899493867154, + "kl": 0.05316162109375, + "learning_rate": 3.6698832734855744e-08, + "loss": 5.3143041441217065e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4419, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 72.12500190734863, + "completions/min_length": 30.125, + "epoch": 8.786299329858526, + "grad_norm": 0.0037001209784178115, + "kl": 0.0810546875, + "learning_rate": 3.658028939941715e-08, + "loss": 8.116249227896333e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4420, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 64.01041984558105, + "completions/min_length": 19.125, + "epoch": 8.788284934226855, + "grad_norm": 0.005853399075497287, + "kl": 0.0728302001953125, + "learning_rate": 3.646193056126384e-08, + "loss": 7.287230982910842e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4421, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 72.98958587646484, + "completions/min_length": 22.0, + "epoch": 8.790270538595184, + "grad_norm": 0.0028696479993686763, + "kl": 0.074371337890625, + "learning_rate": 3.63437562675169e-08, + "loss": 7.443155482178554e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4422, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 62.802085399627686, + "completions/min_length": 20.125, + "epoch": 8.792256142963515, + "grad_norm": 0.003184243215509938, + "kl": 0.064910888671875, + "learning_rate": 3.622576656522397e-08, + "loss": 6.490507803391665e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4423, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 70.16666841506958, + "completions/min_length": 27.0, + "epoch": 8.794241747331844, + "grad_norm": 0.004282120609053288, + "kl": 0.077484130859375, + "learning_rate": 3.6107961501359475e-08, + "loss": 7.750788063276559e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4424, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 65.53125143051147, + "completions/min_length": 26.25, + "epoch": 8.796227351700173, + "grad_norm": 0.004137419031221627, + "kl": 0.063446044921875, + "learning_rate": 3.59903411228239e-08, + "loss": 6.346201553242281e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4425, + "train_speed(iter/s)": 0.022634 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 64.32291841506958, + "completions/min_length": 19.5, + "epoch": 8.798212956068504, + "grad_norm": 0.0024874070700725667, + "kl": 0.062042236328125, + "learning_rate": 3.587290547644456e-08, + "loss": 6.203790690051392e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4426, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.25, + "completions/mean_length": 74.083336353302, + "completions/min_length": 24.5, + "epoch": 8.800198560436833, + "grad_norm": 0.0026815744212083303, + "kl": 0.07843017578125, + "learning_rate": 3.575565460897511e-08, + "loss": 7.8457836934831e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4427, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 68.78125286102295, + "completions/min_length": 27.375, + "epoch": 8.802184164805162, + "grad_norm": 0.003388962211476853, + "kl": 0.059234619140625, + "learning_rate": 3.563858856709556e-08, + "loss": 5.917256203247234e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4428, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 66.80208492279053, + "completions/min_length": 21.125, + "epoch": 8.804169769173493, + "grad_norm": 1.3006128245096253, + "kl": 0.0682373046875, + "learning_rate": 3.55217073974125e-08, + "loss": -0.004407038446515799, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4429, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.625, + "completions/mean_length": 73.71875381469727, + "completions/min_length": 22.75, + "epoch": 8.806155373541822, + "grad_norm": 0.0028610550770152713, + "kl": 0.06719970703125, + "learning_rate": 3.540501114645872e-08, + "loss": 6.713801121804863e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4430, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 67.81250190734863, + "completions/min_length": 21.75, + "epoch": 8.808140977910151, + "grad_norm": 0.0035833374803448483, + "kl": 0.071502685546875, + "learning_rate": 3.5288499860693486e-08, + "loss": 7.149644079618156e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4431, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.375, + "completions/mean_length": 69.75000143051147, + "completions/min_length": 27.875, + "epoch": 8.810126582278482, + "grad_norm": 0.003997033114191241, + "kl": 0.06939697265625, + "learning_rate": 3.517217358650254e-08, + "loss": 6.936653517186642e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4432, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.875, + "completions/mean_length": 69.45833492279053, + "completions/min_length": 23.875, + "epoch": 8.81211218664681, + "grad_norm": 0.004025274146046803, + "kl": 0.0753173828125, + "learning_rate": 3.5056032370197665e-08, + "loss": 7.519741484429687e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4433, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 65.29166889190674, + "completions/min_length": 21.375, + "epoch": 8.81409779101514, + "grad_norm": 0.005281899129190168, + "kl": 0.092071533203125, + "learning_rate": 3.494007625801731e-08, + "loss": 9.209560084855184e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4434, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 68.13541889190674, + "completions/min_length": 21.875, + "epoch": 8.816083395383469, + "grad_norm": 0.002804457619560133, + "kl": 0.05792236328125, + "learning_rate": 3.482430529612612e-08, + "loss": 5.7928751630242914e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4435, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 68.02083444595337, + "completions/min_length": 27.0, + "epoch": 8.8180689997518, + "grad_norm": 0.0032410795444556086, + "kl": 0.064422607421875, + "learning_rate": 3.4708719530614826e-08, + "loss": 6.43706662231125e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4436, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.75, + "completions/mean_length": 62.958335876464844, + "completions/min_length": 22.0, + "epoch": 8.820054604120129, + "grad_norm": 2.485340066728084, + "kl": 0.082275390625, + "learning_rate": 3.459331900750073e-08, + "loss": -0.006537230685353279, + "memory(GiB)": 94.21, + "reward": 1.9166666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9166666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4437, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 67.81250095367432, + "completions/min_length": 22.0, + "epoch": 8.822040208488458, + "grad_norm": 0.0031650147717010935, + "kl": 0.061614990234375, + "learning_rate": 3.447810377272725e-08, + "loss": 6.164857768453658e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4438, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 66.01041841506958, + "completions/min_length": 24.25, + "epoch": 8.824025812856789, + "grad_norm": 0.004814892592620895, + "kl": 0.06744384765625, + "learning_rate": 3.436307387216386e-08, + "loss": 6.746799772372469e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4439, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 71.22916793823242, + "completions/min_length": 38.375, + "epoch": 8.826011417225118, + "grad_norm": 0.0022152839167825073, + "kl": 0.05316162109375, + "learning_rate": 3.424822935160654e-08, + "loss": 5.3128544095670804e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4440, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 71.45833492279053, + "completions/min_length": 24.0, + "epoch": 8.827997021593447, + "grad_norm": 0.003946494427743627, + "kl": 0.08221435546875, + "learning_rate": 3.413357025677743e-08, + "loss": 8.225941564887762e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4441, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.25, + "completions/mean_length": 64.50000238418579, + "completions/min_length": 22.875, + "epoch": 8.829982625961778, + "grad_norm": 0.0025197729664963964, + "kl": 0.064361572265625, + "learning_rate": 3.401909663332464e-08, + "loss": 6.435364775825292e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4442, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.5, + "completions/mean_length": 68.42708587646484, + "completions/min_length": 27.25, + "epoch": 8.831968230330107, + "grad_norm": 0.0038238566717355284, + "kl": 0.057952880859375, + "learning_rate": 3.3904808526822594e-08, + "loss": 5.7944835134549066e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4443, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.125, + "completions/mean_length": 66.22916889190674, + "completions/min_length": 22.5, + "epoch": 8.833953834698436, + "grad_norm": 0.0058701856290664265, + "kl": 0.0704345703125, + "learning_rate": 3.379070598277184e-08, + "loss": 7.049596024444327e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4444, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 70.01041889190674, + "completions/min_length": 30.375, + "epoch": 8.835939439066767, + "grad_norm": 0.005089433792711441, + "kl": 0.069305419921875, + "learning_rate": 3.367678904659904e-08, + "loss": 6.925516936462373e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4445, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 72.94791793823242, + "completions/min_length": 28.0, + "epoch": 8.837925043435096, + "grad_norm": 0.004667625571964998, + "kl": 0.075897216796875, + "learning_rate": 3.356305776365692e-08, + "loss": 7.590887253172696e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4446, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.75, + "completions/mean_length": 68.47916841506958, + "completions/min_length": 26.625, + "epoch": 8.839910647803425, + "grad_norm": 0.006422566536172555, + "kl": 0.078399658203125, + "learning_rate": 3.344951217922437e-08, + "loss": 7.840842590667307e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4447, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.75, + "completions/mean_length": 63.30208420753479, + "completions/min_length": 20.375, + "epoch": 8.841896252171754, + "grad_norm": 0.00516401138414613, + "kl": 0.061767578125, + "learning_rate": 3.333615233850634e-08, + "loss": 6.173625297378749e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4448, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.25, + "completions/mean_length": 72.44791889190674, + "completions/min_length": 21.625, + "epoch": 8.843881856540085, + "grad_norm": 0.006124480711216913, + "kl": 0.063812255859375, + "learning_rate": 3.322297828663373e-08, + "loss": 6.38872297713533e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4449, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 66.70833539962769, + "completions/min_length": 27.125, + "epoch": 8.845867460908414, + "grad_norm": 0.004467890066077925, + "kl": 0.068695068359375, + "learning_rate": 3.310999006866366e-08, + "loss": 6.871936784591526e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4450, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.5, + "completions/mean_length": 63.92708444595337, + "completions/min_length": 25.0, + "epoch": 8.847853065276743, + "grad_norm": 0.002714490151785596, + "kl": 0.078887939453125, + "learning_rate": 3.29971877295791e-08, + "loss": 7.883248326834291e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4451, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.375, + "completions/mean_length": 58.90625238418579, + "completions/min_length": 20.5, + "epoch": 8.849838669645074, + "grad_norm": 2.2109730144131143, + "kl": 2.998443603515625, + "learning_rate": 3.288457131428895e-08, + "loss": 0.0029898881912231445, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4452, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 75.65625238418579, + "completions/min_length": 24.0, + "epoch": 8.851824274013403, + "grad_norm": 0.0038133840964780495, + "kl": 0.064971923828125, + "learning_rate": 3.2772140867628417e-08, + "loss": 6.495663546957076e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4453, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 58.968751430511475, + "completions/min_length": 24.125, + "epoch": 8.853809878381732, + "grad_norm": 0.0029025345100421912, + "kl": 0.053070068359375, + "learning_rate": 3.265989643435829e-08, + "loss": 5.30916076968424e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4454, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 72.22916889190674, + "completions/min_length": 25.625, + "epoch": 8.855795482750063, + "grad_norm": 0.0026259061090380424, + "kl": 0.054290771484375, + "learning_rate": 3.25478380591655e-08, + "loss": 5.428859367384575e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4455, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 70.14583444595337, + "completions/min_length": 18.375, + "epoch": 8.857781087118392, + "grad_norm": 0.7393255715903666, + "kl": 0.0880126953125, + "learning_rate": 3.243596578666302e-08, + "loss": -0.0036449681501835585, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4456, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 66.65625190734863, + "completions/min_length": 24.25, + "epoch": 8.85976669148672, + "grad_norm": 0.006839111909673298, + "kl": 0.080902099609375, + "learning_rate": 3.2324279661389456e-08, + "loss": 8.082977728918195e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4457, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 63.91666841506958, + "completions/min_length": 23.625, + "epoch": 8.861752295855052, + "grad_norm": 0.00429611916964378, + "kl": 0.059844970703125, + "learning_rate": 3.2212779727809504e-08, + "loss": 5.985811003483832e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4458, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 68.33333587646484, + "completions/min_length": 24.5, + "epoch": 8.86373790022338, + "grad_norm": 0.003965196732980484, + "kl": 0.057403564453125, + "learning_rate": 3.2101466030313715e-08, + "loss": 5.743458677898161e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4459, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 67.33333539962769, + "completions/min_length": 24.875, + "epoch": 8.86572350459171, + "grad_norm": 0.004674087290420187, + "kl": 0.076202392578125, + "learning_rate": 3.199033861321826e-08, + "loss": 7.612827175762504e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4460, + "train_speed(iter/s)": 0.022633 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 64.38541841506958, + "completions/min_length": 23.5, + "epoch": 8.867709108960039, + "grad_norm": 0.003531753849818053, + "kl": 0.06182861328125, + "learning_rate": 3.187939752076546e-08, + "loss": 6.178292096592486e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4461, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.125, + "completions/mean_length": 76.15625143051147, + "completions/min_length": 25.75, + "epoch": 8.86969471332837, + "grad_norm": 0.0034799343435747095, + "kl": 0.066497802734375, + "learning_rate": 3.176864279712338e-08, + "loss": 6.646820838795975e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4462, + "train_speed(iter/s)": 0.022632 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 205.25, + "completions/mean_length": 74.10416793823242, + "completions/min_length": 25.75, + "epoch": 8.871680317696699, + "grad_norm": 0.003168066818873465, + "kl": 0.063873291015625, + "learning_rate": 3.165807448638574e-08, + "loss": 6.370982009684667e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4463, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 191.0, + "completions/mean_length": 71.46875238418579, + "completions/min_length": 30.875, + "epoch": 8.873665922065028, + "grad_norm": 1.2047363273355765, + "kl": 0.07232666015625, + "learning_rate": 3.1547692632572285e-08, + "loss": -0.013890378177165985, + "memory(GiB)": 94.21, + "reward": 1.7291666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7291666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4464, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.5, + "completions/mean_length": 69.16666889190674, + "completions/min_length": 25.125, + "epoch": 8.875651526433359, + "grad_norm": 0.0030995843958764964, + "kl": 0.084930419921875, + "learning_rate": 3.143749727962824e-08, + "loss": 8.48943818709813e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4465, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 76.41666889190674, + "completions/min_length": 26.375, + "epoch": 8.877637130801688, + "grad_norm": 0.0028964845835545857, + "kl": 0.060882568359375, + "learning_rate": 3.13274884714248e-08, + "loss": 6.088173540774733e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4466, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.125, + "completions/mean_length": 78.45833492279053, + "completions/min_length": 27.75, + "epoch": 8.879622735170017, + "grad_norm": 0.002830668593501242, + "kl": 0.05718994140625, + "learning_rate": 3.1217666251758866e-08, + "loss": 5.7176857808372006e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4467, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 65.16666889190674, + "completions/min_length": 24.75, + "epoch": 8.881608339538348, + "grad_norm": 0.003762163116174367, + "kl": 0.06549072265625, + "learning_rate": 3.11080306643528e-08, + "loss": 6.54999166727066e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4468, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 62.218751430511475, + "completions/min_length": 19.875, + "epoch": 8.883593943906677, + "grad_norm": 0.0038819703398893365, + "kl": 0.0613250732421875, + "learning_rate": 3.099858175285519e-08, + "loss": 6.128475069999695e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4469, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.375, + "completions/mean_length": 79.77083444595337, + "completions/min_length": 27.25, + "epoch": 8.885579548275006, + "grad_norm": 0.00418831111783306, + "kl": 0.062774658203125, + "learning_rate": 3.088931956083979e-08, + "loss": 6.281497917370871e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4470, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 65.27083492279053, + "completions/min_length": 24.25, + "epoch": 8.887565152643337, + "grad_norm": 0.0036727808494226638, + "kl": 0.07403564453125, + "learning_rate": 3.078024413180619e-08, + "loss": 7.40958348615095e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4471, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 79.72916889190674, + "completions/min_length": 30.375, + "epoch": 8.889550757011666, + "grad_norm": 0.0037651245301109047, + "kl": 0.066009521484375, + "learning_rate": 3.067135550917976e-08, + "loss": 6.60418882034719e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4472, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.0, + "completions/mean_length": 76.17708587646484, + "completions/min_length": 26.5, + "epoch": 8.891536361379995, + "grad_norm": 0.003927564094897929, + "kl": 0.0704345703125, + "learning_rate": 3.056265373631128e-08, + "loss": 7.03707555658184e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4473, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.25, + "completions/mean_length": 73.54166889190674, + "completions/min_length": 23.25, + "epoch": 8.893521965748324, + "grad_norm": 0.0050882071580019795, + "kl": 0.07208251953125, + "learning_rate": 3.045413885647735e-08, + "loss": 7.215599907794967e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4474, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 69.84375238418579, + "completions/min_length": 22.375, + "epoch": 8.895507570116655, + "grad_norm": 0.0038093511005711463, + "kl": 0.070556640625, + "learning_rate": 3.034581091287996e-08, + "loss": 7.051750435493886e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4475, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.125, + "completions/mean_length": 66.43750143051147, + "completions/min_length": 20.125, + "epoch": 8.897493174484984, + "grad_norm": 0.0022130293660831413, + "kl": 0.06146240234375, + "learning_rate": 3.023766994864679e-08, + "loss": 6.145128281787038e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4476, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 65.31250238418579, + "completions/min_length": 24.375, + "epoch": 8.899478778853313, + "grad_norm": 0.0022639850738775543, + "kl": 0.0540771484375, + "learning_rate": 3.012971600683123e-08, + "loss": 5.4102794820209965e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4477, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 75.77083396911621, + "completions/min_length": 24.25, + "epoch": 8.901464383221644, + "grad_norm": 0.003339713302168338, + "kl": 0.06396484375, + "learning_rate": 3.0021949130411895e-08, + "loss": 6.39510981272906e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4478, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 67.50000238418579, + "completions/min_length": 29.625, + "epoch": 8.903449987589973, + "grad_norm": 0.0033473584408936904, + "kl": 0.064300537109375, + "learning_rate": 2.9914369362293026e-08, + "loss": 6.428691995097324e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4479, + "train_speed(iter/s)": 0.022631 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 237.875, + "completions/mean_length": 80.13541793823242, + "completions/min_length": 26.375, + "epoch": 8.905435591958302, + "grad_norm": 0.002741732426725497, + "kl": 0.06915283203125, + "learning_rate": 2.980697674530458e-08, + "loss": 6.910170486662537e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4480, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.0, + "completions/mean_length": 68.87500238418579, + "completions/min_length": 25.75, + "epoch": 8.907421196326633, + "grad_norm": 0.002350342760807624, + "kl": 0.054473876953125, + "learning_rate": 2.969977132220175e-08, + "loss": 5.447406874736771e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4481, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 81.06250238418579, + "completions/min_length": 37.125, + "epoch": 8.909406800694962, + "grad_norm": 0.006171679161739611, + "kl": 0.077606201171875, + "learning_rate": 2.9592753135665283e-08, + "loss": 7.752048259135336e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4482, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 73.19791984558105, + "completions/min_length": 29.875, + "epoch": 8.91139240506329, + "grad_norm": 0.0024595631069271534, + "kl": 0.060394287109375, + "learning_rate": 2.9485922228301596e-08, + "loss": 6.0439033404691145e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4483, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.0, + "completions/mean_length": 64.30208492279053, + "completions/min_length": 26.125, + "epoch": 8.913378009431622, + "grad_norm": 0.003235444483632454, + "kl": 0.06634521484375, + "learning_rate": 2.9379278642642058e-08, + "loss": 6.62503152852878e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4484, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.125, + "completions/mean_length": 62.83333492279053, + "completions/min_length": 23.625, + "epoch": 8.91536361379995, + "grad_norm": 0.004710183969948051, + "kl": 0.064178466796875, + "learning_rate": 2.927282242114404e-08, + "loss": 6.42067680018954e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4485, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 61.87500190734863, + "completions/min_length": 24.0, + "epoch": 8.91734921816828, + "grad_norm": 0.0032660739398445615, + "kl": 0.06805419921875, + "learning_rate": 2.9166553606189858e-08, + "loss": 6.806520104873925e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4486, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 62.66666841506958, + "completions/min_length": 19.75, + "epoch": 8.919334822536609, + "grad_norm": 0.0033170650922295063, + "kl": 0.05517578125, + "learning_rate": 2.9060472240087507e-08, + "loss": 5.518686884897761e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4487, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.5, + "completions/mean_length": 73.43750238418579, + "completions/min_length": 28.625, + "epoch": 8.92132042690494, + "grad_norm": 0.004296993126054609, + "kl": 0.0579833984375, + "learning_rate": 2.895457836507015e-08, + "loss": 5.796835102955811e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4488, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 70.08333539962769, + "completions/min_length": 27.125, + "epoch": 8.923306031273269, + "grad_norm": 0.003963143165516153, + "kl": 0.06988525390625, + "learning_rate": 2.884887202329639e-08, + "loss": 6.986509106354788e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4489, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.125, + "completions/mean_length": 54.69791841506958, + "completions/min_length": 21.125, + "epoch": 8.925291635641598, + "grad_norm": 0.0043471785608314994, + "kl": 0.056396484375, + "learning_rate": 2.8743353256850345e-08, + "loss": 5.6419750762870535e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4490, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.75, + "completions/mean_length": 66.95833539962769, + "completions/min_length": 17.375, + "epoch": 8.927277240009929, + "grad_norm": 0.0036562730798006997, + "kl": 0.057525634765625, + "learning_rate": 2.8638022107741134e-08, + "loss": 5.74771074752789e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4491, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 71.39583539962769, + "completions/min_length": 18.625, + "epoch": 8.929262844378258, + "grad_norm": 0.005919954304850223, + "kl": 0.0604248046875, + "learning_rate": 2.8532878617903377e-08, + "loss": 6.0426111303968355e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4492, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 68.73958444595337, + "completions/min_length": 25.375, + "epoch": 8.931248448746587, + "grad_norm": 0.002599222969608864, + "kl": 0.063446044921875, + "learning_rate": 2.842792282919698e-08, + "loss": 6.343086715787649e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4493, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.125, + "completions/mean_length": 69.89583539962769, + "completions/min_length": 27.625, + "epoch": 8.933234053114917, + "grad_norm": 0.0037290223287503323, + "kl": 0.065521240234375, + "learning_rate": 2.8323154783406967e-08, + "loss": 6.557120650541037e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4494, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.0, + "completions/mean_length": 59.760417461395264, + "completions/min_length": 26.0, + "epoch": 8.935219657483247, + "grad_norm": 0.002729830477485634, + "kl": 0.05963134765625, + "learning_rate": 2.8218574522243798e-08, + "loss": 5.9629186580423266e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4495, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.875, + "completions/mean_length": 69.35416841506958, + "completions/min_length": 19.0, + "epoch": 8.937205261851576, + "grad_norm": 2.2930029865566146, + "kl": 0.08111572265625, + "learning_rate": 2.811418208734323e-08, + "loss": 0.0035009586717933416, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4496, + "train_speed(iter/s)": 0.02263 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.25, + "completions/mean_length": 71.10416793823242, + "completions/min_length": 25.5, + "epoch": 8.939190866219906, + "grad_norm": 0.002977519574416739, + "kl": 0.06390380859375, + "learning_rate": 2.800997752026596e-08, + "loss": 6.391222996171564e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4497, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 69.19791841506958, + "completions/min_length": 21.125, + "epoch": 8.941176470588236, + "grad_norm": 0.0028814438434514775, + "kl": 0.069244384765625, + "learning_rate": 2.790596086249819e-08, + "loss": 6.917622522450984e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4498, + "train_speed(iter/s)": 0.022629 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 56.520835399627686, + "completions/min_length": 17.125, + "epoch": 8.943162074956565, + "grad_norm": 0.007042173266484335, + "kl": 0.08233642578125, + "learning_rate": 2.7802132155451075e-08, + "loss": 8.241958857979625e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4499, + "train_speed(iter/s)": 0.022628 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.125, + "completions/mean_length": 62.250001430511475, + "completions/min_length": 24.125, + "epoch": 8.945147679324894, + "grad_norm": 0.0038935001105113195, + "kl": 0.05487060546875, + "learning_rate": 2.7698491440460992e-08, + "loss": 5.484827488544397e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4500, + "train_speed(iter/s)": 0.022627 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.0, + "completions/mean_length": 70.73958587646484, + "completions/min_length": 19.625, + "epoch": 8.947133283693224, + "grad_norm": 0.9812121683987876, + "kl": 0.055023193359375, + "learning_rate": 2.7595038758789656e-08, + "loss": -0.008326666429638863, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4501, + "train_speed(iter/s)": 0.022624 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 65.03125190734863, + "completions/min_length": 16.0, + "epoch": 8.949118888061554, + "grad_norm": 0.005085516034028818, + "kl": 0.069305419921875, + "learning_rate": 2.7491774151623682e-08, + "loss": 6.928383663762361e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4502, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.625, + "completions/mean_length": 67.38541793823242, + "completions/min_length": 16.75, + "epoch": 8.951104492429883, + "grad_norm": 0.0028909097368808533, + "kl": 0.055267333984375, + "learning_rate": 2.7388697660074956e-08, + "loss": 5.5298994993790984e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4503, + "train_speed(iter/s)": 0.022622 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 77.00000286102295, + "completions/min_length": 28.25, + "epoch": 8.953090096798213, + "grad_norm": 0.003004109526001115, + "kl": 0.065277099609375, + "learning_rate": 2.7285809325180487e-08, + "loss": 6.529617530759424e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4504, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.25, + "completions/mean_length": 56.458335876464844, + "completions/min_length": 19.375, + "epoch": 8.955075701166543, + "grad_norm": 0.00517998306879406, + "kl": 0.055572509765625, + "learning_rate": 2.718310918790223e-08, + "loss": 5.5568874813616276e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4505, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 66.03125238418579, + "completions/min_length": 25.625, + "epoch": 8.957061305534872, + "grad_norm": 0.002693555649533178, + "kl": 0.0514068603515625, + "learning_rate": 2.708059728912737e-08, + "loss": 5.1350198191357777e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4506, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 80.77083587646484, + "completions/min_length": 32.25, + "epoch": 8.959046909903202, + "grad_norm": 0.005137775433693113, + "kl": 0.080780029296875, + "learning_rate": 2.697827366966804e-08, + "loss": 8.076422091107816e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4507, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 75.22916793823242, + "completions/min_length": 24.375, + "epoch": 8.961032514271531, + "grad_norm": 0.0027825772492929735, + "kl": 0.066741943359375, + "learning_rate": 2.6876138370261424e-08, + "loss": 6.667750130873173e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4508, + "train_speed(iter/s)": 0.022623 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 193.0, + "completions/mean_length": 76.41666984558105, + "completions/min_length": 26.625, + "epoch": 8.96301811863986, + "grad_norm": 0.006234419382710262, + "kl": 0.087188720703125, + "learning_rate": 2.6774191431569838e-08, + "loss": 8.721857739146799e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4509, + "train_speed(iter/s)": 0.022622 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.75, + "completions/mean_length": 74.01041889190674, + "completions/min_length": 26.375, + "epoch": 8.965003723008191, + "grad_norm": 0.005492073554030496, + "kl": 0.062591552734375, + "learning_rate": 2.6672432894180585e-08, + "loss": 6.2644096033182e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4510, + "train_speed(iter/s)": 0.022621 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.625, + "completions/mean_length": 77.55208396911621, + "completions/min_length": 26.0, + "epoch": 8.96698932737652, + "grad_norm": 0.0028167848714367267, + "kl": 0.069549560546875, + "learning_rate": 2.657086279860571e-08, + "loss": 6.952951662242413e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4511, + "train_speed(iter/s)": 0.022621 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.5, + "completions/mean_length": 60.60416889190674, + "completions/min_length": 27.875, + "epoch": 8.96897493174485, + "grad_norm": 0.006133395738297823, + "kl": 0.055084228515625, + "learning_rate": 2.6469481185282695e-08, + "loss": 5.5125594371929765e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4512, + "train_speed(iter/s)": 0.02262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.375, + "completions/mean_length": 73.89583587646484, + "completions/min_length": 27.125, + "epoch": 8.970960536113179, + "grad_norm": 0.0024352518387654575, + "kl": 0.06829833984375, + "learning_rate": 2.6368288094573532e-08, + "loss": 6.826894241385162e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4513, + "train_speed(iter/s)": 0.02262 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 70.19791889190674, + "completions/min_length": 19.875, + "epoch": 8.97294614048151, + "grad_norm": 1.050667784594347, + "kl": 0.076568603515625, + "learning_rate": 2.6267283566765442e-08, + "loss": 0.004599431063979864, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4514, + "train_speed(iter/s)": 0.022619 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 75.37500143051147, + "completions/min_length": 28.625, + "epoch": 8.974931744849838, + "grad_norm": 0.0026029328144589535, + "kl": 0.06512451171875, + "learning_rate": 2.6166467642070476e-08, + "loss": 6.50845468044281e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4515, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 66.89583539962769, + "completions/min_length": 20.75, + "epoch": 8.976917349218168, + "grad_norm": 0.8841244484512857, + "kl": 0.0635986328125, + "learning_rate": 2.606584036062548e-08, + "loss": 0.004274980630725622, + "memory(GiB)": 94.21, + "reward": 1.8958333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8958333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4516, + "train_speed(iter/s)": 0.022619 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 68.75000190734863, + "completions/min_length": 27.5, + "epoch": 8.978902953586498, + "grad_norm": 0.004487045753204406, + "kl": 0.076202392578125, + "learning_rate": 2.5965401762492567e-08, + "loss": 7.627331069670618e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4517, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.875, + "completions/mean_length": 66.02083492279053, + "completions/min_length": 23.875, + "epoch": 8.980888557954827, + "grad_norm": 0.008405509807377455, + "kl": 0.069122314453125, + "learning_rate": 2.586515188765831e-08, + "loss": 6.913597462698817e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4518, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.125, + "completions/mean_length": 70.86458539962769, + "completions/min_length": 25.0, + "epoch": 8.982874162323156, + "grad_norm": 0.0024611780120682943, + "kl": 0.061370849609375, + "learning_rate": 2.5765090776034336e-08, + "loss": 6.138101889519021e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4519, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.625, + "completions/mean_length": 64.90625238418579, + "completions/min_length": 26.875, + "epoch": 8.984859766691487, + "grad_norm": 0.003284181310148787, + "kl": 0.063568115234375, + "learning_rate": 2.566521846745723e-08, + "loss": 6.351979391183704e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4520, + "train_speed(iter/s)": 0.022617 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.25, + "completions/mean_length": 66.97916841506958, + "completions/min_length": 19.375, + "epoch": 8.986845371059816, + "grad_norm": 0.9651339550355862, + "kl": 0.105560302734375, + "learning_rate": 2.556553500168812e-08, + "loss": -0.0008393513853661716, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4521, + "train_speed(iter/s)": 0.022617 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 67.56250286102295, + "completions/min_length": 28.0, + "epoch": 8.988830975428145, + "grad_norm": 0.051794966933195435, + "kl": 0.0892333984375, + "learning_rate": 2.546604041841327e-08, + "loss": 8.941303531173617e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4522, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.375, + "completions/mean_length": 58.96875190734863, + "completions/min_length": 25.625, + "epoch": 8.990816579796476, + "grad_norm": 0.00306461668849533, + "kl": 0.0633697509765625, + "learning_rate": 2.536673475724349e-08, + "loss": 6.335699436021969e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4523, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 67.08333539962769, + "completions/min_length": 25.5, + "epoch": 8.992802184164805, + "grad_norm": 1.0298309989948347, + "kl": 0.097442626953125, + "learning_rate": 2.5267618057714544e-08, + "loss": 0.0046271877363324165, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.26020343601703644, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4524, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 68.53125190734863, + "completions/min_length": 22.375, + "epoch": 8.994787788533134, + "grad_norm": 0.021011481703648357, + "kl": 0.08575439453125, + "learning_rate": 2.5168690359286927e-08, + "loss": 8.559702837374061e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4525, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.0, + "completions/mean_length": 63.42708683013916, + "completions/min_length": 22.0, + "epoch": 8.996773392901463, + "grad_norm": 0.005276637889362204, + "kl": 0.074127197265625, + "learning_rate": 2.506995170134579e-08, + "loss": 7.416962762363255e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4526, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 73.27083587646484, + "completions/min_length": 21.5, + "epoch": 8.998758997269794, + "grad_norm": 0.005081345464096999, + "kl": 0.07293701171875, + "learning_rate": 2.4971402123201258e-08, + "loss": 7.289180939551443e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4527, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.375, + "completions/mean_length": 68.45833587646484, + "completions/min_length": 27.125, + "epoch": 9.001985604368329, + "grad_norm": 0.0032218905720488335, + "kl": 0.08721923828125, + "learning_rate": 2.487304166408788e-08, + "loss": 8.7178879766725e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4528, + "train_speed(iter/s)": 0.022618 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 69.28125190734863, + "completions/min_length": 19.5, + "epoch": 9.00397120873666, + "grad_norm": 0.004029272536043304, + "kl": 0.05450439453125, + "learning_rate": 2.4774870363165002e-08, + "loss": 5.447335934150033e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4529, + "train_speed(iter/s)": 0.022617 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.875, + "completions/mean_length": 78.53125286102295, + "completions/min_length": 22.75, + "epoch": 9.005956813104989, + "grad_norm": 0.002614961219304078, + "kl": 0.05950927734375, + "learning_rate": 2.467688825951686e-08, + "loss": 5.948838224867359e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4530, + "train_speed(iter/s)": 0.022617 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 74.67708587646484, + "completions/min_length": 24.625, + "epoch": 9.007942417473318, + "grad_norm": 0.003909280033284893, + "kl": 0.077056884765625, + "learning_rate": 2.4579095392152195e-08, + "loss": 7.701186405029148e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4531, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 65.03125238418579, + "completions/min_length": 19.0, + "epoch": 9.009928021841649, + "grad_norm": 0.0028361100477399576, + "kl": 0.076446533203125, + "learning_rate": 2.448149180000436e-08, + "loss": 7.63984426157549e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4532, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 77.37500143051147, + "completions/min_length": 26.5, + "epoch": 9.011913626209978, + "grad_norm": 0.002492242390435187, + "kl": 0.063995361328125, + "learning_rate": 2.4384077521931557e-08, + "loss": 6.40070647932589e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4533, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 70.1354193687439, + "completions/min_length": 19.625, + "epoch": 9.013899230578307, + "grad_norm": 0.003963207931070446, + "kl": 0.059417724609375, + "learning_rate": 2.4286852596716312e-08, + "loss": 5.939879338257015e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4534, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 66.51041889190674, + "completions/min_length": 30.375, + "epoch": 9.015884834946636, + "grad_norm": 0.034503494588418344, + "kl": 0.11724853515625, + "learning_rate": 2.4189817063066164e-08, + "loss": 0.000117438830784522, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4535, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 60.68750190734863, + "completions/min_length": 19.875, + "epoch": 9.017870439314967, + "grad_norm": 0.022775426908068233, + "kl": 0.113494873046875, + "learning_rate": 2.409297095961288e-08, + "loss": 0.00011347609688527882, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4536, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.875, + "completions/mean_length": 61.031250953674316, + "completions/min_length": 23.5, + "epoch": 9.019856043683296, + "grad_norm": 0.004040718725406422, + "kl": 0.0609130859375, + "learning_rate": 2.3996314324912835e-08, + "loss": 6.0948863392695785e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4537, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.875, + "completions/mean_length": 66.94791746139526, + "completions/min_length": 26.375, + "epoch": 9.021841648051625, + "grad_norm": 0.0037970360092925777, + "kl": 0.072357177734375, + "learning_rate": 2.389984719744742e-08, + "loss": 7.227309106383473e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4538, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 63.281251668930054, + "completions/min_length": 20.875, + "epoch": 9.023827252419956, + "grad_norm": 0.010496840266945652, + "kl": 0.072174072265625, + "learning_rate": 2.380356961562213e-08, + "loss": 7.216067024273798e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4539, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.625, + "completions/mean_length": 66.90625143051147, + "completions/min_length": 22.75, + "epoch": 9.025812856788285, + "grad_norm": 0.007005044207731525, + "kl": 0.0645751953125, + "learning_rate": 2.370748161776698e-08, + "loss": 6.449769716709852e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4540, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 69.08333539962769, + "completions/min_length": 19.375, + "epoch": 9.027798461156614, + "grad_norm": 0.0036370782122761977, + "kl": 0.06817626953125, + "learning_rate": 2.3611583242136858e-08, + "loss": 6.825003947596997e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4541, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 66.01041889190674, + "completions/min_length": 21.5, + "epoch": 9.029784065524945, + "grad_norm": 0.0028755037142842513, + "kl": 0.053009033203125, + "learning_rate": 2.351587452691084e-08, + "loss": 5.2990955737186596e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4542, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.5, + "completions/mean_length": 62.802085876464844, + "completions/min_length": 15.5, + "epoch": 9.031769669893274, + "grad_norm": 0.004861215987040813, + "kl": 0.06512451171875, + "learning_rate": 2.3420355510192612e-08, + "loss": 6.518846930703148e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4543, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 70.31250190734863, + "completions/min_length": 26.375, + "epoch": 9.033755274261603, + "grad_norm": 0.0035362016591708087, + "kl": 0.065338134765625, + "learning_rate": 2.3325026230010368e-08, + "loss": 6.531730468850583e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4544, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 62.18750238418579, + "completions/min_length": 14.375, + "epoch": 9.035740878629934, + "grad_norm": 0.030740655365082796, + "kl": 0.1055908203125, + "learning_rate": 2.3229886724316693e-08, + "loss": 0.00010559442307567224, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4545, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 64.90625143051147, + "completions/min_length": 20.625, + "epoch": 9.037726482998263, + "grad_norm": 0.00522289505410599, + "kl": 0.06939697265625, + "learning_rate": 2.3134937030988788e-08, + "loss": 6.940049206605181e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4546, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.25, + "completions/mean_length": 66.85416841506958, + "completions/min_length": 18.75, + "epoch": 9.039712087366592, + "grad_norm": 0.005965280774300982, + "kl": 0.05792236328125, + "learning_rate": 2.3040177187828024e-08, + "loss": 5.787128611700609e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4547, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 62.17708492279053, + "completions/min_length": 18.25, + "epoch": 9.041697691734921, + "grad_norm": 0.003355668945559222, + "kl": 0.05377197265625, + "learning_rate": 2.2945607232560284e-08, + "loss": 5.381919618230313e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4548, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 60.72916865348816, + "completions/min_length": 24.0, + "epoch": 9.043683296103252, + "grad_norm": 0.002504701579232985, + "kl": 0.070037841796875, + "learning_rate": 2.2851227202836e-08, + "loss": 6.995261355768889e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4549, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.125, + "completions/mean_length": 60.96875190734863, + "completions/min_length": 18.0, + "epoch": 9.045668900471581, + "grad_norm": 0.0036162041069074204, + "kl": 0.079010009765625, + "learning_rate": 2.2757037136229783e-08, + "loss": 7.90992853580974e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4550, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 63.9791693687439, + "completions/min_length": 25.375, + "epoch": 9.04765450483991, + "grad_norm": 0.005387595030086053, + "kl": 0.056793212890625, + "learning_rate": 2.2663037070240754e-08, + "loss": 5.6754892284516245e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4551, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.0, + "completions/mean_length": 68.64583492279053, + "completions/min_length": 22.5, + "epoch": 9.04964010920824, + "grad_norm": 0.0035011935211644927, + "kl": 0.090850830078125, + "learning_rate": 2.2569227042292416e-08, + "loss": 9.09223745111376e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4552, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 68.03125190734863, + "completions/min_length": 24.0, + "epoch": 9.05162571357657, + "grad_norm": 0.0028053485352475043, + "kl": 0.07098388671875, + "learning_rate": 2.2475607089732397e-08, + "loss": 7.098112837411463e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4553, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.75, + "completions/mean_length": 79.70833396911621, + "completions/min_length": 26.5, + "epoch": 9.053611317944899, + "grad_norm": 0.0033546600809278354, + "kl": 0.066619873046875, + "learning_rate": 2.2382177249832988e-08, + "loss": 6.663070962531492e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4554, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.5, + "completions/mean_length": 67.28125143051147, + "completions/min_length": 20.875, + "epoch": 9.05559692231323, + "grad_norm": 0.0028327416967405777, + "kl": 0.063201904296875, + "learning_rate": 2.2288937559790445e-08, + "loss": 6.311328615993261e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4555, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.5, + "completions/mean_length": 70.1666693687439, + "completions/min_length": 25.125, + "epoch": 9.057582526681559, + "grad_norm": 0.002684138567880789, + "kl": 0.0650634765625, + "learning_rate": 2.219588805672551e-08, + "loss": 6.499581650132313e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4556, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 63.78125190734863, + "completions/min_length": 22.25, + "epoch": 9.059568131049888, + "grad_norm": 0.004482321213332049, + "kl": 0.0640869140625, + "learning_rate": 2.2103028777683162e-08, + "loss": 6.410179048543796e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4557, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 63.13541793823242, + "completions/min_length": 20.75, + "epoch": 9.061553735418219, + "grad_norm": 0.00469748423120213, + "kl": 0.073455810546875, + "learning_rate": 2.201035975963278e-08, + "loss": 7.346504571614787e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4558, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.0, + "completions/mean_length": 79.40625238418579, + "completions/min_length": 26.25, + "epoch": 9.063539339786548, + "grad_norm": 0.0025776055027771166, + "kl": 0.064300537109375, + "learning_rate": 2.1917881039467735e-08, + "loss": 6.438494892790914e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4559, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.0, + "completions/mean_length": 60.41666889190674, + "completions/min_length": 25.875, + "epoch": 9.065524944154877, + "grad_norm": 0.003989193192029717, + "kl": 0.064361572265625, + "learning_rate": 2.182559265400591e-08, + "loss": 6.436978583224118e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4560, + "train_speed(iter/s)": 0.022617 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.25, + "completions/mean_length": 69.82291889190674, + "completions/min_length": 21.5, + "epoch": 9.067510548523206, + "grad_norm": 0.004735728389694131, + "kl": 0.060546875, + "learning_rate": 2.1733494639989136e-08, + "loss": 6.0589329223148525e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4561, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.375, + "completions/mean_length": 72.40625286102295, + "completions/min_length": 27.25, + "epoch": 9.069496152891537, + "grad_norm": 1.4864792675427287, + "kl": 0.07177734375, + "learning_rate": 2.1641587034083752e-08, + "loss": -0.019910480827093124, + "memory(GiB)": 94.21, + "reward": 1.8854166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8854166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4562, + "train_speed(iter/s)": 0.022616 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.0, + "completions/mean_length": 68.67708492279053, + "completions/min_length": 22.875, + "epoch": 9.071481757259866, + "grad_norm": 0.003481732394948342, + "kl": 0.062957763671875, + "learning_rate": 2.15498698728801e-08, + "loss": 6.299871893133968e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4563, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 64.96875238418579, + "completions/min_length": 27.625, + "epoch": 9.073467361628195, + "grad_norm": 0.005030429797118001, + "kl": 0.056182861328125, + "learning_rate": 2.145834319289258e-08, + "loss": 5.615845293505117e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4564, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 68.15625190734863, + "completions/min_length": 22.625, + "epoch": 9.075452965996526, + "grad_norm": 0.0037198045930339624, + "kl": 0.06719970703125, + "learning_rate": 2.136700703056016e-08, + "loss": 6.719467637594789e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4565, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 70.39583492279053, + "completions/min_length": 24.875, + "epoch": 9.077438570364855, + "grad_norm": 0.002239190889134339, + "kl": 0.066680908203125, + "learning_rate": 2.1275861422245524e-08, + "loss": 6.668046262348071e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4566, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 63.708335876464844, + "completions/min_length": 21.125, + "epoch": 9.079424174733184, + "grad_norm": 0.008713956080790999, + "kl": 0.058929443359375, + "learning_rate": 2.1184906404235814e-08, + "loss": 5.887925726710819e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4567, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.0, + "completions/mean_length": 74.53125238418579, + "completions/min_length": 22.875, + "epoch": 9.081409779101515, + "grad_norm": 0.0045706902185289715, + "kl": 0.075164794921875, + "learning_rate": 2.1094142012742177e-08, + "loss": 7.511264266213402e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4568, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 74.54166841506958, + "completions/min_length": 25.25, + "epoch": 9.083395383469844, + "grad_norm": 0.00254277419603636, + "kl": 0.057098388671875, + "learning_rate": 2.1003568283899707e-08, + "loss": 5.7095250667771325e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4569, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.25, + "completions/mean_length": 74.84375190734863, + "completions/min_length": 22.875, + "epoch": 9.085380987838173, + "grad_norm": 0.002910854512896116, + "kl": 0.073883056640625, + "learning_rate": 2.0913185253767894e-08, + "loss": 7.384567288681865e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4570, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 76.58333587646484, + "completions/min_length": 25.625, + "epoch": 9.087366592206504, + "grad_norm": 0.0032179189228860507, + "kl": 0.069671630859375, + "learning_rate": 2.082299295833001e-08, + "loss": 6.966745422687382e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4571, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 67.70833587646484, + "completions/min_length": 21.875, + "epoch": 9.089352196574833, + "grad_norm": 0.0028057939434665505, + "kl": 0.055572509765625, + "learning_rate": 2.0732991433493607e-08, + "loss": 5.556229007197544e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4572, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 64.96875143051147, + "completions/min_length": 19.125, + "epoch": 9.091337800943162, + "grad_norm": 0.0029340588696122193, + "kl": 0.052886962890625, + "learning_rate": 2.0643180715090304e-08, + "loss": 5.2954317652620375e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4573, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 81.68750286102295, + "completions/min_length": 32.375, + "epoch": 9.09332340531149, + "grad_norm": 0.0027812154371746247, + "kl": 0.07904052734375, + "learning_rate": 2.0553560838875496e-08, + "loss": 7.895859016571194e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4574, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 69.16666889190674, + "completions/min_length": 23.375, + "epoch": 9.095309009679822, + "grad_norm": 0.005964695224091516, + "kl": 0.0714111328125, + "learning_rate": 2.0464131840528974e-08, + "loss": 7.138065120670944e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4575, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 72.30208539962769, + "completions/min_length": 24.375, + "epoch": 9.09729461404815, + "grad_norm": 0.0029827520822214813, + "kl": 0.05621337890625, + "learning_rate": 2.0374893755654154e-08, + "loss": 5.621215677820146e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4576, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 63.739585876464844, + "completions/min_length": 24.75, + "epoch": 9.09928021841648, + "grad_norm": 0.0024626586627420613, + "kl": 0.07623291015625, + "learning_rate": 2.0285846619778667e-08, + "loss": 7.622718840138987e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4577, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.0, + "completions/mean_length": 81.55208587646484, + "completions/min_length": 27.625, + "epoch": 9.10126582278481, + "grad_norm": 0.005056008869765523, + "kl": 0.0562744140625, + "learning_rate": 2.0196990468354102e-08, + "loss": 5.622143726213835e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4578, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.25, + "completions/mean_length": 75.55208444595337, + "completions/min_length": 25.5, + "epoch": 9.10325142715314, + "grad_norm": 0.9418934316246953, + "kl": 0.0668792724609375, + "learning_rate": 2.0108325336755992e-08, + "loss": 0.0036358728539198637, + "memory(GiB)": 94.21, + "reward": 1.7916666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7916666716337204, + "rewards/CineAccuracyORM/std": 0.11393529921770096, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4579, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.0, + "completions/mean_length": 74.58333587646484, + "completions/min_length": 31.5, + "epoch": 9.105237031521469, + "grad_norm": 0.0033061453495803967, + "kl": 0.06890869140625, + "learning_rate": 2.001985126028377e-08, + "loss": 6.883487367304042e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4580, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 63.343750953674316, + "completions/min_length": 24.625, + "epoch": 9.1072226358898, + "grad_norm": 0.00383220039253361, + "kl": 0.0650634765625, + "learning_rate": 1.993156827416098e-08, + "loss": 6.502013275166973e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4581, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.625, + "completions/mean_length": 62.47916841506958, + "completions/min_length": 21.25, + "epoch": 9.109208240258129, + "grad_norm": 0.0028641601289803938, + "kl": 0.06146240234375, + "learning_rate": 1.9843476413534843e-08, + "loss": 6.148928514448926e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4582, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.125, + "completions/mean_length": 84.06250286102295, + "completions/min_length": 34.0, + "epoch": 9.111193844626458, + "grad_norm": 0.0027977702187432056, + "kl": 0.066436767578125, + "learning_rate": 1.9755575713476692e-08, + "loss": 6.640343053732067e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4583, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 73.78125238418579, + "completions/min_length": 26.0, + "epoch": 9.113179448994789, + "grad_norm": 0.0044494915823661335, + "kl": 0.0797119140625, + "learning_rate": 1.966786620898164e-08, + "loss": 7.963976531755179e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4584, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 66.8854193687439, + "completions/min_length": 23.5, + "epoch": 9.115165053363118, + "grad_norm": 0.0027245810923055007, + "kl": 0.0587615966796875, + "learning_rate": 1.9580347934968545e-08, + "loss": 5.874328053323552e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4585, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 68.93750143051147, + "completions/min_length": 32.375, + "epoch": 9.117150657731447, + "grad_norm": 0.00782548537045295, + "kl": 0.09356689453125, + "learning_rate": 1.9493020926280633e-08, + "loss": 9.359908290207386e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4586, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 73.15625190734863, + "completions/min_length": 30.25, + "epoch": 9.119136262099776, + "grad_norm": 0.0026276625630214006, + "kl": 0.0736083984375, + "learning_rate": 1.940588521768449e-08, + "loss": 7.360795279964805e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4587, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 66.91666793823242, + "completions/min_length": 22.375, + "epoch": 9.121121866468107, + "grad_norm": 0.0025107384852920897, + "kl": 0.053192138671875, + "learning_rate": 1.931894084387059e-08, + "loss": 5.319520641933195e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4588, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.5, + "completions/mean_length": 65.88541889190674, + "completions/min_length": 19.5, + "epoch": 9.123107470836436, + "grad_norm": 0.003262802645193201, + "kl": 0.067138671875, + "learning_rate": 1.9232187839453518e-08, + "loss": 6.71864181640558e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4589, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 65.87500286102295, + "completions/min_length": 20.25, + "epoch": 9.125093075204765, + "grad_norm": 0.002502658612662069, + "kl": 0.0660400390625, + "learning_rate": 1.914562623897137e-08, + "loss": 6.607950490433723e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4590, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 66.31250190734863, + "completions/min_length": 20.25, + "epoch": 9.127078679573096, + "grad_norm": 0.003643917542610691, + "kl": 0.0711669921875, + "learning_rate": 1.90592560768863e-08, + "loss": 7.114700565580279e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4591, + "train_speed(iter/s)": 0.022615 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 66.62500047683716, + "completions/min_length": 20.875, + "epoch": 9.129064283941425, + "grad_norm": 0.004700313693096275, + "kl": 0.061798095703125, + "learning_rate": 1.8973077387583968e-08, + "loss": 6.183073128340766e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4592, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.0, + "completions/mean_length": 58.68750190734863, + "completions/min_length": 20.5, + "epoch": 9.131049888309754, + "grad_norm": 0.0035000341850366266, + "kl": 0.063629150390625, + "learning_rate": 1.8887090205374045e-08, + "loss": 6.364900036714971e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4593, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 69.04166841506958, + "completions/min_length": 24.5, + "epoch": 9.133035492678085, + "grad_norm": 0.0025512166587473156, + "kl": 0.06805419921875, + "learning_rate": 1.8801294564489922e-08, + "loss": 6.807439785916358e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4594, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 63.81250190734863, + "completions/min_length": 23.125, + "epoch": 9.135021097046414, + "grad_norm": 0.002736071233659325, + "kl": 0.056488037109375, + "learning_rate": 1.8715690499088555e-08, + "loss": 5.649589002132416e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4595, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 67.73958492279053, + "completions/min_length": 24.75, + "epoch": 9.137006701414743, + "grad_norm": 0.7635883136435511, + "kl": 0.08392333984375, + "learning_rate": 1.8630278043250734e-08, + "loss": 0.008837152272462845, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 0.9895833358168602, + "rewards/Format/std": 0.03608439117670059, + "step": 4596, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 178.25, + "completions/mean_length": 77.68750238418579, + "completions/min_length": 29.875, + "epoch": 9.138992305783074, + "grad_norm": 0.006265500595996801, + "kl": 0.07061767578125, + "learning_rate": 1.8545057230981153e-08, + "loss": 7.076094334479421e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4597, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 211.5, + "completions/mean_length": 79.59375286102295, + "completions/min_length": 25.25, + "epoch": 9.140977910151403, + "grad_norm": 0.0044094143422299245, + "kl": 0.06573486328125, + "learning_rate": 1.8460028096207835e-08, + "loss": 6.579120235983282e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4598, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 78.91666889190674, + "completions/min_length": 27.875, + "epoch": 9.142963514519732, + "grad_norm": 0.0033495335612856187, + "kl": 0.06658935546875, + "learning_rate": 1.8375190672782757e-08, + "loss": 6.65129191474989e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4599, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 73.14583539962769, + "completions/min_length": 28.875, + "epoch": 9.14494911888806, + "grad_norm": 0.004946087528936944, + "kl": 0.056060791015625, + "learning_rate": 1.829054499448163e-08, + "loss": 5.602114833891392e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4600, + "train_speed(iter/s)": 0.022614 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 206.625, + "completions/mean_length": 77.22916793823242, + "completions/min_length": 25.5, + "epoch": 9.146934723256392, + "grad_norm": 0.004219057498605359, + "kl": 0.06793212890625, + "learning_rate": 1.820609109500354e-08, + "loss": 6.788845348637551e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4601, + "train_speed(iter/s)": 0.022613 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.5, + "completions/mean_length": 67.96875095367432, + "completions/min_length": 20.875, + "epoch": 9.14892032762472, + "grad_norm": 0.005520091075769352, + "kl": 0.065032958984375, + "learning_rate": 1.8121829007971546e-08, + "loss": 6.501689495053142e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4602, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 72.22916841506958, + "completions/min_length": 26.5, + "epoch": 9.15090593199305, + "grad_norm": 0.028207177369508406, + "kl": 0.10748291015625, + "learning_rate": 1.8037758766932143e-08, + "loss": 0.00010735404066508636, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4603, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.625, + "completions/mean_length": 67.43750190734863, + "completions/min_length": 24.5, + "epoch": 9.15289153636138, + "grad_norm": 0.0024923026180727827, + "kl": 0.05438232421875, + "learning_rate": 1.7953880405355337e-08, + "loss": 5.4392890888266265e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4604, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 65.98958683013916, + "completions/min_length": 21.125, + "epoch": 9.15487714072971, + "grad_norm": 1.6541820062026134, + "kl": 0.073333740234375, + "learning_rate": 1.7870193956635082e-08, + "loss": -0.006934754084795713, + "memory(GiB)": 94.21, + "reward": 1.7395833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.7395833358168602, + "rewards/CineAccuracyORM/std": 0.29720086604356766, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4605, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 68.93750190734863, + "completions/min_length": 20.5, + "epoch": 9.156862745098039, + "grad_norm": 0.007253816748055403, + "kl": 0.08563232421875, + "learning_rate": 1.7786699454088562e-08, + "loss": 8.563135634176433e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4606, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.375, + "completions/mean_length": 71.47916841506958, + "completions/min_length": 21.75, + "epoch": 9.15884834946637, + "grad_norm": 0.003069411617542107, + "kl": 0.095855712890625, + "learning_rate": 1.7703396930956906e-08, + "loss": 9.584397776052356e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4607, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 71.35416841506958, + "completions/min_length": 23.375, + "epoch": 9.160833953834699, + "grad_norm": 0.0035596101484501263, + "kl": 0.07928466796875, + "learning_rate": 1.7620286420404528e-08, + "loss": 7.924398960312828e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4608, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.625, + "completions/mean_length": 67.04166841506958, + "completions/min_length": 22.5, + "epoch": 9.162819558203028, + "grad_norm": 0.0026477269280414115, + "kl": 0.065948486328125, + "learning_rate": 1.7537367955519522e-08, + "loss": 6.592989666387439e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4609, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 70.78125143051147, + "completions/min_length": 18.0, + "epoch": 9.164805162571358, + "grad_norm": 0.003292495796972032, + "kl": 0.069549560546875, + "learning_rate": 1.7454641569313533e-08, + "loss": 6.944018241483718e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4610, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 71.68750238418579, + "completions/min_length": 25.75, + "epoch": 9.166790766939688, + "grad_norm": 0.002757706026183243, + "kl": 0.06024169921875, + "learning_rate": 1.7372107294721606e-08, + "loss": 6.030036456650123e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4611, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 180.125, + "completions/mean_length": 72.35416746139526, + "completions/min_length": 26.5, + "epoch": 9.168776371308017, + "grad_norm": 0.0034196619416398152, + "kl": 0.064605712890625, + "learning_rate": 1.728976516460251e-08, + "loss": 6.461337034124881e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4612, + "train_speed(iter/s)": 0.022612 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.625, + "completions/mean_length": 66.46875143051147, + "completions/min_length": 23.375, + "epoch": 9.170761975676346, + "grad_norm": 0.0035347421915786874, + "kl": 0.054351806640625, + "learning_rate": 1.7207615211738302e-08, + "loss": 5.439599044620991e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4613, + "train_speed(iter/s)": 0.022611 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.0, + "completions/mean_length": 66.34375190734863, + "completions/min_length": 23.125, + "epoch": 9.172747580044676, + "grad_norm": 0.0024673838866274005, + "kl": 0.054290771484375, + "learning_rate": 1.7125657468834653e-08, + "loss": 5.4242696933215484e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4614, + "train_speed(iter/s)": 0.022611 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.75, + "completions/mean_length": 72.77083539962769, + "completions/min_length": 24.875, + "epoch": 9.174733184413006, + "grad_norm": 0.005636467698132973, + "kl": 0.06390380859375, + "learning_rate": 1.704389196852085e-08, + "loss": 6.389366171788424e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4615, + "train_speed(iter/s)": 0.022611 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.75, + "completions/mean_length": 71.35416841506958, + "completions/min_length": 22.25, + "epoch": 9.176718788781335, + "grad_norm": 1.2825463305934373, + "kl": 0.109039306640625, + "learning_rate": 1.69623187433493e-08, + "loss": 0.012987978756427765, + "memory(GiB)": 94.21, + "reward": 1.9791666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.9791666716337204, + "rewards/CineAccuracyORM/std": 0.04865618050098419, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4616, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 63.94791865348816, + "completions/min_length": 21.875, + "epoch": 9.178704393149665, + "grad_norm": 0.8109076823176605, + "kl": 0.06829833984375, + "learning_rate": 1.688093782579608e-08, + "loss": -0.005175780039280653, + "memory(GiB)": 94.21, + "reward": 1.7604166716337204, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.7604166716337204, + "rewards/CineAccuracyORM/std": 0.1296451985836029, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4617, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 198.875, + "completions/mean_length": 87.63541889190674, + "completions/min_length": 29.0, + "epoch": 9.180689997517995, + "grad_norm": 1.77821381117017, + "kl": 0.08428955078125, + "learning_rate": 1.679974924826072e-08, + "loss": 0.00458592688664794, + "memory(GiB)": 94.21, + "reward": 1.6354166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6354166669771075, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4618, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 73.16666889190674, + "completions/min_length": 31.625, + "epoch": 9.182675601886324, + "grad_norm": 0.0035441475216441733, + "kl": 0.056671142578125, + "learning_rate": 1.671875304306608e-08, + "loss": 5.667291407007724e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4619, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 75.39583492279053, + "completions/min_length": 23.25, + "epoch": 9.184661206254654, + "grad_norm": 0.002458063614154341, + "kl": 0.0523681640625, + "learning_rate": 1.6637949242458483e-08, + "loss": 5.237808363744989e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4620, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 68.81250238418579, + "completions/min_length": 21.0, + "epoch": 9.186646810622983, + "grad_norm": 0.0030955618031601397, + "kl": 0.06298828125, + "learning_rate": 1.655733787860769e-08, + "loss": 6.294525519479066e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4621, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.5, + "completions/mean_length": 70.37500190734863, + "completions/min_length": 22.375, + "epoch": 9.188632414991313, + "grad_norm": 0.002781929755595527, + "kl": 0.05462646484375, + "learning_rate": 1.647691898360676e-08, + "loss": 5.4624215408694e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4622, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.25, + "completions/mean_length": 67.44791793823242, + "completions/min_length": 20.25, + "epoch": 9.190618019359643, + "grad_norm": 0.09012914312666065, + "kl": 0.14984130859375, + "learning_rate": 1.6396692589472293e-08, + "loss": 0.00015010683273430914, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4623, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.5, + "completions/mean_length": 69.50000286102295, + "completions/min_length": 28.5, + "epoch": 9.192603623727972, + "grad_norm": 0.0028628400578205614, + "kl": 0.066558837890625, + "learning_rate": 1.6316658728143972e-08, + "loss": 6.65376428514719e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4624, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 69.34375143051147, + "completions/min_length": 23.0, + "epoch": 9.194589228096302, + "grad_norm": 0.004908068458422123, + "kl": 0.062835693359375, + "learning_rate": 1.623681743148503e-08, + "loss": 6.280232628341764e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4625, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.0, + "completions/mean_length": 70.48958492279053, + "completions/min_length": 21.125, + "epoch": 9.19657483246463, + "grad_norm": 0.003296981886737747, + "kl": 0.056427001953125, + "learning_rate": 1.6157168731282035e-08, + "loss": 5.6483047956135124e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4626, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.625, + "completions/mean_length": 69.97916889190674, + "completions/min_length": 22.375, + "epoch": 9.198560436832961, + "grad_norm": 0.0037947461749276686, + "kl": 0.062896728515625, + "learning_rate": 1.607771265924479e-08, + "loss": 6.281337846303359e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4627, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.5, + "completions/mean_length": 61.48958444595337, + "completions/min_length": 20.0, + "epoch": 9.20054604120129, + "grad_norm": 1.800353267363252, + "kl": 0.06561279296875, + "learning_rate": 1.599844924700644e-08, + "loss": -0.007479371037334204, + "memory(GiB)": 94.21, + "reward": 1.9895833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.9895833358168602, + "rewards/CineAccuracyORM/std": 0.03608439117670059, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4628, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 186.25, + "completions/mean_length": 71.85416841506958, + "completions/min_length": 22.0, + "epoch": 9.20253164556962, + "grad_norm": 0.0030381041800342404, + "kl": 0.05810546875, + "learning_rate": 1.5919378526123573e-08, + "loss": 5.815225813421421e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4629, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 69.53125190734863, + "completions/min_length": 28.875, + "epoch": 9.20451724993795, + "grad_norm": 0.004775111908317754, + "kl": 0.092376708984375, + "learning_rate": 1.5840500528075728e-08, + "loss": 9.240680810762569e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4630, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.75, + "completions/mean_length": 70.88541793823242, + "completions/min_length": 22.875, + "epoch": 9.20650285430628, + "grad_norm": 0.004090347353094927, + "kl": 0.086456298828125, + "learning_rate": 1.5761815284266123e-08, + "loss": 8.648833318147808e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4631, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.875, + "completions/mean_length": 66.4791693687439, + "completions/min_length": 21.875, + "epoch": 9.208488458674609, + "grad_norm": 0.005509423466539693, + "kl": 0.064788818359375, + "learning_rate": 1.5683322826020974e-08, + "loss": 6.483864854089916e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4632, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 64.41666841506958, + "completions/min_length": 22.5, + "epoch": 9.21047406304294, + "grad_norm": 0.005990543129378365, + "kl": 0.0611572265625, + "learning_rate": 1.5605023184589616e-08, + "loss": 6.118452438386157e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4633, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.25, + "completions/mean_length": 72.30208444595337, + "completions/min_length": 24.875, + "epoch": 9.212459667411268, + "grad_norm": 0.0028097457505736594, + "kl": 0.060455322265625, + "learning_rate": 1.5526916391145062e-08, + "loss": 6.036656486685388e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4634, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.125, + "completions/mean_length": 57.86458444595337, + "completions/min_length": 20.125, + "epoch": 9.214445271779597, + "grad_norm": 0.0028717730378125853, + "kl": 0.073699951171875, + "learning_rate": 1.544900247678321e-08, + "loss": 7.37295049475506e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4635, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 73.97916841506958, + "completions/min_length": 18.875, + "epoch": 9.216430876147928, + "grad_norm": 0.004402854505414873, + "kl": 0.073516845703125, + "learning_rate": 1.5371281472523202e-08, + "loss": 7.35412904759869e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4636, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 73.48958492279053, + "completions/min_length": 28.625, + "epoch": 9.218416480516257, + "grad_norm": 0.0044671089767675245, + "kl": 0.06768798828125, + "learning_rate": 1.5293753409307554e-08, + "loss": 6.768741150153801e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4637, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 63.489585399627686, + "completions/min_length": 19.5, + "epoch": 9.220402084884586, + "grad_norm": 0.00530022185195528, + "kl": 0.097259521484375, + "learning_rate": 1.5216418318001635e-08, + "loss": 9.731283353175968e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4638, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.75, + "completions/mean_length": 76.37500190734863, + "completions/min_length": 29.0, + "epoch": 9.222387689252916, + "grad_norm": 2.4100379078859566, + "kl": 0.078887939453125, + "learning_rate": 1.5139276229394425e-08, + "loss": -0.01215621642768383, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.2523707337677479, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4639, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.25, + "completions/mean_length": 63.43750238418579, + "completions/min_length": 26.75, + "epoch": 9.224373293621246, + "grad_norm": 0.0028845302873262706, + "kl": 0.06353759765625, + "learning_rate": 1.506232717419764e-08, + "loss": 6.358255632221699e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4640, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 70.15625190734863, + "completions/min_length": 21.5, + "epoch": 9.226358897989575, + "grad_norm": 0.004400338843049586, + "kl": 0.061492919921875, + "learning_rate": 1.4985571183046498e-08, + "loss": 6.15089520579204e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4641, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 68.4479193687439, + "completions/min_length": 25.0, + "epoch": 9.228344502357904, + "grad_norm": 0.00286768406648142, + "kl": 0.094024658203125, + "learning_rate": 1.4909008286499104e-08, + "loss": 9.388741455040872e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4642, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 69.89583587646484, + "completions/min_length": 21.375, + "epoch": 9.230330106726235, + "grad_norm": 0.0025233465019816447, + "kl": 0.05328369140625, + "learning_rate": 1.4832638515036855e-08, + "loss": 5.32776684849523e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4643, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 119.375, + "completions/mean_length": 56.375001430511475, + "completions/min_length": 20.875, + "epoch": 9.232315711094564, + "grad_norm": 0.0041635362582778356, + "kl": 0.05438232421875, + "learning_rate": 1.4756461899064098e-08, + "loss": 5.441024404717609e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4644, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.125, + "completions/mean_length": 61.968751430511475, + "completions/min_length": 20.875, + "epoch": 9.234301315462893, + "grad_norm": 0.0033742331395539425, + "kl": 0.0572509765625, + "learning_rate": 1.468047846890852e-08, + "loss": 5.7179709983756766e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4645, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.0, + "completions/mean_length": 75.94791889190674, + "completions/min_length": 25.5, + "epoch": 9.236286919831224, + "grad_norm": 0.002747866799211444, + "kl": 0.072021484375, + "learning_rate": 1.460468825482053e-08, + "loss": 7.202931010397151e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4646, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.5, + "completions/mean_length": 64.62500095367432, + "completions/min_length": 27.625, + "epoch": 9.238272524199553, + "grad_norm": 0.003328410056861108, + "kl": 0.068939208984375, + "learning_rate": 1.4529091286973993e-08, + "loss": 6.895697151776403e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4647, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.875, + "completions/mean_length": 72.48958587646484, + "completions/min_length": 22.375, + "epoch": 9.240258128567882, + "grad_norm": 0.003374029888985708, + "kl": 0.074249267578125, + "learning_rate": 1.4453687595465668e-08, + "loss": 7.420840120175853e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4648, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 71.89583587646484, + "completions/min_length": 21.75, + "epoch": 9.242243732936213, + "grad_norm": 0.0026353953920421553, + "kl": 0.058197021484375, + "learning_rate": 1.4378477210315265e-08, + "loss": 5.8207337133353576e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4649, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 71.62500238418579, + "completions/min_length": 28.125, + "epoch": 9.244229337304542, + "grad_norm": 0.0036660295666054695, + "kl": 0.069610595703125, + "learning_rate": 1.4303460161465775e-08, + "loss": 6.963525083847344e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4650, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 74.520836353302, + "completions/min_length": 25.125, + "epoch": 9.246214941672871, + "grad_norm": 0.006198906693427003, + "kl": 0.083587646484375, + "learning_rate": 1.4228636478783029e-08, + "loss": 8.350690768565983e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4651, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.625, + "completions/mean_length": 76.56250238418579, + "completions/min_length": 26.0, + "epoch": 9.2482005460412, + "grad_norm": 0.004739991098556912, + "kl": 0.07421875, + "learning_rate": 1.4154006192055922e-08, + "loss": 7.420215115416795e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4652, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 68.26041889190674, + "completions/min_length": 24.125, + "epoch": 9.250186150409531, + "grad_norm": 0.0034464099780213927, + "kl": 0.055450439453125, + "learning_rate": 1.407956933099641e-08, + "loss": 5.5444335885113105e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4653, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.5, + "completions/mean_length": 59.781251430511475, + "completions/min_length": 20.625, + "epoch": 9.25217175477786, + "grad_norm": 0.005455917102142847, + "kl": 0.05889892578125, + "learning_rate": 1.4005325925239286e-08, + "loss": 5.8897461713058874e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4654, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 76.25000190734863, + "completions/min_length": 29.5, + "epoch": 9.25415735914619, + "grad_norm": 0.0023763978636056633, + "kl": 0.060638427734375, + "learning_rate": 1.3931276004342574e-08, + "loss": 6.063852197257802e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4655, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.625, + "completions/mean_length": 72.76041841506958, + "completions/min_length": 25.5, + "epoch": 9.25614296351452, + "grad_norm": 0.0032395535386128657, + "kl": 0.07879638671875, + "learning_rate": 1.3857419597787134e-08, + "loss": 7.876742165535688e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4656, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.5, + "completions/mean_length": 74.94791889190674, + "completions/min_length": 26.625, + "epoch": 9.25812856788285, + "grad_norm": 0.004504762660977818, + "kl": 0.07012939453125, + "learning_rate": 1.3783756734976616e-08, + "loss": 7.008872489677742e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4657, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 76.39583587646484, + "completions/min_length": 27.0, + "epoch": 9.260114172251178, + "grad_norm": 0.0028642774654113433, + "kl": 0.07293701171875, + "learning_rate": 1.3710287445237944e-08, + "loss": 7.288622873602435e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4658, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.625, + "completions/mean_length": 68.12500190734863, + "completions/min_length": 23.625, + "epoch": 9.26209977661951, + "grad_norm": 0.0026484065149032314, + "kl": 0.0621337890625, + "learning_rate": 1.3637011757820726e-08, + "loss": 6.21078215772286e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4659, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.375, + "completions/mean_length": 61.208335399627686, + "completions/min_length": 23.625, + "epoch": 9.264085380987838, + "grad_norm": 0.005715638637676868, + "kl": 0.062255859375, + "learning_rate": 1.3563929701897626e-08, + "loss": 6.226979894563556e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4660, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 68.01041746139526, + "completions/min_length": 24.875, + "epoch": 9.266070985356167, + "grad_norm": 0.011858685130890681, + "kl": 0.056884765625, + "learning_rate": 1.3491041306564088e-08, + "loss": 5.6855817092582583e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4661, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.125, + "completions/mean_length": 67.82291889190674, + "completions/min_length": 19.625, + "epoch": 9.268056589724498, + "grad_norm": 0.0038126816445703485, + "kl": 0.066375732421875, + "learning_rate": 1.341834660083857e-08, + "loss": 6.646734982496127e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4662, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 66.01041889190674, + "completions/min_length": 24.5, + "epoch": 9.270042194092827, + "grad_norm": 0.00291322523257202, + "kl": 0.061370849609375, + "learning_rate": 1.3345845613662476e-08, + "loss": 6.139225297374651e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4663, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 66.87500190734863, + "completions/min_length": 20.625, + "epoch": 9.272027798461156, + "grad_norm": 0.006682560022615333, + "kl": 0.067626953125, + "learning_rate": 1.3273538373899995e-08, + "loss": 6.763551209587604e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4664, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.0, + "completions/mean_length": 63.520835399627686, + "completions/min_length": 25.25, + "epoch": 9.274013402829485, + "grad_norm": 0.004315624528626272, + "kl": 0.06414794921875, + "learning_rate": 1.3201424910337987e-08, + "loss": 6.41354126855731e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4665, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.875, + "completions/mean_length": 65.90625238418579, + "completions/min_length": 20.75, + "epoch": 9.275999007197816, + "grad_norm": 0.002979036685849788, + "kl": 0.064239501953125, + "learning_rate": 1.31295052516866e-08, + "loss": 6.42929517198354e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4666, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 68.46875190734863, + "completions/min_length": 23.375, + "epoch": 9.277984611566145, + "grad_norm": 0.0030776501434818166, + "kl": 0.064971923828125, + "learning_rate": 1.305777942657843e-08, + "loss": 6.501008465420455e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4667, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 68.9791693687439, + "completions/min_length": 23.875, + "epoch": 9.279970215934474, + "grad_norm": 0.004760245503712212, + "kl": 0.06268310546875, + "learning_rate": 1.2986247463569134e-08, + "loss": 6.27110421191901e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4668, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.0, + "completions/mean_length": 71.87500238418579, + "completions/min_length": 24.875, + "epoch": 9.281955820302805, + "grad_norm": 0.019004660454931054, + "kl": 0.059356689453125, + "learning_rate": 1.2914909391137097e-08, + "loss": 5.9363570471759886e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4669, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.25, + "completions/mean_length": 70.40625095367432, + "completions/min_length": 34.0, + "epoch": 9.283941424671134, + "grad_norm": 1.658693067486625, + "kl": 0.063323974609375, + "learning_rate": 1.2843765237683547e-08, + "loss": 0.0020861129742115736, + "memory(GiB)": 94.21, + "reward": 1.65625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.65625, + "rewards/CineAccuracyORM/std": 0.3829289712011814, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4670, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.75, + "completions/mean_length": 72.93750095367432, + "completions/min_length": 26.125, + "epoch": 9.285927029039463, + "grad_norm": 0.004543723462675412, + "kl": 0.0657958984375, + "learning_rate": 1.2772815031532436e-08, + "loss": 6.585180381080136e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4671, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.5, + "completions/mean_length": 61.000000953674316, + "completions/min_length": 23.125, + "epoch": 9.287912633407794, + "grad_norm": 0.0036002204952407684, + "kl": 0.061431884765625, + "learning_rate": 1.270205880093067e-08, + "loss": 6.150422268547118e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4672, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 63.37500190734863, + "completions/min_length": 23.875, + "epoch": 9.289898237776123, + "grad_norm": 0.004161064341715211, + "kl": 0.075347900390625, + "learning_rate": 1.2631496574047662e-08, + "loss": 7.526666740886867e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4673, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 69.29166841506958, + "completions/min_length": 28.625, + "epoch": 9.291883842144452, + "grad_norm": 2.1218900262433533, + "kl": 0.0576171875, + "learning_rate": 1.2561128378975827e-08, + "loss": 0.013383294455707073, + "memory(GiB)": 94.21, + "reward": 1.9270833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.9270833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4674, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.25, + "completions/mean_length": 62.60416841506958, + "completions/min_length": 23.0, + "epoch": 9.293869446512783, + "grad_norm": 0.004089983648510965, + "kl": 0.053009033203125, + "learning_rate": 1.2490954243730256e-08, + "loss": 5.3028357797302306e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4675, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.375, + "completions/mean_length": 67.07291746139526, + "completions/min_length": 20.25, + "epoch": 9.295855050881112, + "grad_norm": 0.004408340773121167, + "kl": 0.073699951171875, + "learning_rate": 1.2420974196248712e-08, + "loss": 7.364851626334712e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4676, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 70.47916841506958, + "completions/min_length": 19.125, + "epoch": 9.297840655249441, + "grad_norm": 0.0027681584412542403, + "kl": 0.061981201171875, + "learning_rate": 1.2351188264391855e-08, + "loss": 6.200503412401304e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4677, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 73.92708444595337, + "completions/min_length": 21.75, + "epoch": 9.29982625961777, + "grad_norm": 0.0022999499715569303, + "kl": 0.063201904296875, + "learning_rate": 1.2281596475942791e-08, + "loss": 6.317153747659177e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4678, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.375, + "completions/mean_length": 66.06250238418579, + "completions/min_length": 25.125, + "epoch": 9.301811863986101, + "grad_norm": 0.002809121300505957, + "kl": 0.057891845703125, + "learning_rate": 1.2212198858607691e-08, + "loss": 5.780795618193224e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4679, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.625, + "completions/mean_length": 77.85416889190674, + "completions/min_length": 27.0, + "epoch": 9.30379746835443, + "grad_norm": 0.006530546420004666, + "kl": 0.11407470703125, + "learning_rate": 1.2142995440015125e-08, + "loss": 0.00011410249135224149, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4680, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 64.07291841506958, + "completions/min_length": 21.125, + "epoch": 9.30578307272276, + "grad_norm": 0.0026898165411776745, + "kl": 0.05908203125, + "learning_rate": 1.2073986247716383e-08, + "loss": 5.902966222492978e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4681, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.875, + "completions/mean_length": 68.34375190734863, + "completions/min_length": 26.375, + "epoch": 9.30776867709109, + "grad_norm": 0.005286213403348124, + "kl": 0.09185791015625, + "learning_rate": 1.2005171309185601e-08, + "loss": 9.193408186547458e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4682, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.25, + "completions/mean_length": 73.03125238418579, + "completions/min_length": 26.5, + "epoch": 9.30975428145942, + "grad_norm": 0.0027852237749129358, + "kl": 0.06634521484375, + "learning_rate": 1.1936550651819478e-08, + "loss": 6.622633372899145e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4683, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 76.44791984558105, + "completions/min_length": 27.0, + "epoch": 9.311739885827748, + "grad_norm": 0.003326787790127927, + "kl": 0.068359375, + "learning_rate": 1.186812430293732e-08, + "loss": 6.83020189171657e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4684, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 69.42708539962769, + "completions/min_length": 32.25, + "epoch": 9.313725490196079, + "grad_norm": 0.003682943475043186, + "kl": 0.062713623046875, + "learning_rate": 1.1799892289781122e-08, + "loss": 6.274000043049455e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4685, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 56.37500238418579, + "completions/min_length": 20.875, + "epoch": 9.315711094564408, + "grad_norm": 2.3259162376705733, + "kl": 0.0579833984375, + "learning_rate": 1.1731854639515537e-08, + "loss": -0.00476849963888526, + "memory(GiB)": 94.21, + "reward": 1.8333333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8333333358168602, + "rewards/CineAccuracyORM/std": 0.19210398569703102, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4686, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.75, + "completions/mean_length": 71.57291746139526, + "completions/min_length": 24.25, + "epoch": 9.317696698932737, + "grad_norm": 0.0030422988140760532, + "kl": 0.055511474609375, + "learning_rate": 1.1664011379227789e-08, + "loss": 5.5543983762618154e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4687, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 66.04166841506958, + "completions/min_length": 22.0, + "epoch": 9.319682303301068, + "grad_norm": 2.1199713266306306, + "kl": 0.057281494140625, + "learning_rate": 1.1596362535927718e-08, + "loss": 5.731731653213501e-05, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4688, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.25, + "completions/mean_length": 62.57291889190674, + "completions/min_length": 21.25, + "epoch": 9.321667907669397, + "grad_norm": 0.004521773177240197, + "kl": 0.055633544921875, + "learning_rate": 1.1528908136547722e-08, + "loss": 5.5581323977094144e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4689, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.375, + "completions/mean_length": 60.614585399627686, + "completions/min_length": 20.875, + "epoch": 9.323653512037726, + "grad_norm": 0.002860042648146483, + "kl": 0.06781005859375, + "learning_rate": 1.1461648207942986e-08, + "loss": 6.774544453946874e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4690, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 85.19791889190674, + "completions/min_length": 28.75, + "epoch": 9.325639116406055, + "grad_norm": 0.002962467463999657, + "kl": 0.072509765625, + "learning_rate": 1.1394582776890982e-08, + "loss": 7.261200516950339e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4691, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.25, + "completions/mean_length": 60.80208444595337, + "completions/min_length": 22.25, + "epoch": 9.327624720774386, + "grad_norm": 0.0029881200130602494, + "kl": 0.052825927734375, + "learning_rate": 1.1327711870091961e-08, + "loss": 5.276673982734792e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4692, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.5, + "completions/mean_length": 73.84375286102295, + "completions/min_length": 28.875, + "epoch": 9.329610325142715, + "grad_norm": 0.0025675677347992082, + "kl": 0.063995361328125, + "learning_rate": 1.1261035514168681e-08, + "loss": 6.402004510164261e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4693, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 69.81250095367432, + "completions/min_length": 26.25, + "epoch": 9.331595929511044, + "grad_norm": 0.012555309886568123, + "kl": 0.08013916015625, + "learning_rate": 1.1194553735666357e-08, + "loss": 8.008559234440327e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4694, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 117.75, + "completions/mean_length": 57.875001430511475, + "completions/min_length": 21.375, + "epoch": 9.333581533879375, + "grad_norm": 0.006292320686275326, + "kl": 0.065582275390625, + "learning_rate": 1.1128266561052812e-08, + "loss": 6.566046795342118e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4695, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.0, + "completions/mean_length": 61.76041889190674, + "completions/min_length": 25.125, + "epoch": 9.335567138247704, + "grad_norm": 0.006171530791326735, + "kl": 0.069854736328125, + "learning_rate": 1.1062174016718385e-08, + "loss": 6.988817767705768e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4696, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.875, + "completions/mean_length": 69.70833539962769, + "completions/min_length": 18.75, + "epoch": 9.337552742616033, + "grad_norm": 0.0025086037698560403, + "kl": 0.0615234375, + "learning_rate": 1.0996276128975968e-08, + "loss": 6.139362812973559e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4697, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 196.375, + "completions/mean_length": 76.54166889190674, + "completions/min_length": 25.375, + "epoch": 9.339538346984364, + "grad_norm": 0.02737432467249086, + "kl": 0.077301025390625, + "learning_rate": 1.0930572924060965e-08, + "loss": 7.734097016509622e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4698, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.625, + "completions/mean_length": 63.468751430511475, + "completions/min_length": 21.625, + "epoch": 9.341523951352693, + "grad_norm": 0.006241313812205991, + "kl": 0.05694580078125, + "learning_rate": 1.0865064428131121e-08, + "loss": 5.693789717042819e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4699, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.125, + "completions/mean_length": 61.91666793823242, + "completions/min_length": 19.625, + "epoch": 9.343509555721022, + "grad_norm": 0.0036902125402077783, + "kl": 0.0611572265625, + "learning_rate": 1.0799750667266793e-08, + "loss": 6.114657298894599e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4700, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.75, + "completions/mean_length": 77.114586353302, + "completions/min_length": 29.5, + "epoch": 9.345495160089353, + "grad_norm": 0.0038632889387378755, + "kl": 0.061309814453125, + "learning_rate": 1.0734631667470851e-08, + "loss": 6.125131767475978e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4701, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.25, + "completions/mean_length": 68.614586353302, + "completions/min_length": 24.25, + "epoch": 9.347480764457682, + "grad_norm": 0.010004792351162892, + "kl": 0.075225830078125, + "learning_rate": 1.0669707454668386e-08, + "loss": 7.525092223659158e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4702, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 72.66666889190674, + "completions/min_length": 23.375, + "epoch": 9.349466368826011, + "grad_norm": 0.00234345875696059, + "kl": 0.0521240234375, + "learning_rate": 1.0604978054707336e-08, + "loss": 5.212753239902668e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4703, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.375, + "completions/mean_length": 72.03125238418579, + "completions/min_length": 19.5, + "epoch": 9.35145197319434, + "grad_norm": 0.005823573025601861, + "kl": 0.063385009765625, + "learning_rate": 1.0540443493357809e-08, + "loss": 6.330433097900823e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4704, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 115.375, + "completions/mean_length": 57.218751430511475, + "completions/min_length": 21.125, + "epoch": 9.353437577562671, + "grad_norm": 0.004899766590165031, + "kl": 0.077667236328125, + "learning_rate": 1.0476103796312252e-08, + "loss": 7.764453766867518e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4705, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.125, + "completions/mean_length": 73.44791841506958, + "completions/min_length": 26.0, + "epoch": 9.355423181931, + "grad_norm": 0.0041840631502931555, + "kl": 0.05816650390625, + "learning_rate": 1.0411958989185787e-08, + "loss": 5.819627403980121e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4706, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.75, + "completions/mean_length": 61.34375190734863, + "completions/min_length": 19.25, + "epoch": 9.35740878629933, + "grad_norm": 0.01497782313239926, + "kl": 0.0701904296875, + "learning_rate": 1.0348009097515764e-08, + "loss": 7.023427315289155e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4707, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 60.88541865348816, + "completions/min_length": 25.0, + "epoch": 9.35939439066766, + "grad_norm": 0.004822248479687538, + "kl": 0.057281494140625, + "learning_rate": 1.0284254146762095e-08, + "loss": 5.732703721150756e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4708, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.75, + "completions/mean_length": 63.864586353302, + "completions/min_length": 20.625, + "epoch": 9.361379995035989, + "grad_norm": 0.003641523648466179, + "kl": 0.069671630859375, + "learning_rate": 1.0220694162306919e-08, + "loss": 6.972481787670404e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4709, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 72.84375190734863, + "completions/min_length": 30.625, + "epoch": 9.363365599404318, + "grad_norm": 0.003934393481644837, + "kl": 0.061309814453125, + "learning_rate": 1.0157329169454831e-08, + "loss": 6.131265399744734e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4710, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.625, + "completions/mean_length": 65.489586353302, + "completions/min_length": 19.375, + "epoch": 9.365351203772649, + "grad_norm": 0.0027480278037472407, + "kl": 0.056884765625, + "learning_rate": 1.0094159193432817e-08, + "loss": 5.6855002185329795e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4711, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.25, + "completions/mean_length": 62.937501430511475, + "completions/min_length": 24.5, + "epoch": 9.367336808140978, + "grad_norm": 0.005520510805968863, + "kl": 0.06121826171875, + "learning_rate": 1.0031184259390201e-08, + "loss": 6.12553849350661e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4712, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 68.89583492279053, + "completions/min_length": 25.0, + "epoch": 9.369322412509307, + "grad_norm": 0.003988632374351008, + "kl": 0.076995849609375, + "learning_rate": 9.968404392398545e-09, + "loss": 7.705154712311924e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4713, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.625, + "completions/mean_length": 65.03125143051147, + "completions/min_length": 22.0, + "epoch": 9.371308016877638, + "grad_norm": 0.0042690457094859075, + "kl": 0.063934326171875, + "learning_rate": 9.905819617452015e-09, + "loss": 6.390989437932149e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4714, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.25, + "completions/mean_length": 70.98958539962769, + "completions/min_length": 29.5, + "epoch": 9.373293621245967, + "grad_norm": 0.018463913800104363, + "kl": 0.087982177734375, + "learning_rate": 9.843429959466797e-09, + "loss": 8.794294262770563e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4715, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 68.28125190734863, + "completions/min_length": 23.0, + "epoch": 9.375279225614296, + "grad_norm": 0.0023519606164086864, + "kl": 0.0545654296875, + "learning_rate": 9.781235443281632e-09, + "loss": 5.451842298498377e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4716, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 69.01041793823242, + "completions/min_length": 26.625, + "epoch": 9.377264829982625, + "grad_norm": 0.002444943005011755, + "kl": 0.070098876953125, + "learning_rate": 9.71923609365749e-09, + "loss": 7.008370448602363e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4717, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.375, + "completions/mean_length": 69.18750238418579, + "completions/min_length": 21.125, + "epoch": 9.379250434350956, + "grad_norm": 0.0033534467252879425, + "kl": 0.07012939453125, + "learning_rate": 9.657431935277627e-09, + "loss": 7.007951353443787e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4718, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 72.58333468437195, + "completions/min_length": 31.625, + "epoch": 9.381236038719285, + "grad_norm": 0.0025345169842983226, + "kl": 0.06121826171875, + "learning_rate": 9.595822992747582e-09, + "loss": 6.116722943261266e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4719, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.25, + "completions/mean_length": 74.47916746139526, + "completions/min_length": 25.875, + "epoch": 9.383221643087614, + "grad_norm": 0.0030452810210975035, + "kl": 0.075775146484375, + "learning_rate": 9.53440929059518e-09, + "loss": 7.574297342216596e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4720, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.875, + "completions/mean_length": 69.96875143051147, + "completions/min_length": 29.875, + "epoch": 9.385207247455945, + "grad_norm": 0.004986557874511063, + "kl": 0.06732177734375, + "learning_rate": 9.47319085327053e-09, + "loss": 6.73053291393444e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4721, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 61.67708444595337, + "completions/min_length": 18.25, + "epoch": 9.387192851824274, + "grad_norm": 0.00542078759092515, + "kl": 0.065216064453125, + "learning_rate": 9.412167705146024e-09, + "loss": 6.51925802230835e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4722, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 190.125, + "completions/mean_length": 81.77083587646484, + "completions/min_length": 29.5, + "epoch": 9.389178456192603, + "grad_norm": 0.004792953102784712, + "kl": 0.087127685546875, + "learning_rate": 9.351339870516229e-09, + "loss": 8.715003787074238e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4723, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.25, + "completions/mean_length": 70.01041841506958, + "completions/min_length": 20.5, + "epoch": 9.391164060560934, + "grad_norm": 0.002398671907439135, + "kl": 0.061004638671875, + "learning_rate": 9.29070737359805e-09, + "loss": 6.0993246734142303e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4724, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.625, + "completions/mean_length": 62.364585399627686, + "completions/min_length": 25.25, + "epoch": 9.393149664929263, + "grad_norm": 0.004064201325455677, + "kl": 0.0501861572265625, + "learning_rate": 9.230270238530625e-09, + "loss": 5.020498065277934e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4725, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.875, + "completions/mean_length": 65.00000238418579, + "completions/min_length": 27.75, + "epoch": 9.395135269297592, + "grad_norm": 0.0030962776888813653, + "kl": 0.067352294921875, + "learning_rate": 9.170028489375147e-09, + "loss": 6.734954513376579e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4726, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.125, + "completions/mean_length": 67.42708492279053, + "completions/min_length": 19.75, + "epoch": 9.397120873665923, + "grad_norm": 0.0028067142862925596, + "kl": 0.05645751953125, + "learning_rate": 9.10998215011527e-09, + "loss": 5.645436976919882e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4727, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.25, + "completions/mean_length": 70.15625286102295, + "completions/min_length": 27.75, + "epoch": 9.399106478034252, + "grad_norm": 0.0038383717763806387, + "kl": 0.06597900390625, + "learning_rate": 9.050131244656644e-09, + "loss": 6.601517816307023e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4728, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 65.64583492279053, + "completions/min_length": 26.25, + "epoch": 9.401092082402581, + "grad_norm": 0.004151834635545548, + "kl": 0.06622314453125, + "learning_rate": 8.990475796827269e-09, + "loss": 6.628276605624706e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4729, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 71.15625238418579, + "completions/min_length": 19.625, + "epoch": 9.40307768677091, + "grad_norm": 0.007674940628711286, + "kl": 0.07598876953125, + "learning_rate": 8.931015830377142e-09, + "loss": 7.603838457725942e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4730, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.0, + "completions/mean_length": 64.61458539962769, + "completions/min_length": 17.5, + "epoch": 9.405063291139241, + "grad_norm": 0.0026441533361155033, + "kl": 0.051788330078125, + "learning_rate": 8.871751368978553e-09, + "loss": 5.176360355108045e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4731, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 74.46875238418579, + "completions/min_length": 23.875, + "epoch": 9.40704889550757, + "grad_norm": 0.004012489639981499, + "kl": 0.070587158203125, + "learning_rate": 8.812682436226126e-09, + "loss": 7.064524106681347e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4732, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.125, + "completions/mean_length": 73.56250238418579, + "completions/min_length": 17.375, + "epoch": 9.409034499875899, + "grad_norm": 0.004597349706150479, + "kl": 0.062469482421875, + "learning_rate": 8.753809055636274e-09, + "loss": 6.2429258832708e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4733, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.25, + "completions/mean_length": 75.16666793823242, + "completions/min_length": 18.75, + "epoch": 9.41102010424423, + "grad_norm": 0.0034495975398877387, + "kl": 0.058807373046875, + "learning_rate": 8.695131250647802e-09, + "loss": 5.885846621822566e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4734, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 69.05208587646484, + "completions/min_length": 24.125, + "epoch": 9.413005708612559, + "grad_norm": 0.0029705571962952175, + "kl": 0.064056396484375, + "learning_rate": 8.636649044621635e-09, + "loss": 6.407409091480076e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4735, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.75, + "completions/mean_length": 69.70833492279053, + "completions/min_length": 22.75, + "epoch": 9.414991312980888, + "grad_norm": 0.004414790572474074, + "kl": 0.07684326171875, + "learning_rate": 8.578362460840705e-09, + "loss": 7.691128848819062e-05, + "memory(GiB)": 94.21, + "reward": 1.5625, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.5625, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4736, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 72.40625190734863, + "completions/min_length": 22.875, + "epoch": 9.416976917349219, + "grad_norm": 0.0034457476140950876, + "kl": 0.05279541015625, + "learning_rate": 8.520271522510225e-09, + "loss": 5.2802231948589906e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4737, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.875, + "completions/mean_length": 61.35416841506958, + "completions/min_length": 18.5, + "epoch": 9.418962521717548, + "grad_norm": 0.0030778779039292072, + "kl": 0.048736572265625, + "learning_rate": 8.462376252757474e-09, + "loss": 4.8792069719638675e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4738, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.875, + "completions/mean_length": 73.56250143051147, + "completions/min_length": 27.625, + "epoch": 9.420948126085877, + "grad_norm": 0.0035116356370842126, + "kl": 0.069671630859375, + "learning_rate": 8.404676674631684e-09, + "loss": 6.960570317460224e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4739, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.125, + "completions/mean_length": 67.29166841506958, + "completions/min_length": 21.0, + "epoch": 9.422933730454208, + "grad_norm": 0.0044157598974075216, + "kl": 0.05859375, + "learning_rate": 8.347172811104364e-09, + "loss": 5.867490472155623e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4740, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 71.88541889190674, + "completions/min_length": 24.0, + "epoch": 9.424919334822537, + "grad_norm": 0.005147732832712355, + "kl": 0.07659912109375, + "learning_rate": 8.289864685069038e-09, + "loss": 7.666927558602765e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4741, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.5, + "completions/mean_length": 65.87500143051147, + "completions/min_length": 24.25, + "epoch": 9.426904939190866, + "grad_norm": 0.0031724021113877414, + "kl": 0.061981201171875, + "learning_rate": 8.232752319341174e-09, + "loss": 6.193173612700775e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4742, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.25, + "completions/mean_length": 65.2604193687439, + "completions/min_length": 20.875, + "epoch": 9.428890543559195, + "grad_norm": 0.9964460368223541, + "kl": 0.06390380859375, + "learning_rate": 8.175835736658587e-09, + "loss": -0.008546522818505764, + "memory(GiB)": 94.21, + "reward": 1.90625, + "reward_std": 0.03423266112804413, + "rewards/CineAccuracyORM/mean": 0.90625, + "rewards/CineAccuracyORM/std": 0.12181249633431435, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4743, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 69.46875238418579, + "completions/min_length": 26.25, + "epoch": 9.430876147927526, + "grad_norm": 0.00432103319440194, + "kl": 0.0723876953125, + "learning_rate": 8.119114959680929e-09, + "loss": 7.235478551592678e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4744, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 65.36458539962769, + "completions/min_length": 25.5, + "epoch": 9.432861752295855, + "grad_norm": 0.0024778085971514656, + "kl": 0.0513153076171875, + "learning_rate": 8.062590010989856e-09, + "loss": 5.134732782607898e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4745, + "train_speed(iter/s)": 0.022611 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 69.26041793823242, + "completions/min_length": 21.625, + "epoch": 9.434847356664184, + "grad_norm": 0.0036252210065991974, + "kl": 0.07421875, + "learning_rate": 8.006260913089314e-09, + "loss": 7.41733965696767e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4746, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.5, + "completions/mean_length": 81.91666841506958, + "completions/min_length": 27.0, + "epoch": 9.436832961032515, + "grad_norm": 0.005196864088847535, + "kl": 0.064544677734375, + "learning_rate": 7.950127688405028e-09, + "loss": 6.452913657994941e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4747, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.375, + "completions/mean_length": 72.18750190734863, + "completions/min_length": 25.875, + "epoch": 9.438818565400844, + "grad_norm": 0.0022245019094381596, + "kl": 0.06964111328125, + "learning_rate": 7.894190359284847e-09, + "loss": 6.965249485801905e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4748, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 66.62500238418579, + "completions/min_length": 23.75, + "epoch": 9.440804169769173, + "grad_norm": 0.004900162859226985, + "kl": 0.077392578125, + "learning_rate": 7.838448947998622e-09, + "loss": 7.730567449470982e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4749, + "train_speed(iter/s)": 0.02261 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.25, + "completions/mean_length": 60.364585399627686, + "completions/min_length": 25.0, + "epoch": 9.442789774137504, + "grad_norm": 0.005911987450447452, + "kl": 0.060821533203125, + "learning_rate": 7.782903476738156e-09, + "loss": 6.0835867770947516e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4750, + "train_speed(iter/s)": 0.022609 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.5, + "completions/mean_length": 78.60416889190674, + "completions/min_length": 22.5, + "epoch": 9.444775378505833, + "grad_norm": 0.0023933015481466626, + "kl": 0.063018798828125, + "learning_rate": 7.727553967617373e-09, + "loss": 6.30479771643877e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4751, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 74.62500190734863, + "completions/min_length": 23.875, + "epoch": 9.446760982874162, + "grad_norm": 0.003740260315444024, + "kl": 0.0643310546875, + "learning_rate": 7.672400442672088e-09, + "loss": 6.43393286736682e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4752, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 69.37500238418579, + "completions/min_length": 22.5, + "epoch": 9.448746587242493, + "grad_norm": 0.016130180695109995, + "kl": 0.078369140625, + "learning_rate": 7.617442923859962e-09, + "loss": 7.831267430447042e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4753, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.625, + "completions/mean_length": 69.44791889190674, + "completions/min_length": 22.0, + "epoch": 9.450732191610822, + "grad_norm": 0.004251603683274947, + "kl": 0.062957763671875, + "learning_rate": 7.562681433060936e-09, + "loss": 6.290737655945122e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4754, + "train_speed(iter/s)": 0.022608 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 59.77083444595337, + "completions/min_length": 18.125, + "epoch": 9.45271779597915, + "grad_norm": 0.003274038669974271, + "kl": 0.0611572265625, + "learning_rate": 7.508115992076625e-09, + "loss": 6.109652167651802e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4755, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 67.31250238418579, + "completions/min_length": 28.125, + "epoch": 9.45470340034748, + "grad_norm": 0.0024363351046544665, + "kl": 0.06524658203125, + "learning_rate": 7.453746622630707e-09, + "loss": 6.52419839752838e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4756, + "train_speed(iter/s)": 0.022607 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 195.0, + "completions/mean_length": 86.57291984558105, + "completions/min_length": 25.5, + "epoch": 9.45668900471581, + "grad_norm": 0.0030635568883373735, + "kl": 0.07037353515625, + "learning_rate": 7.3995733463688704e-09, + "loss": 7.033345173113048e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4757, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.125, + "completions/mean_length": 69.05208492279053, + "completions/min_length": 25.25, + "epoch": 9.45867460908414, + "grad_norm": 0.0033999767446773474, + "kl": 0.052490234375, + "learning_rate": 7.345596184858472e-09, + "loss": 5.249096648185514e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4758, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.75, + "completions/mean_length": 70.82291984558105, + "completions/min_length": 26.375, + "epoch": 9.460660213452469, + "grad_norm": 0.006809337357309042, + "kl": 0.074462890625, + "learning_rate": 7.291815159589154e-09, + "loss": 7.44737044442445e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4759, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.625, + "completions/mean_length": 61.583335399627686, + "completions/min_length": 26.125, + "epoch": 9.4626458178208, + "grad_norm": 0.005747356194290514, + "kl": 0.052032470703125, + "learning_rate": 7.238230291972236e-09, + "loss": 5.196863639866933e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4760, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 196.5, + "completions/mean_length": 78.22916889190674, + "completions/min_length": 27.75, + "epoch": 9.464631422189129, + "grad_norm": 0.004646863536684325, + "kl": 0.058380126953125, + "learning_rate": 7.18484160334093e-09, + "loss": 5.8331352192908525e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4761, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.0, + "completions/mean_length": 77.45833683013916, + "completions/min_length": 27.5, + "epoch": 9.466617026557458, + "grad_norm": 0.0024295272145912865, + "kl": 0.060028076171875, + "learning_rate": 7.131649114950511e-09, + "loss": 5.995645187795162e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4762, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.625, + "completions/mean_length": 75.20833539962769, + "completions/min_length": 25.125, + "epoch": 9.468602630925789, + "grad_norm": 0.006257636012124017, + "kl": 0.080169677734375, + "learning_rate": 7.078652847977984e-09, + "loss": 8.007712312974036e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4763, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 68.77083587646484, + "completions/min_length": 27.875, + "epoch": 9.470588235294118, + "grad_norm": 0.6966685689182638, + "kl": 0.0635986328125, + "learning_rate": 7.02585282352236e-09, + "loss": -0.0060100010596215725, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.10136350989341736, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4764, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 66.1354193687439, + "completions/min_length": 28.625, + "epoch": 9.472573839662447, + "grad_norm": 0.002478926389017352, + "kl": 0.070587158203125, + "learning_rate": 6.97324906260438e-09, + "loss": 7.055184687487781e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4765, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.875, + "completions/mean_length": 67.35416889190674, + "completions/min_length": 26.5, + "epoch": 9.474559444030778, + "grad_norm": 0.0028014915464666655, + "kl": 0.069366455078125, + "learning_rate": 6.920841586166737e-09, + "loss": 6.921897147549316e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4766, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 63.23958492279053, + "completions/min_length": 18.625, + "epoch": 9.476545048399107, + "grad_norm": 0.00392661371114939, + "kl": 0.06787109375, + "learning_rate": 6.868630415074017e-09, + "loss": 6.784753350075334e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4767, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 64.0104193687439, + "completions/min_length": 20.75, + "epoch": 9.478530652767436, + "grad_norm": 0.009148566525518756, + "kl": 0.06280517578125, + "learning_rate": 6.816615570112538e-09, + "loss": 6.287980795605108e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4768, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.25, + "completions/mean_length": 72.46875286102295, + "completions/min_length": 33.625, + "epoch": 9.480516257135765, + "grad_norm": 0.002558831474404926, + "kl": 0.066131591796875, + "learning_rate": 6.764797071990569e-09, + "loss": 6.613253935938701e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4769, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 70.63541889190674, + "completions/min_length": 26.875, + "epoch": 9.482501861504096, + "grad_norm": 0.003475030913036586, + "kl": 0.066680908203125, + "learning_rate": 6.713174941338162e-09, + "loss": 6.667665729764849e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4770, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 64.40625238418579, + "completions/min_length": 20.0, + "epoch": 9.484487465872425, + "grad_norm": 0.006578560369885898, + "kl": 0.0794677734375, + "learning_rate": 6.661749198707156e-09, + "loss": 7.930537685751915e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4771, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.75, + "completions/mean_length": 78.01041984558105, + "completions/min_length": 29.5, + "epoch": 9.486473070240754, + "grad_norm": 0.004318152639343838, + "kl": 0.05926513671875, + "learning_rate": 6.610519864571229e-09, + "loss": 5.9220179537078366e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4772, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.5, + "completions/mean_length": 74.70833683013916, + "completions/min_length": 20.875, + "epoch": 9.488458674609085, + "grad_norm": 0.0028507302305977194, + "kl": 0.061279296875, + "learning_rate": 6.559486959325955e-09, + "loss": 6.129234679974616e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4773, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.625, + "completions/mean_length": 67.145836353302, + "completions/min_length": 25.125, + "epoch": 9.490444278977414, + "grad_norm": 0.004541246729001895, + "kl": 0.06744384765625, + "learning_rate": 6.508650503288526e-09, + "loss": 6.748643500031903e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4774, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.25, + "completions/mean_length": 79.69791984558105, + "completions/min_length": 30.875, + "epoch": 9.492429883345743, + "grad_norm": 0.002368792328803445, + "kl": 0.0590972900390625, + "learning_rate": 6.458010516698087e-09, + "loss": 5.90520903642755e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4775, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.375, + "completions/mean_length": 73.19791746139526, + "completions/min_length": 23.25, + "epoch": 9.494415487714074, + "grad_norm": 0.0032097416247676733, + "kl": 0.076904296875, + "learning_rate": 6.407567019715454e-09, + "loss": 7.692919461987913e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4776, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.125, + "completions/mean_length": 73.0104193687439, + "completions/min_length": 28.0, + "epoch": 9.496401092082403, + "grad_norm": 0.005706181421722418, + "kl": 0.08062744140625, + "learning_rate": 6.357320032423285e-09, + "loss": 8.064581197686493e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4777, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 75.65625286102295, + "completions/min_length": 28.625, + "epoch": 9.498386696450732, + "grad_norm": 0.0030724518483858892, + "kl": 0.061187744140625, + "learning_rate": 6.307269574825969e-09, + "loss": 6.129412213340402e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4778, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.375, + "completions/mean_length": 78.34375190734863, + "completions/min_length": 27.75, + "epoch": 9.500372300819063, + "grad_norm": 0.002760299889754672, + "kl": 0.057586669921875, + "learning_rate": 6.257415666849674e-09, + "loss": 5.761609645560384e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4779, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.375, + "completions/mean_length": 61.60416841506958, + "completions/min_length": 18.5, + "epoch": 9.502357905187392, + "grad_norm": 0.002451652146583488, + "kl": 0.0584716796875, + "learning_rate": 6.2077583283423055e-09, + "loss": 5.843268445460126e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4780, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.125, + "completions/mean_length": 60.76041841506958, + "completions/min_length": 21.375, + "epoch": 9.50434350955572, + "grad_norm": 0.0037879893359206015, + "kl": 0.056854248046875, + "learning_rate": 6.158297579073546e-09, + "loss": 5.685799624188803e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4781, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.0, + "completions/mean_length": 66.59375190734863, + "completions/min_length": 21.875, + "epoch": 9.50632911392405, + "grad_norm": 0.009577424421303878, + "kl": 0.07562255859375, + "learning_rate": 6.109033438734646e-09, + "loss": 7.558182551292703e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4782, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 70.86458539962769, + "completions/min_length": 19.75, + "epoch": 9.50831471829238, + "grad_norm": 0.006677649818823071, + "kl": 0.064178466796875, + "learning_rate": 6.059965926938859e-09, + "loss": 6.418031989596784e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4783, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 68.2604193687439, + "completions/min_length": 26.625, + "epoch": 9.51030032266071, + "grad_norm": 0.0046917213130720755, + "kl": 0.058197021484375, + "learning_rate": 6.01109506322095e-09, + "loss": 5.8211277064401656e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4784, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.375, + "completions/mean_length": 62.57291793823242, + "completions/min_length": 19.875, + "epoch": 9.512285927029039, + "grad_norm": 0.003941936159635688, + "kl": 0.059051513671875, + "learning_rate": 5.96242086703741e-09, + "loss": 5.8996807638322935e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4785, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.125, + "completions/mean_length": 72.28125286102295, + "completions/min_length": 26.75, + "epoch": 9.51427153139737, + "grad_norm": 0.0027272384283332333, + "kl": 0.059722900390625, + "learning_rate": 5.913943357766571e-09, + "loss": 5.975658859824762e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4786, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 64.64583492279053, + "completions/min_length": 22.0, + "epoch": 9.516257135765699, + "grad_norm": 0.003102199991682703, + "kl": 0.0665283203125, + "learning_rate": 5.8656625547082725e-09, + "loss": 6.657680933130905e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4787, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.5, + "completions/mean_length": 68.07291984558105, + "completions/min_length": 29.75, + "epoch": 9.518242740134028, + "grad_norm": 0.004569857660072722, + "kl": 0.06243896484375, + "learning_rate": 5.817578477084251e-09, + "loss": 6.2411098042503e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4788, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.25, + "completions/mean_length": 59.07291793823242, + "completions/min_length": 22.75, + "epoch": 9.520228344502359, + "grad_norm": 0.00261731264564144, + "kl": 0.05914306640625, + "learning_rate": 5.769691144037692e-09, + "loss": 5.9095902543049306e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4789, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.125, + "completions/mean_length": 62.510417461395264, + "completions/min_length": 18.875, + "epoch": 9.522213948870688, + "grad_norm": 0.0032678584605705784, + "kl": 0.067779541015625, + "learning_rate": 5.722000574633568e-09, + "loss": 6.770661275368184e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4790, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 68.34375286102295, + "completions/min_length": 18.625, + "epoch": 9.524199553239017, + "grad_norm": 0.0033231228196449296, + "kl": 0.059814453125, + "learning_rate": 5.6745067878585796e-09, + "loss": 5.9849495301023126e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4791, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.875, + "completions/mean_length": 70.19791984558105, + "completions/min_length": 24.0, + "epoch": 9.526185157607348, + "grad_norm": 0.002759660461810293, + "kl": 0.06658935546875, + "learning_rate": 5.627209802620935e-09, + "loss": 6.660568033112213e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4792, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 70.35416841506958, + "completions/min_length": 22.0, + "epoch": 9.528170761975677, + "grad_norm": 0.0032470034251468743, + "kl": 0.060150146484375, + "learning_rate": 5.580109637750685e-09, + "loss": 6.012981612002477e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4793, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.0, + "completions/mean_length": 65.62500190734863, + "completions/min_length": 25.625, + "epoch": 9.530156366344006, + "grad_norm": 0.0025214673038298674, + "kl": 0.074920654296875, + "learning_rate": 5.533206311999328e-09, + "loss": 7.481173815904185e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4794, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.75, + "completions/mean_length": 62.10416889190674, + "completions/min_length": 17.75, + "epoch": 9.532141970712335, + "grad_norm": 0.005986730512673897, + "kl": 0.093017578125, + "learning_rate": 5.486499844040093e-09, + "loss": 9.28775843931362e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4795, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.75, + "completions/mean_length": 59.843751430511475, + "completions/min_length": 19.0, + "epoch": 9.534127575080666, + "grad_norm": 0.0026378644399682803, + "kl": 0.0726318359375, + "learning_rate": 5.4399902524678855e-09, + "loss": 7.262932194862515e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4796, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.375, + "completions/mean_length": 62.76041889190674, + "completions/min_length": 27.25, + "epoch": 9.536113179448995, + "grad_norm": 1.7010649115289804, + "kl": 0.0594482421875, + "learning_rate": 5.393677555799114e-09, + "loss": 0.004848122596740723, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.770833333954215, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4797, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 71.84375238418579, + "completions/min_length": 27.5, + "epoch": 9.538098783817324, + "grad_norm": 0.0037426321961469547, + "kl": 0.070343017578125, + "learning_rate": 5.347561772471809e-09, + "loss": 7.031289715087041e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4798, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.875, + "completions/mean_length": 60.52083492279053, + "completions/min_length": 22.75, + "epoch": 9.540084388185655, + "grad_norm": 0.0040342674013660925, + "kl": 0.06365966796875, + "learning_rate": 5.301642920845672e-09, + "loss": 6.362433487083763e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4799, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.125, + "completions/mean_length": 63.270836353302, + "completions/min_length": 24.0, + "epoch": 9.542069992553984, + "grad_norm": 0.0023877154705503655, + "kl": 0.05242919921875, + "learning_rate": 5.255921019202081e-09, + "loss": 5.241192411631346e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4800, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 183.625, + "completions/mean_length": 68.73958539962769, + "completions/min_length": 17.5, + "epoch": 9.544055596922313, + "grad_norm": 0.002967951181710472, + "kl": 0.064453125, + "learning_rate": 5.210396085743751e-09, + "loss": 6.44532628939487e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4801, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 184.5, + "completions/mean_length": 77.59375190734863, + "completions/min_length": 22.625, + "epoch": 9.546041201290643, + "grad_norm": 0.003174240797514496, + "kl": 0.0640869140625, + "learning_rate": 5.165068138595241e-09, + "loss": 6.404802843462676e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4802, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.125, + "completions/mean_length": 70.4479193687439, + "completions/min_length": 23.125, + "epoch": 9.548026805658973, + "grad_norm": 0.008426793584497974, + "kl": 0.08935546875, + "learning_rate": 5.119937195802504e-09, + "loss": 8.934881043387577e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4803, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.0, + "completions/mean_length": 64.03125095367432, + "completions/min_length": 22.5, + "epoch": 9.550012410027302, + "grad_norm": 0.005121012783382403, + "kl": 0.087554931640625, + "learning_rate": 5.0750032753331674e-09, + "loss": 8.746125968173146e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4804, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.125, + "completions/mean_length": 62.08333492279053, + "completions/min_length": 21.625, + "epoch": 9.551998014395632, + "grad_norm": 0.0030184847959814185, + "kl": 0.052642822265625, + "learning_rate": 5.0302663950764216e-09, + "loss": 5.265386062092148e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4805, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.625, + "completions/mean_length": 67.39583539962769, + "completions/min_length": 23.625, + "epoch": 9.553983618763962, + "grad_norm": 0.0025969432879313775, + "kl": 0.07159423828125, + "learning_rate": 4.985726572842852e-09, + "loss": 7.152646139729768e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4806, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.875, + "completions/mean_length": 69.31250143051147, + "completions/min_length": 20.625, + "epoch": 9.55596922313229, + "grad_norm": 0.004716517405570303, + "kl": 0.06732177734375, + "learning_rate": 4.941383826364831e-09, + "loss": 6.741842662449926e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4807, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.125, + "completions/mean_length": 64.54166793823242, + "completions/min_length": 15.375, + "epoch": 9.55795482750062, + "grad_norm": 0.0048574903224478045, + "kl": 0.055694580078125, + "learning_rate": 4.8972381732961256e-09, + "loss": 5.5724311096128076e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4808, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.875, + "completions/mean_length": 69.65625143051147, + "completions/min_length": 18.5, + "epoch": 9.55994043186895, + "grad_norm": 0.0023546975398983793, + "kl": 0.052581787109375, + "learning_rate": 4.853289631212065e-09, + "loss": 5.252580012893304e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4809, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 60.343751430511475, + "completions/min_length": 22.875, + "epoch": 9.56192603623728, + "grad_norm": 0.004855094272041856, + "kl": 0.053863525390625, + "learning_rate": 4.809538217609488e-09, + "loss": 5.37940941285342e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4810, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 182.125, + "completions/mean_length": 75.95833683013916, + "completions/min_length": 21.0, + "epoch": 9.563911640605609, + "grad_norm": 0.0028386396233875248, + "kl": 0.064697265625, + "learning_rate": 4.7659839499067934e-09, + "loss": 6.476941052824259e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4811, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.25, + "completions/mean_length": 75.52083683013916, + "completions/min_length": 22.125, + "epoch": 9.56589724497394, + "grad_norm": 0.004491564962713142, + "kl": 0.063262939453125, + "learning_rate": 4.722626845443778e-09, + "loss": 6.32830779068172e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4812, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 171.25, + "completions/mean_length": 79.48958539962769, + "completions/min_length": 30.875, + "epoch": 9.567882849342269, + "grad_norm": 0.005130581328552408, + "kl": 0.06463623046875, + "learning_rate": 4.679466921481912e-09, + "loss": 6.46094122203067e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4813, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.875, + "completions/mean_length": 72.56250190734863, + "completions/min_length": 23.125, + "epoch": 9.569868453710598, + "grad_norm": 0.006116430422971592, + "kl": 0.084075927734375, + "learning_rate": 4.636504195204061e-09, + "loss": 8.414929470745847e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4814, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.625, + "completions/mean_length": 78.26041984558105, + "completions/min_length": 29.625, + "epoch": 9.571854058078928, + "grad_norm": 0.002695661182693227, + "kl": 0.0675048828125, + "learning_rate": 4.593738683714654e-09, + "loss": 6.75319679430686e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4815, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.25, + "completions/mean_length": 64.0416693687439, + "completions/min_length": 21.5, + "epoch": 9.573839662447257, + "grad_norm": 0.007946555617805547, + "kl": 0.06597900390625, + "learning_rate": 4.5511704040394615e-09, + "loss": 6.597238825634122e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4816, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.625, + "completions/mean_length": 84.29166793823242, + "completions/min_length": 28.625, + "epoch": 9.575825266815587, + "grad_norm": 0.0034478098868416904, + "kl": 0.082550048828125, + "learning_rate": 4.5087993731259266e-09, + "loss": 8.244122727774084e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4817, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 68.38541841506958, + "completions/min_length": 18.375, + "epoch": 9.577810871183917, + "grad_norm": 0.0027447318827082757, + "kl": 0.0618896484375, + "learning_rate": 4.4666256078427775e-09, + "loss": 6.19066267972812e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4818, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 174.5, + "completions/mean_length": 72.42708587646484, + "completions/min_length": 15.875, + "epoch": 9.579796475552246, + "grad_norm": 0.006937354291975717, + "kl": 0.079833984375, + "learning_rate": 4.424649124980307e-09, + "loss": 7.977255154401064e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4819, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.75, + "completions/mean_length": 65.1041693687439, + "completions/min_length": 26.125, + "epoch": 9.581782079920576, + "grad_norm": 0.0025989805262227332, + "kl": 0.081787109375, + "learning_rate": 4.382869941250311e-09, + "loss": 8.165553299477324e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4820, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 187.25, + "completions/mean_length": 81.14583587646484, + "completions/min_length": 21.75, + "epoch": 9.583767684288905, + "grad_norm": 0.002489918925010924, + "kl": 0.059967041015625, + "learning_rate": 4.341288073285876e-09, + "loss": 5.999734275974333e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4821, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 70.56250238418579, + "completions/min_length": 21.75, + "epoch": 9.585753288657235, + "grad_norm": 0.004836301211722175, + "kl": 0.072784423828125, + "learning_rate": 4.299903537641703e-09, + "loss": 7.275366806425154e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4822, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 69.11458587646484, + "completions/min_length": 28.125, + "epoch": 9.587738893025564, + "grad_norm": 0.003333732682628272, + "kl": 0.0552978515625, + "learning_rate": 4.258716350793834e-09, + "loss": 5.5283759138546884e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4823, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.625, + "completions/mean_length": 62.708335399627686, + "completions/min_length": 17.75, + "epoch": 9.589724497393894, + "grad_norm": 0.00888327888230167, + "kl": 0.079925537109375, + "learning_rate": 4.2177265291397646e-09, + "loss": 7.981668022694066e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4824, + "train_speed(iter/s)": 0.022599 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.875, + "completions/mean_length": 72.80208492279053, + "completions/min_length": 23.875, + "epoch": 9.591710101762224, + "grad_norm": 0.005645911417661935, + "kl": 0.0799560546875, + "learning_rate": 4.176934088998496e-09, + "loss": 7.997317879926413e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4825, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 116.75, + "completions/mean_length": 62.59375238418579, + "completions/min_length": 22.875, + "epoch": 9.593695706130553, + "grad_norm": 0.0038043380761648513, + "kl": 0.058746337890625, + "learning_rate": 4.13633904661026e-09, + "loss": 5.867323125130497e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4826, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 71.72916889190674, + "completions/min_length": 21.875, + "epoch": 9.595681310498883, + "grad_norm": 0.0027613000382040735, + "kl": 0.062347412109375, + "learning_rate": 4.095941418136795e-09, + "loss": 6.241798109840602e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4827, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.125, + "completions/mean_length": 71.30208539962769, + "completions/min_length": 26.5, + "epoch": 9.597666914867213, + "grad_norm": 1.2831206839109994, + "kl": 0.06915283203125, + "learning_rate": 4.0557412196613464e-09, + "loss": 0.0050938064232468605, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4828, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 64.54166889190674, + "completions/min_length": 26.125, + "epoch": 9.599652519235542, + "grad_norm": 0.0024774840526502646, + "kl": 0.0501708984375, + "learning_rate": 4.015738467188501e-09, + "loss": 5.011974644730799e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4829, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.125, + "completions/mean_length": 70.34375333786011, + "completions/min_length": 23.0, + "epoch": 9.601638123603871, + "grad_norm": 0.00409935102665403, + "kl": 0.065887451171875, + "learning_rate": 3.975933176644075e-09, + "loss": 6.592516729142517e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4830, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.625, + "completions/mean_length": 62.93750238418579, + "completions/min_length": 27.0, + "epoch": 9.603623727972202, + "grad_norm": 0.00564334083552613, + "kl": 0.07342529296875, + "learning_rate": 3.936325363875503e-09, + "loss": 7.3415765655227e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4831, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 173.0, + "completions/mean_length": 78.87500143051147, + "completions/min_length": 21.5, + "epoch": 9.605609332340531, + "grad_norm": 0.018192002257694618, + "kl": 0.064453125, + "learning_rate": 3.896915044651505e-09, + "loss": 6.435334216803312e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4832, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.625, + "completions/mean_length": 69.54166793823242, + "completions/min_length": 25.5, + "epoch": 9.60759493670886, + "grad_norm": 0.004123358615490844, + "kl": 0.066864013671875, + "learning_rate": 3.8577022346621415e-09, + "loss": 6.688522262265906e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4833, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.5, + "completions/mean_length": 65.20833587646484, + "completions/min_length": 23.375, + "epoch": 9.60958054107719, + "grad_norm": 0.008269054210156446, + "kl": 0.073883056640625, + "learning_rate": 3.818686949518812e-09, + "loss": 7.384506898233667e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4834, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 64.20833444595337, + "completions/min_length": 20.75, + "epoch": 9.61156614544552, + "grad_norm": 0.0023979478233232825, + "kl": 0.054718017578125, + "learning_rate": 3.779869204754427e-09, + "loss": 5.4725631343899295e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4835, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.625, + "completions/mean_length": 67.83333492279053, + "completions/min_length": 27.25, + "epoch": 9.61355174981385, + "grad_norm": 1.6619016595638105, + "kl": 0.0772705078125, + "learning_rate": 3.741249015823178e-09, + "loss": 0.010840097442269325, + "memory(GiB)": 94.21, + "reward": 1.8541666716337204, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.8541666716337204, + "rewards/CineAccuracyORM/std": 0.17921441793441772, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4836, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.25, + "completions/mean_length": 69.59375333786011, + "completions/min_length": 27.875, + "epoch": 9.615537354182178, + "grad_norm": 0.003926457680621946, + "kl": 0.062530517578125, + "learning_rate": 3.7028263981005446e-09, + "loss": 6.253214087337255e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4837, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 101.0, + "completions/mean_length": 50.30208492279053, + "completions/min_length": 21.25, + "epoch": 9.61752295855051, + "grad_norm": 0.006517851772604092, + "kl": 0.053436279296875, + "learning_rate": 3.664601366883291e-09, + "loss": 5.342508666217327e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4838, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.75, + "completions/mean_length": 77.68750143051147, + "completions/min_length": 26.25, + "epoch": 9.619508562918838, + "grad_norm": 0.002781012486535216, + "kl": 0.073577880859375, + "learning_rate": 3.6265739373897985e-09, + "loss": 7.359175651799887e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4839, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.125, + "completions/mean_length": 60.427085638046265, + "completions/min_length": 21.25, + "epoch": 9.621494167287167, + "grad_norm": 0.0028123731067994664, + "kl": 0.06341552734375, + "learning_rate": 3.5887441247594574e-09, + "loss": 6.342627602862194e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4840, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 168.875, + "completions/mean_length": 73.66666841506958, + "completions/min_length": 23.0, + "epoch": 9.623479771655498, + "grad_norm": 0.006917475881395366, + "kl": 0.073272705078125, + "learning_rate": 3.5511119440531644e-09, + "loss": 7.335221744142473e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4841, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 72.46875238418579, + "completions/min_length": 25.625, + "epoch": 9.625465376023827, + "grad_norm": 0.00229037342357923, + "kl": 0.06781005859375, + "learning_rate": 3.5136774102531574e-09, + "loss": 6.784041761420667e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4842, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.75, + "completions/mean_length": 67.79166889190674, + "completions/min_length": 24.0, + "epoch": 9.627450980392156, + "grad_norm": 0.0030190256454811354, + "kl": 0.061920166015625, + "learning_rate": 3.4764405382628483e-09, + "loss": 6.18806152488105e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4843, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.25, + "completions/mean_length": 73.45833539962769, + "completions/min_length": 29.125, + "epoch": 9.629436584760487, + "grad_norm": 0.0051352166923351915, + "kl": 0.070159912109375, + "learning_rate": 3.4394013429071e-09, + "loss": 7.005871157161891e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4844, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 111.25, + "completions/mean_length": 61.01041793823242, + "completions/min_length": 28.875, + "epoch": 9.631422189128816, + "grad_norm": 0.004315634692808378, + "kl": 0.088134765625, + "learning_rate": 3.402559838931951e-09, + "loss": 8.816139597911388e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4845, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 62.90625333786011, + "completions/min_length": 26.5, + "epoch": 9.633407793497145, + "grad_norm": 0.005037263652277187, + "kl": 0.05731201171875, + "learning_rate": 3.3659160410047792e-09, + "loss": 5.7321150961797684e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4846, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.375, + "completions/mean_length": 76.45833587646484, + "completions/min_length": 27.75, + "epoch": 9.635393397865474, + "grad_norm": 0.005254992123200667, + "kl": 0.06842041015625, + "learning_rate": 3.329469963714249e-09, + "loss": 6.844510062364861e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4847, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 114.625, + "completions/mean_length": 57.88541841506958, + "completions/min_length": 17.25, + "epoch": 9.637379002233805, + "grad_norm": 0.005647927697036055, + "kl": 0.061187744140625, + "learning_rate": 3.293221621570419e-09, + "loss": 6.114768621046096e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4848, + "train_speed(iter/s)": 0.022606 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.125, + "completions/mean_length": 67.00000333786011, + "completions/min_length": 20.0, + "epoch": 9.639364606602134, + "grad_norm": 0.012355383865799277, + "kl": 0.08575439453125, + "learning_rate": 3.2571710290044684e-09, + "loss": 8.574880484957248e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4849, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.25, + "completions/mean_length": 65.61458444595337, + "completions/min_length": 18.125, + "epoch": 9.641350210970463, + "grad_norm": 0.002453766636556973, + "kl": 0.059783935546875, + "learning_rate": 3.2213182003689164e-09, + "loss": 5.9788675571326166e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4850, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.0, + "completions/mean_length": 64.86458396911621, + "completions/min_length": 20.0, + "epoch": 9.643335815338794, + "grad_norm": 0.0034688709493888945, + "kl": 0.074554443359375, + "learning_rate": 3.185663149937512e-09, + "loss": 7.457609171979129e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4851, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.125, + "completions/mean_length": 68.53125238418579, + "completions/min_length": 26.875, + "epoch": 9.645321419707123, + "grad_norm": 0.005565369656437917, + "kl": 0.056610107421875, + "learning_rate": 3.150205891905344e-09, + "loss": 5.6664528528926894e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4852, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 67.333336353302, + "completions/min_length": 18.625, + "epoch": 9.647307024075452, + "grad_norm": 0.0028327988180825068, + "kl": 0.050994873046875, + "learning_rate": 3.114946440388677e-09, + "loss": 5.107714969199151e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4853, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.5, + "completions/mean_length": 67.70833492279053, + "completions/min_length": 26.75, + "epoch": 9.649292628443783, + "grad_norm": 0.005824612746408752, + "kl": 0.067352294921875, + "learning_rate": 3.079884809425004e-09, + "loss": 6.741640390828252e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4854, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.5, + "completions/mean_length": 70.38541841506958, + "completions/min_length": 24.375, + "epoch": 9.651278232812112, + "grad_norm": 0.0035747472146432646, + "kl": 0.0963134765625, + "learning_rate": 3.0450210129732147e-09, + "loss": 9.628474799683318e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4855, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 167.75, + "completions/mean_length": 71.37500333786011, + "completions/min_length": 22.25, + "epoch": 9.653263837180441, + "grad_norm": 0.011097882465219129, + "kl": 0.080352783203125, + "learning_rate": 3.0103550649132616e-09, + "loss": 8.038054511416703e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4856, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.0, + "completions/mean_length": 68.73958492279053, + "completions/min_length": 26.875, + "epoch": 9.655249441548772, + "grad_norm": 0.003530127286092695, + "kl": 0.057464599609375, + "learning_rate": 2.9758869790463827e-09, + "loss": 5.7426324929110706e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4857, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 140.375, + "completions/mean_length": 59.97916793823242, + "completions/min_length": 20.0, + "epoch": 9.657235045917101, + "grad_norm": 0.0041520817658141215, + "kl": 0.05999755859375, + "learning_rate": 2.941616769095101e-09, + "loss": 6.0041958931833506e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4858, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.125, + "completions/mean_length": 66.73958539962769, + "completions/min_length": 30.0, + "epoch": 9.65922065028543, + "grad_norm": 0.0024100362084596956, + "kl": 0.05291748046875, + "learning_rate": 2.9075444487031142e-09, + "loss": 5.293916547088884e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4859, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.375, + "completions/mean_length": 69.11458492279053, + "completions/min_length": 21.875, + "epoch": 9.66120625465376, + "grad_norm": 0.0035572645378273977, + "kl": 0.059783935546875, + "learning_rate": 2.873670031435349e-09, + "loss": 5.982501897960901e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4860, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.75, + "completions/mean_length": 71.87500286102295, + "completions/min_length": 20.0, + "epoch": 9.66319185902209, + "grad_norm": 0.0024291370363734585, + "kl": 0.060791015625, + "learning_rate": 2.839993530777851e-09, + "loss": 6.076978024793789e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4861, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 77.58333492279053, + "completions/min_length": 25.875, + "epoch": 9.66517746339042, + "grad_norm": 0.002513044364039413, + "kl": 0.080718994140625, + "learning_rate": 2.806514960138062e-09, + "loss": 8.070183685049415e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4862, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 70.84375190734863, + "completions/min_length": 24.25, + "epoch": 9.667163067758748, + "grad_norm": 0.0029312276875060783, + "kl": 0.0899658203125, + "learning_rate": 2.773234332844487e-09, + "loss": 8.996226824820042e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4863, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.0, + "completions/mean_length": 72.58333492279053, + "completions/min_length": 22.25, + "epoch": 9.66914867212708, + "grad_norm": 0.0030330969875835712, + "kl": 0.06146240234375, + "learning_rate": 2.7401516621468057e-09, + "loss": 6.14297459833324e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4864, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.0, + "completions/mean_length": 70.91666889190674, + "completions/min_length": 32.125, + "epoch": 9.671134276495408, + "grad_norm": 0.002601525870039028, + "kl": 0.064453125, + "learning_rate": 2.7072669612159816e-09, + "loss": 6.442826270358637e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4865, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.125, + "completions/mean_length": 68.67708492279053, + "completions/min_length": 20.0, + "epoch": 9.673119880863737, + "grad_norm": 0.002564356415336455, + "kl": 0.0621490478515625, + "learning_rate": 2.674580243144153e-09, + "loss": 6.210828723851591e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4866, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 67.33333539962769, + "completions/min_length": 27.25, + "epoch": 9.675105485232068, + "grad_norm": 0.00246052440736179, + "kl": 0.060516357421875, + "learning_rate": 2.642091520944523e-09, + "loss": 6.050724186934531e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4867, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.0, + "completions/mean_length": 74.89583492279053, + "completions/min_length": 27.125, + "epoch": 9.677091089600397, + "grad_norm": 0.002951842850527218, + "kl": 0.095184326171875, + "learning_rate": 2.609800807551521e-09, + "loss": 9.522426989860833e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4868, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.625, + "completions/mean_length": 67.01041984558105, + "completions/min_length": 24.75, + "epoch": 9.679076693968726, + "grad_norm": 0.02051451334704056, + "kl": 0.060943603515625, + "learning_rate": 2.5777081158209203e-09, + "loss": 6.084067717893049e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4869, + "train_speed(iter/s)": 0.022605 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 79.61458683013916, + "completions/min_length": 23.375, + "epoch": 9.681062298337057, + "grad_norm": 0.00452776540368591, + "kl": 0.07049560546875, + "learning_rate": 2.5458134585293333e-09, + "loss": 7.046831160550937e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4870, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 64.86458492279053, + "completions/min_length": 19.0, + "epoch": 9.683047902705386, + "grad_norm": 0.024464361869878246, + "kl": 0.095489501953125, + "learning_rate": 2.5141168483748253e-09, + "loss": 9.550269896863028e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4871, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.625, + "completions/mean_length": 68.28125190734863, + "completions/min_length": 24.375, + "epoch": 9.685033507073715, + "grad_norm": 0.002328423368921501, + "kl": 0.063751220703125, + "learning_rate": 2.4826182979764686e-09, + "loss": 6.374135409714654e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4872, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.75, + "completions/mean_length": 65.77083492279053, + "completions/min_length": 26.125, + "epoch": 9.687019111442044, + "grad_norm": 0.0033201898603226433, + "kl": 0.053680419921875, + "learning_rate": 2.4513178198744542e-09, + "loss": 5.368553684093058e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4873, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 65.64583539962769, + "completions/min_length": 19.75, + "epoch": 9.689004715810375, + "grad_norm": 0.0031306706094000893, + "kl": 0.054962158203125, + "learning_rate": 2.420215426530259e-09, + "loss": 5.499834514921531e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4874, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.0, + "completions/mean_length": 67.43750143051147, + "completions/min_length": 25.625, + "epoch": 9.690990320178704, + "grad_norm": 0.8022520596962549, + "kl": 0.073455810546875, + "learning_rate": 2.3893111303262547e-09, + "loss": -0.005667926277965307, + "memory(GiB)": 94.21, + "reward": 1.8229166716337204, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8229166716337204, + "rewards/CineAccuracyORM/std": 0.19492431730031967, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4875, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.125, + "completions/mean_length": 66.91666793823242, + "completions/min_length": 19.125, + "epoch": 9.692975924547033, + "grad_norm": 0.31908655292375704, + "kl": 0.52294921875, + "learning_rate": 2.3586049435663224e-09, + "loss": 0.0005212367395870388, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4876, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 60.8854193687439, + "completions/min_length": 18.25, + "epoch": 9.694961528915364, + "grad_norm": 0.002394973473424306, + "kl": 0.05340576171875, + "learning_rate": 2.328096878475072e-09, + "loss": 5.3393385314848274e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4877, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.375, + "completions/mean_length": 78.25000238418579, + "completions/min_length": 29.875, + "epoch": 9.696947133283693, + "grad_norm": 0.002913896362997115, + "kl": 0.07086181640625, + "learning_rate": 2.29778694719851e-09, + "loss": 7.087054109433666e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4878, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 69.87500190734863, + "completions/min_length": 23.125, + "epoch": 9.698932737652022, + "grad_norm": 0.0032494313577079194, + "kl": 0.05859375, + "learning_rate": 2.2676751618036505e-09, + "loss": 5.8602083299774677e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4879, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.5, + "completions/mean_length": 74.56250190734863, + "completions/min_length": 24.5, + "epoch": 9.700918342020353, + "grad_norm": 0.002577562131957097, + "kl": 0.067047119140625, + "learning_rate": 2.2377615342785705e-09, + "loss": 6.701111851725727e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4880, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.875, + "completions/mean_length": 67.26041984558105, + "completions/min_length": 24.25, + "epoch": 9.702903946388682, + "grad_norm": 0.0046430579500008895, + "kl": 0.0819854736328125, + "learning_rate": 2.2080460765326325e-09, + "loss": 8.187860657926649e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4881, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.25, + "completions/mean_length": 75.04166793823242, + "completions/min_length": 26.125, + "epoch": 9.704889550757011, + "grad_norm": 0.0037047414384949323, + "kl": 0.0662841796875, + "learning_rate": 2.1785288003960954e-09, + "loss": 6.629362178500742e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4882, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.75, + "completions/mean_length": 67.04166841506958, + "completions/min_length": 23.625, + "epoch": 9.706875155125342, + "grad_norm": 0.0033560374313336104, + "kl": 0.06488037109375, + "learning_rate": 2.1492097176205036e-09, + "loss": 6.487013160949573e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4883, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 70.77083539962769, + "completions/min_length": 21.75, + "epoch": 9.708860759493671, + "grad_norm": 0.004543794419514337, + "kl": 0.063690185546875, + "learning_rate": 2.1200888398783532e-09, + "loss": 6.36466356809251e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4884, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 73.79166889190674, + "completions/min_length": 24.125, + "epoch": 9.710846363862, + "grad_norm": 0.003299978949370532, + "kl": 0.055419921875, + "learning_rate": 2.0911661787633152e-09, + "loss": 5.542510189116001e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4885, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 175.5, + "completions/mean_length": 75.3229193687439, + "completions/min_length": 26.125, + "epoch": 9.71283196823033, + "grad_norm": 0.0044365228436324036, + "kl": 0.08062744140625, + "learning_rate": 2.0624417457900667e-09, + "loss": 8.057904778979719e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4886, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 65.364586353302, + "completions/min_length": 24.875, + "epoch": 9.71481757259866, + "grad_norm": 0.002772278052466705, + "kl": 0.057098388671875, + "learning_rate": 2.033915552394516e-09, + "loss": 5.712054553441703e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4887, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.375, + "completions/mean_length": 67.05208492279053, + "completions/min_length": 21.125, + "epoch": 9.71680317696699, + "grad_norm": 1.7824189762645273, + "kl": 0.061309814453125, + "learning_rate": 2.005587609933468e-09, + "loss": 0.012681696563959122, + "memory(GiB)": 94.21, + "reward": 1.8020833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8020833358168602, + "rewards/CineAccuracyORM/std": 0.2319217473268509, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4888, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.0, + "completions/mean_length": 56.98958492279053, + "completions/min_length": 26.375, + "epoch": 9.718788781335318, + "grad_norm": 0.0039207437734277715, + "kl": 0.049163818359375, + "learning_rate": 1.9774579296849004e-09, + "loss": 4.919374987366609e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4889, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.875, + "completions/mean_length": 60.48958492279053, + "completions/min_length": 26.125, + "epoch": 9.720774385703649, + "grad_norm": 0.010444253234628572, + "kl": 0.07244873046875, + "learning_rate": 1.9495265228478553e-09, + "loss": 7.251766510307789e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4890, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 63.9166693687439, + "completions/min_length": 15.375, + "epoch": 9.722759990071978, + "grad_norm": 0.0024499987047419632, + "kl": 0.06903076171875, + "learning_rate": 1.921793400542382e-09, + "loss": 6.894676334923133e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4891, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.75, + "completions/mean_length": 72.39583492279053, + "completions/min_length": 24.5, + "epoch": 9.724745594440307, + "grad_norm": 0.002566391580491315, + "kl": 0.061614990234375, + "learning_rate": 1.894258573809704e-09, + "loss": 6.15893368376419e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4892, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 59.770835638046265, + "completions/min_length": 21.375, + "epoch": 9.726731198808638, + "grad_norm": 0.004223567298165255, + "kl": 0.0660858154296875, + "learning_rate": 1.866922053611941e-09, + "loss": 6.609825504710898e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4893, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.5, + "completions/mean_length": 64.55208539962769, + "completions/min_length": 20.375, + "epoch": 9.728716803176967, + "grad_norm": 0.004624187021800558, + "kl": 0.059967041015625, + "learning_rate": 1.8397838508323881e-09, + "loss": 5.990253703203052e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4894, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.0, + "completions/mean_length": 64.54166793823242, + "completions/min_length": 22.625, + "epoch": 9.730702407545296, + "grad_norm": 0.007816360037811628, + "kl": 0.06573486328125, + "learning_rate": 1.8128439762754022e-09, + "loss": 6.576003943337128e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4895, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.125, + "completions/mean_length": 68.68750238418579, + "completions/min_length": 25.75, + "epoch": 9.732688011913627, + "grad_norm": 1.1492892909109909, + "kl": 0.081817626953125, + "learning_rate": 1.7861024406661817e-09, + "loss": 8.186760533135384e-05, + "memory(GiB)": 94.21, + "reward": 1.6770833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.6770833358168602, + "rewards/CineAccuracyORM/std": 0.3254825547337532, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4896, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.625, + "completions/mean_length": 60.08333444595337, + "completions/min_length": 21.75, + "epoch": 9.734673616281956, + "grad_norm": 0.003555358582824764, + "kl": 0.0537109375, + "learning_rate": 1.7595592546512106e-09, + "loss": 5.3691223001806065e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4897, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.875, + "completions/mean_length": 71.70833539962769, + "completions/min_length": 27.0, + "epoch": 9.736659220650285, + "grad_norm": 0.002806854217593576, + "kl": 0.079376220703125, + "learning_rate": 1.733214428797869e-09, + "loss": 7.926135003799573e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4898, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.75, + "completions/mean_length": 60.44791889190674, + "completions/min_length": 25.125, + "epoch": 9.738644825018614, + "grad_norm": 0.004106409016724521, + "kl": 0.06591796875, + "learning_rate": 1.7070679735946002e-09, + "loss": 6.585016672033817e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4899, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.375, + "completions/mean_length": 67.70833683013916, + "completions/min_length": 19.125, + "epoch": 9.740630429386945, + "grad_norm": 0.0032727363537347555, + "kl": 0.07171630859375, + "learning_rate": 1.6811198994508557e-09, + "loss": 7.17202783562243e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4900, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.25, + "completions/mean_length": 67.55208539962769, + "completions/min_length": 27.125, + "epoch": 9.742616033755274, + "grad_norm": 0.0031718823813094133, + "kl": 0.060821533203125, + "learning_rate": 1.6553702166971495e-09, + "loss": 6.073419353924692e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4901, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.375, + "completions/mean_length": 67.16666889190674, + "completions/min_length": 23.25, + "epoch": 9.744601638123603, + "grad_norm": 0.005897440937601853, + "kl": 0.0758056640625, + "learning_rate": 1.6298189355849478e-09, + "loss": 7.571832975372672e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4902, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 185.125, + "completions/mean_length": 78.77083587646484, + "completions/min_length": 24.625, + "epoch": 9.746587242491934, + "grad_norm": 0.002937303468559925, + "kl": 0.0721435546875, + "learning_rate": 1.6044660662867248e-09, + "loss": 7.219894177978858e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4903, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.625, + "completions/mean_length": 65.72916793823242, + "completions/min_length": 21.625, + "epoch": 9.748572846860263, + "grad_norm": 0.003237457740005051, + "kl": 0.059478759765625, + "learning_rate": 1.579311618896073e-09, + "loss": 5.946962482994422e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4904, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.0, + "completions/mean_length": 72.90625238418579, + "completions/min_length": 24.25, + "epoch": 9.750558451228592, + "grad_norm": 0.0028302712073289523, + "kl": 0.060699462890625, + "learning_rate": 1.5543556034274818e-09, + "loss": 6.071483949199319e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4905, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.125, + "completions/mean_length": 66.60416889190674, + "completions/min_length": 22.125, + "epoch": 9.752544055596923, + "grad_norm": 0.0038497218597631647, + "kl": 0.060821533203125, + "learning_rate": 1.5295980298165035e-09, + "loss": 6.070291419746354e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4906, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.375, + "completions/mean_length": 68.45833444595337, + "completions/min_length": 26.875, + "epoch": 9.754529659965252, + "grad_norm": 0.0026325006622195846, + "kl": 0.0587158203125, + "learning_rate": 1.505038907919587e-09, + "loss": 5.8668760175351053e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4907, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.875, + "completions/mean_length": 71.14583587646484, + "completions/min_length": 30.0, + "epoch": 9.756515264333581, + "grad_norm": 0.004062945118989607, + "kl": 0.068328857421875, + "learning_rate": 1.4806782475142999e-09, + "loss": 6.834537634858862e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4908, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.375, + "completions/mean_length": 57.75000190734863, + "completions/min_length": 17.5, + "epoch": 9.758500868701912, + "grad_norm": 0.0023128513146530435, + "kl": 0.061248779296875, + "learning_rate": 1.456516058299162e-09, + "loss": 6.127214874140918e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4909, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 73.53125190734863, + "completions/min_length": 27.25, + "epoch": 9.760486473070241, + "grad_norm": 0.00235816750721465, + "kl": 0.057769775390625, + "learning_rate": 1.43255234989359e-09, + "loss": 5.774916644440964e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4910, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.625, + "completions/mean_length": 77.54166984558105, + "completions/min_length": 29.875, + "epoch": 9.76247207743857, + "grad_norm": 0.0026992264437089557, + "kl": 0.07342529296875, + "learning_rate": 1.4087871318380628e-09, + "loss": 7.336567068705335e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4911, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 71.333336353302, + "completions/min_length": 24.875, + "epoch": 9.764457681806899, + "grad_norm": 0.0028740920228540064, + "kl": 0.0633544921875, + "learning_rate": 1.3852204135940682e-09, + "loss": 6.34082971373573e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4912, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 68.34375143051147, + "completions/min_length": 26.25, + "epoch": 9.76644328617523, + "grad_norm": 0.004442286294750017, + "kl": 0.07794189453125, + "learning_rate": 1.3618522045439896e-09, + "loss": 7.791213283780962e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4913, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.0, + "completions/mean_length": 61.114585399627686, + "completions/min_length": 24.0, + "epoch": 9.768428890543559, + "grad_norm": 0.9693114689966057, + "kl": 0.08258056640625, + "learning_rate": 1.3386825139912184e-09, + "loss": 0.004584218375384808, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515517219901085, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4914, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 132.75, + "completions/mean_length": 62.69791889190674, + "completions/min_length": 19.0, + "epoch": 9.770414494911888, + "grad_norm": 0.003953011698298157, + "kl": 0.063873291015625, + "learning_rate": 1.315711351160098e-09, + "loss": 6.383230356732383e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4915, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.125, + "completions/mean_length": 69.33333539962769, + "completions/min_length": 23.625, + "epoch": 9.772400099280219, + "grad_norm": 0.002988304915735757, + "kl": 0.052978515625, + "learning_rate": 1.2929387251959244e-09, + "loss": 5.2948726079193875e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4916, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 70.82291841506958, + "completions/min_length": 27.125, + "epoch": 9.774385703648548, + "grad_norm": 0.0025273976544686943, + "kl": 0.057861328125, + "learning_rate": 1.2703646451650007e-09, + "loss": 5.792102820123546e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4917, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 164.125, + "completions/mean_length": 65.98958539962769, + "completions/min_length": 17.75, + "epoch": 9.776371308016877, + "grad_norm": 0.0027761983235578855, + "kl": 0.060089111328125, + "learning_rate": 1.2479891200544712e-09, + "loss": 6.004751776345074e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4918, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.875, + "completions/mean_length": 75.28125190734863, + "completions/min_length": 24.625, + "epoch": 9.778356912385208, + "grad_norm": 0.0032506017737627545, + "kl": 0.06988525390625, + "learning_rate": 1.2258121587726545e-09, + "loss": 6.982425111345947e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4919, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 124.875, + "completions/mean_length": 62.01041841506958, + "completions/min_length": 21.5, + "epoch": 9.780342516753537, + "grad_norm": 0.004341238898533917, + "kl": 0.064849853515625, + "learning_rate": 1.2038337701485435e-09, + "loss": 6.488948565674946e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4920, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 66.50000238418579, + "completions/min_length": 24.125, + "epoch": 9.782328121121866, + "grad_norm": 0.002727750978269722, + "kl": 0.0704345703125, + "learning_rate": 1.1820539629322501e-09, + "loss": 7.044737867545336e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4921, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.875, + "completions/mean_length": 65.29166889190674, + "completions/min_length": 21.5, + "epoch": 9.784313725490197, + "grad_norm": 0.9147985395637744, + "kl": 0.076416015625, + "learning_rate": 1.1604727457947828e-09, + "loss": -0.007903304882347584, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4922, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.75, + "completions/mean_length": 65.15625190734863, + "completions/min_length": 18.0, + "epoch": 9.786299329858526, + "grad_norm": 0.0038524035881175485, + "kl": 0.0762939453125, + "learning_rate": 1.1390901273280462e-09, + "loss": 7.630523759871721e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4923, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 141.25, + "completions/mean_length": 69.18750333786011, + "completions/min_length": 23.0, + "epoch": 9.788284934226855, + "grad_norm": 0.004470685652187916, + "kl": 0.053466796875, + "learning_rate": 1.1179061160450088e-09, + "loss": 5.347974001779221e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4924, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 70.09375238418579, + "completions/min_length": 26.625, + "epoch": 9.790270538595184, + "grad_norm": 0.0025386537513485915, + "kl": 0.073272705078125, + "learning_rate": 1.0969207203793685e-09, + "loss": 7.335464761126786e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4925, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.375, + "completions/mean_length": 69.17708492279053, + "completions/min_length": 17.125, + "epoch": 9.792256142963515, + "grad_norm": 0.004527896516384505, + "kl": 0.08233642578125, + "learning_rate": 1.0761339486859422e-09, + "loss": 8.237495785579085e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4926, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 130.0, + "completions/mean_length": 65.95833396911621, + "completions/min_length": 24.375, + "epoch": 9.794241747331844, + "grad_norm": 0.0033412006027033793, + "kl": 0.0540771484375, + "learning_rate": 1.0555458092403325e-09, + "loss": 5.405517731560394e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4927, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.375, + "completions/mean_length": 73.00000143051147, + "completions/min_length": 26.25, + "epoch": 9.796227351700173, + "grad_norm": 0.0030554930211831146, + "kl": 0.0791015625, + "learning_rate": 1.0351563102392048e-09, + "loss": 7.910649583209306e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4928, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.5, + "completions/mean_length": 74.52083444595337, + "completions/min_length": 27.75, + "epoch": 9.798212956068504, + "grad_norm": 0.003961981033338274, + "kl": 0.0574951171875, + "learning_rate": 1.0149654597999545e-09, + "loss": 5.749250703956932e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4929, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.375, + "completions/mean_length": 61.87500238418579, + "completions/min_length": 21.375, + "epoch": 9.800198560436833, + "grad_norm": 0.0038670682705185125, + "kl": 0.07818603515625, + "learning_rate": 9.949732659609854e-10, + "loss": 7.819194433977827e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4930, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.5, + "completions/mean_length": 72.80208444595337, + "completions/min_length": 32.0, + "epoch": 9.802184164805162, + "grad_norm": 0.0035007702358087417, + "kl": 0.0740966796875, + "learning_rate": 9.751797366817083e-10, + "loss": 7.414381252601743e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4931, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.25, + "completions/mean_length": 75.177086353302, + "completions/min_length": 28.625, + "epoch": 9.804169769173493, + "grad_norm": 0.0028782338321037843, + "kl": 0.073333740234375, + "learning_rate": 9.555848798423195e-10, + "loss": 7.33111664885655e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4932, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 151.375, + "completions/mean_length": 61.53125286102295, + "completions/min_length": 20.875, + "epoch": 9.806155373541822, + "grad_norm": 0.004812318773267642, + "kl": 0.0544891357421875, + "learning_rate": 9.361887032438564e-10, + "loss": 5.44918148079887e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4933, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.5, + "completions/mean_length": 69.50000333786011, + "completions/min_length": 21.375, + "epoch": 9.808140977910151, + "grad_norm": 0.007386824070509152, + "kl": 0.08892822265625, + "learning_rate": 9.169912146084758e-10, + "loss": 8.903484558686614e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4934, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 71.958336353302, + "completions/min_length": 26.25, + "epoch": 9.810126582278482, + "grad_norm": 0.0025956159257214655, + "kl": 0.05419921875, + "learning_rate": 8.979924215790635e-10, + "loss": 5.4165440815268084e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4935, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 61.46875238418579, + "completions/min_length": 21.875, + "epoch": 9.81211218664681, + "grad_norm": 0.0025961079585854205, + "kl": 0.0545654296875, + "learning_rate": 8.791923317194582e-10, + "loss": 5.451752804219723e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4936, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.375, + "completions/mean_length": 67.44791793823242, + "completions/min_length": 25.0, + "epoch": 9.81409779101514, + "grad_norm": 0.002320192232239995, + "kl": 0.05633544921875, + "learning_rate": 8.605909525143396e-10, + "loss": 5.624774712487124e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4937, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 154.875, + "completions/mean_length": 71.92708444595337, + "completions/min_length": 29.0, + "epoch": 9.816083395383469, + "grad_norm": 0.004987222309842723, + "kl": 0.065155029296875, + "learning_rate": 8.421882913692835e-10, + "loss": 6.519594171550125e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4938, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.125, + "completions/mean_length": 60.50000190734863, + "completions/min_length": 16.75, + "epoch": 9.8180689997518, + "grad_norm": 0.003964087890630469, + "kl": 0.0623779296875, + "learning_rate": 8.239843556108739e-10, + "loss": 6.227349513210356e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4939, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.875, + "completions/mean_length": 70.01041841506958, + "completions/min_length": 21.75, + "epoch": 9.820054604120129, + "grad_norm": 0.003700480245563324, + "kl": 0.061859130859375, + "learning_rate": 8.059791524864801e-10, + "loss": 6.183989171404392e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4940, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 71.92708587646484, + "completions/min_length": 29.25, + "epoch": 9.822040208488458, + "grad_norm": 0.003118603510522461, + "kl": 0.07806396484375, + "learning_rate": 7.881726891642571e-10, + "loss": 7.805461063981056e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4941, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.75, + "completions/mean_length": 68.65625190734863, + "completions/min_length": 25.25, + "epoch": 9.824025812856789, + "grad_norm": 0.004830495818646426, + "kl": 0.075469970703125, + "learning_rate": 7.705649727334784e-10, + "loss": 7.542136154370382e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4942, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 61.239585876464844, + "completions/min_length": 20.0, + "epoch": 9.826011417225118, + "grad_norm": 0.250625416287303, + "kl": 0.228759765625, + "learning_rate": 7.531560102040368e-10, + "loss": 0.00022838378208689392, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4943, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 158.875, + "completions/mean_length": 68.89583492279053, + "completions/min_length": 21.0, + "epoch": 9.827997021593447, + "grad_norm": 0.004143987166351701, + "kl": 0.063751220703125, + "learning_rate": 7.359458085068327e-10, + "loss": 6.372031202772632e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4944, + "train_speed(iter/s)": 0.022604 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 166.75, + "completions/mean_length": 77.02083587646484, + "completions/min_length": 23.75, + "epoch": 9.829982625961778, + "grad_norm": 0.00812242766943953, + "kl": 0.086761474609375, + "learning_rate": 7.189343744936627e-10, + "loss": 8.672293915878981e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4945, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.625, + "completions/mean_length": 73.32291984558105, + "completions/min_length": 27.75, + "epoch": 9.831968230330107, + "grad_norm": 0.003069944067159677, + "kl": 0.055694580078125, + "learning_rate": 7.021217149371095e-10, + "loss": 5.569959103013389e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4946, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.75, + "completions/mean_length": 80.520836353302, + "completions/min_length": 31.875, + "epoch": 9.833953834698436, + "grad_norm": 0.0029404915752553845, + "kl": 0.066192626953125, + "learning_rate": 6.855078365306521e-10, + "loss": 6.622498767683282e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4947, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 123.875, + "completions/mean_length": 57.11458492279053, + "completions/min_length": 19.5, + "epoch": 9.835939439066767, + "grad_norm": 0.006216118937372336, + "kl": 0.072662353515625, + "learning_rate": 6.690927458886109e-10, + "loss": 7.26415601093322e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4948, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 170.25, + "completions/mean_length": 71.93750190734863, + "completions/min_length": 23.875, + "epoch": 9.837925043435096, + "grad_norm": 0.003586536611286647, + "kl": 0.09130859375, + "learning_rate": 6.52876449546258e-10, + "loss": 9.13163967197761e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4949, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 137.375, + "completions/mean_length": 67.66666746139526, + "completions/min_length": 25.875, + "epoch": 9.839910647803425, + "grad_norm": 0.002208604457820552, + "kl": 0.05267333984375, + "learning_rate": 6.368589539595959e-10, + "loss": 5.267279266263358e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4950, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.375, + "completions/mean_length": 71.05208492279053, + "completions/min_length": 26.5, + "epoch": 9.841896252171754, + "grad_norm": 0.0029101850324022354, + "kl": 0.06866455078125, + "learning_rate": 6.21040265505468e-10, + "loss": 6.870054494356737e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4951, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.75, + "completions/mean_length": 69.93750238418579, + "completions/min_length": 19.75, + "epoch": 9.843881856540085, + "grad_norm": 0.006810479758389723, + "kl": 0.07965087890625, + "learning_rate": 6.054203904817812e-10, + "loss": 7.961005030665547e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4952, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.0, + "completions/mean_length": 71.52083539962769, + "completions/min_length": 21.125, + "epoch": 9.845867460908414, + "grad_norm": 0.00233024970663714, + "kl": 0.061920166015625, + "learning_rate": 5.899993351070054e-10, + "loss": 6.190160638652742e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4953, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 169.5, + "completions/mean_length": 73.84375238418579, + "completions/min_length": 19.375, + "epoch": 9.847853065276743, + "grad_norm": 0.003606115658858581, + "kl": 0.062957763671875, + "learning_rate": 5.747771055206741e-10, + "loss": 6.295074126683176e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4954, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.875, + "completions/mean_length": 66.42708444595337, + "completions/min_length": 24.375, + "epoch": 9.849838669645074, + "grad_norm": 0.004113093807762768, + "kl": 0.07177734375, + "learning_rate": 5.597537077830505e-10, + "loss": 7.180670218076557e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4955, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 121.0, + "completions/mean_length": 63.62500238418579, + "completions/min_length": 25.375, + "epoch": 9.851824274013403, + "grad_norm": 0.0029272398565606387, + "kl": 0.060760498046875, + "learning_rate": 5.449291478752394e-10, + "loss": 6.069597293389961e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4956, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.75, + "completions/mean_length": 77.5729193687439, + "completions/min_length": 31.5, + "epoch": 9.853809878381732, + "grad_norm": 0.00364253130880136, + "kl": 0.07098388671875, + "learning_rate": 5.303034316992417e-10, + "loss": 7.088730490067974e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4957, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 126.125, + "completions/mean_length": 61.01041841506958, + "completions/min_length": 17.25, + "epoch": 9.855795482750063, + "grad_norm": 0.0044567901921322975, + "kl": 0.0609130859375, + "learning_rate": 5.158765650778996e-10, + "loss": 6.0853693867102265e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4958, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 127.75, + "completions/mean_length": 63.92708444595337, + "completions/min_length": 18.625, + "epoch": 9.857781087118392, + "grad_norm": 0.0034395284342734057, + "kl": 0.05609130859375, + "learning_rate": 5.01648553754841e-10, + "loss": 5.60905973543413e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4959, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.5, + "completions/mean_length": 72.43750190734863, + "completions/min_length": 26.75, + "epoch": 9.85976669148672, + "grad_norm": 0.0022682243629432905, + "kl": 0.05438232421875, + "learning_rate": 4.876194033945347e-10, + "loss": 5.437176878331229e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4960, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 60.75000190734863, + "completions/min_length": 23.0, + "epoch": 9.861752295855052, + "grad_norm": 0.004860945948056546, + "kl": 0.052154541015625, + "learning_rate": 4.737891195822352e-10, + "loss": 5.2177434554323554e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4961, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 216.75, + "completions/mean_length": 79.11458587646484, + "completions/min_length": 25.375, + "epoch": 9.86373790022338, + "grad_norm": 0.002741801260354241, + "kl": 0.07061767578125, + "learning_rate": 4.601577078242047e-10, + "loss": 7.055123569443822e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4962, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.0, + "completions/mean_length": 69.23958539962769, + "completions/min_length": 20.375, + "epoch": 9.86572350459171, + "grad_norm": 0.002916452726004066, + "kl": 0.0592041015625, + "learning_rate": 4.4672517354721326e-10, + "loss": 5.9139008953934535e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4963, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.25, + "completions/mean_length": 69.09375190734863, + "completions/min_length": 20.875, + "epoch": 9.867709108960039, + "grad_norm": 0.0025810766734490885, + "kl": 0.055572509765625, + "learning_rate": 4.334915220992053e-10, + "loss": 5.548813351197168e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4964, + "train_speed(iter/s)": 0.022603 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.875, + "completions/mean_length": 83.06250095367432, + "completions/min_length": 29.25, + "epoch": 9.86969471332837, + "grad_norm": 0.004265449261202776, + "kl": 0.064239501953125, + "learning_rate": 4.204567587486885e-10, + "loss": 6.421327998396009e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4965, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 179.75, + "completions/mean_length": 70.1666693687439, + "completions/min_length": 19.5, + "epoch": 9.871680317696699, + "grad_norm": 0.0023019412380832793, + "kl": 0.054412841796875, + "learning_rate": 4.076208886851229e-10, + "loss": 5.441211033030413e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4966, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 65.79166889190674, + "completions/min_length": 19.625, + "epoch": 9.873665922065028, + "grad_norm": 0.00466966472491242, + "kl": 0.071746826171875, + "learning_rate": 3.949839170187541e-10, + "loss": 7.168490265030414e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4967, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 150.625, + "completions/mean_length": 70.17708492279053, + "completions/min_length": 30.375, + "epoch": 9.875651526433359, + "grad_norm": 0.04236118095267577, + "kl": 0.088104248046875, + "learning_rate": 3.8254584878055773e-10, + "loss": 8.810514555079862e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4968, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 176.0, + "completions/mean_length": 74.73958587646484, + "completions/min_length": 28.5, + "epoch": 9.877637130801688, + "grad_norm": 0.0028984809971913626, + "kl": 0.074920654296875, + "learning_rate": 3.703066889224615e-10, + "loss": 7.494209421565756e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4969, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.25, + "completions/mean_length": 66.67708444595337, + "completions/min_length": 20.125, + "epoch": 9.879622735170017, + "grad_norm": 0.0026172484586559075, + "kl": 0.06500244140625, + "learning_rate": 3.5826644231706784e-10, + "loss": 6.509361992357299e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4970, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 145.0, + "completions/mean_length": 68.98958539962769, + "completions/min_length": 24.625, + "epoch": 9.881608339538348, + "grad_norm": 0.0030047013288678685, + "kl": 0.05316162109375, + "learning_rate": 3.4642511375798657e-10, + "loss": 5.311940185492858e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4971, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 155.25, + "completions/mean_length": 64.98958492279053, + "completions/min_length": 24.5, + "epoch": 9.883593943906677, + "grad_norm": 0.002863542917715487, + "kl": 0.094451904296875, + "learning_rate": 3.3478270795933574e-10, + "loss": 9.464035974815488e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4972, + "train_speed(iter/s)": 0.022602 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 71.86458683013916, + "completions/min_length": 29.375, + "epoch": 9.885579548275006, + "grad_norm": 0.0027823123829232204, + "kl": 0.057586669921875, + "learning_rate": 3.2333922955635194e-10, + "loss": 5.755884194513783e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4973, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 143.25, + "completions/mean_length": 69.90625238418579, + "completions/min_length": 23.875, + "epoch": 9.887565152643337, + "grad_norm": 0.003311938889952719, + "kl": 0.064788818359375, + "learning_rate": 3.120946831048354e-10, + "loss": 6.473198300227523e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4974, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 163.625, + "completions/mean_length": 73.69791889190674, + "completions/min_length": 25.75, + "epoch": 9.889550757011666, + "grad_norm": 0.004843637806478451, + "kl": 0.062835693359375, + "learning_rate": 3.010490730815385e-10, + "loss": 6.293550541158766e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4975, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.125, + "completions/mean_length": 72.78125238418579, + "completions/min_length": 29.5, + "epoch": 9.891536361379995, + "grad_norm": 0.00258890425231928, + "kl": 0.067047119140625, + "learning_rate": 2.9020240388388794e-10, + "loss": 6.708676664857194e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4976, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 203.0, + "completions/mean_length": 81.65625333786011, + "completions/min_length": 28.5, + "epoch": 9.893521965748324, + "grad_norm": 0.002474235830371792, + "kl": 0.062713623046875, + "learning_rate": 2.7955467983026284e-10, + "loss": 6.276718340814114e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4977, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 129.375, + "completions/mean_length": 62.562501430511475, + "completions/min_length": 16.875, + "epoch": 9.895507570116655, + "grad_norm": 1.566695079567521, + "kl": 0.06591796875, + "learning_rate": 2.6910590515966113e-10, + "loss": 6.595502782147378e-05, + "memory(GiB)": 94.21, + "reward": 1.7708333432674408, + "reward_std": 0.03227486088871956, + "rewards/CineAccuracyORM/mean": 0.7708333358168602, + "rewards/CineAccuracyORM/std": 0.12682486698031425, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4978, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.5, + "completions/mean_length": 80.21875238418579, + "completions/min_length": 26.625, + "epoch": 9.897493174484984, + "grad_norm": 0.0023293936682199715, + "kl": 0.06182861328125, + "learning_rate": 2.588560840320331e-10, + "loss": 6.178909097798169e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4979, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.75, + "completions/mean_length": 70.69791841506958, + "completions/min_length": 25.25, + "epoch": 9.899478778853313, + "grad_norm": 0.0036043351625637122, + "kl": 0.058990478515625, + "learning_rate": 2.4880522052800334e-10, + "loss": 5.893385241506621e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4980, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 122.25, + "completions/mean_length": 65.61458492279053, + "completions/min_length": 16.875, + "epoch": 9.901464383221644, + "grad_norm": 0.0031271481790348596, + "kl": 0.071380615234375, + "learning_rate": 2.3895331864903776e-10, + "loss": 7.129264122340828e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4981, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.375, + "completions/mean_length": 66.50000095367432, + "completions/min_length": 22.25, + "epoch": 9.903449987589973, + "grad_norm": 0.002770663657091112, + "kl": 0.056396484375, + "learning_rate": 2.293003823174433e-10, + "loss": 5.6329437938984483e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4982, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.0, + "completions/mean_length": 59.66666889190674, + "completions/min_length": 21.0, + "epoch": 9.905435591958302, + "grad_norm": 0.012688918468588451, + "kl": 0.0679931640625, + "learning_rate": 2.198464153762014e-10, + "loss": 6.803065480198711e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4983, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 188.25, + "completions/mean_length": 86.96875190734863, + "completions/min_length": 34.125, + "epoch": 9.907421196326633, + "grad_norm": 0.002783874945721273, + "kl": 0.06610107421875, + "learning_rate": 2.1059142158919018e-10, + "loss": 6.611172284465283e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4984, + "train_speed(iter/s)": 0.022599 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 149.125, + "completions/mean_length": 63.82291889190674, + "completions/min_length": 21.5, + "epoch": 9.909406800694962, + "grad_norm": 0.003155548573825557, + "kl": 0.07037353515625, + "learning_rate": 2.015354046409623e-10, + "loss": 7.04998237779364e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4985, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.5, + "completions/mean_length": 62.70833444595337, + "completions/min_length": 20.625, + "epoch": 9.91139240506329, + "grad_norm": 0.0037685532454678873, + "kl": 0.065521240234375, + "learning_rate": 1.926783681369115e-10, + "loss": 6.549165118485689e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4986, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.125, + "completions/mean_length": 70.73958587646484, + "completions/min_length": 22.5, + "epoch": 9.913378009431622, + "grad_norm": 0.0026566020766932816, + "kl": 0.0701904296875, + "learning_rate": 1.840203156032727e-10, + "loss": 7.013032882241532e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4987, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.75, + "completions/mean_length": 85.82291793823242, + "completions/min_length": 29.875, + "epoch": 9.91536361379995, + "grad_norm": 0.002659269109254934, + "kl": 0.0943603515625, + "learning_rate": 1.7556125048695525e-10, + "loss": 9.437241533305496e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4988, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.625, + "completions/mean_length": 60.04166889190674, + "completions/min_length": 19.125, + "epoch": 9.91734921816828, + "grad_norm": 0.004663215058839115, + "kl": 0.086212158203125, + "learning_rate": 1.673011761557097e-10, + "loss": 8.620247535873204e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4989, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 133.25, + "completions/mean_length": 65.86458587646484, + "completions/min_length": 27.0, + "epoch": 9.919334822536609, + "grad_norm": 0.002386957079413601, + "kl": 0.060211181640625, + "learning_rate": 1.5924009589801668e-10, + "loss": 6.018680869601667e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4990, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.375, + "completions/mean_length": 65.12500095367432, + "completions/min_length": 24.25, + "epoch": 9.92132042690494, + "grad_norm": 0.0027547175043255586, + "kl": 0.067901611328125, + "learning_rate": 1.5137801292325336e-10, + "loss": 6.7899476562161e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4991, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.125, + "completions/mean_length": 60.083335876464844, + "completions/min_length": 18.125, + "epoch": 9.923306031273269, + "grad_norm": 0.005328498802481913, + "kl": 0.0599365234375, + "learning_rate": 1.437149303613605e-10, + "loss": 5.993892773403786e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4992, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 165.25, + "completions/mean_length": 64.21875143051147, + "completions/min_length": 21.0, + "epoch": 9.925291635641598, + "grad_norm": 0.003329082001985909, + "kl": 0.056915283203125, + "learning_rate": 1.362508512632865e-10, + "loss": 5.6879543990362436e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4993, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 160.625, + "completions/mean_length": 69.89583587646484, + "completions/min_length": 20.125, + "epoch": 9.927277240009929, + "grad_norm": 0.0031193247306549403, + "kl": 0.062896728515625, + "learning_rate": 1.2898577860054327e-10, + "loss": 6.286584539338946e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4994, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 134.5, + "completions/mean_length": 67.25000190734863, + "completions/min_length": 23.875, + "epoch": 9.929262844378258, + "grad_norm": 0.002466307950208977, + "kl": 0.081451416015625, + "learning_rate": 1.2191971526559485e-10, + "loss": 8.159526623785496e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4995, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 161.375, + "completions/mean_length": 67.15625238418579, + "completions/min_length": 19.5, + "epoch": 9.931248448746587, + "grad_norm": 0.0030281324538418268, + "kl": 0.05120849609375, + "learning_rate": 1.1505266407157987e-10, + "loss": 5.126166433910839e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4996, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 136.75, + "completions/mean_length": 66.802086353302, + "completions/min_length": 23.25, + "epoch": 9.933234053114917, + "grad_norm": 0.004728499306660028, + "kl": 0.066741943359375, + "learning_rate": 1.083846277523115e-10, + "loss": 6.680757360300049e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4997, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 135.5, + "completions/mean_length": 71.77083539962769, + "completions/min_length": 26.625, + "epoch": 9.935219657483247, + "grad_norm": 0.0031677994834557223, + "kl": 0.055572509765625, + "learning_rate": 1.0191560896261053e-10, + "loss": 5.560018325923011e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4998, + "train_speed(iter/s)": 0.022601 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.5, + "completions/mean_length": 74.16666793823242, + "completions/min_length": 20.125, + "epoch": 9.937205261851576, + "grad_norm": 0.0041143029368503085, + "kl": 0.08026123046875, + "learning_rate": 9.564561027791684e-11, + "loss": 8.02710055722855e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.32639559358358383, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 4999, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 144.5, + "completions/mean_length": 69.90625190734863, + "completions/min_length": 22.375, + "epoch": 9.939190866219906, + "grad_norm": 0.004320772430352747, + "kl": 0.065521240234375, + "learning_rate": 8.957463419434485e-11, + "loss": 6.551534170284867e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5000, + "train_speed(iter/s)": 0.0226 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 192.0, + "completions/mean_length": 79.81250143051147, + "completions/min_length": 24.375, + "epoch": 9.941176470588236, + "grad_norm": 0.003311299062815428, + "kl": 0.06561279296875, + "learning_rate": 8.370268312901662e-11, + "loss": 6.555959407705814e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5001, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 131.125, + "completions/mean_length": 68.21875095367432, + "completions/min_length": 21.75, + "epoch": 9.943162074956565, + "grad_norm": 0.0028011093755625142, + "kl": 0.061798095703125, + "learning_rate": 7.802975941956225e-11, + "loss": 6.170413689687848e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5002, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 125.75, + "completions/mean_length": 66.03125190734863, + "completions/min_length": 23.0, + "epoch": 9.945147679324894, + "grad_norm": 0.0029730229984516257, + "kl": 0.060546875, + "learning_rate": 7.255586532456392e-11, + "loss": 6.052818935131654e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5003, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 118.75, + "completions/mean_length": 58.677085399627686, + "completions/min_length": 18.875, + "epoch": 9.947133283693224, + "grad_norm": 0.003736969629975271, + "kl": 0.0577392578125, + "learning_rate": 6.728100302327844e-11, + "loss": 5.776435136795044e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5004, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 138.5, + "completions/mean_length": 63.26041793823242, + "completions/min_length": 17.625, + "epoch": 9.949118888061554, + "grad_norm": 0.0029548581729983373, + "kl": 0.05303955078125, + "learning_rate": 6.220517461574815e-11, + "loss": 5.301543205860071e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5005, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 128.125, + "completions/mean_length": 63.56250190734863, + "completions/min_length": 25.125, + "epoch": 9.951104492429883, + "grad_norm": 0.002751932672667616, + "kl": 0.052032470703125, + "learning_rate": 5.7328382122745487e-11, + "loss": 5.20942521688994e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5006, + "train_speed(iter/s)": 0.022598 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 72.75000143051147, + "completions/min_length": 25.125, + "epoch": 9.953090096798213, + "grad_norm": 0.0031973263226006974, + "kl": 0.069610595703125, + "learning_rate": 5.2650627485884005e-11, + "loss": 6.958014273550361e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5007, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.625, + "completions/mean_length": 70.23958539962769, + "completions/min_length": 24.75, + "epoch": 9.955075701166543, + "grad_norm": 0.0023261012574901053, + "kl": 0.074066162109375, + "learning_rate": 4.8171912567396275e-11, + "loss": 7.407159137073904e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5008, + "train_speed(iter/s)": 0.022598 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 110.375, + "completions/mean_length": 56.78125190734863, + "completions/min_length": 19.875, + "epoch": 9.957061305534872, + "grad_norm": 0.0030203454945909717, + "kl": 0.06207275390625, + "learning_rate": 4.3892239150411514e-11, + "loss": 6.200814095791429e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5009, + "train_speed(iter/s)": 0.022598 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 159.625, + "completions/mean_length": 69.86458539962769, + "completions/min_length": 26.5, + "epoch": 9.959046909903202, + "grad_norm": 0.0028704947241669477, + "kl": 0.06414794921875, + "learning_rate": 3.981160893873348e-11, + "loss": 6.415010284399614e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5010, + "train_speed(iter/s)": 0.022598 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.0, + "completions/mean_length": 73.86458492279053, + "completions/min_length": 23.75, + "epoch": 9.961032514271531, + "grad_norm": 0.003954299585060534, + "kl": 0.070037841796875, + "learning_rate": 3.593002355695152e-11, + "loss": 6.996475713094696e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5011, + "train_speed(iter/s)": 0.022598 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 152.625, + "completions/mean_length": 69.02083539962769, + "completions/min_length": 25.125, + "epoch": 9.96301811863986, + "grad_norm": 0.0031736604836958876, + "kl": 0.0596923828125, + "learning_rate": 3.224748455038506e-11, + "loss": 5.9735488321166486e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5012, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 142.0, + "completions/mean_length": 69.61458539962769, + "completions/min_length": 24.0, + "epoch": 9.965003723008191, + "grad_norm": 0.0048170577250103335, + "kl": 0.057830810546875, + "learning_rate": 2.876399338519464e-11, + "loss": 5.783676169812679e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5013, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 156.0, + "completions/mean_length": 68.30208539962769, + "completions/min_length": 18.875, + "epoch": 9.96698932737652, + "grad_norm": 0.003635558580733918, + "kl": 0.0771484375, + "learning_rate": 2.5479551448215342e-11, + "loss": 7.724007446086034e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5014, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 181.25, + "completions/mean_length": 79.06250238418579, + "completions/min_length": 30.625, + "epoch": 9.96897493174485, + "grad_norm": 0.003912979604279716, + "kl": 0.058502197265625, + "learning_rate": 2.2394160047012332e-11, + "loss": 5.846464046044275e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5015, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.5, + "completions/mean_length": 87.96875238418579, + "completions/min_length": 24.25, + "epoch": 9.970960536113179, + "grad_norm": 0.0030208456802302613, + "kl": 0.066802978515625, + "learning_rate": 1.950782040993637e-11, + "loss": 6.677482451777905e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5016, + "train_speed(iter/s)": 0.022596 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 217.875, + "completions/mean_length": 82.66666841506958, + "completions/min_length": 33.0, + "epoch": 9.97294614048151, + "grad_norm": 0.0038933502705710736, + "kl": 0.068206787109375, + "learning_rate": 1.6820533686179306e-11, + "loss": 6.82783720549196e-05, + "memory(GiB)": 94.21, + "reward": 1.8125, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.8125, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5017, + "train_speed(iter/s)": 0.022596 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 120.125, + "completions/mean_length": 58.520835399627686, + "completions/min_length": 20.375, + "epoch": 9.974931744849838, + "grad_norm": 0.0030747908520010647, + "kl": 0.0493621826171875, + "learning_rate": 1.4332300945552045e-11, + "loss": 4.9329944886267185e-05, + "memory(GiB)": 94.21, + "reward": 1.75, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.75, + "rewards/CineAccuracyORM/std": 0.26111647486686707, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5018, + "train_speed(iter/s)": 0.022596 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 139.75, + "completions/mean_length": 66.68750286102295, + "completions/min_length": 21.5, + "epoch": 9.976917349218168, + "grad_norm": 0.0031566961583329427, + "kl": 0.071258544921875, + "learning_rate": 1.2043123178651082e-11, + "loss": 7.123083923943341e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5019, + "train_speed(iter/s)": 0.022597 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 153.5, + "completions/mean_length": 73.39583587646484, + "completions/min_length": 29.125, + "epoch": 9.978902953586498, + "grad_norm": 0.0030517392417054394, + "kl": 0.074005126953125, + "learning_rate": 9.95300129691401e-12, + "loss": 7.393911073449999e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5020, + "train_speed(iter/s)": 0.022596 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 177.75, + "completions/mean_length": 67.25000238418579, + "completions/min_length": 18.75, + "epoch": 9.980888557954827, + "grad_norm": 0.00314708597922162, + "kl": 0.0772705078125, + "learning_rate": 8.061936132397473e-12, + "loss": 7.719548011664301e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5021, + "train_speed(iter/s)": 0.022596 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 162.875, + "completions/mean_length": 69.01041841506958, + "completions/min_length": 22.25, + "epoch": 9.982874162323156, + "grad_norm": 0.0037940584353413995, + "kl": 0.055023193359375, + "learning_rate": 6.369928438054728e-12, + "loss": 5.4935295338509604e-05, + "memory(GiB)": 94.21, + "reward": 1.6875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.6875, + "rewards/CineAccuracyORM/std": 0.1958373561501503, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5022, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 172.875, + "completions/mean_length": 71.22916889190674, + "completions/min_length": 25.625, + "epoch": 9.984859766691487, + "grad_norm": 0.002759925815876402, + "kl": 0.08148193359375, + "learning_rate": 4.876978887402572e-12, + "loss": 8.154282113537192e-05, + "memory(GiB)": 94.21, + "reward": 2.0, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 1.0, + "rewards/CineAccuracyORM/std": 0.0, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5023, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 148.0, + "completions/mean_length": 68.32291841506958, + "completions/min_length": 19.375, + "epoch": 9.986845371059816, + "grad_norm": 0.004699794446605444, + "kl": 0.0875244140625, + "learning_rate": 3.5830880749099237e-12, + "loss": 8.739631448406726e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5024, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.75, + "completions/mean_length": 69.64583587646484, + "completions/min_length": 26.125, + "epoch": 9.988830975428145, + "grad_norm": 0.002320346328278786, + "kl": 0.05517578125, + "learning_rate": 2.4882565156647552e-12, + "loss": 5.5175456509459764e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5025, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 70.52083492279053, + "completions/min_length": 21.5, + "epoch": 9.990816579796476, + "grad_norm": 1.5461457951612276, + "kl": 0.068878173828125, + "learning_rate": 1.592484645540626e-12, + "loss": -0.013513864949345589, + "memory(GiB)": 94.21, + "reward": 1.8645833432674408, + "reward_std": 0.025515519082546234, + "rewards/CineAccuracyORM/mean": 0.8645833358168602, + "rewards/CineAccuracyORM/std": 0.16664262861013412, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5026, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.625, + "completions/mean_length": 66.52083492279053, + "completions/min_length": 21.75, + "epoch": 9.992802184164805, + "grad_norm": 0.0023987120296745787, + "kl": 0.06939697265625, + "learning_rate": 8.957728211411719e-13, + "loss": 6.932149699423462e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5027, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 146.625, + "completions/mean_length": 66.68750143051147, + "completions/min_length": 28.375, + "epoch": 9.994787788533134, + "grad_norm": 0.003938453124709751, + "kl": 0.059234619140625, + "learning_rate": 3.9812131985561547e-13, + "loss": 5.927008896833286e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5028, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 147.5, + "completions/mean_length": 70.76041889190674, + "completions/min_length": 24.5, + "epoch": 9.996773392901463, + "grad_norm": 0.005103637817761054, + "kl": 0.067138671875, + "learning_rate": 9.953033985876658e-14, + "loss": 6.719196971971542e-05, + "memory(GiB)": 94.21, + "reward": 1.875, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.875, + "rewards/CineAccuracyORM/std": 0.13055823743343353, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5029, + "train_speed(iter/s)": 0.022595 + }, + { + "clip_ratio": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 157.0, + "completions/mean_length": 67.50000238418579, + "completions/min_length": 26.125, + "epoch": 9.998758997269794, + "grad_norm": 0.002779238466724921, + "kl": 0.0516357421875, + "learning_rate": 0.0, + "loss": 5.166804476175457e-05, + "memory(GiB)": 94.21, + "reward": 1.9375, + "reward_std": 0.0, + "rewards/CineAccuracyORM/mean": 0.9375, + "rewards/CineAccuracyORM/std": 0.06527911871671677, + "rewards/Format/mean": 1.0, + "rewards/Format/std": 0.0, + "step": 5030, + "train_speed(iter/s)": 0.022595 } ], - "logging_steps": 5, - "max_steps": 841, + "logging_steps": 1, + "max_steps": 5030, "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 100, + "num_train_epochs": 10, + "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { @@ -1717,8 +105657,8 @@ "attributes": {} } }, - "total_flos": 101605807005696.0, - "train_batch_size": 1, + "total_flos": 0.0, + "train_batch_size": 2, "trial_name": null, "trial_params": null } diff --git a/training_args.bin b/training_args.bin index 0d03844590cf02ff96d7940937cc92bf67b11918..6f709d1474cbb5918bfcf355e428979373ef99d2 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f7dec8e5ecd03f7b60cd0b0dfcaa6013458cf9e6817c81c5c391d78b95ee914 -size 8312 +oid sha256:5ec30db82d58e502de1d84aeaa00ff3be6dcdbab98c36a7d211221c3f43dc348 +size 9976