Upload folder using huggingface_hub
Browse files- checkpoint-8752/config.json +46 -0
 - checkpoint-8752/model.safetensors +3 -0
 - checkpoint-8752/optimizer.pt +3 -0
 - checkpoint-8752/preprocessor_config.json +23 -0
 - checkpoint-8752/rng_state.pth +3 -0
 - checkpoint-8752/scheduler.pt +3 -0
 - checkpoint-8752/trainer_state.json +192 -0
 - checkpoint-8752/training_args.bin +3 -0
 - model.safetensors +1 -1
 - training_args.bin +1 -1
 
    	
        checkpoint-8752/config.json
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "_name_or_path": "google/vit-base-patch16-224-in21k",
         
     | 
| 3 | 
         
            +
              "architectures": [
         
     | 
| 4 | 
         
            +
                "ViTForImageClassification"
         
     | 
| 5 | 
         
            +
              ],
         
     | 
| 6 | 
         
            +
              "attention_probs_dropout_prob": 0.0,
         
     | 
| 7 | 
         
            +
              "encoder_stride": 16,
         
     | 
| 8 | 
         
            +
              "hidden_act": "gelu",
         
     | 
| 9 | 
         
            +
              "hidden_dropout_prob": 0.0,
         
     | 
| 10 | 
         
            +
              "hidden_size": 768,
         
     | 
| 11 | 
         
            +
              "id2label": {
         
     | 
| 12 | 
         
            +
                "0": "0-2",
         
     | 
| 13 | 
         
            +
                "1": "3-9",
         
     | 
| 14 | 
         
            +
                "2": "10-19",
         
     | 
| 15 | 
         
            +
                "3": "20-29",
         
     | 
| 16 | 
         
            +
                "4": "30-39",
         
     | 
| 17 | 
         
            +
                "5": "40-49",
         
     | 
| 18 | 
         
            +
                "6": "50-59",
         
     | 
| 19 | 
         
            +
                "7": "60-69",
         
     | 
| 20 | 
         
            +
                "8": "more than 70"
         
     | 
| 21 | 
         
            +
              },
         
     | 
| 22 | 
         
            +
              "image_size": 224,
         
     | 
| 23 | 
         
            +
              "initializer_range": 0.02,
         
     | 
| 24 | 
         
            +
              "intermediate_size": 3072,
         
     | 
| 25 | 
         
            +
              "label2id": {
         
     | 
| 26 | 
         
            +
                "0-2": 0,
         
     | 
| 27 | 
         
            +
                "10-19": 2,
         
     | 
| 28 | 
         
            +
                "20-29": 3,
         
     | 
| 29 | 
         
            +
                "3-9": 1,
         
     | 
| 30 | 
         
            +
                "30-39": 4,
         
     | 
| 31 | 
         
            +
                "40-49": 5,
         
     | 
| 32 | 
         
            +
                "50-59": 6,
         
     | 
| 33 | 
         
            +
                "60-69": 7,
         
     | 
| 34 | 
         
            +
                "more than 70": 8
         
     | 
| 35 | 
         
            +
              },
         
     | 
| 36 | 
         
            +
              "layer_norm_eps": 1e-12,
         
     | 
| 37 | 
         
            +
              "model_type": "vit",
         
     | 
| 38 | 
         
            +
              "num_attention_heads": 12,
         
     | 
| 39 | 
         
            +
              "num_channels": 3,
         
     | 
| 40 | 
         
            +
              "num_hidden_layers": 12,
         
     | 
| 41 | 
         
            +
              "patch_size": 16,
         
     | 
| 42 | 
         
            +
              "problem_type": "single_label_classification",
         
     | 
| 43 | 
         
            +
              "qkv_bias": true,
         
     | 
| 44 | 
         
            +
              "torch_dtype": "float32",
         
     | 
| 45 | 
         
            +
              "transformers_version": "4.47.0"
         
     | 
| 46 | 
         
            +
            }
         
     | 
    	
        checkpoint-8752/model.safetensors
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:cd3a725b1119d48b89118403515c77428316605407f871bbc7bba7fb50b32305
         
     | 
| 3 | 
         
            +
            size 343245508
         
     | 
    	
        checkpoint-8752/optimizer.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:087e51816b138512d362a1022cf69ae38505cd21144fa8057f954593880b9011
         
     | 
| 3 | 
         
            +
            size 686611898
         
     | 
    	
        checkpoint-8752/preprocessor_config.json
    ADDED
    
    | 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "do_convert_rgb": null,
         
     | 
| 3 | 
         
            +
              "do_normalize": true,
         
     | 
| 4 | 
         
            +
              "do_rescale": true,
         
     | 
| 5 | 
         
            +
              "do_resize": true,
         
     | 
| 6 | 
         
            +
              "image_mean": [
         
     | 
| 7 | 
         
            +
                0.5,
         
     | 
| 8 | 
         
            +
                0.5,
         
     | 
| 9 | 
         
            +
                0.5
         
     | 
| 10 | 
         
            +
              ],
         
     | 
| 11 | 
         
            +
              "image_processor_type": "ViTImageProcessor",
         
     | 
| 12 | 
         
            +
              "image_std": [
         
     | 
| 13 | 
         
            +
                0.5,
         
     | 
| 14 | 
         
            +
                0.5,
         
     | 
| 15 | 
         
            +
                0.5
         
     | 
| 16 | 
         
            +
              ],
         
     | 
| 17 | 
         
            +
              "resample": 2,
         
     | 
| 18 | 
         
            +
              "rescale_factor": 0.00392156862745098,
         
     | 
| 19 | 
         
            +
              "size": {
         
     | 
| 20 | 
         
            +
                "height": 224,
         
     | 
| 21 | 
         
            +
                "width": 224
         
     | 
| 22 | 
         
            +
              }
         
     | 
| 23 | 
         
            +
            }
         
     | 
    	
        checkpoint-8752/rng_state.pth
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:48ee9b73399c28d7e668360bf1d5a4d11095c4738bf96c13f7bb6fbff59f8ccb
         
     | 
| 3 | 
         
            +
            size 14244
         
     | 
    	
        checkpoint-8752/scheduler.pt
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:197fc8dcfe2b6e4805b14a2ff8529c35795cc2f0491c4b5a6bd2783753725f19
         
     | 
| 3 | 
         
            +
            size 1064
         
     | 
    	
        checkpoint-8752/trainer_state.json
    ADDED
    
    | 
         @@ -0,0 +1,192 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
              "best_metric": 1.0447938442230225,
         
     | 
| 3 | 
         
            +
              "best_model_checkpoint": "fairface_age_image_detection/checkpoint-8752",
         
     | 
| 4 | 
         
            +
              "epoch": 4.0,
         
     | 
| 5 | 
         
            +
              "eval_steps": 500,
         
     | 
| 6 | 
         
            +
              "global_step": 8752,
         
     | 
| 7 | 
         
            +
              "is_hyper_param_search": false,
         
     | 
| 8 | 
         
            +
              "is_local_process_zero": true,
         
     | 
| 9 | 
         
            +
              "is_world_process_zero": true,
         
     | 
| 10 | 
         
            +
              "log_history": [
         
     | 
| 11 | 
         
            +
                {
         
     | 
| 12 | 
         
            +
                  "epoch": 0.22851919561243145,
         
     | 
| 13 | 
         
            +
                  "grad_norm": 2.228593587875366,
         
     | 
| 14 | 
         
            +
                  "learning_rate": 3.7931509997701674e-06,
         
     | 
| 15 | 
         
            +
                  "loss": 1.8122,
         
     | 
| 16 | 
         
            +
                  "step": 500
         
     | 
| 17 | 
         
            +
                },
         
     | 
| 18 | 
         
            +
                {
         
     | 
| 19 | 
         
            +
                  "epoch": 0.4570383912248629,
         
     | 
| 20 | 
         
            +
                  "grad_norm": 3.4612433910369873,
         
     | 
| 21 | 
         
            +
                  "learning_rate": 3.5633187772925763e-06,
         
     | 
| 22 | 
         
            +
                  "loss": 1.4177,
         
     | 
| 23 | 
         
            +
                  "step": 1000
         
     | 
| 24 | 
         
            +
                },
         
     | 
| 25 | 
         
            +
                {
         
     | 
| 26 | 
         
            +
                  "epoch": 0.6855575868372943,
         
     | 
| 27 | 
         
            +
                  "grad_norm": 5.7468061447143555,
         
     | 
| 28 | 
         
            +
                  "learning_rate": 3.333486554814985e-06,
         
     | 
| 29 | 
         
            +
                  "loss": 1.3017,
         
     | 
| 30 | 
         
            +
                  "step": 1500
         
     | 
| 31 | 
         
            +
                },
         
     | 
| 32 | 
         
            +
                {
         
     | 
| 33 | 
         
            +
                  "epoch": 0.9140767824497258,
         
     | 
| 34 | 
         
            +
                  "grad_norm": 4.807565212249756,
         
     | 
| 35 | 
         
            +
                  "learning_rate": 3.1036543323373935e-06,
         
     | 
| 36 | 
         
            +
                  "loss": 1.2187,
         
     | 
| 37 | 
         
            +
                  "step": 2000
         
     | 
| 38 | 
         
            +
                },
         
     | 
| 39 | 
         
            +
                {
         
     | 
| 40 | 
         
            +
                  "epoch": 1.0,
         
     | 
| 41 | 
         
            +
                  "eval_accuracy": 0.5457,
         
     | 
| 42 | 
         
            +
                  "eval_loss": 1.1929566860198975,
         
     | 
| 43 | 
         
            +
                  "eval_model_preparation_time": 0.0035,
         
     | 
| 44 | 
         
            +
                  "eval_runtime": 876.5594,
         
     | 
| 45 | 
         
            +
                  "eval_samples_per_second": 11.408,
         
     | 
| 46 | 
         
            +
                  "eval_steps_per_second": 1.426,
         
     | 
| 47 | 
         
            +
                  "step": 2188
         
     | 
| 48 | 
         
            +
                },
         
     | 
| 49 | 
         
            +
                {
         
     | 
| 50 | 
         
            +
                  "epoch": 1.1425959780621573,
         
     | 
| 51 | 
         
            +
                  "grad_norm": 3.004606246948242,
         
     | 
| 52 | 
         
            +
                  "learning_rate": 2.873822109859802e-06,
         
     | 
| 53 | 
         
            +
                  "loss": 1.1713,
         
     | 
| 54 | 
         
            +
                  "step": 2500
         
     | 
| 55 | 
         
            +
                },
         
     | 
| 56 | 
         
            +
                {
         
     | 
| 57 | 
         
            +
                  "epoch": 1.3711151736745886,
         
     | 
| 58 | 
         
            +
                  "grad_norm": 4.531455039978027,
         
     | 
| 59 | 
         
            +
                  "learning_rate": 2.643989887382211e-06,
         
     | 
| 60 | 
         
            +
                  "loss": 1.1284,
         
     | 
| 61 | 
         
            +
                  "step": 3000
         
     | 
| 62 | 
         
            +
                },
         
     | 
| 63 | 
         
            +
                {
         
     | 
| 64 | 
         
            +
                  "epoch": 1.59963436928702,
         
     | 
| 65 | 
         
            +
                  "grad_norm": 3.8209729194641113,
         
     | 
| 66 | 
         
            +
                  "learning_rate": 2.4141576649046196e-06,
         
     | 
| 67 | 
         
            +
                  "loss": 1.105,
         
     | 
| 68 | 
         
            +
                  "step": 3500
         
     | 
| 69 | 
         
            +
                },
         
     | 
| 70 | 
         
            +
                {
         
     | 
| 71 | 
         
            +
                  "epoch": 1.8281535648994516,
         
     | 
| 72 | 
         
            +
                  "grad_norm": 3.4607913494110107,
         
     | 
| 73 | 
         
            +
                  "learning_rate": 2.184325442427028e-06,
         
     | 
| 74 | 
         
            +
                  "loss": 1.0773,
         
     | 
| 75 | 
         
            +
                  "step": 4000
         
     | 
| 76 | 
         
            +
                },
         
     | 
| 77 | 
         
            +
                {
         
     | 
| 78 | 
         
            +
                  "epoch": 2.0,
         
     | 
| 79 | 
         
            +
                  "eval_accuracy": 0.5754,
         
     | 
| 80 | 
         
            +
                  "eval_loss": 1.085502028465271,
         
     | 
| 81 | 
         
            +
                  "eval_model_preparation_time": 0.0035,
         
     | 
| 82 | 
         
            +
                  "eval_runtime": 892.4566,
         
     | 
| 83 | 
         
            +
                  "eval_samples_per_second": 11.205,
         
     | 
| 84 | 
         
            +
                  "eval_steps_per_second": 1.401,
         
     | 
| 85 | 
         
            +
                  "step": 4376
         
     | 
| 86 | 
         
            +
                },
         
     | 
| 87 | 
         
            +
                {
         
     | 
| 88 | 
         
            +
                  "epoch": 2.056672760511883,
         
     | 
| 89 | 
         
            +
                  "grad_norm": 4.261874675750732,
         
     | 
| 90 | 
         
            +
                  "learning_rate": 1.954493219949437e-06,
         
     | 
| 91 | 
         
            +
                  "loss": 1.0657,
         
     | 
| 92 | 
         
            +
                  "step": 4500
         
     | 
| 93 | 
         
            +
                },
         
     | 
| 94 | 
         
            +
                {
         
     | 
| 95 | 
         
            +
                  "epoch": 2.2851919561243146,
         
     | 
| 96 | 
         
            +
                  "grad_norm": 3.815229654312134,
         
     | 
| 97 | 
         
            +
                  "learning_rate": 1.7246609974718455e-06,
         
     | 
| 98 | 
         
            +
                  "loss": 1.036,
         
     | 
| 99 | 
         
            +
                  "step": 5000
         
     | 
| 100 | 
         
            +
                },
         
     | 
| 101 | 
         
            +
                {
         
     | 
| 102 | 
         
            +
                  "epoch": 2.5137111517367456,
         
     | 
| 103 | 
         
            +
                  "grad_norm": 4.119363307952881,
         
     | 
| 104 | 
         
            +
                  "learning_rate": 1.494828774994254e-06,
         
     | 
| 105 | 
         
            +
                  "loss": 1.0255,
         
     | 
| 106 | 
         
            +
                  "step": 5500
         
     | 
| 107 | 
         
            +
                },
         
     | 
| 108 | 
         
            +
                {
         
     | 
| 109 | 
         
            +
                  "epoch": 2.742230347349177,
         
     | 
| 110 | 
         
            +
                  "grad_norm": 4.172023296356201,
         
     | 
| 111 | 
         
            +
                  "learning_rate": 1.2649965525166627e-06,
         
     | 
| 112 | 
         
            +
                  "loss": 1.0126,
         
     | 
| 113 | 
         
            +
                  "step": 6000
         
     | 
| 114 | 
         
            +
                },
         
     | 
| 115 | 
         
            +
                {
         
     | 
| 116 | 
         
            +
                  "epoch": 2.9707495429616086,
         
     | 
| 117 | 
         
            +
                  "grad_norm": 3.792961597442627,
         
     | 
| 118 | 
         
            +
                  "learning_rate": 1.0351643300390714e-06,
         
     | 
| 119 | 
         
            +
                  "loss": 1.008,
         
     | 
| 120 | 
         
            +
                  "step": 6500
         
     | 
| 121 | 
         
            +
                },
         
     | 
| 122 | 
         
            +
                {
         
     | 
| 123 | 
         
            +
                  "epoch": 3.0,
         
     | 
| 124 | 
         
            +
                  "eval_accuracy": 0.5792,
         
     | 
| 125 | 
         
            +
                  "eval_loss": 1.0549699068069458,
         
     | 
| 126 | 
         
            +
                  "eval_model_preparation_time": 0.0035,
         
     | 
| 127 | 
         
            +
                  "eval_runtime": 902.9283,
         
     | 
| 128 | 
         
            +
                  "eval_samples_per_second": 11.075,
         
     | 
| 129 | 
         
            +
                  "eval_steps_per_second": 1.384,
         
     | 
| 130 | 
         
            +
                  "step": 6564
         
     | 
| 131 | 
         
            +
                },
         
     | 
| 132 | 
         
            +
                {
         
     | 
| 133 | 
         
            +
                  "epoch": 3.19926873857404,
         
     | 
| 134 | 
         
            +
                  "grad_norm": 6.58300256729126,
         
     | 
| 135 | 
         
            +
                  "learning_rate": 8.053321075614801e-07,
         
     | 
| 136 | 
         
            +
                  "loss": 0.9923,
         
     | 
| 137 | 
         
            +
                  "step": 7000
         
     | 
| 138 | 
         
            +
                },
         
     | 
| 139 | 
         
            +
                {
         
     | 
| 140 | 
         
            +
                  "epoch": 3.4277879341864717,
         
     | 
| 141 | 
         
            +
                  "grad_norm": 7.9831223487854,
         
     | 
| 142 | 
         
            +
                  "learning_rate": 5.754998850838887e-07,
         
     | 
| 143 | 
         
            +
                  "loss": 0.9792,
         
     | 
| 144 | 
         
            +
                  "step": 7500
         
     | 
| 145 | 
         
            +
                },
         
     | 
| 146 | 
         
            +
                {
         
     | 
| 147 | 
         
            +
                  "epoch": 3.656307129798903,
         
     | 
| 148 | 
         
            +
                  "grad_norm": 6.074549198150635,
         
     | 
| 149 | 
         
            +
                  "learning_rate": 3.456676626062974e-07,
         
     | 
| 150 | 
         
            +
                  "loss": 0.9826,
         
     | 
| 151 | 
         
            +
                  "step": 8000
         
     | 
| 152 | 
         
            +
                },
         
     | 
| 153 | 
         
            +
                {
         
     | 
| 154 | 
         
            +
                  "epoch": 3.8848263254113347,
         
     | 
| 155 | 
         
            +
                  "grad_norm": 6.298589706420898,
         
     | 
| 156 | 
         
            +
                  "learning_rate": 1.1583544012870603e-07,
         
     | 
| 157 | 
         
            +
                  "loss": 0.9858,
         
     | 
| 158 | 
         
            +
                  "step": 8500
         
     | 
| 159 | 
         
            +
                },
         
     | 
| 160 | 
         
            +
                {
         
     | 
| 161 | 
         
            +
                  "epoch": 4.0,
         
     | 
| 162 | 
         
            +
                  "eval_accuracy": 0.5809,
         
     | 
| 163 | 
         
            +
                  "eval_loss": 1.0447938442230225,
         
     | 
| 164 | 
         
            +
                  "eval_model_preparation_time": 0.0035,
         
     | 
| 165 | 
         
            +
                  "eval_runtime": 893.1973,
         
     | 
| 166 | 
         
            +
                  "eval_samples_per_second": 11.196,
         
     | 
| 167 | 
         
            +
                  "eval_steps_per_second": 1.399,
         
     | 
| 168 | 
         
            +
                  "step": 8752
         
     | 
| 169 | 
         
            +
                }
         
     | 
| 170 | 
         
            +
              ],
         
     | 
| 171 | 
         
            +
              "logging_steps": 500,
         
     | 
| 172 | 
         
            +
              "max_steps": 8752,
         
     | 
| 173 | 
         
            +
              "num_input_tokens_seen": 0,
         
     | 
| 174 | 
         
            +
              "num_train_epochs": 4,
         
     | 
| 175 | 
         
            +
              "save_steps": 500,
         
     | 
| 176 | 
         
            +
              "stateful_callbacks": {
         
     | 
| 177 | 
         
            +
                "TrainerControl": {
         
     | 
| 178 | 
         
            +
                  "args": {
         
     | 
| 179 | 
         
            +
                    "should_epoch_stop": false,
         
     | 
| 180 | 
         
            +
                    "should_evaluate": false,
         
     | 
| 181 | 
         
            +
                    "should_log": false,
         
     | 
| 182 | 
         
            +
                    "should_save": true,
         
     | 
| 183 | 
         
            +
                    "should_training_stop": true
         
     | 
| 184 | 
         
            +
                  },
         
     | 
| 185 | 
         
            +
                  "attributes": {}
         
     | 
| 186 | 
         
            +
                }
         
     | 
| 187 | 
         
            +
              },
         
     | 
| 188 | 
         
            +
              "total_flos": 2.169911838302208e+19,
         
     | 
| 189 | 
         
            +
              "train_batch_size": 32,
         
     | 
| 190 | 
         
            +
              "trial_name": null,
         
     | 
| 191 | 
         
            +
              "trial_params": null
         
     | 
| 192 | 
         
            +
            }
         
     | 
    	
        checkpoint-8752/training_args.bin
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:e97ef976af56a6fd0395073925187744ad7c56829cffa99ff2332a90f94307b2
         
     | 
| 3 | 
         
            +
            size 5304
         
     | 
    	
        model.safetensors
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 343245508
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:cd3a725b1119d48b89118403515c77428316605407f871bbc7bba7fb50b32305
         
     | 
| 3 | 
         
             
            size 343245508
         
     | 
    	
        training_args.bin
    CHANGED
    
    | 
         @@ -1,3 +1,3 @@ 
     | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            -
            oid sha256: 
     | 
| 3 | 
         
             
            size 5304
         
     | 
| 
         | 
|
| 1 | 
         
             
            version https://git-lfs.github.com/spec/v1
         
     | 
| 2 | 
         
            +
            oid sha256:e97ef976af56a6fd0395073925187744ad7c56829cffa99ff2332a90f94307b2
         
     | 
| 3 | 
         
             
            size 5304
         
     |