End of training

Files changed (7) hide show

README.md CHANGED Viewed

@@ -5,11 +5,11 @@ library_name: transformers
 model_name: trainer_output
 tags:
 - generated_from_trainer
 - sft
-- tool-use
 - unsloth
-- trl
-- linalg-zero
 licence: license
 ---

 model_name: trainer_output
 tags:
 - generated_from_trainer
+- linalg-zero
+- trl
 - sft
 - unsloth
+- tool-use
 licence: license
 ---

adapter_config.json CHANGED Viewed

@@ -29,13 +29,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
     "q_proj",
-    "k_proj",
     "gate_proj",
-    "o_proj",
-    "down_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "o_proj",
     "v_proj",
     "q_proj",
+    "up_proj",
     "gate_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

all_results.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
     "epoch": 1.0,
     "eval_loss": 0.8699710965156555,
-    "eval_runtime": 8.6196,
     "eval_samples": 60,
-    "eval_samples_per_second": 6.961,
-    "eval_steps_per_second": 6.961,
-    "total_flos": 2717720554039296.0,
-    "train_loss": 0.0,
-    "train_runtime": 0.0042,
     "train_samples": 60,
-    "train_samples_per_second": 14376.363,
-    "train_steps_per_second": 239.606
 }

 {
     "epoch": 1.0,
     "eval_loss": 0.8699710965156555,
+    "eval_runtime": 8.5093,
     "eval_samples": 60,
+    "eval_samples_per_second": 7.051,
+    "eval_steps_per_second": 7.051,
+    "total_flos": 3989722274383872.0,
+    "train_loss": 0.7914382219314575,
+    "train_runtime": 48.4068,
     "train_samples": 60,
+    "train_samples_per_second": 1.239,
+    "train_steps_per_second": 0.021
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
     "eval_loss": 0.8699710965156555,
-    "eval_runtime": 8.6196,
     "eval_samples": 60,
-    "eval_samples_per_second": 6.961,
-    "eval_steps_per_second": 6.961
 }

 {
     "epoch": 1.0,
     "eval_loss": 0.8699710965156555,
+    "eval_runtime": 8.5093,
     "eval_samples": 60,
+    "eval_samples_per_second": 7.051,
+    "eval_steps_per_second": 7.051
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
-    "total_flos": 2717720554039296.0,
-    "train_loss": 0.0,
-    "train_runtime": 0.0042,
     "train_samples": 60,
-    "train_samples_per_second": 14376.363,
-    "train_steps_per_second": 239.606
 }

 {
     "epoch": 1.0,
+    "total_flos": 3989722274383872.0,
+    "train_loss": 0.7914382219314575,
+    "train_runtime": 48.4068,
     "train_samples": 60,
+    "train_samples_per_second": 1.239,
+    "train_steps_per_second": 0.021
 }

trainer_state.json CHANGED Viewed

@@ -3,35 +3,27 @@
   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 1.0,
-  "eval_steps": 50,
   "global_step": 1,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
-    {
-      "epoch": 0,
-      "eval_loss": 0.8395363092422485,
-      "eval_runtime": 8.5871,
-      "eval_samples_per_second": 6.987,
-      "eval_steps_per_second": 6.987,
-      "step": 0
-    },
     {
       "epoch": 1.0,
       "step": 1,
-      "total_flos": 2717720554039296.0,
-      "train_loss": 0.0,
-      "train_runtime": 0.0042,
-      "train_samples_per_second": 14376.363,
-      "train_steps_per_second": 239.606
     }
   ],
   "logging_steps": 5,
   "max_steps": 1,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
-  "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -44,7 +36,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2717720554039296.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_metric": null,
   "best_model_checkpoint": null,
   "epoch": 1.0,
+  "eval_steps": 500,
   "global_step": 1,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 1.0,
       "step": 1,
+      "total_flos": 3989722274383872.0,
+      "train_loss": 0.7914382219314575,
+      "train_runtime": 48.4068,
+      "train_samples_per_second": 1.239,
+      "train_steps_per_second": 0.021
     }
   ],
   "logging_steps": 5,
   "max_steps": 1,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
+  "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
       "attributes": {}
     }
   },
+  "total_flos": 3989722274383872.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cff4ee75012ee385e734726f3dee6d7d4a60147422cc2344287ce24a45db845
 size 6097

 version https://git-lfs.github.com/spec/v1
+oid sha256:a33b7b850c5404ebde18cccd9d96cdc1f4b112f80e63cdb79e28b53df4e2a4c8
 size 6097