atomwalk12 commited on
Commit
15084a2
·
verified ·
1 Parent(s): 3886b30

End of training

Browse files
README.md CHANGED
@@ -5,11 +5,11 @@ library_name: transformers
5
  model_name: trainer_output
6
  tags:
7
  - generated_from_trainer
 
 
8
  - sft
9
- - tool-use
10
  - unsloth
11
- - trl
12
- - linalg-zero
13
  licence: license
14
  ---
15
 
 
5
  model_name: trainer_output
6
  tags:
7
  - generated_from_trainer
8
+ - linalg-zero
9
+ - trl
10
  - sft
 
11
  - unsloth
12
+ - tool-use
 
13
  licence: license
14
  ---
15
 
adapter_config.json CHANGED
@@ -29,13 +29,13 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
 
32
  "v_proj",
33
  "q_proj",
34
- "k_proj",
35
  "gate_proj",
36
- "o_proj",
37
- "down_proj",
38
- "up_proj"
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "k_proj",
33
+ "o_proj",
34
  "v_proj",
35
  "q_proj",
36
+ "up_proj",
37
  "gate_proj",
38
+ "down_proj"
 
 
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.8699710965156555,
4
- "eval_runtime": 8.6196,
5
  "eval_samples": 60,
6
- "eval_samples_per_second": 6.961,
7
- "eval_steps_per_second": 6.961,
8
- "total_flos": 2717720554039296.0,
9
- "train_loss": 0.0,
10
- "train_runtime": 0.0042,
11
  "train_samples": 60,
12
- "train_samples_per_second": 14376.363,
13
- "train_steps_per_second": 239.606
14
  }
 
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.8699710965156555,
4
+ "eval_runtime": 8.5093,
5
  "eval_samples": 60,
6
+ "eval_samples_per_second": 7.051,
7
+ "eval_steps_per_second": 7.051,
8
+ "total_flos": 3989722274383872.0,
9
+ "train_loss": 0.7914382219314575,
10
+ "train_runtime": 48.4068,
11
  "train_samples": 60,
12
+ "train_samples_per_second": 1.239,
13
+ "train_steps_per_second": 0.021
14
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.8699710965156555,
4
- "eval_runtime": 8.6196,
5
  "eval_samples": 60,
6
- "eval_samples_per_second": 6.961,
7
- "eval_steps_per_second": 6.961
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.8699710965156555,
4
+ "eval_runtime": 8.5093,
5
  "eval_samples": 60,
6
+ "eval_samples_per_second": 7.051,
7
+ "eval_steps_per_second": 7.051
8
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 2717720554039296.0,
4
- "train_loss": 0.0,
5
- "train_runtime": 0.0042,
6
  "train_samples": 60,
7
- "train_samples_per_second": 14376.363,
8
- "train_steps_per_second": 239.606
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 3989722274383872.0,
4
+ "train_loss": 0.7914382219314575,
5
+ "train_runtime": 48.4068,
6
  "train_samples": 60,
7
+ "train_samples_per_second": 1.239,
8
+ "train_steps_per_second": 0.021
9
  }
trainer_state.json CHANGED
@@ -3,35 +3,27 @@
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
- "eval_steps": 50,
7
  "global_step": 1,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
- {
13
- "epoch": 0,
14
- "eval_loss": 0.8395363092422485,
15
- "eval_runtime": 8.5871,
16
- "eval_samples_per_second": 6.987,
17
- "eval_steps_per_second": 6.987,
18
- "step": 0
19
- },
20
  {
21
  "epoch": 1.0,
22
  "step": 1,
23
- "total_flos": 2717720554039296.0,
24
- "train_loss": 0.0,
25
- "train_runtime": 0.0042,
26
- "train_samples_per_second": 14376.363,
27
- "train_steps_per_second": 239.606
28
  }
29
  ],
30
  "logging_steps": 5,
31
  "max_steps": 1,
32
  "num_input_tokens_seen": 0,
33
  "num_train_epochs": 1,
34
- "save_steps": 50,
35
  "stateful_callbacks": {
36
  "TrainerControl": {
37
  "args": {
@@ -44,7 +36,7 @@
44
  "attributes": {}
45
  }
46
  },
47
- "total_flos": 2717720554039296.0,
48
  "train_batch_size": 1,
49
  "trial_name": null,
50
  "trial_params": null
 
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
  "epoch": 1.0,
6
+ "eval_steps": 500,
7
  "global_step": 1,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
  "step": 1,
15
+ "total_flos": 3989722274383872.0,
16
+ "train_loss": 0.7914382219314575,
17
+ "train_runtime": 48.4068,
18
+ "train_samples_per_second": 1.239,
19
+ "train_steps_per_second": 0.021
20
  }
21
  ],
22
  "logging_steps": 5,
23
  "max_steps": 1,
24
  "num_input_tokens_seen": 0,
25
  "num_train_epochs": 1,
26
+ "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
29
  "args": {
 
36
  "attributes": {}
37
  }
38
  },
39
+ "total_flos": 3989722274383872.0,
40
  "train_batch_size": 1,
41
  "trial_name": null,
42
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cff4ee75012ee385e734726f3dee6d7d4a60147422cc2344287ce24a45db845
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a33b7b850c5404ebde18cccd9d96cdc1f4b112f80e63cdb79e28b53df4e2a4c8
3
  size 6097