Commit
·
76ae46f
1
Parent(s):
18d0924
Update run_pretraining.py and configuration_aragpt2.py
Browse files- README.md +19 -1
- configuration_aragpt2.py +1 -1
README.md
CHANGED
|
@@ -86,7 +86,25 @@ python create_pretraining_data.py
|
|
| 86 |
|
| 87 |
Finetuning:
|
| 88 |
```bash
|
| 89 |
-
python3 run_pretraining.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
```
|
| 91 |
# Model Sizes
|
| 92 |
|
|
|
|
| 86 |
|
| 87 |
Finetuning:
|
| 88 |
```bash
|
| 89 |
+
python3 run_pretraining.py \
|
| 90 |
+
--input_file="gs://<GS_BUCKET>/pretraining_data/*" \
|
| 91 |
+
--output_dir="gs://<GS_BUCKET>/pretraining_model/" \
|
| 92 |
+
--config_file="config/small_hparams.json" \
|
| 93 |
+
--batch_size=128 \
|
| 94 |
+
--eval_batch_size=8 \
|
| 95 |
+
--num_train_steps= \
|
| 96 |
+
--num_warmup_steps= \
|
| 97 |
+
--learning_rate= \
|
| 98 |
+
--save_checkpoints_steps= \
|
| 99 |
+
--max_seq_length=1024 \
|
| 100 |
+
--max_eval_steps= \
|
| 101 |
+
--optimizer="lamb" \
|
| 102 |
+
--iterations_per_loop=5000 \
|
| 103 |
+
--keep_checkpoint_max=10 \
|
| 104 |
+
--use_tpu=True \
|
| 105 |
+
--tpu_name=<TPU NAME> \
|
| 106 |
+
--do_train=True \
|
| 107 |
+
--do_eval=False
|
| 108 |
```
|
| 109 |
# Model Sizes
|
| 110 |
|
configuration_aragpt2.py
CHANGED
|
@@ -131,7 +131,7 @@ class AraGPT2Config(PretrainedConfig):
|
|
| 131 |
n_layer=12,
|
| 132 |
n_head=12,
|
| 133 |
n_inner=None,
|
| 134 |
-
activation_function="
|
| 135 |
resid_pdrop=0.1,
|
| 136 |
embd_pdrop=0.1,
|
| 137 |
attn_pdrop=0.1,
|
|
|
|
| 131 |
n_layer=12,
|
| 132 |
n_head=12,
|
| 133 |
n_inner=None,
|
| 134 |
+
activation_function="gelu_new",
|
| 135 |
resid_pdrop=0.1,
|
| 136 |
embd_pdrop=0.1,
|
| 137 |
attn_pdrop=0.1,
|