Spaces:
Running
Running
adds parameters to medical config
Browse files
config/train_gpt_oss_medical_o1_sft.py
CHANGED
|
@@ -56,12 +56,13 @@ config = GPTOSSEnhancedCustomConfig(
|
|
| 56 |
# TRAINING HYPERPARAMETERS
|
| 57 |
# ============================================================================
|
| 58 |
num_train_epochs=1.0,
|
| 59 |
-
batch_size=
|
| 60 |
-
gradient_accumulation_steps=
|
| 61 |
learning_rate=2e-4,
|
| 62 |
min_lr=2e-5,
|
| 63 |
weight_decay=0.01,
|
| 64 |
warmup_ratio=0.03,
|
|
|
|
| 65 |
max_grad_norm=1.0,
|
| 66 |
|
| 67 |
# Sequence length
|
|
@@ -113,7 +114,7 @@ config = GPTOSSEnhancedCustomConfig(
|
|
| 113 |
# LOGGING & EVAL
|
| 114 |
# ============================================================================
|
| 115 |
eval_strategy="steps",
|
| 116 |
-
eval_steps=
|
| 117 |
logging_steps=10,
|
| 118 |
save_strategy="steps",
|
| 119 |
save_steps=500,
|
|
@@ -124,8 +125,8 @@ config = GPTOSSEnhancedCustomConfig(
|
|
| 124 |
load_best_model_at_end=False,
|
| 125 |
eval_accumulation_steps=2,
|
| 126 |
eval_batch_size=1,
|
| 127 |
-
eval_ratio=0.
|
| 128 |
-
test_ratio=0.
|
| 129 |
|
| 130 |
# ============================================================================
|
| 131 |
# MONITORING & HUB
|
|
|
|
| 56 |
# TRAINING HYPERPARAMETERS
|
| 57 |
# ============================================================================
|
| 58 |
num_train_epochs=1.0,
|
| 59 |
+
batch_size=4,
|
| 60 |
+
gradient_accumulation_steps=4,
|
| 61 |
learning_rate=2e-4,
|
| 62 |
min_lr=2e-5,
|
| 63 |
weight_decay=0.01,
|
| 64 |
warmup_ratio=0.03,
|
| 65 |
+
warmup_steps=50,
|
| 66 |
max_grad_norm=1.0,
|
| 67 |
|
| 68 |
# Sequence length
|
|
|
|
| 114 |
# LOGGING & EVAL
|
| 115 |
# ============================================================================
|
| 116 |
eval_strategy="steps",
|
| 117 |
+
eval_steps=100,
|
| 118 |
logging_steps=10,
|
| 119 |
save_strategy="steps",
|
| 120 |
save_steps=500,
|
|
|
|
| 125 |
load_best_model_at_end=False,
|
| 126 |
eval_accumulation_steps=2,
|
| 127 |
eval_batch_size=1,
|
| 128 |
+
eval_ratio=0.001,
|
| 129 |
+
test_ratio=0.0005,
|
| 130 |
|
| 131 |
# ============================================================================
|
| 132 |
# MONITORING & HUB
|