Spaces:
Running
Running
attempts to identify data bug
Browse files- trainer.py +10 -0
trainer.py
CHANGED
|
@@ -55,11 +55,18 @@ class SmolLM3Trainer:
|
|
| 55 |
)
|
| 56 |
|
| 57 |
# Get datasets
|
|
|
|
| 58 |
train_dataset = self.dataset.get_train_dataset()
|
|
|
|
|
|
|
|
|
|
| 59 |
eval_dataset = self.dataset.get_eval_dataset()
|
|
|
|
| 60 |
|
| 61 |
# Get data collator
|
|
|
|
| 62 |
data_collator = self.dataset.get_data_collator()
|
|
|
|
| 63 |
|
| 64 |
# Add monitoring callback - temporarily disabled to debug
|
| 65 |
callbacks = []
|
|
@@ -116,6 +123,8 @@ class SmolLM3Trainer:
|
|
| 116 |
# logger.info("Continuing with console monitoring only")
|
| 117 |
|
| 118 |
# Try standard Trainer first (more stable with callbacks)
|
|
|
|
|
|
|
| 119 |
try:
|
| 120 |
trainer = Trainer(
|
| 121 |
model=self.model.model,
|
|
@@ -129,6 +138,7 @@ class SmolLM3Trainer:
|
|
| 129 |
logger.info("Using standard Hugging Face Trainer")
|
| 130 |
except Exception as e:
|
| 131 |
logger.warning(f"Standard Trainer failed: {e}")
|
|
|
|
| 132 |
# Fallback to SFTTrainer
|
| 133 |
trainer = SFTTrainer(
|
| 134 |
model=self.model.model,
|
|
|
|
| 55 |
)
|
| 56 |
|
| 57 |
# Get datasets
|
| 58 |
+
logger.info("Getting train dataset...")
|
| 59 |
train_dataset = self.dataset.get_train_dataset()
|
| 60 |
+
logger.info(f"Train dataset: {type(train_dataset)} with {len(train_dataset)} samples")
|
| 61 |
+
|
| 62 |
+
logger.info("Getting eval dataset...")
|
| 63 |
eval_dataset = self.dataset.get_eval_dataset()
|
| 64 |
+
logger.info(f"Eval dataset: {type(eval_dataset)} with {len(eval_dataset)} samples")
|
| 65 |
|
| 66 |
# Get data collator
|
| 67 |
+
logger.info("Getting data collator...")
|
| 68 |
data_collator = self.dataset.get_data_collator()
|
| 69 |
+
logger.info(f"Data collator: {type(data_collator)}")
|
| 70 |
|
| 71 |
# Add monitoring callback - temporarily disabled to debug
|
| 72 |
callbacks = []
|
|
|
|
| 123 |
# logger.info("Continuing with console monitoring only")
|
| 124 |
|
| 125 |
# Try standard Trainer first (more stable with callbacks)
|
| 126 |
+
logger.info("Creating Trainer with training arguments...")
|
| 127 |
+
logger.info(f"Training args keys: {list(training_args.keys())}")
|
| 128 |
try:
|
| 129 |
trainer = Trainer(
|
| 130 |
model=self.model.model,
|
|
|
|
| 138 |
logger.info("Using standard Hugging Face Trainer")
|
| 139 |
except Exception as e:
|
| 140 |
logger.warning(f"Standard Trainer failed: {e}")
|
| 141 |
+
logger.error(f"Trainer creation error details: {type(e).__name__}: {str(e)}")
|
| 142 |
# Fallback to SFTTrainer
|
| 143 |
trainer = SFTTrainer(
|
| 144 |
model=self.model.model,
|