Spaces:
Running
Running
enable external logging boolean correctly
Browse files- model.py +2 -12
- trainer.py +12 -13
model.py
CHANGED
|
@@ -151,7 +151,7 @@ class SmolLM3Model:
|
|
| 151 |
# Only enable DDP if multiple GPUs are available
|
| 152 |
"ddp_backend": self.config.ddp_backend if torch.cuda.device_count() > 1 else None,
|
| 153 |
"ddp_find_unused_parameters": self.config.ddp_find_unused_parameters if torch.cuda.device_count() > 1 else False,
|
| 154 |
-
"report_to":
|
| 155 |
"remove_unused_columns": False,
|
| 156 |
"dataloader_pin_memory": False,
|
| 157 |
"group_by_length": True,
|
|
@@ -172,17 +172,7 @@ class SmolLM3Model:
|
|
| 172 |
# Override with kwargs
|
| 173 |
training_args.update(kwargs)
|
| 174 |
|
| 175 |
-
|
| 176 |
-
for key, value in training_args.items():
|
| 177 |
-
if isinstance(value, bool):
|
| 178 |
-
logger.info(f"Boolean argument: {key} = {value}")
|
| 179 |
-
|
| 180 |
-
try:
|
| 181 |
-
return TrainingArguments(**training_args)
|
| 182 |
-
except Exception as e:
|
| 183 |
-
logger.error(f"Failed to create TrainingArguments: {e}")
|
| 184 |
-
logger.error(f"Training arguments: {training_args}")
|
| 185 |
-
raise
|
| 186 |
|
| 187 |
def save_pretrained(self, path: str):
|
| 188 |
"""Save model and tokenizer"""
|
|
|
|
| 151 |
# Only enable DDP if multiple GPUs are available
|
| 152 |
"ddp_backend": self.config.ddp_backend if torch.cuda.device_count() > 1 else None,
|
| 153 |
"ddp_find_unused_parameters": self.config.ddp_find_unused_parameters if torch.cuda.device_count() > 1 else False,
|
| 154 |
+
"report_to": None, # Enable external logging (default)
|
| 155 |
"remove_unused_columns": False,
|
| 156 |
"dataloader_pin_memory": False,
|
| 157 |
"group_by_length": True,
|
|
|
|
| 172 |
# Override with kwargs
|
| 173 |
training_args.update(kwargs)
|
| 174 |
|
| 175 |
+
return TrainingArguments(**training_args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
def save_pretrained(self, path: str):
|
| 178 |
"""Save model and tokenizer"""
|
trainer.py
CHANGED
|
@@ -98,19 +98,18 @@ class SmolLM3Trainer:
|
|
| 98 |
callbacks.append(SimpleConsoleCallback())
|
| 99 |
logger.info("Added simple console monitoring callback")
|
| 100 |
|
| 101 |
-
# Try to add Trackio callback if available
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
# logger.info("Continuing with console monitoring only")
|
| 114 |
|
| 115 |
# Try standard Trainer first (more stable with callbacks)
|
| 116 |
try:
|
|
|
|
| 98 |
callbacks.append(SimpleConsoleCallback())
|
| 99 |
logger.info("Added simple console monitoring callback")
|
| 100 |
|
| 101 |
+
# Try to add Trackio callback if available
|
| 102 |
+
if self.monitor and self.monitor.enable_tracking:
|
| 103 |
+
try:
|
| 104 |
+
trackio_callback = self.monitor.create_monitoring_callback()
|
| 105 |
+
if trackio_callback:
|
| 106 |
+
callbacks.append(trackio_callback)
|
| 107 |
+
logger.info("Added Trackio monitoring callback")
|
| 108 |
+
else:
|
| 109 |
+
logger.warning("Failed to create Trackio callback")
|
| 110 |
+
except Exception as e:
|
| 111 |
+
logger.error(f"Error creating Trackio callback: {e}")
|
| 112 |
+
logger.info("Continuing with console monitoring only")
|
|
|
|
| 113 |
|
| 114 |
# Try standard Trainer first (more stable with callbacks)
|
| 115 |
try:
|