Spaces:
Running
Running
adds normalized variables on update
Browse files- src/dataset_utils.py +42 -3
- src/monitoring.py +48 -0
- templates/spaces/trackio/app.py +18 -4
src/dataset_utils.py
CHANGED
|
@@ -253,19 +253,58 @@ class TrackioDatasetManager:
|
|
| 253 |
inc_params = _parse_json_field(incoming.get('parameters'), {})
|
| 254 |
inc_artifacts = _parse_json_field(incoming.get('artifacts'), [])
|
| 255 |
inc_logs = _parse_json_field(incoming.get('logs'), [])
|
| 256 |
-
# Merge metrics with de-dup
|
| 257 |
merged_metrics = []
|
| 258 |
seen = set()
|
| 259 |
for entry in base_metrics + inc_metrics:
|
| 260 |
try:
|
| 261 |
-
# Use the original entry so _metrics_key can properly
|
| 262 |
-
# distinguish dict vs non-dict entries
|
| 263 |
key = _metrics_key(entry)
|
| 264 |
except Exception:
|
| 265 |
key = (None, None)
|
| 266 |
if key not in seen:
|
| 267 |
seen.add(key)
|
| 268 |
merged_metrics.append(entry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
# Merge params
|
| 270 |
merged_params = {}
|
| 271 |
if isinstance(base_params, dict):
|
|
|
|
| 253 |
inc_params = _parse_json_field(incoming.get('parameters'), {})
|
| 254 |
inc_artifacts = _parse_json_field(incoming.get('artifacts'), [])
|
| 255 |
inc_logs = _parse_json_field(incoming.get('logs'), [])
|
| 256 |
+
# Merge metrics with de-dup (by step+timestamp) then collapse per step
|
| 257 |
merged_metrics = []
|
| 258 |
seen = set()
|
| 259 |
for entry in base_metrics + inc_metrics:
|
| 260 |
try:
|
|
|
|
|
|
|
| 261 |
key = _metrics_key(entry)
|
| 262 |
except Exception:
|
| 263 |
key = (None, None)
|
| 264 |
if key not in seen:
|
| 265 |
seen.add(key)
|
| 266 |
merged_metrics.append(entry)
|
| 267 |
+
|
| 268 |
+
# Collapse duplicate steps by merging their metric dicts and keeping the latest timestamp
|
| 269 |
+
try:
|
| 270 |
+
step_to_entry: Dict[Any, Dict[str, Any]] = {}
|
| 271 |
+
for e in merged_metrics:
|
| 272 |
+
if not isinstance(e, dict):
|
| 273 |
+
continue
|
| 274 |
+
# Ensure nested structure {timestamp, step, metrics}
|
| 275 |
+
if 'metrics' not in e:
|
| 276 |
+
e = {
|
| 277 |
+
'timestamp': e.get('timestamp'),
|
| 278 |
+
'step': e.get('step'),
|
| 279 |
+
'metrics': {k: v for k, v in e.items() if k not in ('step', 'timestamp')}
|
| 280 |
+
}
|
| 281 |
+
step_val = e.get('step')
|
| 282 |
+
if step_val in step_to_entry:
|
| 283 |
+
existing_e = step_to_entry[step_val]
|
| 284 |
+
try:
|
| 285 |
+
existing_metrics_dict = existing_e.get('metrics', {})
|
| 286 |
+
if isinstance(existing_metrics_dict, dict):
|
| 287 |
+
existing_metrics_dict.update(e.get('metrics', {}))
|
| 288 |
+
else:
|
| 289 |
+
existing_e['metrics'] = e.get('metrics', {})
|
| 290 |
+
except Exception:
|
| 291 |
+
existing_e['metrics'] = e.get('metrics', {})
|
| 292 |
+
try:
|
| 293 |
+
if str(e.get('timestamp', '')) > str(existing_e.get('timestamp', '')):
|
| 294 |
+
existing_e['timestamp'] = e.get('timestamp')
|
| 295 |
+
except Exception:
|
| 296 |
+
pass
|
| 297 |
+
else:
|
| 298 |
+
step_to_entry[step_val] = dict(e)
|
| 299 |
+
def _step_key(x: Dict[str, Any]):
|
| 300 |
+
try:
|
| 301 |
+
return float(x.get('step'))
|
| 302 |
+
except Exception:
|
| 303 |
+
return -1.0
|
| 304 |
+
merged_metrics = sorted(step_to_entry.values(), key=_step_key)
|
| 305 |
+
except Exception:
|
| 306 |
+
# On any error, keep the de-duplicated list
|
| 307 |
+
pass
|
| 308 |
# Merge params
|
| 309 |
merged_params = {}
|
| 310 |
if isinstance(base_params, dict):
|
src/monitoring.py
CHANGED
|
@@ -310,6 +310,54 @@ class SmolLM3Monitor:
|
|
| 310 |
except Exception:
|
| 311 |
pass
|
| 312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
# Merge artifacts if provided
|
| 314 |
if 'artifacts' in experiment_data and isinstance(experiment_data['artifacts'], list):
|
| 315 |
# De-duplicate while preserving order
|
|
|
|
| 310 |
except Exception:
|
| 311 |
pass
|
| 312 |
|
| 313 |
+
# Collapse duplicate step entries by merging their metric dictionaries
|
| 314 |
+
try:
|
| 315 |
+
def _collapse_by_step(entries: list) -> list:
|
| 316 |
+
step_to_entry: dict = {}
|
| 317 |
+
for e in entries:
|
| 318 |
+
if not isinstance(e, dict):
|
| 319 |
+
continue
|
| 320 |
+
# Normalize to nested structure
|
| 321 |
+
if 'metrics' not in e:
|
| 322 |
+
e = {
|
| 323 |
+
'timestamp': e.get('timestamp'),
|
| 324 |
+
'step': e.get('step'),
|
| 325 |
+
'metrics': {k: v for k, v in e.items() if k not in ('step', 'timestamp')}
|
| 326 |
+
}
|
| 327 |
+
step_val = e.get('step')
|
| 328 |
+
if step_val in step_to_entry:
|
| 329 |
+
# Merge metrics into existing entry for the same step
|
| 330 |
+
existing_e = step_to_entry[step_val]
|
| 331 |
+
try:
|
| 332 |
+
existing_e_metrics = existing_e.get('metrics', {})
|
| 333 |
+
if isinstance(existing_e_metrics, dict):
|
| 334 |
+
existing_e_metrics.update(e.get('metrics', {}))
|
| 335 |
+
else:
|
| 336 |
+
existing_e['metrics'] = e.get('metrics', {})
|
| 337 |
+
except Exception:
|
| 338 |
+
existing_e['metrics'] = e.get('metrics', {})
|
| 339 |
+
# Prefer the latest timestamp (ISO strings compare lexicographically)
|
| 340 |
+
try:
|
| 341 |
+
if str(e.get('timestamp', '')) > str(existing_e.get('timestamp', '')):
|
| 342 |
+
existing_e['timestamp'] = e.get('timestamp')
|
| 343 |
+
except Exception:
|
| 344 |
+
pass
|
| 345 |
+
else:
|
| 346 |
+
step_to_entry[step_val] = dict(e)
|
| 347 |
+
# Sort by step (fallback to 0 for None/non-numeric)
|
| 348 |
+
def _step_key(x):
|
| 349 |
+
val = x.get('step')
|
| 350 |
+
try:
|
| 351 |
+
return float(val)
|
| 352 |
+
except Exception:
|
| 353 |
+
return -1.0
|
| 354 |
+
return sorted(step_to_entry.values(), key=_step_key)
|
| 355 |
+
|
| 356 |
+
merged_metrics = _collapse_by_step(merged_metrics)
|
| 357 |
+
except Exception:
|
| 358 |
+
# If anything goes wrong, keep original list
|
| 359 |
+
pass
|
| 360 |
+
|
| 361 |
# Merge artifacts if provided
|
| 362 |
if 'artifacts' in experiment_data and isinstance(experiment_data['artifacts'], list):
|
| 363 |
# De-duplicate while preserving order
|
templates/spaces/trackio/app.py
CHANGED
|
@@ -661,18 +661,32 @@ class TrackioSpace:
|
|
| 661 |
if not experiment['metrics']:
|
| 662 |
return pd.DataFrame()
|
| 663 |
|
| 664 |
-
# Convert metrics to DataFrame
|
| 665 |
data = []
|
| 666 |
for metric_entry in experiment['metrics']:
|
| 667 |
step = metric_entry.get('step', 0)
|
| 668 |
timestamp = metric_entry.get('timestamp', '')
|
| 669 |
metrics = metric_entry.get('metrics', {})
|
| 670 |
-
|
| 671 |
row = {'step': step, 'timestamp': timestamp}
|
| 672 |
row.update(metrics)
|
| 673 |
data.append(row)
|
| 674 |
-
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
# Global instance
|
| 678 |
trackio_space = TrackioSpace()
|
|
|
|
| 661 |
if not experiment['metrics']:
|
| 662 |
return pd.DataFrame()
|
| 663 |
|
| 664 |
+
# Convert metrics to DataFrame (merge duplicate steps)
|
| 665 |
data = []
|
| 666 |
for metric_entry in experiment['metrics']:
|
| 667 |
step = metric_entry.get('step', 0)
|
| 668 |
timestamp = metric_entry.get('timestamp', '')
|
| 669 |
metrics = metric_entry.get('metrics', {})
|
| 670 |
+
|
| 671 |
row = {'step': step, 'timestamp': timestamp}
|
| 672 |
row.update(metrics)
|
| 673 |
data.append(row)
|
| 674 |
+
|
| 675 |
+
if not data:
|
| 676 |
+
return pd.DataFrame()
|
| 677 |
+
|
| 678 |
+
df = pd.DataFrame(data)
|
| 679 |
+
# Ensure step exists even if None
|
| 680 |
+
if 'step' not in df.columns:
|
| 681 |
+
df['step'] = 0
|
| 682 |
+
# For duplicate steps, keep the latest timestamp and merge columns by last valid value
|
| 683 |
+
try:
|
| 684 |
+
df.sort_values(['step', 'timestamp'], inplace=True)
|
| 685 |
+
# Take the last row per step (latest timestamp)
|
| 686 |
+
df = df.groupby('step', as_index=False).last()
|
| 687 |
+
except Exception:
|
| 688 |
+
pass
|
| 689 |
+
return df
|
| 690 |
|
| 691 |
# Global instance
|
| 692 |
trackio_space = TrackioSpace()
|