Julian Bilcke
commited on
Commit
·
e8518d0
1
Parent(s):
9cdaa70
less logs
Browse files
vms/ui/project/services/training.py
CHANGED
|
@@ -1664,25 +1664,25 @@ class TrainingService:
|
|
| 1664 |
# Check in lora_weights directory
|
| 1665 |
lora_weights_dir = self.app.output_path / "lora_weights"
|
| 1666 |
if lora_weights_dir.exists():
|
| 1667 |
-
logger.info(f"Found lora_weights directory: {lora_weights_dir}")
|
| 1668 |
|
| 1669 |
# Look for the latest checkpoint directory in lora_weights
|
| 1670 |
lora_checkpoints = [d for d in lora_weights_dir.glob("*") if d.is_dir() and d.name.isdigit()]
|
| 1671 |
if lora_checkpoints:
|
| 1672 |
latest_lora_checkpoint = max(lora_checkpoints, key=lambda x: int(x.name))
|
| 1673 |
-
logger.info(f"Found latest LoRA checkpoint: {latest_lora_checkpoint}")
|
| 1674 |
|
| 1675 |
# Extract step count from directory name
|
| 1676 |
result["steps"] = int(latest_lora_checkpoint.name)
|
| 1677 |
|
| 1678 |
# List contents of the latest checkpoint directory
|
| 1679 |
checkpoint_contents = list(latest_lora_checkpoint.glob("*"))
|
| 1680 |
-
logger.info(f"Contents of LoRA checkpoint {latest_lora_checkpoint.name}: {checkpoint_contents}")
|
| 1681 |
|
| 1682 |
# Check for weights in the latest LoRA checkpoint
|
| 1683 |
lora_safetensors = latest_lora_checkpoint / "pytorch_lora_weights.safetensors"
|
| 1684 |
if lora_safetensors.exists():
|
| 1685 |
-
logger.info(f"Found weights in latest LoRA checkpoint: {lora_safetensors}")
|
| 1686 |
result["path"] = str(lora_safetensors)
|
| 1687 |
return result
|
| 1688 |
|
|
@@ -1697,14 +1697,14 @@ class TrainingService:
|
|
| 1697 |
for weight_file in possible_weight_files:
|
| 1698 |
weight_path = latest_lora_checkpoint / weight_file
|
| 1699 |
if weight_path.exists():
|
| 1700 |
-
logger.info(f"Found weights file {weight_file} in latest LoRA checkpoint: {weight_path}")
|
| 1701 |
result["path"] = str(weight_path)
|
| 1702 |
return result
|
| 1703 |
|
| 1704 |
# Check if any .safetensors files exist
|
| 1705 |
safetensors_files = list(latest_lora_checkpoint.glob("*.safetensors"))
|
| 1706 |
if safetensors_files:
|
| 1707 |
-
logger.info(f"Found .safetensors files in LoRA checkpoint: {safetensors_files}")
|
| 1708 |
# Return the first .safetensors file found
|
| 1709 |
result["path"] = str(safetensors_files[0])
|
| 1710 |
return result
|
|
@@ -1712,11 +1712,12 @@ class TrainingService:
|
|
| 1712 |
# Fallback: check for direct safetensors file in lora_weights root
|
| 1713 |
lora_safetensors = lora_weights_dir / "pytorch_lora_weights.safetensors"
|
| 1714 |
if lora_safetensors.exists():
|
| 1715 |
-
logger.info(f"Found weights in lora_weights directory: {lora_safetensors}")
|
| 1716 |
result["path"] = str(lora_safetensors)
|
| 1717 |
return result
|
| 1718 |
else:
|
| 1719 |
logger.info(f"pytorch_lora_weights.safetensors not found in lora_weights directory")
|
|
|
|
| 1720 |
|
| 1721 |
# If not found in root or lora_weights, log the issue and check fallback
|
| 1722 |
logger.warning(f"Model weights not found at expected location: {model_output_safetensors_path}")
|
|
|
|
| 1664 |
# Check in lora_weights directory
|
| 1665 |
lora_weights_dir = self.app.output_path / "lora_weights"
|
| 1666 |
if lora_weights_dir.exists():
|
| 1667 |
+
#logger.info(f"Found lora_weights directory: {lora_weights_dir}")
|
| 1668 |
|
| 1669 |
# Look for the latest checkpoint directory in lora_weights
|
| 1670 |
lora_checkpoints = [d for d in lora_weights_dir.glob("*") if d.is_dir() and d.name.isdigit()]
|
| 1671 |
if lora_checkpoints:
|
| 1672 |
latest_lora_checkpoint = max(lora_checkpoints, key=lambda x: int(x.name))
|
| 1673 |
+
#logger.info(f"Found latest LoRA checkpoint: {latest_lora_checkpoint}")
|
| 1674 |
|
| 1675 |
# Extract step count from directory name
|
| 1676 |
result["steps"] = int(latest_lora_checkpoint.name)
|
| 1677 |
|
| 1678 |
# List contents of the latest checkpoint directory
|
| 1679 |
checkpoint_contents = list(latest_lora_checkpoint.glob("*"))
|
| 1680 |
+
#logger.info(f"Contents of LoRA checkpoint {latest_lora_checkpoint.name}: {checkpoint_contents}")
|
| 1681 |
|
| 1682 |
# Check for weights in the latest LoRA checkpoint
|
| 1683 |
lora_safetensors = latest_lora_checkpoint / "pytorch_lora_weights.safetensors"
|
| 1684 |
if lora_safetensors.exists():
|
| 1685 |
+
#logger.info(f"Found weights in latest LoRA checkpoint: {lora_safetensors}")
|
| 1686 |
result["path"] = str(lora_safetensors)
|
| 1687 |
return result
|
| 1688 |
|
|
|
|
| 1697 |
for weight_file in possible_weight_files:
|
| 1698 |
weight_path = latest_lora_checkpoint / weight_file
|
| 1699 |
if weight_path.exists():
|
| 1700 |
+
#logger.info(f"Found weights file {weight_file} in latest LoRA checkpoint: {weight_path}")
|
| 1701 |
result["path"] = str(weight_path)
|
| 1702 |
return result
|
| 1703 |
|
| 1704 |
# Check if any .safetensors files exist
|
| 1705 |
safetensors_files = list(latest_lora_checkpoint.glob("*.safetensors"))
|
| 1706 |
if safetensors_files:
|
| 1707 |
+
#logger.info(f"Found .safetensors files in LoRA checkpoint: {safetensors_files}")
|
| 1708 |
# Return the first .safetensors file found
|
| 1709 |
result["path"] = str(safetensors_files[0])
|
| 1710 |
return result
|
|
|
|
| 1712 |
# Fallback: check for direct safetensors file in lora_weights root
|
| 1713 |
lora_safetensors = lora_weights_dir / "pytorch_lora_weights.safetensors"
|
| 1714 |
if lora_safetensors.exists():
|
| 1715 |
+
#logger.info(f"Found weights in lora_weights directory: {lora_safetensors}")
|
| 1716 |
result["path"] = str(lora_safetensors)
|
| 1717 |
return result
|
| 1718 |
else:
|
| 1719 |
logger.info(f"pytorch_lora_weights.safetensors not found in lora_weights directory")
|
| 1720 |
+
pass
|
| 1721 |
|
| 1722 |
# If not found in root or lora_weights, log the issue and check fallback
|
| 1723 |
logger.warning(f"Model weights not found at expected location: {model_output_safetensors_path}")
|