Spaces:

Tonic
/

SmolFactory

Running

Tonic commited on Jul 19

Commit

0de9de2

verified ·

1 Parent(s): bb64084

improves requirements and dependencies

Files changed (5) hide show

config/__init__.py CHANGED Viewed

@@ -7,11 +7,30 @@ from .train_smollm3_openhermes_fr import SmolLM3ConfigOpenHermesFR, get_config a
 from .train_smollm3_openhermes_fr_a100_large import SmolLM3ConfigOpenHermesFRA100Large, get_config as get_a100_large_config
 from .train_smollm3_openhermes_fr_a100_multiple_passes import SmolLM3ConfigOpenHermesFRMultiplePasses, get_config as get_multiple_passes_config
 __all__ = [
     'SmolLM3Config',
     'SmolLM3ConfigOpenHermesFR',
     'SmolLM3ConfigOpenHermesFRA100Large',
     'SmolLM3ConfigOpenHermesFRMultiplePasses',
     'get_base_config',
     'get_openhermes_fr_config',
     'get_a100_large_config',

 from .train_smollm3_openhermes_fr_a100_large import SmolLM3ConfigOpenHermesFRA100Large, get_config as get_a100_large_config
 from .train_smollm3_openhermes_fr_a100_multiple_passes import SmolLM3ConfigOpenHermesFRMultiplePasses, get_config as get_multiple_passes_config
+# Generic get_config function that can handle different config types
+def get_config(config_path: str):
+    """Generic get_config function that tries different config types"""
+    import os
+    if not os.path.exists(config_path):
+        return get_base_config(config_path)
+    # Try to determine config type based on filename
+    if "a100_large" in config_path:
+        return get_a100_large_config(config_path)
+    elif "a100_multiple_passes" in config_path:
+        return get_multiple_passes_config(config_path)
+    elif "openhermes_fr" in config_path:
+        return get_openhermes_fr_config(config_path)
+    else:
+        return get_base_config(config_path)
 __all__ = [
     'SmolLM3Config',
     'SmolLM3ConfigOpenHermesFR',
     'SmolLM3ConfigOpenHermesFRA100Large',
     'SmolLM3ConfigOpenHermesFRMultiplePasses',
+    'get_config',
     'get_base_config',
     'get_openhermes_fr_config',
     'get_a100_large_config',

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-# Core dependencies
 torch>=2.0.0
 transformers>=4.53.0
 datasets>=2.14.0
@@ -11,32 +11,27 @@ tokenizers>=0.13.0
 # Training and optimization
 flash-attn>=2.0.0
-xformers>=0.0.20
 bitsandbytes>=0.41.0
-# Utilities
 numpy>=1.24.0
-pandas>=2.0.0
-scikit-learn>=1.3.0
 tqdm>=4.65.0
-wandb>=0.15.0
-# Optional: for evaluation
-lighteval>=0.1.0
-evaluate>=0.4.0
-# Optional: for deployment
-vllm>=0.2.0
-sentencepiece>=0.1.99
-# Development
-pytest>=7.0.0
-black>=23.0.0
-isort>=5.12.0
-# Experiment tracking and monitoring
-trackio>=0.1.0
-psutil>=5.9.0
-# Hugging Face Hub integration
-huggingface_hub>=0.16.0

+# Core dependencies - essential for training
 torch>=2.0.0
 transformers>=4.53.0
 datasets>=2.14.0
 # Training and optimization
 flash-attn>=2.0.0
 bitsandbytes>=0.41.0
+# Basic utilities
 numpy>=1.24.0
 tqdm>=4.65.0
+# Experiment tracking
+trackio>=0.1.0
+# Optional: for evaluation (commented out to reduce conflicts)
+# lighteval>=0.1.0
+# evaluate>=0.4.0
+# Optional: for deployment (commented out to reduce conflicts)
+# vllm>=0.2.0
+# sentencepiece>=0.1.99
+# Development tools (commented out to reduce conflicts)
+# pytest>=7.0.0
+# black>=23.0.0
+# isort>=5.12.0
+# System monitoring
+psutil>=5.9.0

requirements_core.txt ADDED Viewed

+# Core requirements for SmolLM3 training (without flash-attn)
+torch>=2.0.0
+transformers>=4.53.0
+datasets>=2.14.0
+accelerate>=0.20.0
+trl>=0.7.0
+huggingface-hub>=0.16.0
+tokenizers>=0.13.0
+bitsandbytes>=0.41.0
+numpy>=1.24.0
+tqdm>=4.65.0
+trackio>=0.1.0
+psutil>=5.9.0

requirements_minimal.txt ADDED Viewed

+# Minimal requirements for SmolLM3 training
+torch>=2.0.0
+transformers>=4.53.0
+datasets>=2.14.0
+accelerate>=0.20.0
+trl>=0.7.0
+huggingface-hub>=0.16.0
+tokenizers>=0.13.0
+flash-attn>=2.0.0
+bitsandbytes>=0.41.0
+numpy>=1.24.0
+tqdm>=4.65.0
+trackio>=0.1.0
+psutil>=5.9.0

run_a100_large_experiment.py CHANGED Viewed

@@ -40,11 +40,13 @@ def main():
     )
     parser.add_argument(
         "--trackio-url",
         type=str,
         help="Trackio URL for experiment tracking"
     )
     parser.add_argument(
         "--trackio-token",
         type=str,
         help="Trackio token for authentication"
     )

     )
     parser.add_argument(
         "--trackio-url",
+        "--trackio_url",
         type=str,
         help="Trackio URL for experiment tracking"
     )
     parser.add_argument(
         "--trackio-token",
+        "--trackio_token",
         type=str,
         help="Trackio token for authentication"
     )