Spaces:
Running
Running
adds repoid only based on repo name, adds version-robust sfttrainer
Browse files
launch.sh
CHANGED
|
@@ -831,8 +831,11 @@ get_input "Experiment name" "smollm3_finetune_$(date +%Y%m%d_%H%M%S)" EXPERIMENT
|
|
| 831 |
|
| 832 |
# Configure model repository name (customizable)
|
| 833 |
print_info "Setting up model repository name..."
|
| 834 |
-
|
| 835 |
-
|
|
|
|
|
|
|
|
|
|
| 836 |
print_status "Model repository: $REPO_NAME"
|
| 837 |
|
| 838 |
# Automatically create dataset repository
|
|
@@ -1311,10 +1314,10 @@ export HF_USERNAME="$HF_USERNAME"
|
|
| 1311 |
--hf-username "$HF_USERNAME" \
|
| 1312 |
--model-id "$DEMO_MODEL_ID" \
|
| 1313 |
--subfolder "$DEMO_SUBFOLDER" \
|
| 1314 |
-
--space-name "${
|
| 1315 |
|
| 1316 |
if [ $? -eq 0 ]; then
|
| 1317 |
-
DEMO_SPACE_URL="https://huggingface.co/spaces/$HF_USERNAME/${
|
| 1318 |
print_status "โ
Demo space deployed successfully: $DEMO_SPACE_URL"
|
| 1319 |
else
|
| 1320 |
print_warning "โ ๏ธ Demo space deployment failed, but continuing with pipeline"
|
|
@@ -1385,7 +1388,7 @@ echo "๐ Trackio: $TRACKIO_URL"
|
|
| 1385 |
echo "๐ Experiment: $EXPERIMENT_NAME"
|
| 1386 |
echo "๐ Dataset: https://huggingface.co/datasets/$TRACKIO_DATASET_REPO"
|
| 1387 |
$(if [ "$DEPLOY_DEMO" = "y" ] || [ "$DEPLOY_DEMO" = "Y" ]; then
|
| 1388 |
-
echo "๐ฎ Demo: https://huggingface.co/spaces/$HF_USERNAME/${
|
| 1389 |
fi)
|
| 1390 |
echo ""
|
| 1391 |
echo "๐ Summary report saved to: training_summary.md"
|
|
|
|
| 831 |
|
| 832 |
# Configure model repository name (customizable)
|
| 833 |
print_info "Setting up model repository name..."
|
| 834 |
+
# Ask only for short repo name; we'll prefix with username automatically
|
| 835 |
+
DEFAULT_SHORT_REPO="smolfactory-$(date +%Y%m%d)"
|
| 836 |
+
get_input "Model repository name (repo only, no username/)" "$DEFAULT_SHORT_REPO" REPO_SHORT
|
| 837 |
+
# Build full repo id using detected username
|
| 838 |
+
REPO_NAME="$HF_USERNAME/$REPO_SHORT"
|
| 839 |
print_status "Model repository: $REPO_NAME"
|
| 840 |
|
| 841 |
# Automatically create dataset repository
|
|
|
|
| 1314 |
--hf-username "$HF_USERNAME" \
|
| 1315 |
--model-id "$DEMO_MODEL_ID" \
|
| 1316 |
--subfolder "$DEMO_SUBFOLDER" \
|
| 1317 |
+
--space-name "${REPO_SHORT}-demo"
|
| 1318 |
|
| 1319 |
if [ $? -eq 0 ]; then
|
| 1320 |
+
DEMO_SPACE_URL="https://huggingface.co/spaces/$HF_USERNAME/${REPO_SHORT}-demo"
|
| 1321 |
print_status "โ
Demo space deployed successfully: $DEMO_SPACE_URL"
|
| 1322 |
else
|
| 1323 |
print_warning "โ ๏ธ Demo space deployment failed, but continuing with pipeline"
|
|
|
|
| 1388 |
echo "๐ Experiment: $EXPERIMENT_NAME"
|
| 1389 |
echo "๐ Dataset: https://huggingface.co/datasets/$TRACKIO_DATASET_REPO"
|
| 1390 |
$(if [ "$DEPLOY_DEMO" = "y" ] || [ "$DEPLOY_DEMO" = "Y" ]; then
|
| 1391 |
+
echo "๐ฎ Demo: https://huggingface.co/spaces/$HF_USERNAME/${REPO_SHORT}-demo"
|
| 1392 |
fi)
|
| 1393 |
echo ""
|
| 1394 |
echo "๐ Summary report saved to: training_summary.md"
|
scripts/deploy_demo_space.py
CHANGED
|
@@ -42,9 +42,10 @@ class DemoSpaceDeployer:
|
|
| 42 |
demo_type: Optional[str] = None):
|
| 43 |
self.hf_token = hf_token
|
| 44 |
self.hf_username = hf_username
|
| 45 |
-
|
|
|
|
| 46 |
self.subfolder = subfolder
|
| 47 |
-
self.space_name = space_name or f"{model_id.split('/')[-1]}-demo"
|
| 48 |
self.space_id = f"{hf_username}/{self.space_name}"
|
| 49 |
self.space_url = f"https://huggingface.co/spaces/{self.space_id}"
|
| 50 |
|
|
|
|
| 42 |
demo_type: Optional[str] = None):
|
| 43 |
self.hf_token = hf_token
|
| 44 |
self.hf_username = hf_username
|
| 45 |
+
# Allow passing just a repo name without username and auto-prefix
|
| 46 |
+
self.model_id = model_id if "/" in model_id else f"{hf_username}/{model_id}"
|
| 47 |
self.subfolder = subfolder
|
| 48 |
+
self.space_name = space_name or f"{self.model_id.split('/')[-1]}-demo"
|
| 49 |
self.space_id = f"{hf_username}/{self.space_name}"
|
| 50 |
self.space_url = f"https://huggingface.co/spaces/{self.space_id}"
|
| 51 |
|
scripts/model_tonic/push_gpt_oss_to_huggingface.py
CHANGED
|
@@ -247,12 +247,35 @@ This model is licensed under the MIT License.
|
|
| 247 |
|
| 248 |
return card_content
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description, training_config_type=None, model_name=None, dataset_name=None, batch_size=None, learning_rate=None, max_epochs=None, max_seq_length=None, trainer_type=None):
|
| 251 |
"""Push GPT-OSS model to Hugging Face Hub"""
|
| 252 |
|
| 253 |
print("=== GPT-OSS Model Push Pipeline ===")
|
| 254 |
print(f"Checkpoint: {checkpoint_path}")
|
| 255 |
-
|
|
|
|
| 256 |
print(f"Experiment: {experiment_name}")
|
| 257 |
print(f"Author: {author_name}")
|
| 258 |
|
|
@@ -276,7 +299,7 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
|
|
| 276 |
# Create model card
|
| 277 |
print("Creating model card...")
|
| 278 |
model_card_content = create_gpt_oss_model_card(
|
| 279 |
-
model_name=
|
| 280 |
experiment_name=experiment_name,
|
| 281 |
trackio_url=trackio_url,
|
| 282 |
dataset_repo=dataset_repo,
|
|
@@ -297,18 +320,18 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
|
|
| 297 |
f.write(model_card_content)
|
| 298 |
|
| 299 |
# Push to Hugging Face Hub
|
| 300 |
-
print(f"Pushing model to: {
|
| 301 |
|
| 302 |
# Set HF token
|
| 303 |
os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
|
| 304 |
|
| 305 |
# Push using transformers
|
| 306 |
from huggingface_hub import HfApi
|
| 307 |
-
api = HfApi()
|
| 308 |
|
| 309 |
# Create repository if it doesn't exist
|
| 310 |
try:
|
| 311 |
-
api.create_repo(
|
| 312 |
except Exception as e:
|
| 313 |
print(f"Warning: Could not create repository: {e}")
|
| 314 |
|
|
@@ -316,12 +339,12 @@ def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experi
|
|
| 316 |
print("Uploading model files...")
|
| 317 |
api.upload_folder(
|
| 318 |
folder_path=temp_output,
|
| 319 |
-
repo_id=
|
| 320 |
repo_type="model"
|
| 321 |
)
|
| 322 |
|
| 323 |
print("โ
GPT-OSS model pushed successfully!")
|
| 324 |
-
print(f"Model URL: https://huggingface.co/{
|
| 325 |
|
| 326 |
# Clean up
|
| 327 |
import shutil
|
|
|
|
| 247 |
|
| 248 |
return card_content
|
| 249 |
|
| 250 |
+
def _resolve_repo_id(repo_name: str, hf_token: str) -> str:
|
| 251 |
+
"""Resolve to username/repo if only repo name was provided."""
|
| 252 |
+
try:
|
| 253 |
+
if "/" in repo_name:
|
| 254 |
+
return repo_name
|
| 255 |
+
from huggingface_hub import HfApi
|
| 256 |
+
username = None
|
| 257 |
+
if hf_token:
|
| 258 |
+
try:
|
| 259 |
+
api = HfApi(token=hf_token)
|
| 260 |
+
info = api.whoami()
|
| 261 |
+
username = info.get("name") or info.get("username")
|
| 262 |
+
except Exception:
|
| 263 |
+
username = None
|
| 264 |
+
if not username:
|
| 265 |
+
username = os.getenv("HF_USERNAME")
|
| 266 |
+
if not username:
|
| 267 |
+
raise ValueError("Could not determine HF username. Set HF_USERNAME or pass username/repo.")
|
| 268 |
+
return f"{username}/{repo_name}"
|
| 269 |
+
except Exception:
|
| 270 |
+
return repo_name
|
| 271 |
+
|
| 272 |
def push_gpt_oss_model(checkpoint_path, repo_name, hf_token, trackio_url, experiment_name, dataset_repo, author_name, model_description, training_config_type=None, model_name=None, dataset_name=None, batch_size=None, learning_rate=None, max_epochs=None, max_seq_length=None, trainer_type=None):
|
| 273 |
"""Push GPT-OSS model to Hugging Face Hub"""
|
| 274 |
|
| 275 |
print("=== GPT-OSS Model Push Pipeline ===")
|
| 276 |
print(f"Checkpoint: {checkpoint_path}")
|
| 277 |
+
full_repo_id = _resolve_repo_id(repo_name, hf_token)
|
| 278 |
+
print(f"Repository: {full_repo_id}")
|
| 279 |
print(f"Experiment: {experiment_name}")
|
| 280 |
print(f"Author: {author_name}")
|
| 281 |
|
|
|
|
| 299 |
# Create model card
|
| 300 |
print("Creating model card...")
|
| 301 |
model_card_content = create_gpt_oss_model_card(
|
| 302 |
+
model_name=full_repo_id,
|
| 303 |
experiment_name=experiment_name,
|
| 304 |
trackio_url=trackio_url,
|
| 305 |
dataset_repo=dataset_repo,
|
|
|
|
| 320 |
f.write(model_card_content)
|
| 321 |
|
| 322 |
# Push to Hugging Face Hub
|
| 323 |
+
print(f"Pushing model to: {full_repo_id}")
|
| 324 |
|
| 325 |
# Set HF token
|
| 326 |
os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
|
| 327 |
|
| 328 |
# Push using transformers
|
| 329 |
from huggingface_hub import HfApi
|
| 330 |
+
api = HfApi(token=hf_token)
|
| 331 |
|
| 332 |
# Create repository if it doesn't exist
|
| 333 |
try:
|
| 334 |
+
api.create_repo(full_repo_id, private=False, exist_ok=True)
|
| 335 |
except Exception as e:
|
| 336 |
print(f"Warning: Could not create repository: {e}")
|
| 337 |
|
|
|
|
| 339 |
print("Uploading model files...")
|
| 340 |
api.upload_folder(
|
| 341 |
folder_path=temp_output,
|
| 342 |
+
repo_id=full_repo_id,
|
| 343 |
repo_type="model"
|
| 344 |
)
|
| 345 |
|
| 346 |
print("โ
GPT-OSS model pushed successfully!")
|
| 347 |
+
print(f"Model URL: https://huggingface.co/{full_repo_id}")
|
| 348 |
|
| 349 |
# Clean up
|
| 350 |
import shutil
|
scripts/model_tonic/push_to_huggingface.py
CHANGED
|
@@ -73,6 +73,7 @@ class HuggingFacePusher:
|
|
| 73 |
trainer_type: Optional[str] = None
|
| 74 |
):
|
| 75 |
self.model_path = Path(model_path)
|
|
|
|
| 76 |
self.repo_name = repo_name
|
| 77 |
self.token = token or hf_token or os.getenv('HF_TOKEN')
|
| 78 |
self.private = private
|
|
@@ -101,6 +102,9 @@ class HuggingFacePusher:
|
|
| 101 |
else:
|
| 102 |
raise ImportError("huggingface_hub is required. Install with: pip install huggingface_hub")
|
| 103 |
|
|
|
|
|
|
|
|
|
|
| 104 |
# Initialize monitoring if available
|
| 105 |
self.monitor = None
|
| 106 |
if MONITORING_AVAILABLE:
|
|
@@ -112,25 +116,60 @@ class HuggingFacePusher:
|
|
| 112 |
dataset_repo=self.dataset_repo
|
| 113 |
)
|
| 114 |
|
| 115 |
-
logger.info(f"Initialized HuggingFacePusher for {
|
| 116 |
logger.info(f"Dataset repository: {self.dataset_repo}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def create_repository(self) -> bool:
|
| 119 |
"""Create the Hugging Face repository"""
|
| 120 |
try:
|
| 121 |
-
logger.info(f"Creating repository: {self.
|
| 122 |
|
| 123 |
# Create repository with timeout handling
|
| 124 |
try:
|
| 125 |
# Create repository
|
| 126 |
create_repo(
|
| 127 |
-
repo_id=self.
|
| 128 |
token=self.token,
|
| 129 |
private=self.private,
|
| 130 |
exist_ok=True
|
| 131 |
)
|
| 132 |
|
| 133 |
-
logger.info(f"โ
Repository created: https://huggingface.co/{self.
|
| 134 |
return True
|
| 135 |
|
| 136 |
except Exception as e:
|
|
@@ -189,8 +228,8 @@ class HuggingFacePusher:
|
|
| 189 |
|
| 190 |
# Update with actual values
|
| 191 |
variables.update({
|
| 192 |
-
"repo_name": self.
|
| 193 |
-
"model_name": self.
|
| 194 |
"experiment_name": self.experiment_name or "model_push",
|
| 195 |
"dataset_repo": self.dataset_repo,
|
| 196 |
"author_name": self.author_name or "Model Author",
|
|
@@ -238,7 +277,7 @@ pipeline_tag: text-generation
|
|
| 238 |
base_model: HuggingFaceTB/SmolLM3-3B
|
| 239 |
---
|
| 240 |
|
| 241 |
-
# {self.
|
| 242 |
|
| 243 |
This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B architecture.
|
| 244 |
|
|
@@ -269,8 +308,8 @@ This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B archite
|
|
| 269 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 270 |
|
| 271 |
# Load model and tokenizer
|
| 272 |
-
model = AutoModelForCausalLM.from_pretrained("{self.
|
| 273 |
-
tokenizer = AutoTokenizer.from_pretrained("{self.
|
| 274 |
|
| 275 |
# Generate text
|
| 276 |
inputs = tokenizer("Hello, how are you?", return_tensors="pt")
|
|
@@ -346,7 +385,7 @@ This model is licensed under the Apache 2.0 License.
|
|
| 346 |
upload_file(
|
| 347 |
path_or_fileobj=str(file_path),
|
| 348 |
path_in_repo=remote_path,
|
| 349 |
-
repo_id=self.
|
| 350 |
token=self.token
|
| 351 |
)
|
| 352 |
logger.info(f"โ
Uploaded {relative_path}")
|
|
@@ -381,7 +420,7 @@ This model is licensed under the Apache 2.0 License.
|
|
| 381 |
upload_file(
|
| 382 |
path_or_fileobj=str(file_path),
|
| 383 |
path_in_repo=f"training_results/{file_name}",
|
| 384 |
-
repo_id=self.
|
| 385 |
token=self.token
|
| 386 |
)
|
| 387 |
|
|
@@ -397,7 +436,7 @@ This model is licensed under the Apache 2.0 License.
|
|
| 397 |
try:
|
| 398 |
logger.info("Creating README.md...")
|
| 399 |
|
| 400 |
-
readme_content = f"""# {self.
|
| 401 |
|
| 402 |
A fine-tuned SmolLM3 model for text generation tasks.
|
| 403 |
|
|
@@ -406,8 +445,8 @@ A fine-tuned SmolLM3 model for text generation tasks.
|
|
| 406 |
```python
|
| 407 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 408 |
|
| 409 |
-
model = AutoModelForCausalLM.from_pretrained("{self.
|
| 410 |
-
tokenizer = AutoTokenizer.from_pretrained("{self.
|
| 411 |
|
| 412 |
# Generate text
|
| 413 |
text = "Hello, how are you?"
|
|
@@ -463,7 +502,7 @@ MIT License
|
|
| 463 |
path_or_fileobj=str(readme_path),
|
| 464 |
path_in_repo="README.md",
|
| 465 |
token=self.token,
|
| 466 |
-
repo_id=self.
|
| 467 |
)
|
| 468 |
|
| 469 |
# Clean up
|
|
@@ -483,7 +522,7 @@ MIT License
|
|
| 483 |
# Log to Trackio
|
| 484 |
self.monitor.log_metrics({
|
| 485 |
"push_action": action,
|
| 486 |
-
"repo_name": self.
|
| 487 |
"model_size_gb": self._get_model_size(),
|
| 488 |
"dataset_repo": self.dataset_repo,
|
| 489 |
**details
|
|
@@ -492,7 +531,7 @@ MIT License
|
|
| 492 |
# Log training summary
|
| 493 |
self.monitor.log_training_summary({
|
| 494 |
"model_push": True,
|
| 495 |
-
"model_repo": self.
|
| 496 |
"dataset_repo": self.dataset_repo,
|
| 497 |
"push_date": datetime.now().isoformat(),
|
| 498 |
**details
|
|
@@ -505,7 +544,7 @@ MIT License
|
|
| 505 |
def push_model(self, training_config: Optional[Dict[str, Any]] = None,
|
| 506 |
results: Optional[Dict[str, Any]] = None) -> bool:
|
| 507 |
"""Complete model push process with HF Datasets integration"""
|
| 508 |
-
logger.info(f"๐ Starting model push to {self.
|
| 509 |
logger.info(f"๐ Dataset repository: {self.dataset_repo}")
|
| 510 |
|
| 511 |
# Validate model path
|
|
@@ -533,7 +572,7 @@ MIT License
|
|
| 533 |
upload_file(
|
| 534 |
path_or_fileobj=str(model_card_path),
|
| 535 |
path_in_repo="README.md",
|
| 536 |
-
repo_id=self.
|
| 537 |
token=self.token
|
| 538 |
)
|
| 539 |
finally:
|
|
@@ -556,7 +595,7 @@ MIT License
|
|
| 556 |
"results": results
|
| 557 |
})
|
| 558 |
|
| 559 |
-
logger.info(f"๐ Model successfully pushed to: https://huggingface.co/{self.
|
| 560 |
logger.info(f"๐ Experiment data stored in: {self.dataset_repo}")
|
| 561 |
return True
|
| 562 |
|
|
@@ -582,7 +621,7 @@ def parse_args():
|
|
| 582 |
|
| 583 |
# Required arguments
|
| 584 |
parser.add_argument('model_path', type=str, help='Path to trained model directory')
|
| 585 |
-
parser.add_argument('repo_name', type=str, help='Hugging Face repository name (
|
| 586 |
|
| 587 |
# Optional arguments
|
| 588 |
parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
|
|
|
|
| 73 |
trainer_type: Optional[str] = None
|
| 74 |
):
|
| 75 |
self.model_path = Path(model_path)
|
| 76 |
+
# Original user input (may be just the repo name without username)
|
| 77 |
self.repo_name = repo_name
|
| 78 |
self.token = token or hf_token or os.getenv('HF_TOKEN')
|
| 79 |
self.private = private
|
|
|
|
| 102 |
else:
|
| 103 |
raise ImportError("huggingface_hub is required. Install with: pip install huggingface_hub")
|
| 104 |
|
| 105 |
+
# Resolve the full repo id (username/repo) if user only provided repo name
|
| 106 |
+
self.repo_id = self._resolve_repo_id(self.repo_name)
|
| 107 |
+
|
| 108 |
# Initialize monitoring if available
|
| 109 |
self.monitor = None
|
| 110 |
if MONITORING_AVAILABLE:
|
|
|
|
| 116 |
dataset_repo=self.dataset_repo
|
| 117 |
)
|
| 118 |
|
| 119 |
+
logger.info(f"Initialized HuggingFacePusher for {self.repo_id}")
|
| 120 |
logger.info(f"Dataset repository: {self.dataset_repo}")
|
| 121 |
+
|
| 122 |
+
def _resolve_repo_id(self, repo_name: str) -> str:
|
| 123 |
+
"""Return a fully-qualified repo id in the form username/repo.
|
| 124 |
+
|
| 125 |
+
If the provided name already contains a '/', it is returned unchanged.
|
| 126 |
+
Otherwise, we attempt to derive the username from the authenticated token
|
| 127 |
+
or from the HF_USERNAME environment variable.
|
| 128 |
+
"""
|
| 129 |
+
try:
|
| 130 |
+
if "/" in repo_name:
|
| 131 |
+
return repo_name
|
| 132 |
+
|
| 133 |
+
# Need a username. Prefer API whoami(), fallback to env HF_USERNAME
|
| 134 |
+
username: Optional[str] = None
|
| 135 |
+
if self.token:
|
| 136 |
+
try:
|
| 137 |
+
user_info = self.api.whoami()
|
| 138 |
+
username = user_info.get("name") or user_info.get("username")
|
| 139 |
+
except Exception:
|
| 140 |
+
username = None
|
| 141 |
+
|
| 142 |
+
if not username:
|
| 143 |
+
username = os.getenv("HF_USERNAME")
|
| 144 |
+
|
| 145 |
+
if not username:
|
| 146 |
+
raise ValueError(
|
| 147 |
+
"Username could not be determined. Provide a token or set HF_USERNAME, "
|
| 148 |
+
"or pass a fully-qualified repo id 'username/repo'."
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
return f"{username}/{repo_name}"
|
| 152 |
+
except Exception as resolve_error:
|
| 153 |
+
logger.error(f"Failed to resolve full repo id for '{repo_name}': {resolve_error}")
|
| 154 |
+
# Fall back to provided value (may fail later at create/upload)
|
| 155 |
+
return repo_name
|
| 156 |
|
| 157 |
def create_repository(self) -> bool:
|
| 158 |
"""Create the Hugging Face repository"""
|
| 159 |
try:
|
| 160 |
+
logger.info(f"Creating repository: {self.repo_id}")
|
| 161 |
|
| 162 |
# Create repository with timeout handling
|
| 163 |
try:
|
| 164 |
# Create repository
|
| 165 |
create_repo(
|
| 166 |
+
repo_id=self.repo_id,
|
| 167 |
token=self.token,
|
| 168 |
private=self.private,
|
| 169 |
exist_ok=True
|
| 170 |
)
|
| 171 |
|
| 172 |
+
logger.info(f"โ
Repository created: https://huggingface.co/{self.repo_id}")
|
| 173 |
return True
|
| 174 |
|
| 175 |
except Exception as e:
|
|
|
|
| 228 |
|
| 229 |
# Update with actual values
|
| 230 |
variables.update({
|
| 231 |
+
"repo_name": self.repo_id,
|
| 232 |
+
"model_name": self.repo_id.split('/')[-1],
|
| 233 |
"experiment_name": self.experiment_name or "model_push",
|
| 234 |
"dataset_repo": self.dataset_repo,
|
| 235 |
"author_name": self.author_name or "Model Author",
|
|
|
|
| 277 |
base_model: HuggingFaceTB/SmolLM3-3B
|
| 278 |
---
|
| 279 |
|
| 280 |
+
# {self.repo_id.split('/')[-1]}
|
| 281 |
|
| 282 |
This is a fine-tuned SmolLM3 model based on the HuggingFaceTB/SmolLM3-3B architecture.
|
| 283 |
|
|
|
|
| 308 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 309 |
|
| 310 |
# Load model and tokenizer
|
| 311 |
+
model = AutoModelForCausalLM.from_pretrained("{self.repo_id}")
|
| 312 |
+
tokenizer = AutoTokenizer.from_pretrained("{self.repo_id}")
|
| 313 |
|
| 314 |
# Generate text
|
| 315 |
inputs = tokenizer("Hello, how are you?", return_tensors="pt")
|
|
|
|
| 385 |
upload_file(
|
| 386 |
path_or_fileobj=str(file_path),
|
| 387 |
path_in_repo=remote_path,
|
| 388 |
+
repo_id=self.repo_id,
|
| 389 |
token=self.token
|
| 390 |
)
|
| 391 |
logger.info(f"โ
Uploaded {relative_path}")
|
|
|
|
| 420 |
upload_file(
|
| 421 |
path_or_fileobj=str(file_path),
|
| 422 |
path_in_repo=f"training_results/{file_name}",
|
| 423 |
+
repo_id=self.repo_id,
|
| 424 |
token=self.token
|
| 425 |
)
|
| 426 |
|
|
|
|
| 436 |
try:
|
| 437 |
logger.info("Creating README.md...")
|
| 438 |
|
| 439 |
+
readme_content = f"""# {self.repo_id.split('/')[-1]}
|
| 440 |
|
| 441 |
A fine-tuned SmolLM3 model for text generation tasks.
|
| 442 |
|
|
|
|
| 445 |
```python
|
| 446 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 447 |
|
| 448 |
+
model = AutoModelForCausalLM.from_pretrained("{self.repo_id}")
|
| 449 |
+
tokenizer = AutoTokenizer.from_pretrained("{self.repo_id}")
|
| 450 |
|
| 451 |
# Generate text
|
| 452 |
text = "Hello, how are you?"
|
|
|
|
| 502 |
path_or_fileobj=str(readme_path),
|
| 503 |
path_in_repo="README.md",
|
| 504 |
token=self.token,
|
| 505 |
+
repo_id=self.repo_id
|
| 506 |
)
|
| 507 |
|
| 508 |
# Clean up
|
|
|
|
| 522 |
# Log to Trackio
|
| 523 |
self.monitor.log_metrics({
|
| 524 |
"push_action": action,
|
| 525 |
+
"repo_name": self.repo_id,
|
| 526 |
"model_size_gb": self._get_model_size(),
|
| 527 |
"dataset_repo": self.dataset_repo,
|
| 528 |
**details
|
|
|
|
| 531 |
# Log training summary
|
| 532 |
self.monitor.log_training_summary({
|
| 533 |
"model_push": True,
|
| 534 |
+
"model_repo": self.repo_id,
|
| 535 |
"dataset_repo": self.dataset_repo,
|
| 536 |
"push_date": datetime.now().isoformat(),
|
| 537 |
**details
|
|
|
|
| 544 |
def push_model(self, training_config: Optional[Dict[str, Any]] = None,
|
| 545 |
results: Optional[Dict[str, Any]] = None) -> bool:
|
| 546 |
"""Complete model push process with HF Datasets integration"""
|
| 547 |
+
logger.info(f"๐ Starting model push to {self.repo_id}")
|
| 548 |
logger.info(f"๐ Dataset repository: {self.dataset_repo}")
|
| 549 |
|
| 550 |
# Validate model path
|
|
|
|
| 572 |
upload_file(
|
| 573 |
path_or_fileobj=str(model_card_path),
|
| 574 |
path_in_repo="README.md",
|
| 575 |
+
repo_id=self.repo_id,
|
| 576 |
token=self.token
|
| 577 |
)
|
| 578 |
finally:
|
|
|
|
| 595 |
"results": results
|
| 596 |
})
|
| 597 |
|
| 598 |
+
logger.info(f"๐ Model successfully pushed to: https://huggingface.co/{self.repo_id}")
|
| 599 |
logger.info(f"๐ Experiment data stored in: {self.dataset_repo}")
|
| 600 |
return True
|
| 601 |
|
|
|
|
| 621 |
|
| 622 |
# Required arguments
|
| 623 |
parser.add_argument('model_path', type=str, help='Path to trained model directory')
|
| 624 |
+
parser.add_argument('repo_name', type=str, help='Hugging Face repository name (repo-name). Username will be auto-detected from your token.')
|
| 625 |
|
| 626 |
# Optional arguments
|
| 627 |
parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
|
scripts/training/train_gpt_oss.py
CHANGED
|
@@ -537,16 +537,38 @@ def train_gpt_oss(config_path, experiment_name, output_dir, trackio_url, trainer
|
|
| 537 |
# Create SFT configuration
|
| 538 |
sft_config = create_sft_config(config, output_dir)
|
| 539 |
|
| 540 |
-
# Create trainer
|
| 541 |
print("Creating SFT trainer...")
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
|
| 551 |
# Start training
|
| 552 |
print("Starting GPT-OSS training...")
|
|
|
|
| 537 |
# Create SFT configuration
|
| 538 |
sft_config = create_sft_config(config, output_dir)
|
| 539 |
|
| 540 |
+
# Create trainer with version-robust kwargs
|
| 541 |
print("Creating SFT trainer...")
|
| 542 |
+
try:
|
| 543 |
+
sft_sig = inspect.signature(SFTTrainer.__init__)
|
| 544 |
+
sft_params = set(sft_sig.parameters.keys())
|
| 545 |
+
except Exception:
|
| 546 |
+
sft_params = {"model", "args", "train_dataset", "tokenizer", "dataset_text_field", "max_seq_length"}
|
| 547 |
+
|
| 548 |
+
sft_kwargs = {
|
| 549 |
+
"model": peft_model,
|
| 550 |
+
"args": sft_config,
|
| 551 |
+
"train_dataset": dataset,
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
# Prefer passing tokenizer if supported; otherwise try processing_class
|
| 555 |
+
if "tokenizer" in sft_params:
|
| 556 |
+
sft_kwargs["tokenizer"] = tokenizer
|
| 557 |
+
elif "processing_class" in sft_params:
|
| 558 |
+
sft_kwargs["processing_class"] = tokenizer
|
| 559 |
+
|
| 560 |
+
# Pass dataset text field if supported (we produced a 'text' column)
|
| 561 |
+
if "dataset_text_field" in sft_params:
|
| 562 |
+
sft_kwargs["dataset_text_field"] = "text"
|
| 563 |
+
|
| 564 |
+
# Pass max sequence length if supported
|
| 565 |
+
if "max_seq_length" in sft_params:
|
| 566 |
+
sft_kwargs["max_seq_length"] = getattr(config, 'max_seq_length', 2048)
|
| 567 |
+
|
| 568 |
+
# Remove any None values
|
| 569 |
+
sft_kwargs = {k: v for k, v in sft_kwargs.items() if v is not None}
|
| 570 |
+
|
| 571 |
+
trainer = SFTTrainer(**sft_kwargs)
|
| 572 |
|
| 573 |
# Start training
|
| 574 |
print("Starting GPT-OSS training...")
|