Ramzes / examples /randlora_finetuning /randlora_finetuning.py
Bordoglor's picture
Upload folder using huggingface_hub
302920f verified
# This script is based on examples/dora_finetuning/dora_finetuning.py
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
DataCollatorForLanguageModeling,
Trainer,
TrainingArguments,
)
from peft import LoraConfig, RandLoraConfig, get_peft_model, prepare_model_for_kbit_training
def train_model(
base_model: str,
data_path: str,
output_dir: str,
batch_size: int,
num_epochs: int,
learning_rate: float,
cutoff_len: int,
val_set_size: int,
use_lora: bool,
quantize: bool,
eval_step: int,
save_step: int,
device: str,
rank: int,
randlora_alpha: int,
randlora_dropout: float,
randlora_target_modules: str,
hub_model_id: str,
push_to_hub: bool,
sparse: bool,
very_sparse: bool,
):
os.environ["TOKENIZERS_PARALLELISM"] = "false"
hf_token = os.getenv("HF_TOKEN")
# Setup device
device = torch.device(device)
print(f"Using device: {device}")
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, token=hf_token)
# Compute type
device_type = device.type
device_module = getattr(torch, device_type, torch.cuda)
bf16_suppotrted = device_module.is_available() and device_module.is_bf16_supported()
torch_dtype = torch.bfloat16 if bf16_suppotrted else torch.float16
# QRandLora (quantized randlora): IF YOU WANNA QUANTIZE THE MODEL
if quantize:
model = AutoModelForCausalLM.from_pretrained(
base_model,
token=hf_token,
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16 if bf16_suppotrted else torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
),
torch_dtype=torch_dtype,
)
# setup for quantized training
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
else:
model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch_dtype,
token=hf_token,
)
# LoRa config for the PEFT model
if use_lora:
peft_config = LoraConfig(
r=rank, # Rank of matrix
lora_alpha=randlora_alpha,
target_modules=(randlora_target_modules.split(",") if randlora_target_modules else ["k_proj", "v_proj"]),
lora_dropout=randlora_dropout,
bias="none",
)
else:
peft_config = RandLoraConfig(
r=rank, # Rank of random bases
randlora_alpha=randlora_alpha,
target_modules=(randlora_target_modules.split(",") if randlora_target_modules else ["k_proj", "v_proj"]),
randlora_dropout=randlora_dropout,
bias="none",
sparse=sparse,
very_sparse=very_sparse,
)
# get the peft model with RandLora config
model = get_peft_model(model, peft_config)
model.to(device) # MODEL TO ACCELERATOR
tokenizer.pad_token = tokenizer.eos_token
# Load the dataset
dataset = load_dataset(data_path)
def tokenize_function(examples):
inputs = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=cutoff_len)
inputs["labels"] = inputs["input_ids"].copy() # setting labels for a language modeling task
return inputs
# Tokenize the dataset and prepare for training
tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)
# Data collator to dynamically pad the batched examples
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
# Compute the total amount of training step for warmup
max_steps = int((len(dataset) // batch_size) * num_epochs)
# Define training arguments
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=num_epochs,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
warmup_steps=int(max_steps * 0.1), # 10% of total trainig steps
weight_decay=0.01,
logging_dir="./logs",
logging_steps=eval_step,
save_steps=save_step,
save_total_limit=2,
push_to_hub=push_to_hub,
hub_model_id=hub_model_id,
gradient_accumulation_steps=16
// batch_size, # Maintaining a minimum batch size of 16 post accumulation is recommended to ensure good performance
learning_rate=learning_rate,
hub_token=hf_token,
label_names=["labels"],
)
# Clear accelerator cache to free memory
device_module.empty_cache()
# Initialize the Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["test"],
data_collator=data_collator,
)
# Start model training
trainer.train()
# Save and push the trained model and tokenizer
if push_to_hub:
# Push the main model to the hub
trainer.push_to_hub(commit_message="Fine-tuned model")
# Save the model and tokenizer locally
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Fine-tune LLaMA with DoRA and PEFT")
parser.add_argument("--base_model", type=str, default="huggyllama/llama-7b", help="Base model path or name")
parser.add_argument(
"--data_path", type=str, default="timdettmers/openassistant-guanaco", help="Dataset path or name"
)
parser.add_argument(
"--output_dir", type=str, default="path/to/output", help="Output directory for the fine-tuned model"
)
parser.add_argument("--batch_size", type=int, default=1, help="Batch size")
parser.add_argument("--num_epochs", type=int, default=1, help="Number of training epochs")
parser.add_argument("--learning_rate", type=float, default=3e-4, help="Learning rate")
parser.add_argument("--cutoff_len", type=int, default=512, help="Cutoff length for tokenization")
parser.add_argument("--val_set_size", type=int, default=500, help="Validation set size")
parser.add_argument("--use_lora", action="store_true", help="Apply Lora instead of RandLora")
parser.add_argument("--quantize", action="store_true", help="Use quantization")
parser.add_argument("--eval_step", type=int, default=10, help="Evaluation step interval")
parser.add_argument("--save_step", type=int, default=100, help="Save step interval")
parser.add_argument("--device", type=str, default="auto", help="Device to use for training")
parser.add_argument("--rank", type=int, default=32, help="RandLora basis rank")
parser.add_argument("--randlora_alpha", type=int, default=640, help="RandLora alpha")
parser.add_argument("--randlora_dropout", type=float, default=0.05, help="RandLora dropout rate")
parser.add_argument(
"--randlora_target_modules", type=str, default=None, help="Comma-separated list of target modules for RandLora"
)
parser.add_argument("--sparse", action="store_true", help="Use sparse matrix multiplication")
parser.add_argument("--very_sparse", action="store_true", help="Use very sparse matrix multiplication")
parser.add_argument(
"--hub_model_id",
type=str,
default="path/to/repo",
help="Repository name to push the model on the Hugging Face Hub",
)
parser.add_argument("--push_to_hub", action="store_true", help="Whether to push the model to Hugging Face Hub")
args = parser.parse_args()
if args.device == "auto":
args.device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
train_model(
base_model=args.base_model,
data_path=args.data_path,
output_dir=args.output_dir,
batch_size=args.batch_size,
num_epochs=args.num_epochs,
learning_rate=args.learning_rate,
cutoff_len=args.cutoff_len,
val_set_size=args.val_set_size,
use_lora=args.use_lora,
quantize=args.quantize,
eval_step=args.eval_step,
save_step=args.save_step,
device=args.device,
rank=args.rank,
randlora_alpha=args.randlora_alpha,
randlora_dropout=args.randlora_dropout,
randlora_target_modules=args.randlora_target_modules,
hub_model_id=args.hub_model_id,
push_to_hub=args.push_to_hub,
sparse=args.sparse,
very_sparse=args.very_sparse,
)