Ramzes / examples /eva_finetuning /eva_finetuning.py
Bordoglor's picture
Upload folder using huggingface_hub
302920f verified
# Copyright 2024-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from utils import DataCollator, TokenizerMetaMath
from peft import EvaConfig, LoraConfig, get_peft_model, initialize_lora_eva_weights
DEVICE = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
# config
model_name = "meta-llama/Llama-3.1-8B"
max_seq_len = 512
rank = 16
alpha = 1
rho = 2.0
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"]
svd_batch_size = 4 # can be different from the batch size used in finetuning
batch_size = 4
learning_rate = 5e-4
gradient_accumulation_steps = 8
num_epochs = 1
output_dir = "outputs"
bf16 = True
# load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# load dataset
dataset = load_dataset("meta-math/MetaMathQA")
dataset = dataset.map(
TokenizerMetaMath(model_name),
batched=True,
remove_columns=dataset["train"].column_names,
)
dataset.set_format(type="torch")
# data collator
data_collator = DataCollator(tokenizer.eos_token_id, max_length=max_seq_len)
# dataloader
dataloader = DataLoader(
dataset["train"],
batch_size=svd_batch_size,
collate_fn=data_collator,
)
# setup peft config
eva_config = EvaConfig(rho=rho)
peft_config = LoraConfig(
r=rank, lora_alpha=alpha, target_modules=target_modules, init_lora_weights="eva", eva_config=eva_config
)
# move model to accelerator
model = model.to(DEVICE)
# to optimize memory usage during eva initialization, set low_cpu_mem_usage=True
peft_model = get_peft_model(model, peft_config, low_cpu_mem_usage=True)
initialize_lora_eva_weights(peft_model, dataloader)
# setup training arguments
training_args = TrainingArguments(
per_device_train_batch_size=batch_size,
learning_rate=learning_rate,
gradient_accumulation_steps=gradient_accumulation_steps,
num_train_epochs=num_epochs,
output_dir=output_dir,
remove_unused_columns=False,
bf16=bf16,
)
# continue with standard finetuning
trainer = Trainer(
model=peft_model,
args=training_args,
train_dataset=dataset["train"],
data_collator=data_collator,
)
trainer.train()