Spaces:
Running
Running
| import comet_ml | |
| from unsloth import PatchDPOTrainer | |
| from accelerate import Accelerator | |
| from config import SAVED_MODEL | |
| PatchDPOTrainer() | |
| import torch | |
| from transformers import TextStreamer, AutoTokenizer | |
| from datasets import load_dataset | |
| from unsloth import FastLanguageModel, is_bfloat16_supported | |
| from trl import DPOConfig, DPOTrainer | |
| from accelerate import init_empty_weights | |
| class MyLlamaModel: | |
| max_seq_length = 256 | |
| NUM_TRAIN_EPOCHS = 6 | |
| beta = 0.5 | |
| LOAD_IN_4BIT = False | |
| device_map = "auto" | |
| save_method = "lora" # merged_X just means the whole model is saved, not just the transformer | |
| lora_dropout = 0. | |
| lora_alpha = 32 | |
| learning_rate=2e-5 | |
| r = 32 | |
| base_output_dir = f"{SAVED_MODEL}/{max_seq_length}maxSeqLen_{NUM_TRAIN_EPOCHS}Epochs_{device_map}devmap_4Bit{LOAD_IN_4BIT}_{save_method}_beta{beta}_loraDropout{lora_dropout}_r{r}_lora_alpha{lora_alpha}_lr{learning_rate}/" | |
| def __init__(self): | |
| self.model_name="unsloth/DeepSeek-R1-GGUF" | |
| self.model_path = f"{self.base_output_dir}/{self.model_name}" | |
| def get_model_tokenizer(self, model_name: str): | |
| print(f"Using model {model_name}") | |
| self.model_name = model_name | |
| self.model_path = f"{self.base_output_dir}/{model_name}" | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name=self.model_name, | |
| # max_seq_length=self.max_seq_length, | |
| load_in_4bit=self.LOAD_IN_4BIT, # "You can activate QLoRA by setting load_in_4bit to True" LLMEngineering, p251 | |
| # quantization_config=bnb_config, # helped with memory but caused non-zero probabilities when demoed | |
| # # device_map=self.device_map, # try this | |
| trust_remote_code=True, | |
| ) | |
| return model, tokenizer | |
| def train_and_save(self): | |
| model, tokenizer = self.get_model_tokenizer(self.model_name) | |
| with init_empty_weights(): | |
| model = FastLanguageModel.get_peft_model( | |
| model, | |
| r=self.r, | |
| lora_alpha=self.lora_alpha, | |
| lora_dropout=self.lora_dropout, | |
| target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"], | |
| ) | |
| torch.nn.Module.to_empty(model, device=torch.device("cuda")) # this eliminates 'NotImplementedError: Cannot copy out of meta tensor' | |
| accelerator = Accelerator(mixed_precision="bf16", cpu=True) # Enable mixed precision for memory efficiency | |
| device = accelerator.device | |
| # model.to(device) | |
| # optimizer = AdamW(params=model.parameters(), lr=3e-2) | |
| # Move the model to the appropriate device | |
| model = accelerator.prepare(model) | |
| self.do_dpo(model, tokenizer) | |
| def do_dpo(self, model, tokenizer): | |
| dataset = self.load_prepared_dataset(tokenizer.eos_token) | |
| trainer = DPOTrainer( | |
| model=model, | |
| ref_model=None, | |
| tokenizer=tokenizer, | |
| beta=self.beta, | |
| train_dataset=dataset["train"], | |
| eval_dataset=dataset["test"], | |
| max_length=self.max_seq_length // 2, | |
| max_prompt_length=self.max_seq_length // 2, | |
| args=DPOConfig( | |
| learning_rate=self.learning_rate, | |
| lr_scheduler_type="linear", | |
| per_device_train_batch_size=1, | |
| per_device_eval_batch_size=1, | |
| gradient_accumulation_steps=8, | |
| num_train_epochs=self.NUM_TRAIN_EPOCHS, | |
| fp16=not is_bfloat16_supported(), | |
| bf16=is_bfloat16_supported(), | |
| weight_decay=0.01, | |
| warmup_steps=10, | |
| output_dir="output", | |
| eval_strategy="steps", | |
| eval_steps=0.2, | |
| logging_steps=1, | |
| report_to="comet_ml", | |
| seed=0, | |
| ), | |
| ) | |
| trainer.train() | |
| model.save_pretrained_merged(self.model_path, tokenizer=tokenizer, save_method=self.save_method) # merged_4bit_forced | |
| generate_text_using(model, tokenizer) | |
| def load_prepared_dataset(eos_token): | |
| alpaca_template = """Below is an instruction that describes a task. | |
| Write a response that appropriately completes the request. | |
| ### Instruction: | |
| {} | |
| ### Response: | |
| """ | |
| def format_samples(example): | |
| example["prompt"] = alpaca_template.format(example["prompt"]) | |
| example["chosen"] = example['chosen'] + eos_token | |
| example["rejected"] = example['rejected'] + eos_token | |
| return {"prompt": example["prompt"], "chosen": | |
| example["chosen"], "rejected": example["rejected"]} | |
| dataset = load_dataset("mlabonne/llmtwin-dpo", split="train") | |
| dataset = dataset.map(format_samples) | |
| dataset = dataset.train_test_split(test_size=0.05) | |
| return dataset | |
| def generate_text_using(model, tokenizer): | |
| print(f"Model of type {type(model)}, tokenizer of type {type(tokenizer)}") | |
| #"pt", "tf", "np", "jax", "mlx" | |
| inputs = tokenizer(["Who are the creators of the course that is under the 'Decoding ML' umbrella?"], return_tensors="pt").to("cuda") | |
| text_streamer = TextStreamer(tokenizer) | |
| FastLanguageModel.for_inference(model) | |
| _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=MyLlamaModel.max_seq_length, use_cache=True) | |
| if __name__ == "__main__": | |
| my_model = MyLlamaModel() | |
| my_model.train_and_save() |