Spaces:

PhillHenry
/

MyLlmPlayground

Running

App Files Files Community

PhillHenry commited on Feb 3

Commit

9aa17f3

verified ·

1 Parent(s): 440412f

Upload LlmEngChap6.py

Browse files

Files changed (1) hide show

LlmEngChap6.py +135 -0

LlmEngChap6.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import comet_ml
+from unsloth import PatchDPOTrainer
+from accelerate import Accelerator
+from config import SAVED_MODEL
+PatchDPOTrainer()
+import torch
+from transformers import TextStreamer, AutoTokenizer
+from datasets import load_dataset
+from unsloth import FastLanguageModel, is_bfloat16_supported
+from trl import DPOConfig, DPOTrainer
+from accelerate import init_empty_weights
+class MyLlamaModel:
+    max_seq_length = 256
+    NUM_TRAIN_EPOCHS = 6
+    beta = 0.5
+    LOAD_IN_4BIT = False
+    device_map = "auto"
+    save_method = "lora"  # merged_X just means the whole model is saved, not just the transformer
+    lora_dropout = 0.
+    lora_alpha = 32
+    learning_rate=2e-5
+    r = 32
+    base_output_dir = f"{SAVED_MODEL}/{max_seq_length}maxSeqLen_{NUM_TRAIN_EPOCHS}Epochs_{device_map}devmap_4Bit{LOAD_IN_4BIT}_{save_method}_beta{beta}_loraDropout{lora_dropout}_r{r}_lora_alpha{lora_alpha}_lr{learning_rate}/"
+    def __init__(self):
+        self.model_name="unsloth/DeepSeek-R1-GGUF"
+        self.model_path = f"{self.base_output_dir}/{self.model_name}"
+    def get_model_tokenizer(self, model_name: str):
+        print(f"Using model {model_name}")
+        self.model_name = model_name
+        self.model_path = f"{self.base_output_dir}/{model_name}"
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name=self.model_name,
+            # max_seq_length=self.max_seq_length,
+            load_in_4bit=self.LOAD_IN_4BIT, # "You can activate QLoRA by setting load_in_4bit to True"  LLMEngineering, p251
+            # quantization_config=bnb_config, # helped with memory but caused non-zero probabilities when demoed
+            # # device_map=self.device_map, # try this
+            trust_remote_code=True,
+        )
+        return model, tokenizer
+    def train_and_save(self):
+        model, tokenizer = self.get_model_tokenizer(self.model_name)
+        with init_empty_weights():
+            model = FastLanguageModel.get_peft_model(
+                model,
+                r=self.r,
+                lora_alpha=self.lora_alpha,
+                lora_dropout=self.lora_dropout,
+                target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
+            )
+            torch.nn.Module.to_empty(model, device=torch.device("cuda"))  # this eliminates 'NotImplementedError: Cannot copy out of meta tensor'
+            accelerator = Accelerator(mixed_precision="bf16", cpu=True)  # Enable mixed precision for memory efficiency
+            device = accelerator.device
+            # model.to(device)
+            # optimizer = AdamW(params=model.parameters(), lr=3e-2)
+            # Move the model to the appropriate device
+            model = accelerator.prepare(model)
+            self.do_dpo(model, tokenizer)
+    def do_dpo(self, model, tokenizer):
+        dataset = self.load_prepared_dataset(tokenizer.eos_token)
+        trainer = DPOTrainer(
+            model=model,
+            ref_model=None,
+            tokenizer=tokenizer,
+            beta=self.beta,
+            train_dataset=dataset["train"],
+            eval_dataset=dataset["test"],
+            max_length=self.max_seq_length // 2,
+            max_prompt_length=self.max_seq_length // 2,
+            args=DPOConfig(
+                learning_rate=self.learning_rate,
+                lr_scheduler_type="linear",
+                per_device_train_batch_size=1,
+                per_device_eval_batch_size=1,
+                gradient_accumulation_steps=8,
+                num_train_epochs=self.NUM_TRAIN_EPOCHS,
+                fp16=not is_bfloat16_supported(),
+                bf16=is_bfloat16_supported(),
+                weight_decay=0.01,
+                warmup_steps=10,
+                output_dir="output",
+                eval_strategy="steps",
+                eval_steps=0.2,
+                logging_steps=1,
+                report_to="comet_ml",
+                seed=0,
+            ),
+        )
+        trainer.train()
+        model.save_pretrained_merged(self.model_path, tokenizer=tokenizer, save_method=self.save_method) # merged_4bit_forced
+        generate_text_using(model, tokenizer)
+    @staticmethod
+    def load_prepared_dataset(eos_token):
+        alpaca_template = """Below is an instruction that describes a task.
+        Write a response that appropriately completes the request.
+        ### Instruction:
+        {}
+        ### Response:
+        """
+        def format_samples(example):
+            example["prompt"] = alpaca_template.format(example["prompt"])
+            example["chosen"] = example['chosen'] + eos_token
+            example["rejected"] = example['rejected'] + eos_token
+            return {"prompt": example["prompt"], "chosen":
+                example["chosen"], "rejected": example["rejected"]}
+        dataset = load_dataset("mlabonne/llmtwin-dpo", split="train")
+        dataset = dataset.map(format_samples)
+        dataset = dataset.train_test_split(test_size=0.05)
+        return dataset
+def generate_text_using(model, tokenizer):
+    print(f"Model of type {type(model)}, tokenizer of type {type(tokenizer)}")
+    #"pt",  "tf",  "np", "jax", "mlx"
+    inputs = tokenizer(["Who are the creators of the course that is under the 'Decoding ML' umbrella?"], return_tensors="pt").to("cuda")
+    text_streamer = TextStreamer(tokenizer)
+    FastLanguageModel.for_inference(model)
+    _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=MyLlamaModel.max_seq_length, use_cache=True)
+if __name__ == "__main__":
+    my_model = MyLlamaModel()
+    my_model.train_and_save()