Final_Assignment_Template3

Sleeping

bwilkie commited on Jul 22

Commit

391fe34

verified ·

1 Parent(s): a94813f

Update myagent.py

Files changed (1) hide show

myagent.py CHANGED Viewed

@@ -41,7 +41,15 @@ class BasicAgent:
             return error
 # Create a wrapper class that matches the expected interface
 class LocalLlamaModel:
@@ -50,23 +58,10 @@ class LocalLlamaModel:
         self.tokenizer = tokenizer
         self.device = model.device if hasattr(model, 'device') else 'cpu'
-    def generate(self, prompt: str, max_new_tokens=512, **kwargs):
-        """Generate text using the local model"""
-        input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
-        with torch.no_grad():
-            output_ids = self.model.generate(
-                input_ids,
-                max_new_tokens=max_new_tokens,
-                do_sample=True,
-                temperature=0.7,
-                pad_token_id=self.tokenizer.eos_token_id,
-                **kwargs
-            )
-        # Decode only the new tokens (excluding the input)
-        new_tokens = output_ids[0][input_ids.shape[1]:]
-        output = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
         return output
     def __call__(self, prompt: str, max_new_tokens=512, **kwargs):

             return error
+# Model configuration
+model_id = "bartowski/Llama-3.2-3B-Instruct-GGUF"
+filename = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model_init = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)
 # Create a wrapper class that matches the expected interface
 class LocalLlamaModel:
         self.tokenizer = tokenizer
         self.device = model.device if hasattr(model, 'device') else 'cpu'
+    def generate(self, prompt: str, max_new_tokens=512*10, **kwargs):
+        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
+        output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens)
+        output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
         return output
     def __call__(self, prompt: str, max_new_tokens=512, **kwargs):