Final_Assignment_Template3

Sleeping

App Files Files Community

bwilkie commited on Jul 22

Commit

55b792c

verified ·

1 Parent(s): e4f7d1f

Update myagent.py

Browse files

Files changed (1) hide show

myagent.py +37 -20

myagent.py CHANGED Viewed

@@ -61,26 +61,43 @@ class LocalLlamaModel:
         self.device = model.device if hasattr(model, 'device') else 'cpu'
     def generate(self, prompt: str, max_new_tokens=512, **kwargs):
-        # Generate answer using the provided prompt
-        input_ids = self.tokenizer.apply_chat_template(
-            [{"role": "user", "content": prompt}],
-            add_generation_prompt=True,
-            return_tensors="pt",
-            tokenize=True,
-        ).to(self.model.device)
-        output = self.model.generate(
-            input_ids,
-            do_sample=True,
-            temperature=0.3,
-            min_p=0.15,
-            repetition_penalty=1.05,
-            max_new_tokens=max_new_tokens,
-        )
-        output = self.tokenizer.decode(output[0], skip_special_tokens=False)
-        return output
     def __call__(self, prompt: str, max_new_tokens=512, **kwargs):
         """Make the model callable like a function"""
         return self.generate(prompt, max_new_tokens, **kwargs)

         self.device = model.device if hasattr(model, 'device') else 'cpu'
     def generate(self, prompt: str, max_new_tokens=512, **kwargs):
+        try:
+            # Generate answer using the provided prompt - following the recommended pattern
+            input_ids = self.tokenizer.apply_chat_template(
+                [{"role": "user", "content": str(prompt)}],
+                add_generation_prompt=True,
+                return_tensors="pt",
+                tokenize=True,
+            ).to(self.model.device)
+            # Generate output - exactly as in recommended code
+            output = self.model.generate(
+                input_ids,
+                do_sample=True,
+                temperature=0.3,
+                min_p=0.15,
+                repetition_penalty=1.05,
+                max_new_tokens=max_new_tokens,
+            )
+            # Decode the full output - as in recommended code
+            decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=False)
+            # Extract only the assistant's response (after the last <|im_start|>assistant)
+            if "<|im_start|>assistant" in decoded_output:
+                assistant_response = decoded_output.split("<|im_start|>assistant")[-1]
+                # Remove any trailing special tokens
+                assistant_response = assistant_response.replace("<|im_end|>", "").strip()
+                return assistant_response
+            else:
+                # Fallback: return the full decoded output
+                return decoded_output
+        except Exception as e:
+            print(f"Error in model generation: {e}")
+            return f"Error generating response: {str(e)}"
     def __call__(self, prompt: str, max_new_tokens=512, **kwargs):
         """Make the model callable like a function"""
         return self.generate(prompt, max_new_tokens, **kwargs)