Final_Assignment_Template3

Sleeping

App Files Files Community

bwilkie commited on Jul 22

Commit

d9f0f18

verified ·

1 Parent(s): 78f6f4d

Update myagent.py

Browse files

Files changed (1) hide show

myagent.py +57 -39

myagent.py CHANGED Viewed

@@ -5,14 +5,14 @@ from tools.fetch import fetch_webpage
 from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
 import myprompts
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import torch
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
@@ -40,14 +40,12 @@ class BasicAgent:
             print(error)
             return error
 # Load model and tokenizer
 model_id = "LiquidAI/LFM2-1.2B"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
-    torch_dtype="bfloat16",
     trust_remote_code=True,
     # attn_implementation="flash_attention_2"  # <- uncomment on compatible GPU
 )
@@ -58,52 +56,74 @@ class LocalLlamaModel:
     def __init__(self, model, tokenizer):
         self.model = model
         self.tokenizer = tokenizer
-        self.device = 'cpu'
-    def generate(self, prompt: str, max_new_tokens=512*5, **kwargs):
         try:
-            # Generate answer using the provided prompt - following the recommended pattern
-            # input_ids = self.tokenizer.apply_chat_template(
-            #     [{"role": "user", "content": str(prompt)}],
-            #     add_generation_prompt=True,
-            #     return_tensors="pt",
-            #     tokenize=True,
-            # ).to(self.model.device)
             print("Prompt: ", prompt)
             print("Prompt type: ", type(prompt))
-            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
-            # Generate output - exactly as in recommended code
-            output = self.model.generate(
-                input_ids,
-                do_sample=True,
-                temperature=0.3,
-                min_p=0.15,
-                repetition_penalty=1.05,
-                max_new_tokens=max_new_tokens,
-            )
-            # Decode the full output - as in recommended code
-            decoded_output = self.tokenizer.decode(output[0], skip_special_tokens=False)
-            # Extract only the assistant's response (after the last <|im_start|>assistant)
-            if "<|im_start|>assistant" in decoded_output:
-                assistant_response = decoded_output.split("<|im_start|>assistant")[-1]
-                # Remove any trailing special tokens
-                assistant_response = assistant_response.replace("<|im_end|>", "").strip()
-                return assistant_response
-            else:
-                # Fallback: return the full decoded output
-                return decoded_output
         except Exception as e:
             print(f"Error in model generation: {e}")
             return f"Error generating response: {str(e)}"
-    def __call__(self, prompt: str, max_new_tokens=512, **kwargs):
         """Make the model callable like a function"""
         return self.generate(prompt, max_new_tokens, **kwargs)
@@ -118,8 +138,6 @@ gaia_agent = CodeAgent(
     model=wrapped_model
 )
 if __name__ == "__main__":
     # Example usage
     question = "What was the actual enrollment of the Malko competition in 2023?"

 from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
 import myprompts
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+t torch
 # --- Basic Agent Definition ---
 class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
             print(error)
             return error
 # Load model and tokenizer
 model_id = "LiquidAI/LFM2-1.2B"
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
+    torch_dtype=torch.bfloat16,  # Fixed: was string, should be torch dtype
     trust_remote_code=True,
     # attn_implementation="flash_attention_2"  # <- uncomment on compatible GPU
 )
     def __init__(self, model, tokenizer):
         self.model = model
         self.tokenizer = tokenizer
+        self.device = model.device if hasattr(model, 'device') else 'cpu'
+    def _extract_text_from_messages(self, messages):
+        """Extract text content from ChatMessage objects or handle string input"""
+        if isinstance(messages, str):
+            return messages
+        elif isinstance(messages, list):
+            # Handle list of ChatMessage objects
+            text_parts = []
+            for msg in messages:
+                if hasattr(msg, 'content'):
+                    # Handle ChatMessage with content attribute
+                    if isinstance(msg.content, list):
+                        # Content is a list of content items
+                        for content_item in msg.content:
+                            if isinstance(content_item, dict) and 'text' in content_item:
+                                text_parts.append(content_item['text'])
+                            elif hasattr(content_item, 'text'):
+                                text_parts.append(content_item.text)
+                    elif isinstance(msg.content, str):
+                        text_parts.append(msg.content)
+                elif isinstance(msg, dict) and 'content' in msg:
+                    # Handle dictionary format
+                    text_parts.append(str(msg['content']))
+                else:
+                    # Fallback: convert to string
+                    text_parts.append(str(msg))
+            return '\n'.join(text_parts)
+        else:
+            return str(messages)
+    def generate(self, prompt, max_new_tokens=512*5, **kwargs):
         try:
             print("Prompt: ", prompt)
             print("Prompt type: ", type(prompt))
+            # Extract text from the prompt (which might be ChatMessage objects)
+            text_prompt = self._extract_text_from_messages(prompt)
+            print("Extracted text prompt:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)
+            # Tokenize the text prompt
+            inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.model.device)
+            input_ids = inputs['input_ids']
+            # Generate output
+            with torch.no_grad():
+                output = self.model.generate(
+                    input_ids,
+                    do_sample=True,
+                    temperature=0.3,
+                    min_p=0.15,
+                    repetition_penalty=1.05,
+                    max_new_tokens=max_new_tokens,
+                    pad_token_id=self.tokenizer.eos_token_id,  # Handle padding
+                )
+            # Decode only the new tokens (exclude the input)
+            new_tokens = output[0][len(input_ids[0]):]
+            response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
+            return response.strip()
         except Exception as e:
             print(f"Error in model generation: {e}")
             return f"Error generating response: {str(e)}"
+    def __call__(self, prompt, max_new_tokens=512, **kwargs):
         """Make the model callable like a function"""
         return self.generate(prompt, max_new_tokens, **kwargs)
     model=wrapped_model
 )
 if __name__ == "__main__":
     # Example usage
     question = "What was the actual enrollment of the Malko competition in 2023?"