Final_Assignment_Template3

Sleeping

App Files Files Community

bwilkie commited on Jul 22

Commit

c06e6b5

verified ·

1 Parent(s): da62d97

Update myagent.py

Browse files

Files changed (1) hide show

myagent.py +55 -113

myagent.py CHANGED Viewed

@@ -8,139 +8,81 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch
 # --- Basic Agent Definition ---
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        try:
-            # Use the reviewer agent to determine if the question can be answered by a model or requires code
-            print("Calling reviewer agent...")
-            reviewer_answer = reviewer_agent.run(myprompts.review_prompt + "\nThe question is:\n" + question)
-            print(f"Reviewer agent answer: {reviewer_answer}")
-            question = question + '\n' + myprompts.output_format
-            fixed_answer = ""
-            if reviewer_answer == "code":
-                fixed_answer = gaia_agent.run(question)
-                print(f"Code agent answer: {fixed_answer}")
-            elif reviewer_answer == "model":
-                # If the reviewer agent suggests using the model, we can proceed with the model agent
-                print("Using model agent to answer the question.")
-                fixed_answer = model_agent.run(myprompts.model_prompt + "\nThe question is:\n" + question)
-                print(f"Model agent answer: {fixed_answer}")
-            return fixed_answer
-        except Exception as e:
-            error = f"An error occurred while processing the question: {e}"
-            print(error)
-            return error
-# Load model and tokenizer
-model_id = "LiquidAI/LFM2-1.2B"
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    device_map="auto",
-    torch_dtype=torch.bfloat16,  # Fixed: was string, should be torch dtype
-    trust_remote_code=True,
-    # attn_implementation="flash_attention_2"  # <- uncomment on compatible GPU
-)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# Create a wrapper class that matches the expected interface
 class LocalLlamaModel:
     def __init__(self, model, tokenizer):
         self.model = model
         self.tokenizer = tokenizer
         self.device = model.device if hasattr(model, 'device') else 'cpu'
-    def _extract_text_from_messages(self, messages):
-        """Extract text content from ChatMessage objects or handle string input"""
-        if isinstance(messages, str):
-            return messages
-        elif isinstance(messages, list):
-            # Handle list of ChatMessage objects
-            text_parts = []
-            for msg in messages:
-                if hasattr(msg, 'content'):
-                    # Handle ChatMessage with content attribute
-                    if isinstance(msg.content, list):
-                        # Content is a list of content items
-                        for content_item in msg.content:
-                            if isinstance(content_item, dict) and 'text' in content_item:
-                                text_parts.append(content_item['text'])
-                            elif hasattr(content_item, 'text'):
-                                text_parts.append(content_item.text)
-                    elif isinstance(msg.content, str):
-                        text_parts.append(msg.content)
-                elif isinstance(msg, dict) and 'content' in msg:
-                    # Handle dictionary format
-                    text_parts.append(str(msg['content']))
-                else:
-                    # Fallback: convert to string
-                    text_parts.append(str(msg))
-            return '\n'.join(text_parts)
         else:
-            return str(messages)
-    def generate(self, prompt, max_new_tokens=512*5, **kwargs):
         try:
-            print("Prompt: ", prompt)
-            print("Prompt type: ", type(prompt))
-            # Extract text from the prompt (which might be ChatMessage objects)
-            text_prompt = self._extract_text_from_messages(prompt)
-            print("Extracted text prompt:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)
-            # Tokenize the text prompt
-            inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.model.device)
-            input_ids = inputs['input_ids']
-            # Generate output
             with torch.no_grad():
                 output = self.model.generate(
-                    input_ids,
                     do_sample=True,
                     temperature=0.3,
                     min_p=0.15,
                     repetition_penalty=1.05,
                     max_new_tokens=max_new_tokens,
-                    pad_token_id=self.tokenizer.eos_token_id,  # Handle padding
                 )
-            # Decode only the new tokens (exclude the input)
             new_tokens = output[0][len(input_ids[0]):]
-            response = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
-            return response.strip()
-        except Exception as e:
-            print(f"Error in model generation: {e}")
-            return f"Error generating response: {str(e)}"
-    def __call__(self, prompt, max_new_tokens=512, **kwargs):
-        """Make the model callable like a function"""
-        return self.generate(prompt, max_new_tokens, **kwargs)
-# Create the model instance
-wrapped_model = LocalLlamaModel(model, tokenizer)
-# Now create your agents - these should work with the wrapped model
-reviewer_agent = ToolCallingAgent(model=wrapped_model, tools=[])
-model_agent = ToolCallingAgent(model=wrapped_model, tools=[fetch_webpage])
-gaia_agent = CodeAgent(
-    tools=[fetch_webpage, get_youtube_title_description, get_youtube_transcript],
-    model=wrapped_model
-)
 if __name__ == "__main__":
-    # Example usage
-    question = "What was the actual enrollment of the Malko competition in 2023?"
-    agent = BasicAgent()
-    answer = agent(question)
-    print(f"Answer: {answer}")

 import torch
 # --- Basic Agent Definition ---
+# Basic model wrapper for local inference with debug info
 class LocalLlamaModel:
     def __init__(self, model, tokenizer):
         self.model = model
         self.tokenizer = tokenizer
         self.device = model.device if hasattr(model, 'device') else 'cpu'
+        print(f"Model device: {self.device}")
+    def _extract_prompt(self, prompt):
+        if isinstance(prompt, str):
+            return prompt
+        elif isinstance(prompt, list):
+            # Convert list of ChatMessages or dicts to plain text
+            return "\n".join(
+                msg.content if hasattr(msg, "content") else msg.get("content", str(msg))
+                for msg in prompt
+            )
         else:
+            return str(prompt)
+    def generate(self, prompt, max_new_tokens=512):
         try:
+            print("\n[DEBUG] Raw prompt input:", prompt)
+            text_prompt = self._extract_prompt(prompt)
+            print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt)
+            inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device)
+            input_ids = inputs["input_ids"]
+            print("[DEBUG] Tokenized input shape:", input_ids.shape)
             with torch.no_grad():
                 output = self.model.generate(
+                    input_ids=input_ids,
                     do_sample=True,
                     temperature=0.3,
                     min_p=0.15,
                     repetition_penalty=1.05,
                     max_new_tokens=max_new_tokens,
+                    pad_token_id=self.tokenizer.eos_token_id,
                 )
             new_tokens = output[0][len(input_ids[0]):]
+            decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
+            print("[DEBUG] Decoded output:", decoded.strip())
+            return decoded.strip()
+        except Exception as e:
+            print(f"[ERROR] Generation failed: {e}")
+            return f"Error generating response: {e}"
+    def __call__(self, prompt, max_new_tokens=512):
+        return self.generate(prompt, max_new_tokens)
+# Load your model and tokenizer
+def load_model(model_id="LiquidAI/LFM2-1.2B"):
+    print(f"Loading model: {model_id}")
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        device_map="auto",
+        torch_dtype=torch.bfloat16,
+        trust_remote_code=True,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    return LocalLlamaModel(model, tokenizer)
+# Run minimal test
 if __name__ == "__main__":
+    model = load_model()
+    # Example prompt
+    prompt = "What is the capital of France?"
+    print("\n[TEST] Asking a simple question...")
+    response = model(prompt)
+    print("\nFinal Answer:", response)