Spaces:
Sleeping
Sleeping
| import os | |
| from smolagents import CodeAgent, ToolCallingAgent | |
| from smolagents import OpenAIServerModel | |
| from tools.fetch import fetch_webpage | |
| from tools.yttranscript import get_youtube_transcript, get_youtube_title_description | |
| import myprompts | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| import torch | |
| from groq_api import GrokApi | |
| # --- Basic Agent Definition --- | |
| # Basic model wrapper for local inference with debug info | |
| class BasicAgent: | |
| def __init__(self): | |
| print("[INFO] Loading default model...") | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| "LiquidAI/LFM2-1.2B", | |
| device_map="auto", | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True, | |
| ) | |
| model_id="LiquidAI/LFM2-1.2B" | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| self.device = self.model.device if hasattr(self.model, 'device') else 'cpu' | |
| print(f"Model device: {self.device}") | |
| def _extract_prompt(self, prompt): | |
| if isinstance(prompt, str): | |
| return prompt | |
| elif isinstance(prompt, list): | |
| # Convert list of ChatMessages or dicts to plain text | |
| return "\n".join( | |
| msg.content if hasattr(msg, "content") else msg.get("content", str(msg)) | |
| for msg in prompt | |
| ) | |
| else: | |
| return str(prompt) | |
| def generate(self, prompt, max_new_tokens=512): | |
| try: | |
| print("\n[DEBUG] Raw prompt input:", prompt) | |
| text_prompt = self._extract_prompt(prompt) | |
| print("[DEBUG] Extracted prompt text:", text_prompt[:200] + "..." if len(text_prompt) > 200 else text_prompt) | |
| inputs = self.tokenizer(text_prompt, return_tensors="pt").to(self.device) | |
| input_ids = inputs["input_ids"] | |
| print("[DEBUG] Tokenized input shape:", input_ids.shape) | |
| with torch.no_grad(): | |
| output = self.model.generate( | |
| input_ids=input_ids, | |
| do_sample=True, | |
| temperature=0.3, | |
| min_p=0.15, | |
| repetition_penalty=1.05, | |
| max_new_tokens=max_new_tokens, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| ) | |
| new_tokens = output[0][len(input_ids[0]):] | |
| decoded = self.tokenizer.decode(new_tokens, skip_special_tokens=True) | |
| print("[DEBUG] Decoded output:", decoded.strip()) | |
| return decoded.strip() | |
| except Exception as e: | |
| print(f"[ERROR] Generation failed: {e}") | |
| return f"Error generating response: {e}" | |
| def __call__(self, prompt, max_new_tokens=2048): | |
| return self.generate(prompt, max_new_tokens) | |
| # Run minimal test | |
| if __name__ == "__main__": | |
| model = load_model() | |
| # Example prompt | |
| prompt = "What is the capital of France?" | |
| print("\n[TEST] Asking a simple question...") | |
| response = model(prompt) | |
| print("\nFinal Answer:", response) |