Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| from smolagents import CodeAgent, Model, ChatMessage | |
| import tools.tools as tls # Your tool definitions | |
| load_dotenv() | |
| """ | |
| enforce_strict_role_alternation() | |
| Ensures that messages follow the required pattern: | |
| 'user/assistant/user/assistant/...', starting with an optional 'system' message. | |
| This is necessary because many chat-based models (e.g., ChatCompletion APIs) | |
| expect the conversation format to alternate strictly between user and assistant roles, | |
| possibly preceded by a single system message. | |
| Parameters: | |
| ----------- | |
| messages : list of dict | |
| The message history. Each message is expected to be a dictionary with a 'role' key | |
| ('user', 'assistant', or 'system') and a 'content' key. | |
| Returns: | |
| -------- | |
| cleaned : list of dict | |
| A sanitized version of the messages list that follows the correct role alternation rules. | |
| """ | |
| def enforce_strict_role_alternation(messages): | |
| cleaned = [] # List to store the cleaned message sequence | |
| last_role = None # Tracks the last valid role added to ensure alternation | |
| for msg in messages: | |
| role = msg["role"] | |
| # Skip any message that doesn't have a valid role | |
| if role not in ("user", "assistant", "system"): | |
| continue | |
| # Allow a single 'system' message only at the very beginning | |
| if role == "system" and not cleaned: | |
| cleaned.append(msg) | |
| continue | |
| # Skip messages with the same role as the previous one (breaks alternation) | |
| if role == last_role: | |
| continue | |
| # Add the valid message to the cleaned list | |
| cleaned.append(msg) | |
| last_role = role # Update the last role for the next iteration | |
| return cleaned | |
| # Define a custom model class that wraps around Hugging Face's InferenceClient for chat-based models | |
| class HuggingFaceChatModel(Model): | |
| def __init__(self): | |
| # Set the model ID for the specific Hugging Face model to use | |
| model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
| # Create an InferenceClient with the model ID and the Hugging Face token from your environment | |
| self.client = InferenceClient(model=model_id, token=os.getenv("HF_TOKEN")) | |
| def generate(self, messages, stop_sequences=None): | |
| """ | |
| Generates a response from the chat model based on the input message history. | |
| Parameters: | |
| ----------- | |
| messages : list of dict | |
| A list of message dicts in OpenAI-style format, e.g.: | |
| [{"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi!"}] | |
| stop_sequences : list of str, optional | |
| A list of strings that will stop generation when encountered. Default is ["Task"]. | |
| Returns: | |
| -------- | |
| ChatMessage | |
| A formatted response object with role='assistant' and the model-generated content. | |
| """ | |
| # Set default stop sequences if none provided | |
| if stop_sequences is None: | |
| stop_sequences = ["Task"] | |
| # π‘ Preprocess: Enforce valid alternation of user/assistant messages | |
| cleaned_messages = enforce_strict_role_alternation(messages) | |
| # π§ Call the Hugging Face chat API with cleaned messages | |
| response = self.client.chat_completion( | |
| messages=cleaned_messages, | |
| stop=stop_sequences, | |
| max_tokens=1024 # Limit the number of tokens generated in the reply | |
| ) | |
| # π¦ Extract content from the model response and wrap it in a ChatMessage object | |
| content = response.choices[0].message["content"] | |
| return ChatMessage(role="assistant", content=content) | |
| # β Basic Agent with SmolAgents | |
| class BasicAgent: | |
| def __init__(self): | |
| # Informative log to indicate that the agent is being initialized | |
| print("β BasicAgent initialized with Hugging Face chat model.") | |
| # Instantiate your custom model that wraps the Hugging Face InferenceClient | |
| self.model = HuggingFaceChatModel() | |
| # Create the CodeAgent, which uses the tools and the chat model | |
| self.agent = CodeAgent( | |
| tools=[tls.search_tool, tls.calculate_cargo_travel_time], # Your list of tools | |
| model=self.model, # The model to generate tool-using responses | |
| additional_authorized_imports=["pandas"], # Optional: allow use of pandas in generated code | |
| max_steps=20, # Limit the number of planning steps (tool calls + reasoning) | |
| ) | |
| def __call__(self, messages) -> str: | |
| """ | |
| Handle a call to the agent with either a single question or a message history. | |
| Parameters: | |
| ----------- | |
| messages : Union[str, List[Dict[str, str]]] | |
| The input from the chat interface β either: | |
| - a plain string (just one message) | |
| - a list of dicts, like [{"role": "user", "content": "What's the weather?"}] | |
| Returns: | |
| -------- | |
| str | |
| The assistant's response as a string. | |
| """ | |
| # If the input is a chat history (list of messages), get the most recent user message | |
| if isinstance(messages, list): | |
| question = messages[-1]["content"] # Extract last message content | |
| else: | |
| question = messages # If it's just a string, use it directly | |
| # Log the input for debugging | |
| print(f"π₯ Received question: {question[:60]}...") | |
| # Run the CodeAgent to get a response (may include tool use) | |
| response = self.agent.run(question) | |
| # Log the response for debugging | |
| print(f"π€ Response generated: {response[:60]}...") | |
| return response # Return final result | |