File size: 4,683 Bytes
fcaa164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========

from typing import Any, Dict, List, Optional, Union

from typing_extensions import Self

from camel.agents import ChatAgent
from camel.data_collector.base import BaseDataCollector
from camel.messages import AlpacaItem, BaseMessage
from camel.schemas import OpenAISchemaConverter

# ruff: noqa: E501
DEFAULT_CONVERTER_PROMPTS = """
    Extract key entities and attributes from the conversations
    and convert them into a structured JSON format.
    For example:
    Instruction: You are a helpful assistant. 
    User: When is the release date of the video game Portal?
    Assistant: The release date of the video game Portal is October 9.
    Your output should be:
    {
        "instruction": "You are a helpful assistant. When is the release date of the video game Portal?",
        "input": "",
        "output": "The release date of the video game Portal is October 9."
    }
"""


class AlpacaDataCollector(BaseDataCollector):
    def __init__(self) -> None:
        super().__init__()
        self.system_message: Optional[BaseMessage] = None
        self.agent_name: Optional[str] = None

    def record(
        self,
        agent: Union[List[ChatAgent], ChatAgent],
    ) -> Self:
        r"""Inject an agent into the data collector.

        Args:
            agent (Union[List[ChatAgent], ChatAgent]):
                The agent to inject.
        """
        if not self.agent_name:
            _agent = agent if isinstance(agent, ChatAgent) else agent[0]
            self.agent_name = _agent.role_name
            self.system_message = _agent._system_message
        super().record(agent)
        return self

    def convert(self) -> Dict[str, Any]:
        r"""Convert the collected data into a dictionary."""
        if self.agent_name is None:
            raise ValueError("No agent injected")

        history = self.get_agent_history(self.agent_name)
        if not history:
            raise ValueError("No data collected.")

        # Validate and process history
        if len(history) == 3 and history[0].role == "system":
            history = history[1:]  # Ignore the system message.
        elif len(history) != 2:
            raise ValueError(
                f"AlpacaDataCollector only supports one message pair, but "
                f"got {len(history)}"
            )

        input_message, output_message = history
        instruction = (
            self.system_message.content if self.system_message else ""
        ) + str(input_message.message)

        data = {
            "instruction": instruction,
            "input": "",
            "output": output_message.message,
        }
        self.data.append(data)
        return data

    def llm_convert(
        self,
        converter: Optional[OpenAISchemaConverter] = None,
        prompt: Optional[str] = None,
    ) -> Dict[str, str]:
        r"""Convert collected data using an LLM schema converter.

        Args:
            converter (Optional[OpenAISchemaConverter], optional):
                The converter to use. (default: :obj:`OpenAISchemaConverter`)
            prompt (Optional[str], optional): Prompt to guide the conversion.
                (default: :obj:`DEFAULT_CONVERTER_PROMPTS`)

        Returns:
            Dict[str, str]: The converted data.

        Raises:
            ValueError: If no agent is injected or data cannot be collected.
        """
        prompt = prompt or DEFAULT_CONVERTER_PROMPTS
        converter = converter or OpenAISchemaConverter()

        system = self.system_message.content if self.system_message else ""
        context = [f"Instruction: {system}\n"]

        for message in self.get_agent_history(str(self.agent_name)):
            if message.role == "user":
                context.append(f"User: {message.message}\n")
            else:
                context.append(f"{message.name}: {message.message}\n")
        return converter.convert(
            "\n".join(context), AlpacaItem, prompt=prompt
        ).model_dump()