Spaces:
Runtime error
Runtime error
| import re | |
| from typing import Literal, cast | |
| from pydantic import BaseModel, Field | |
| from utils import format_docstring | |
| ActionType = Literal["none", "speak", "non-verbal communication", "action", "leave"] | |
| class Message(BaseModel): | |
| """ | |
| An interface for messages. | |
| There is only one required method: to_natural_language | |
| """ | |
| def to_natural_language(self) -> str: | |
| raise NotImplementedError | |
| class SimpleMessage(Message): | |
| """ | |
| A simple message with a single string field. | |
| """ | |
| message: str = Field(description="the message") | |
| def to_natural_language(self) -> str: | |
| return self.message | |
| class Observation(Message): | |
| last_turn: str = Field(description="the last turn of the conversation") | |
| turn_number: int = Field(description="the turn number of the conversation") | |
| available_actions: list[ActionType] = Field(description="the available actions") | |
| def to_natural_language(self) -> str: | |
| if self.turn_number == 0: | |
| return f"\n{self.last_turn}\nConversation Starts:\n" | |
| else: | |
| return f"Turn #{self.turn_number-1}: {self.last_turn}\n" | |
| class ScriptBackground(Message): | |
| scenario: str = Field(description="scenario of the episode") | |
| p1_name: str = Field(description="name of participant 1") | |
| p2_name: str = Field(description="name of participant 2") | |
| p1_background: str = Field(description="background of participant 1") | |
| p2_background: str = Field(description="background of participant 2") | |
| p1_goal: str = Field(description="goal of participant 1") | |
| p2_goal: str = Field(description="goal of participant 2") | |
| def to_natural_language(self) -> str: | |
| if self.p1_background or self.p2_background: | |
| p1_background = self.p1_background if self.p1_background else "Unknown" | |
| p2_background = self.p2_background if self.p2_background else "Unknown" | |
| # Not using AND, since in stranger relation the background is not visible | |
| return format_docstring( | |
| f"""Here is the context of this interaction: | |
| Scenario: {self.scenario} | |
| Participants: {self.p1_name} and {self.p2_name} | |
| {self.p1_name}'s background: {p1_background} | |
| {self.p2_name}'s background: {p2_background} | |
| {self.p1_name}'s goal: {self.p1_goal} | |
| {self.p2_name}'s goal: {self.p2_goal} | |
| """ | |
| ) | |
| else: | |
| return format_docstring( | |
| f"""Here is the context of this interaction: | |
| Scenario: {self.scenario} | |
| Participants: {self.p1_name} and {self.p2_name} | |
| {self.p1_name}'s goal: {self.p1_goal} | |
| {self.p2_name}'s goal: {self.p2_goal} | |
| """ | |
| ) | |
| class ScriptEnvironmentResponse(Message): | |
| terminated: bool = Field( | |
| description="whether the conversation is terminated", | |
| default_factory=lambda: False, | |
| ) | |
| p1_rate: float | tuple[float, dict[str, float]] | None = Field( | |
| description="rating of participant 1, on the scale of 1 to 10" | |
| ) | |
| p2_rate: float | tuple[float, dict[str, float]] | None = Field( | |
| description="rating of participant 2, on the scale of 1 to 10" | |
| ) | |
| comments: str | None = Field( | |
| description="All of the comments supporting the termination and rating" | |
| ) | |
| def to_natural_language(self) -> str: | |
| reason_to_stop = format_docstring( | |
| f"""Environment response: | |
| {"The conversation is terminated." if self.terminated else ""} | |
| {"Rating of participant 1" + str(self.p1_rate) if self.p1_rate is not None else ""} | |
| {"Rating of participant 2" + str(self.p2_rate) if self.p2_rate is not None else ""} | |
| {self.comments if self.comments is not None else ""} | |
| """ | |
| ) | |
| clean_text = "" | |
| for line in reason_to_stop.split("\n"): | |
| if line.strip(): | |
| clean_text += line + "\n" | |
| return clean_text | |
| class AgentAction(Message): | |
| action_type: ActionType = Field( | |
| description="whether to speak at this turn or choose to not do anything" | |
| ) | |
| argument: str = Field( | |
| description="the utterance if choose to speak, the expression or gesture if choose non-verbal communication, or the physical action if choose action" | |
| ) | |
| def to_natural_language(self) -> str: | |
| match self.action_type: | |
| case "none": | |
| return "did nothing" | |
| case "speak": | |
| return f"{self.argument}" | |
| case "non-verbal communication": | |
| return f"[{self.action_type}] {self.argument}" | |
| case "action": | |
| return f"[{self.action_type}] {self.argument}" | |
| case "leave": | |
| return "left the conversation" | |
| ScriptInteractionReturnType = tuple[ | |
| list[list[tuple[str, str, Message]]], list[tuple[str, Message]] | |
| ] | |
| class ScriptInteraction(Message): | |
| interactions: str = Field( | |
| description="""The interaction between the two participants in maximum 20 turns. Each turn is separated by a newline, and should only describe one agent. Following the structure: | |
| Turn #x | |
| [participant's name] [action] {argument for some actions} | |
| You can use different types of actions, but only use one in each turn. You should move other information into argument part. Below shows a python code snippet of the format for each action type: | |
| match self.action_type: | |
| case "none": | |
| return "did nothing" | |
| case "speak": | |
| return f'said: "{self.argument}"' | |
| case "non-verbal communication": | |
| return f"[{self.action_type}] {self.argument}" | |
| case "action": | |
| return f"[{self.action_type}] {self.argument}" | |
| case "leave": | |
| return "left the conversation" | |
| For example, the following is acceptable: | |
| Turn #x | |
| Oliver Thompson said: "Hey Esmeralda, what's wrong? You seem upset." | |
| Turn #x | |
| Esmeralda Solis [action] moved closer | |
| Turn #x | |
| Oliver Thompson [non-verbal communication] smiled | |
| Turn #x | |
| Esmeralda Solis did nothing | |
| Turn #x | |
| Oliver Thompson left the conversation | |
| Turn #x | |
| Esmeralda Solis [action] leaned in and lowered her voice: "Sorry" | |
| And the following is not acceptable: | |
| Turn #1 | |
| Oliver Thompson [speak] said: "Hey Esmeralda, what's wrong? You seem upset." | |
| Turn #1 | |
| Esmeralda Solis non-verbal communication moved closer | |
| """ | |
| ) | |
| def to_natural_language(self) -> str: | |
| return self.interactions | |
| def parse( | |
| self, agent_names: list[str], background: str | |
| ) -> tuple[list[list[tuple[str, str, Message]]], list[tuple[str, Message]]]: | |
| interaction = self.interactions | |
| # print("Interaction: ", interaction) | |
| lines = self.split_by_turn(interaction) | |
| agent_results = [] | |
| results: list[list[tuple[str, str, Message]]] = [ | |
| [ | |
| ( | |
| "Environment", | |
| name, | |
| Observation( | |
| last_turn=background, | |
| turn_number=0, | |
| available_actions=["none"], | |
| ), | |
| ) | |
| for name in agent_names | |
| ] | |
| ] | |
| for line_idx, line in enumerate(lines): | |
| try: | |
| res = self.parse_single_dialogue(line) | |
| action: AgentAction = cast(AgentAction, res["action"]) | |
| argument: str = cast(str, res["argument"]) | |
| cast(int, res["turn"]) | |
| name: str = cast(str, res["name"]) | |
| parsed_action = AgentAction(action_type=action, argument=argument) | |
| if name not in agent_names: | |
| print( | |
| f"The name of the agent, {name}, is not in the list of agent names, {agent_names}" | |
| ) | |
| name = agent_names[ | |
| line_idx % 2 | |
| ] # TODO Not sure what name to be set here | |
| except Exception as e: | |
| print( | |
| f"Error when parsing the dialogue: {line}", | |
| f"The error is: {e}", | |
| ) | |
| raise e | |
| parsed_action = AgentAction(action_type="none", argument="") | |
| name = agent_names[line_idx % 2] # TODO same question as above | |
| inactive_agent_name = ( | |
| agent_names[0] if name == agent_names[1] else agent_names[1] | |
| ) | |
| results.append( | |
| [ | |
| ( | |
| "Environment", | |
| name, | |
| Observation( | |
| last_turn="environment is the agent", | |
| turn_number=line_idx + 1, | |
| available_actions=["none"], | |
| ), | |
| ) | |
| for name in agent_names | |
| ] | |
| + [ | |
| (name, "Environment", parsed_action), | |
| ( | |
| inactive_agent_name, | |
| "Environment", | |
| AgentAction(action_type="none", argument="did nothing"), | |
| ), | |
| ] | |
| ) | |
| agent_results.append((name, parsed_action)) | |
| # print("Parsed agent results: ", agent_results) | |
| return (results, agent_results) # type: ignore | |
| def parse_single_dialogue( | |
| self, dialogue: str | |
| ) -> dict[str, str | int | AgentAction | None]: | |
| """Parse a single dialogue string and return a dictionary with turn, name, action, and argument.""" | |
| # Match the turn number and name. Assume all agent name starts with a capital letter and is followed by lowercase letters | |
| match_turn_name = re.match( | |
| r"Turn #?(\d+):?\s*\n((?:[A-Z]['a-z]* ?)+)", dialogue | |
| ) | |
| if not match_turn_name: | |
| raise ValueError( | |
| f"The dialogue does not match the expected format: {dialogue}" | |
| ) | |
| return None # TODO Which should we use, return None or raise error? | |
| turn, name = match_turn_name.groups() | |
| action_content = dialogue[ | |
| len(match_turn_name.group(0)) : | |
| ].strip() # Extract the action content | |
| # Check for different action types | |
| if "did nothing" in action_content: | |
| action, argument = "none", "" | |
| elif match := re.match(r'said: "(.*?)"', action_content): | |
| action, argument = "speak", match.group(1) | |
| action, argument = action.strip(), argument.strip() | |
| elif match := re.match(r'\[speak\] said: "(.*?)"', action_content): | |
| action, argument = "speak", match.group(1) | |
| action, argument = action.strip(), argument.strip() | |
| elif match := re.match( | |
| r"\[(non-verbal communication|action)\] (.*)", action_content | |
| ): | |
| action, argument = match.groups() | |
| elif "left the conversation" in action_content: | |
| # TODO Make it more elegant to handle the situation of `left the conversation.` | |
| action, argument = "leave", "" | |
| else: | |
| action, argument = None, None | |
| parsed_item = { | |
| "turn": int(turn), | |
| "name": name.strip(), | |
| "action": action, | |
| "argument": argument, | |
| } | |
| return parsed_item | |
| def split_by_turn(self, input_string: str) -> list[str]: | |
| """Split the input dialogue string by turn and return a list of dialogues.""" | |
| # Split using 'Turn #' as delimiter, but keep the delimiter in the results | |
| dialogues = re.split(r"(?=Turn #?\d+)", input_string) | |
| # Remove any empty strings and strip whitespace | |
| dialogues = [dialogue.strip() for dialogue in dialogues if dialogue.strip()] | |
| dialogues = [dialogue for dialogue in dialogues if dialogue.startswith("Turn")] | |
| # Change from Turn #x to Turn (#)x (# is optional) | |
| dialogues[-1] = "\n".join( | |
| dialogues[-1].split("\n")[:2] | |
| ) # Discard further input in the last turn | |
| for dialogue in dialogues: | |
| # TODO this is current workaround for the issue of multiple agents in one turn | |
| if len(dialogue.split("\n")) >= 3: | |
| raise ValueError("Only one agent can act per turn.") | |
| return dialogues | |
| def default_value_for_return_type() -> ScriptInteractionReturnType: | |
| results_1: list[list[tuple[str, str, Message]]] = [ | |
| [ | |
| ( | |
| "Environment", | |
| name, | |
| Observation( | |
| last_turn="Environment is the agent", | |
| turn_number=0, | |
| available_actions=["none"], | |
| ), | |
| ) | |
| for name in ["none", "none"] | |
| ] | |
| ] | |
| results_2: list[tuple[str, Message]] = [ | |
| ("", AgentAction(action_type="none", argument="")) | |
| ] | |
| return (results_1, results_2) | |