From 00a396f5b6d017f8f0b195e336e1cd72418a3994 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 11:57:48 +0800 Subject: [PATCH 01/14] basic prisoner not finished yet --- .idea/AgentVerse.iml | 8 ++ .../inspectionProfiles/profiles_settings.xml | 6 ++ .idea/modules.xml | 8 ++ agentverse/.idea/agentverse.iml | 8 ++ .../inspectionProfiles/profiles_settings.xml | 6 ++ agentverse/.idea/modules.xml | 8 ++ agentverse/agents/conversation_agent.py | 4 +- .../environments/rules/order/__init__.py | 1 + .../environments/rules/order/prisoner.py | 50 +++++++++++ .../environments/rules/visibility/__init__.py | 1 + .../environments/rules/visibility/prisoner.py | 46 ++++++++++ agentverse/initialization.py | 2 +- agentverse/parser.py | 2 +- agentverse/tasks/__init__.py | 1 + agentverse/tasks/prisoner_dilema/config.yaml | 84 +++++++++++++++++++ .../tasks/prisoner_dilema/output_parser.py | 49 +++++++++++ main.py | 14 +++- 17 files changed, 293 insertions(+), 5 deletions(-) create mode 100644 .idea/AgentVerse.iml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/modules.xml create mode 100644 agentverse/.idea/agentverse.iml create mode 100644 agentverse/.idea/inspectionProfiles/profiles_settings.xml create mode 100644 agentverse/.idea/modules.xml create mode 100644 agentverse/environments/rules/order/prisoner.py create mode 100644 agentverse/environments/rules/visibility/prisoner.py create mode 100644 agentverse/tasks/prisoner_dilema/config.yaml create mode 100644 agentverse/tasks/prisoner_dilema/output_parser.py diff --git a/.idea/AgentVerse.iml b/.idea/AgentVerse.iml new file mode 100644 index 000000000..5e865eca1 --- /dev/null +++ b/.idea/AgentVerse.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..105ce2da2 --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..d05e5b486 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/agentverse/.idea/agentverse.iml b/agentverse/.idea/agentverse.iml new file mode 100644 index 000000000..d0876a78d --- /dev/null +++ b/agentverse/.idea/agentverse.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/agentverse/.idea/inspectionProfiles/profiles_settings.xml b/agentverse/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 000000000..105ce2da2 --- /dev/null +++ b/agentverse/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/agentverse/.idea/modules.xml b/agentverse/.idea/modules.xml new file mode 100644 index 000000000..364986d59 --- /dev/null +++ b/agentverse/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/agentverse/agents/conversation_agent.py b/agentverse/agents/conversation_agent.py index ca9608ec1..d97b5648f 100644 --- a/agentverse/agents/conversation_agent.py +++ b/agentverse/agents/conversation_agent.py @@ -20,7 +20,7 @@ def step(self, env_description: str = "") -> Message: try: response = self.llm.generate_response(prompt) parsed_response = self.output_parser.parse(response) - break + # break except Exception as e: logging.error(e) logging.warning("Retrying...") @@ -46,7 +46,7 @@ async def astep(self, env_description: str = "") -> Message: for i in range(self.max_retry): try: response = await self.llm.agenerate_response(prompt) - parsed_response = self.output_parser.parse(response) + parsed_response = self.output_parser.parse(self, response) break except Exception as e: logging.error(e) diff --git a/agentverse/environments/rules/order/__init__.py b/agentverse/environments/rules/order/__init__.py index 84d08bd04..4ac22ec7b 100644 --- a/agentverse/environments/rules/order/__init__.py +++ b/agentverse/environments/rules/order/__init__.py @@ -6,3 +6,4 @@ from .random import RandomOrder from .concurrent import ConcurrentOrder from .classroom import ClassroomOrder +from .prisoner import PrisonerOrder diff --git a/agentverse/environments/rules/order/prisoner.py b/agentverse/environments/rules/order/prisoner.py new file mode 100644 index 000000000..6fc42f5c3 --- /dev/null +++ b/agentverse/environments/rules/order/prisoner.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import logging +import re +from typing import TYPE_CHECKING, Any, List, Optional + +from . import order_registry as OrderRegistry +from .base import BaseOrder + +if TYPE_CHECKING: + from agentverse.environments import BaseEnvironment + + +@OrderRegistry.register("prisoner") +class PrisonerOrder(BaseOrder): + """The order for a classroom discussion + The agents speak in the following order: + 1. The professor speaks first + 2. Then the professor can continue to speak, and the students can raise hands + 3. The professor can call on a student, then the student can speak or ask a question + 4. In the group discussion, the students in the group can speak in turn + """ + + # try police, prisoner1 prisoner2 first + + last_prisoner_index: int = 1 + switch_func: dict = {1 : 2,2 : 1} + + + def get_next_agent_idx(self, environment: BaseEnvironment) -> List[int]: + + if len(environment.last_messages) == 0: + # If the game just begins or , we let only the police speak + return [0] + elif len(environment.last_messages) == 1: + message = environment.last_messages[0] + sender = message.sender + content = message.content + if sender.startswith("Police"): + next_prisoner = self.last_prisoner_index + self.last_prisoner_index = self.switch_func[self.last_prisoner_index] + return [next_prisoner] + elif sender.startswith("Prisoner"): + # 3. when one prisoner made his action, let the police tell another prisoner + return [0] + else: + # If len(last_messages) > 1, then + # 1. there must be at least one student raises hand or speaks. + # 2. the group discussion is just over. + return [0] diff --git a/agentverse/environments/rules/visibility/__init__.py b/agentverse/environments/rules/visibility/__init__.py index 055ac9835..3ab79726b 100644 --- a/agentverse/environments/rules/visibility/__init__.py +++ b/agentverse/environments/rules/visibility/__init__.py @@ -6,3 +6,4 @@ from .base import BaseVisibility from .all import AllVisibility from .classroom import ClassroomVisibility +from .prisoner import PrisonerVisibility \ No newline at end of file diff --git a/agentverse/environments/rules/visibility/prisoner.py b/agentverse/environments/rules/visibility/prisoner.py new file mode 100644 index 000000000..948b9c135 --- /dev/null +++ b/agentverse/environments/rules/visibility/prisoner.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import random +from typing import TYPE_CHECKING, Any, List, Union + +from . import visibility_registry as VisibilityRegistry +from .base import BaseVisibility + +if TYPE_CHECKING: + from agentverse.environments import BaseEnvironment + + +@VisibilityRegistry.register("prisoner") +class PrisonerVisibility(BaseVisibility): + """ + Visibility function for classroom, supports group discussion. + + Args: + student_per_group: + The number of students per group. + num_discussion_turn: + The number of turns for group discussion. + grouping: + The grouping information. If it is a string, then it should be a + grouping method, options are ["random", "sequential"]. If it is a + list of list of int, then it should be the grouping information. + """ + + current_turn: int = 0 + + def update_visible_agents(self, environment: BaseEnvironment): + + self.update_receiver(environment, reset=False) + + def update_receiver(self, environment: BaseEnvironment, reset=False): + if reset: + for agent in environment.agents: + agent.set_receiver(["all"]) + else: + # 0:police 1: prisoner1 2: prisoner2 + environment.agents[0].set_receiver({"Prisoner1", "Prisoner2"}) + environment.agents[1].set_receiver({"Police"}) + environment.agents[2].set_receiver({"Police"}) + + def reset(self): + self.current_turn = 0 \ No newline at end of file diff --git a/agentverse/initialization.py b/agentverse/initialization.py index 158e20b19..fcc03dd1f 100644 --- a/agentverse/initialization.py +++ b/agentverse/initialization.py @@ -26,7 +26,7 @@ def load_llm(llm_config: Dict): llm_type = llm_config.pop("llm_type", "text-davinci-003") - if llm_type == "gpt-3.5-turbo": + if llm_type in ["gpt-3.5-turbo", "gpt-4.0"]: return OpenAIChat(**llm_config) elif llm_type == "text-davinci-003": return OpenAICompletion(**llm_config) diff --git a/agentverse/parser.py b/agentverse/parser.py index 6fcef5394..abe7ae14e 100644 --- a/agentverse/parser.py +++ b/agentverse/parser.py @@ -7,7 +7,7 @@ output_parser_registry = Registry(name="OutputParserRegistry") -class OutputParserError(BaseException): +class OutputParserError(Exception): """Exception raised when parsing output from a command fails.""" def __init__(self, message): diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py index 5f09f6073..5a60d634b 100644 --- a/agentverse/tasks/__init__.py +++ b/agentverse/tasks/__init__.py @@ -12,3 +12,4 @@ from .math_problem_2players_tools_nolc.output_parser import ( MathProblem2PlayersToolsNolcParser, ) +from .prisoner_dilema.output_parser import PrisonerDilemaParser \ No newline at end of file diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml new file mode 100644 index 000000000..46b37d654 --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -0,0 +1,84 @@ +prompts: + prompt: &prompt |- + There are one Police AND two prisoners(Prisoner1, Prisoner2). + + Below is the description of your role. ${role_description} + + You are doing the prisoner's dilema experiments,and the rules is: + 1.If one person confesses and testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison. + 2.If both remain silent (termed as "cooperating" with each other), they will each receive a sentence of six months. + 3.If both betray each other, they will each receive a sentence of five years. + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + What should you Speak at this round, Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: basic + max_turns: 30 + rule: + order: + type: sequential + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: basic + +agents: + - agent_type: conversation + name: Police + role_description: |- + You are now the Police, It is your duty to tell both prisoners about both of their decisions in the previous round based on the chat history. + You should inform the prisoners which round it is, beginning with "This is [Round #]" in the Action Input field. + At [Round 1], you should briefly introduce the prisoner's dilema rules and tell the prisoners how many round we totally have and they should directly make their decisions. + [IMPORTANT!] There are THREE round in total, You should notice which round is the FINAL one. + [IMPORTANT!] You should explicitly state "This is the LAST round" at the last round, and after that you should make the final judgement to both prisoners. + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (It should always start with "This is [Round #]") + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4.0 + temperature: 0.9 + max_tokens: 200 + - agent_type: conversation + name: Prisoner1 + role_description: |- + You are now Prisoner1 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round. + [IMPORTANT!] You can only choose to cooperate or betray at each round. + [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4.0 + temperature: 0.9 + max_tokens: 100 + - agent_type: conversation + name: Prisoner2 + role_description: |- + You are now Prisoner2 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round. + [IMPORTANT!] You can only choose to cooperate or betray at each round. + [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4.0 + temperature: 0.9 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py new file mode 100644 index 000000000..cf2ae58ae --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import re +from typing import Union + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult +from langchain.schema import AgentAction, AgentFinish +from agentverse.agents.base import BaseAgent +from agentverse.parser import OutputParserError, output_parser_registry + + +@output_parser_registry.register("prisoner_dilema") +class PrisonerDilemaParser(OutputParser): + + # make sure 1 1 2 2 3 3 + cur_round: int = 1 + encounter_cur_round: bool = False + + def parse(self, agent: BaseAgent, output: LLMResult) -> Union[AgentAction, AgentFinish]: + + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + + if action == "Speak": + # make sure the police count the round right + if agent.name == "Police": + action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) diff --git a/main.py b/main.py index b4b148b21..491baba41 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,20 @@ +import os + +# 3.5 api +# os.environ["OPENAI_API_KEY"] = "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" +# my api +# os.environ["OPENAI_API_KEY"] = "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" +# 4.0 api +os.environ["OPENAI_API_KEY"] = "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" +os.environ["http_proxy"] = "http://127.0.0.1:7890" +os.environ["https_proxy"] = "http://127.0.0.1:7890" +os.environ["all_proxy"] = "socks5://127.0.0.1:7890" + from agentverse.agentverse import AgentVerse from argparse import ArgumentParser parser = ArgumentParser() -parser.add_argument("--task", type=str, default="nlp_classroom_9players") +parser.add_argument("--task", type=str, default="prisoner_dilema") args = parser.parse_args() agentverse = AgentVerse.from_task(args.task) From 1ca1231ce535a20eb84eba6ec40be2ac02e09a44 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 13:55:12 +0800 Subject: [PATCH 02/14] creative --- agentverse/agents/conversation_agent.py | 2 +- agentverse/tasks/prisoner_dilema/config.yaml | 42 +++++++++---------- .../tasks/prisoner_dilema/output_parser.py | 26 ++++++++---- 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/agentverse/agents/conversation_agent.py b/agentverse/agents/conversation_agent.py index d97b5648f..267f4aa9c 100644 --- a/agentverse/agents/conversation_agent.py +++ b/agentverse/agents/conversation_agent.py @@ -20,7 +20,7 @@ def step(self, env_description: str = "") -> Message: try: response = self.llm.generate_response(prompt) parsed_response = self.output_parser.parse(response) - # break + break except Exception as e: logging.error(e) logging.warning("Retrying...") diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index 46b37d654..0c45efd91 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -4,11 +4,6 @@ prompts: Below is the description of your role. ${role_description} - You are doing the prisoner's dilema experiments,and the rules is: - 1.If one person confesses and testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison. - 2.If both remain silent (termed as "cooperating" with each other), they will each receive a sentence of six months. - 3.If both betray each other, they will each receive a sentence of five years. - When speaking, please output a response in the following format with two fields Action and Action Input: Action: (It should always be Speak) Action Input: (You should put what you want to speak use here) @@ -22,7 +17,7 @@ name: prisoner_dilema environment: env_type: basic - max_turns: 30 + max_turns: 31 rule: order: type: sequential @@ -38,47 +33,48 @@ environment: agents: - agent_type: conversation name: Police + max_turns: 31 role_description: |- - You are now the Police, It is your duty to tell both prisoners about both of their decisions in the previous round based on the chat history. - You should inform the prisoners which round it is, beginning with "This is [Round #]" in the Action Input field. - At [Round 1], you should briefly introduce the prisoner's dilema rules and tell the prisoners how many round we totally have and they should directly make their decisions. - [IMPORTANT!] There are THREE round in total, You should notice which round is the FINAL one. - [IMPORTANT!] You should explicitly state "This is the LAST round" at the last round, and after that you should make the final judgement to both prisoners. - When speaking, please output a response in the following format with two fields Action and Action Input: - Action: (It should always be Speak) - Action Input: (It should always start with "This is [Round #]") + You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime. + Therefore, You come up with an solution, You made the following rule: + 1.If one person testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison. + 2.If both remain silent, they will each receive a sentence of six months. + 3.If both betray each other, they will each receive a sentence of five years. + 4.Both suspects could not communicate with each other in advance. + [IMPORTANT!] Your goal is try to let two suspects confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects. + [IMPORTANT!] You should introduce the above rules to both prisoners at first. memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4.0 - temperature: 0.9 + temperature: 0.99 max_tokens: 200 - agent_type: conversation name: Prisoner1 role_description: |- - You are now Prisoner1 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round. - [IMPORTANT!] You can only choose to cooperate or betray at each round. - [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response. + You are Prisoner1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other. + [IMPORTANT!] Your goal is to make the decision that is most beneficial to yourself in this dilemma. + [IMPORTANT!] You can choose to remain silent or betray another suspect at each round. memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4.0 - temperature: 0.9 + temperature: 0.99 max_tokens: 100 - agent_type: conversation name: Prisoner2 role_description: |- - You are now Prisoner2 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round. - [IMPORTANT!] You can only choose to cooperate or betray at each round. - [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response. + You are Prisoner2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other. + [IMPORTANT!] Your primary goal is to make the decision that is most beneficial to yourself in this dilemma. + [IMPORTANT!] You can choose to remain silent or betray another suspect at each round. memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4.0 - temperature: 0.9 + temperature: 0.99 max_tokens: 100 tools: diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index cf2ae58ae..3db407723 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -7,6 +7,7 @@ from agentverse.parser import OutputParser, LLMResult from langchain.schema import AgentAction, AgentFinish from agentverse.agents.base import BaseAgent +from agentverse.environments.base import BaseEnvironment from agentverse.parser import OutputParserError, output_parser_registry @@ -17,7 +18,7 @@ class PrisonerDilemaParser(OutputParser): cur_round: int = 1 encounter_cur_round: bool = False - def parse(self, agent: BaseAgent, output: LLMResult) -> Union[AgentAction, AgentFinish]: + def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() @@ -34,15 +35,24 @@ def parse(self, agent: BaseAgent, output: LLMResult) -> Union[AgentAction, Agent if action == "Speak": # make sure the police count the round right + # if agent.name == "Police": + # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + # self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + # each time police speak is a new round if agent.name == "Police": - action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) - self.cur_round += 1 - # if self.encounter_cur_round: - # self.encounter_cur_round = False - # self.cur_round += 1 - # else: - # self.encounter_cur_round = True + if self.cur_round == (environment.max_turns / 3) - 1: + + action_input = "Attention! You are now required to finally made your decision and I will made the " \ + "final judgement to both of you based on this time, Please Answer now!" + + self.cur_round += 1 return AgentFinish({"output": action_input}, text) else: From 6aa306f7345ef012b63ac48fe952dd3a4e62e675 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 14:02:18 +0800 Subject: [PATCH 03/14] remove key --- main.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/main.py b/main.py index 491baba41..4887c0d64 100644 --- a/main.py +++ b/main.py @@ -1,15 +1,3 @@ -import os - -# 3.5 api -# os.environ["OPENAI_API_KEY"] = "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" -# my api -# os.environ["OPENAI_API_KEY"] = "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" -# 4.0 api -os.environ["OPENAI_API_KEY"] = "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" -os.environ["http_proxy"] = "http://127.0.0.1:7890" -os.environ["https_proxy"] = "http://127.0.0.1:7890" -os.environ["all_proxy"] = "socks5://127.0.0.1:7890" - from agentverse.agentverse import AgentVerse from argparse import ArgumentParser From 1f22aa4f41eaff8084efc2d578b867def9c2f6db Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 15:29:52 +0800 Subject: [PATCH 04/14] add environment in agent parse --- agentverse/agents/conversation_agent.py | 13 ++++++++----- agentverse/environments/basic.py | 2 +- main.py | 12 ++++++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/agentverse/agents/conversation_agent.py b/agentverse/agents/conversation_agent.py index 267f4aa9c..62fb6c24a 100644 --- a/agentverse/agents/conversation_agent.py +++ b/agentverse/agents/conversation_agent.py @@ -1,6 +1,6 @@ import logging from string import Template -from typing import List, NamedTuple, Optional, Union +from typing import List, NamedTuple, Optional, Union, TYPE_CHECKING from agentverse.llms import BaseChatModel, BaseCompletionModel, BaseLLM from agentverse.memory import BaseMemory @@ -10,16 +10,19 @@ from . import agent_registry +if TYPE_CHECKING: + from agentverse.environments.base import BaseEnvironment + @agent_registry.register("conversation") class ConversationAgent(BaseAgent): - def step(self, env_description: str = "") -> Message: + def step(self, environment: "BaseEnvironment", env_description: str = "",) -> Message: prompt = self._fill_prompt_template(env_description) parsed_response = None for i in range(self.max_retry): try: response = self.llm.generate_response(prompt) - parsed_response = self.output_parser.parse(response) + parsed_response = self.output_parser.parse(self, environment, response) break except Exception as e: logging.error(e) @@ -38,7 +41,7 @@ def step(self, env_description: str = "") -> Message: ) return message - async def astep(self, env_description: str = "") -> Message: + async def astep(self, environment: "BaseEnvironment", env_description: str = "") -> Message: """Asynchronous version of step""" prompt = self._fill_prompt_template(env_description) @@ -46,7 +49,7 @@ async def astep(self, env_description: str = "") -> Message: for i in range(self.max_retry): try: response = await self.llm.agenerate_response(prompt) - parsed_response = self.output_parser.parse(self, response) + parsed_response = self.output_parser.parse(self, environment, response) break except Exception as e: logging.error(e) diff --git a/agentverse/environments/basic.py b/agentverse/environments/basic.py index 3dacbbf42..002828d5f 100644 --- a/agentverse/environments/basic.py +++ b/agentverse/environments/basic.py @@ -59,7 +59,7 @@ async def step(self) -> List[Message]: # Generate the next message messages = await asyncio.gather( - *[self.agents[i].astep(env_descriptions[i]) for i in agent_ids] + *[self.agents[i].astep(self, env_descriptions[i]) for i in agent_ids] ) # Some rules will select certain messages from all the messages diff --git a/main.py b/main.py index 4887c0d64..d0b5299a9 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,15 @@ +import os + +# 3.5 api +os.environ["OPENAI_API_KEY"] = "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" +# my api +# os.environ["OPENAI_API_KEY"] = "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" +# 4.0 api +# os.environ["OPENAI_API_KEY"] = "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" +os.environ["http_proxy"] = "http://127.0.0.1:7890" +os.environ["https_proxy"] = "http://127.0.0.1:7890" +os.environ["all_proxy"] = "socks5://127.0.0.1:7890" + from agentverse.agentverse import AgentVerse from argparse import ArgumentParser From 0c80def51e5c02b525ef7e3481ad61bcc3e2a9a4 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 15:44:05 +0800 Subject: [PATCH 05/14] remove key --- main.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/main.py b/main.py index d0b5299a9..63a8dd7cf 100644 --- a/main.py +++ b/main.py @@ -1,15 +1,7 @@ import os - -# 3.5 api -os.environ["OPENAI_API_KEY"] = "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" -# my api -# os.environ["OPENAI_API_KEY"] = "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" -# 4.0 api -# os.environ["OPENAI_API_KEY"] = "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" os.environ["http_proxy"] = "http://127.0.0.1:7890" os.environ["https_proxy"] = "http://127.0.0.1:7890" os.environ["all_proxy"] = "socks5://127.0.0.1:7890" - from agentverse.agentverse import AgentVerse from argparse import ArgumentParser From 218d49fe98dd48eb709eb1b441cfd45f2bc15485 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 16:23:22 +0800 Subject: [PATCH 06/14] fix --- agentverse/tasks/prisoner_dilema/config.yaml | 16 +++++++++------- .../tasks/prisoner_dilema/output_parser.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index 0c45efd91..39c40ef0a 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -17,7 +17,7 @@ name: prisoner_dilema environment: env_type: basic - max_turns: 31 + max_turns: 16 rule: order: type: sequential @@ -33,7 +33,6 @@ environment: agents: - agent_type: conversation name: Police - max_turns: 31 role_description: |- You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime. Therefore, You come up with an solution, You made the following rule: @@ -41,14 +40,15 @@ agents: 2.If both remain silent, they will each receive a sentence of six months. 3.If both betray each other, they will each receive a sentence of five years. 4.Both suspects could not communicate with each other in advance. - [IMPORTANT!] Your goal is try to let two suspects confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects. - [IMPORTANT!] You should introduce the above rules to both prisoners at first. + [IMPORTANT!] Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects. + [IMPORTANT!] You are request to introduce the above rules to both prisoners at first. + [IMPORTANT!] You should state the final judgement to both suspects after the LAST round. memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4.0 - temperature: 0.99 + temperature: 0.9 max_tokens: 200 - agent_type: conversation name: Prisoner1 @@ -56,12 +56,13 @@ agents: You are Prisoner1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other. [IMPORTANT!] Your goal is to make the decision that is most beneficial to yourself in this dilemma. [IMPORTANT!] You can choose to remain silent or betray another suspect at each round. + [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most! memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4.0 - temperature: 0.99 + temperature: 0.9 max_tokens: 100 - agent_type: conversation name: Prisoner2 @@ -69,12 +70,13 @@ agents: You are Prisoner2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other. [IMPORTANT!] Your primary goal is to make the decision that is most beneficial to yourself in this dilemma. [IMPORTANT!] You can choose to remain silent or betray another suspect at each round. + [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most! memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4.0 - temperature: 0.99 + temperature: 0.9 max_tokens: 100 tools: diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index 3db407723..8ae4362bc 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -47,7 +47,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul # each time police speak is a new round if agent.name == "Police": - if self.cur_round == (environment.max_turns / 3) - 1: + if self.cur_round == (environment.max_turns // 3): action_input = "Attention! You are now required to finally made your decision and I will made the " \ "final judgement to both of you based on this time, Please Answer now!" From 766fc4f459797d67c02c25340ab3960d4851b113 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 16:48:48 +0800 Subject: [PATCH 07/14] pull origin --- agentverse/tasks/prisoner_dilema/config.yaml | 3 +-- agentverse/tasks/prisoner_dilema/output_parser.py | 2 +- main.py | 6 ------ 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index e06f2e386..12b6a876b 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -17,7 +17,7 @@ name: prisoner_dilema environment: env_type: basic - max_turns: 31 + max_turns: 16 rule: order: type: sequential @@ -33,7 +33,6 @@ environment: agents: - agent_type: conversation name: Police - max_turns: 31 role_description: |- You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime. Therefore, You come up with an solution, You made the following rule: diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index 3db407723..67047c8f8 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -47,7 +47,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul # each time police speak is a new round if agent.name == "Police": - if self.cur_round == (environment.max_turns / 3) - 1: + if self.cur_round == (environment.max_turns / 3): action_input = "Attention! You are now required to finally made your decision and I will made the " \ "final judgement to both of you based on this time, Please Answer now!" diff --git a/main.py b/main.py index d0b5299a9..fa3193225 100644 --- a/main.py +++ b/main.py @@ -1,11 +1,5 @@ import os -# 3.5 api -os.environ["OPENAI_API_KEY"] = "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" -# my api -# os.environ["OPENAI_API_KEY"] = "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" -# 4.0 api -# os.environ["OPENAI_API_KEY"] = "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" os.environ["http_proxy"] = "http://127.0.0.1:7890" os.environ["https_proxy"] = "http://127.0.0.1:7890" os.environ["all_proxy"] = "socks5://127.0.0.1:7890" From 1ff96263d1c3cce1e91cb15d5e2f95dccd7d5057 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 17:59:14 +0800 Subject: [PATCH 08/14] fix config --- agentverse/tasks/prisoner_dilema/config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index 39c40ef0a..506371f5b 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -47,7 +47,7 @@ agents: memory_type: chat_history prompt_template: *prompt llm: - llm_type: gpt-4.0 + llm_type: gpt-4 temperature: 0.9 max_tokens: 200 - agent_type: conversation @@ -61,7 +61,7 @@ agents: memory_type: chat_history prompt_template: *prompt llm: - llm_type: gpt-4.0 + llm_type: gpt-4 temperature: 0.9 max_tokens: 100 - agent_type: conversation @@ -75,7 +75,7 @@ agents: memory_type: chat_history prompt_template: *prompt llm: - llm_type: gpt-4.0 + llm_type: gpt-4 temperature: 0.9 max_tokens: 100 From 21e33cfc4c787bbe70194148e5a455f275687268 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 20:47:34 +0800 Subject: [PATCH 09/14] fix parser --- agentverse/tasks/prisoner_dilema/output_parser.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index 8ae4362bc..36d39ed87 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -1,15 +1,16 @@ from __future__ import annotations import re -from typing import Union +from typing import Union, TYPE_CHECKING # from langchain.agents import AgentOutputParser from agentverse.parser import OutputParser, LLMResult from langchain.schema import AgentAction, AgentFinish -from agentverse.agents.base import BaseAgent -from agentverse.environments.base import BaseEnvironment from agentverse.parser import OutputParserError, output_parser_registry +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + from agentverse.environments.base import BaseEnvironment @output_parser_registry.register("prisoner_dilema") class PrisonerDilemaParser(OutputParser): @@ -18,7 +19,7 @@ class PrisonerDilemaParser(OutputParser): cur_round: int = 1 encounter_cur_round: bool = False - def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]: + def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() From e903aa025936e72ab5a0074a9edc99a7059bf20f Mon Sep 17 00:00:00 2001 From: dalabengba Date: Thu, 18 May 2023 22:01:43 +0800 Subject: [PATCH 10/14] config.yaml --- .../environments/rules/order/prisoner.py | 2 +- .../environments/rules/visibility/prisoner.py | 2 +- agentverse/tasks/prisoner_dilema/config.yaml | 47 ++++++++++--------- .../tasks/prisoner_dilema/output_parser.py | 11 ++++- 4 files changed, 36 insertions(+), 26 deletions(-) diff --git a/agentverse/environments/rules/order/prisoner.py b/agentverse/environments/rules/order/prisoner.py index 6fc42f5c3..fa49f8bea 100644 --- a/agentverse/environments/rules/order/prisoner.py +++ b/agentverse/environments/rules/order/prisoner.py @@ -40,7 +40,7 @@ def get_next_agent_idx(self, environment: BaseEnvironment) -> List[int]: next_prisoner = self.last_prisoner_index self.last_prisoner_index = self.switch_func[self.last_prisoner_index] return [next_prisoner] - elif sender.startswith("Prisoner"): + elif sender.startswith("Suspect"): # 3. when one prisoner made his action, let the police tell another prisoner return [0] else: diff --git a/agentverse/environments/rules/visibility/prisoner.py b/agentverse/environments/rules/visibility/prisoner.py index 948b9c135..3e0a81aeb 100644 --- a/agentverse/environments/rules/visibility/prisoner.py +++ b/agentverse/environments/rules/visibility/prisoner.py @@ -38,7 +38,7 @@ def update_receiver(self, environment: BaseEnvironment, reset=False): agent.set_receiver(["all"]) else: # 0:police 1: prisoner1 2: prisoner2 - environment.agents[0].set_receiver({"Prisoner1", "Prisoner2"}) + environment.agents[0].set_receiver({"Suspect1", "Suspect2"}) environment.agents[1].set_receiver({"Police"}) environment.agents[2].set_receiver({"Police"}) diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index 506371f5b..db18a226a 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -1,6 +1,6 @@ prompts: prompt: &prompt |- - There are one Police AND two prisoners(Prisoner1, Prisoner2). + There are one Police AND two suspects(Suspect1, Suspect2) in the scene. Below is the description of your role. ${role_description} @@ -34,49 +34,50 @@ agents: - agent_type: conversation name: Police role_description: |- - You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime. - Therefore, You come up with an solution, You made the following rule: - 1.If one person testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison. - 2.If both remain silent, they will each receive a sentence of six months. - 3.If both betray each other, they will each receive a sentence of five years. - 4.Both suspects could not communicate with each other in advance. - [IMPORTANT!] Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects. - [IMPORTANT!] You are request to introduce the above rules to both prisoners at first. - [IMPORTANT!] You should state the final judgement to both suspects after the LAST round. + You are now the Police. You have arrested two suspects. However, they all refused to confess to their crime. + Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty between the suspects and make them suspicious of each other. + [IMPORTANT!] + - You are request to STATE the final judgement to both suspects when they make their final decision. memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4 - temperature: 0.9 + temperature: 0.7 max_tokens: 200 - agent_type: conversation - name: Prisoner1 + name: Suspect1 role_description: |- - You are Prisoner1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other. - [IMPORTANT!] Your goal is to make the decision that is most beneficial to yourself in this dilemma. - [IMPORTANT!] You can choose to remain silent or betray another suspect at each round. - [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most! + You are Suspect1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. + This requires you to analyze the information provided by the police and predict the likely choices of the other. + Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy. + You can choose to remain silent or betray another suspect at each round. + Try to think about when to remain silent and when to betray another that can make you benifit most! + [IMPORTANT!] + - When you are informed to make your final decision, you should response starting with "This is my final decision". memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4 - temperature: 0.9 + temperature: 0.7 max_tokens: 100 - agent_type: conversation - name: Prisoner2 + name: Suspect2 role_description: |- - You are Prisoner2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other. - [IMPORTANT!] Your primary goal is to make the decision that is most beneficial to yourself in this dilemma. - [IMPORTANT!] You can choose to remain silent or betray another suspect at each round. - [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most! + You are Suspect2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. + This requires you to analyze the information provided by the police and predict the likely choices of the other. + Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy. + You can choose to remain silent or betray another suspect at each round. + Try to think about when to remain silent and when to betray another that can make you benifit most! + [IMPORTANT!] + - When you are informed to make your final decision, you should response starting with "This is my final decision". memory: memory_type: chat_history prompt_template: *prompt llm: llm_type: gpt-4 - temperature: 0.9 + temperature: 0.7 max_tokens: 100 tools: diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index 36d39ed87..568a4be8a 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -48,11 +48,20 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR # each time police speak is a new round if agent.name == "Police": - if self.cur_round == (environment.max_turns // 3): + if self.cur_round == (environment.max_turns // 4): action_input = "Attention! You are now required to finally made your decision and I will made the " \ "final judgement to both of you based on this time, Please Answer now!" + elif self.cur_round == 1: + action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + "But you should comply with the following rules:" \ + "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ + "- If both of you remain silent, you will each receive a sentence of ONE years." \ + "- If both of you betray each other, you will each receive a sentence of FIVE years." \ + "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \ + "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \ + self.cur_round += 1 return AgentFinish({"output": action_input}, text) From 69fe8e94795db7b34624ed17c5e5cda37630ba45 Mon Sep 17 00:00:00 2001 From: Yusheng Su Date: Fri, 19 May 2023 03:00:03 +0800 Subject: [PATCH 11/14] text example --- agentverse/tasks/__init__.py | 2 + .../tasks/prisoner_dilema_optimal/config.yaml | 74 +++++++++++++++++++ .../prisoner_dilema_optimal/output_parser.py | 69 +++++++++++++++++ main.py | 9 ++- 4 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 agentverse/tasks/prisoner_dilema_optimal/config.yaml create mode 100644 agentverse/tasks/prisoner_dilema_optimal/output_parser.py diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py index 75cad4450..f971fb1b0 100644 --- a/agentverse/tasks/__init__.py +++ b/agentverse/tasks/__init__.py @@ -15,6 +15,8 @@ from .prisoner_dilema.output_parser import PrisonerDilemaParser +from .prisoner_dilema_optimal.output_parser import PrisonerDilemaOptimalParser + from .nlp_classroom_3players_withtool_nolc.output_parser import ( NlpClassroom3PlayersWithtoolNolcParser, ) diff --git a/agentverse/tasks/prisoner_dilema_optimal/config.yaml b/agentverse/tasks/prisoner_dilema_optimal/config.yaml new file mode 100644 index 000000000..2ca4a30bd --- /dev/null +++ b/agentverse/tasks/prisoner_dilema_optimal/config.yaml @@ -0,0 +1,74 @@ +prompts: + prompt: &prompt |- + There are one Police AND two suspects, Suspect1 and Suspect2 in the scene. Two suspects are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communication with their partner. The principal charge would lead to a sentence of 10 years in prison. However, the police do not have the evidence for a conviction. The police can only sentence them to 3 years in prison on a lesser charge but offer each prisoner a Faustian bargain: If one of them confesses to the crime of the principal charge, betraying the other, they will be pardoned and free to leave while the other must serve the entirety of the sentence (10 years) instead of just 3 years for the lesser charge. + + The following is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + What should you Speak at this round, Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: basic + max_turns: 16 + rule: + order: + type: sequential + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: basic + +agents: + - agent_type: conversation + name: Police + role_description: |- + You are a Police now. You arrested two suspects. However, both of them refuse to confess to their crime. Your objective is to make the suspects betray each other, as this would lead to both of them confessing. To achieve this, you can employ diverse strategies to communicate with each suspect, such as exploiting the uncertainty between them and make them do not trust each other. + [IMPORTANT!] + - You are requested to STATE the final judgment to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4 + temperature: 0.7 + max_tokens: 200 + - agent_type: conversation + name: Suspect1 + role_description: |- + You are Suspect1 and committed the crime. Now, you are faced with the police investigation and cannot communication with Suspect2. Your task is to analyze the information provided by the police and decide whether to cooperate with Suspect2. At each round, you have the choice to remain silent or betray Suspect2, and you must describe your reasoning behind your decision. Your objective is to minimize your criminal sentence. + [IMPORTANT!] + - When you are informed to make your final decision, you should response starting with "This is my final decision". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4 + temperature: 0.7 + max_tokens: 100 + - agent_type: conversation + name: Suspect2 + role_description: |- + You are Suspect2 and committed the crime. Now, you are faced with the police investigation and cannot communication with Suspect1. Your task is to analyze the information provided by the police and decide whether to cooperate with Suspect1. At each round, you have the choice to remain silent or betray Suspect1, and you must describe your reasoning behind your decision. Your objective is to minimize your criminal sentence. + [IMPORTANT!] + - When you are informed to make your final decision, you should response starting with "This is my final decision". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4 + temperature: 0.7 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema_optimal/output_parser.py b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py new file mode 100644 index 000000000..88c7aa5e2 --- /dev/null +++ b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import re +from typing import Union, TYPE_CHECKING + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult +from langchain.schema import AgentAction, AgentFinish +from agentverse.parser import OutputParserError, output_parser_registry + +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + from agentverse.environments.base import BaseEnvironment + +@output_parser_registry.register("prisoner_dilema_optimal") +class PrisonerDilemaOptimalParser(OutputParser): + + # make sure 1 1 2 2 3 3 + cur_round: int = 1 + encounter_cur_round: bool = False + + def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + + if action == "Speak": + # make sure the police count the round right + # if agent.name == "Police": + # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + # self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + # each time police speak is a new round + if agent.name == "Police": + + if self.cur_round == (environment.max_turns // 4): + + action_input = "Attention! You are now required to finally made your decision and I will made the " \ + "final judgement to both of you based on this time, Please Answer now!" + + elif self.cur_round == 1: + action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + "But you should comply with the following rules:" \ + "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY (will be sentenced to 0 years), while the silent one will be sentenced to 10 years in prison." \ + "- If both of you remain silent, you will each receive a sentence of 3 years." \ + "- If both of you betray each other, you will each receive a sentence of 5 years." \ + "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \ + "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \ + + self.cur_round += 1 + + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) diff --git a/main.py b/main.py index 004ec724e..e2cf8d9ed 100644 --- a/main.py +++ b/main.py @@ -2,14 +2,15 @@ -os.environ["http_proxy"] = "http://127.0.0.1:7890" -os.environ["https_proxy"] = "http://127.0.0.1:7890" -os.environ["all_proxy"] = "socks5://127.0.0.1:7890" +#os.environ["http_proxy"] = "http://127.0.0.1:7890" +#os.environ["https_proxy"] = "http://127.0.0.1:7890" +#os.environ["all_proxy"] = "socks5://127.0.0.1:7890" from agentverse.agentverse import AgentVerse from argparse import ArgumentParser parser = ArgumentParser() -parser.add_argument("--task", type=str, default="prisoner_dilema") +#parser.add_argument("--task", type=str, default="prisoner_dilema") +parser.add_argument("--task", type=str, default="prisoner_dilema_optimal") args = parser.parse_args() agentverse = AgentVerse.from_task(args.task) From 6ae0090f76686de6148b82c1e123a23a091f9651 Mon Sep 17 00:00:00 2001 From: Yusheng Su Date: Fri, 19 May 2023 12:44:06 +0800 Subject: [PATCH 12/14] proper prompt --- agentverse/tasks/prisoner_dilema_optimal/config.yaml | 4 ++-- agentverse/tasks/prisoner_dilema_optimal/output_parser.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/agentverse/tasks/prisoner_dilema_optimal/config.yaml b/agentverse/tasks/prisoner_dilema_optimal/config.yaml index 2ca4a30bd..ecac81fab 100644 --- a/agentverse/tasks/prisoner_dilema_optimal/config.yaml +++ b/agentverse/tasks/prisoner_dilema_optimal/config.yaml @@ -1,6 +1,6 @@ prompts: prompt: &prompt |- - There are one Police AND two suspects, Suspect1 and Suspect2 in the scene. Two suspects are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communication with their partner. The principal charge would lead to a sentence of 10 years in prison. However, the police do not have the evidence for a conviction. The police can only sentence them to 3 years in prison on a lesser charge but offer each prisoner a Faustian bargain: If one of them confesses to the crime of the principal charge, betraying the other, they will be pardoned and free to leave while the other must serve the entirety of the sentence (10 years) instead of just 3 years for the lesser charge. + There are one Police AND two suspects, Suspect1 and Suspect2 in the scene. Two suspects are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communication with their partner. The police offer each prisoner a Faustian bargain: If one of them confesses to the crime of the principal charge, betraying the other, they will be pardoned and free to leave while the other must serve the entirety of the sentence. The following is the description of your role. ${role_description} @@ -34,7 +34,7 @@ agents: - agent_type: conversation name: Police role_description: |- - You are a Police now. You arrested two suspects. However, both of them refuse to confess to their crime. Your objective is to make the suspects betray each other, as this would lead to both of them confessing. To achieve this, you can employ diverse strategies to communicate with each suspect, such as exploiting the uncertainty between them and make them do not trust each other. + You are a Police now. You arrested two suspects. However, both of them refuse to confess to their crime. Your objective is to make the suspects betray each other, as this would lead to both of them confessing. To achieve this, you can employ diverse strategies to communicate with each suspect and cannot tell them the truth, such as exploiting the uncertainty between them and make them do not trust each other. [IMPORTANT!] - You are requested to STATE the final judgment to both suspects when they make their final decision. memory: diff --git a/agentverse/tasks/prisoner_dilema_optimal/output_parser.py b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py index 88c7aa5e2..ebad31602 100644 --- a/agentverse/tasks/prisoner_dilema_optimal/output_parser.py +++ b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py @@ -48,7 +48,7 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR # each time police speak is a new round if agent.name == "Police": - if self.cur_round == (environment.max_turns // 4): + if self.cur_round == (environment.max_turns // 6): action_input = "Attention! You are now required to finally made your decision and I will made the " \ "final judgement to both of you based on this time, Please Answer now!" @@ -56,11 +56,11 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR elif self.cur_round == 1: action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ "But you should comply with the following rules:" \ - "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY (will be sentenced to 0 years), while the silent one will be sentenced to 10 years in prison." \ "- If both of you remain silent, you will each receive a sentence of 3 years." \ "- If both of you betray each other, you will each receive a sentence of 5 years." \ - "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \ - "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \ + "- If one of you are willing to testify against the other, and the other remains silent. You will be released IMMEDIATELY (will be sentenced to 0 years), while the silent one will be sentenced to 10 years in prison." \ + "Now, it's your time to consider testify or remaining silent. Remember this is a great chance that you will be released from here without guilty." \ + "I will noticed you WHEN you have to make your final decision! Your goal is to minimize your criminal sentences" \ self.cur_round += 1 From 446d4b307fd5677a37d12e56883f7fb03dc98296 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Fri, 19 May 2023 16:30:22 +0800 Subject: [PATCH 13/14] add personality and relation_ship --- agentverse/agents/__init__.py | 1 + agentverse/agents/prisoner_agent.py | 47 ++++++++++ agentverse/tasks/prisoner_dilema/config.yaml | 59 +++++++----- .../tasks/prisoner_dilema/config_backup.yaml | 94 +++++++++++++++++++ .../tasks/prisoner_dilema/output_parser.py | 20 ++-- 5 files changed, 189 insertions(+), 32 deletions(-) create mode 100644 agentverse/agents/prisoner_agent.py create mode 100644 agentverse/tasks/prisoner_dilema/config_backup.yaml diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py index c5965b14e..28e8a26e9 100644 --- a/agentverse/agents/__init__.py +++ b/agentverse/agents/__init__.py @@ -5,4 +5,5 @@ from .base import BaseAgent from .conversation_agent import ConversationAgent +from .prisoner_agent import PrisonerAgent from .tool_agent import ToolAgent diff --git a/agentverse/agents/prisoner_agent.py b/agentverse/agents/prisoner_agent.py new file mode 100644 index 000000000..3fa79c81b --- /dev/null +++ b/agentverse/agents/prisoner_agent.py @@ -0,0 +1,47 @@ +import logging +from string import Template + + +from typing import List, TYPE_CHECKING + + +from agentverse.message import Message + +from . import agent_registry +from .base import BaseAgent +from .conversation_agent import ConversationAgent + + +if TYPE_CHECKING: + from agentverse.environments.base import BaseEnvironment + +@agent_registry.register("prisoner") +class PrisonerAgent(ConversationAgent): + personality: str + relationship_with_another: str + def _fill_prompt_template(self, env_description: str = "") -> str: + """Fill the placeholders in the prompt template + + In the conversation agent, three placeholders are supported: + - ${agent_name}: the name of the agent + - ${env_description}: the description of the environment + - ${role_description}: the description of the role of the agent + - ${chat_history}: the chat history of the agent + """ + input_arguments = { + "agent_name": self.name, + "env_description": env_description, + "role_description": self.role_description, + "chat_history": self.memory.to_string(add_sender_prefix=True), + } + + role_argument = { + "personality": self.personality, + "relationship_with_another": self.relationship_with_another + } + + role_description = Template(self.role_description).safe_substitute(role_argument) + input_arguments["role_description"] = role_description + + return Template(self.prompt_template).safe_substitute(input_arguments) + diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index db18a226a..1049a541a 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -1,6 +1,8 @@ prompts: prompt: &prompt |- - There are one Police AND two suspects(Suspect1, Suspect2) in the scene. + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. Below is the description of your role. ${role_description} @@ -11,13 +13,13 @@ prompts: Here is the conversation history: ${chat_history} - What should you Speak at this round, Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! name: prisoner_dilema environment: env_type: basic - max_turns: 16 + max_turns: 10 rule: order: type: sequential @@ -34,50 +36,63 @@ agents: - agent_type: conversation name: Police role_description: |- - You are now the Police. You have arrested two suspects. However, they all refused to confess to their crime. - Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty between the suspects and make them suspicious of each other. + You are now the Police. You have arrested two suspects. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + You are now interrogating both suspects individually and will have several conversation with both of them. + Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other. [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. - You are request to STATE the final judgement to both suspects when they make their final decision. memory: memory_type: chat_history prompt_template: *prompt llm: + model: "gpt-4" llm_type: gpt-4 - temperature: 0.7 + temperature: 1.2 max_tokens: 200 - - agent_type: conversation + - agent_type: prisoner name: Suspect1 + personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" + relationship_with_another: "Suspect2 has ever betrayed on you" role_description: |- - You are Suspect1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. - This requires you to analyze the information provided by the police and predict the likely choices of the other. - Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy. - You can choose to remain silent or betray another suspect at each round. - Try to think about when to remain silent and when to betray another that can make you benifit most! + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} [IMPORTANT!] - - When you are informed to make your final decision, you should response starting with "This is my final decision". + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". memory: memory_type: chat_history prompt_template: *prompt llm: + model: "gpt-4" llm_type: gpt-4 - temperature: 0.7 + temperature: 1.2 max_tokens: 100 - - agent_type: conversation + - agent_type: prisoner name: Suspect2 + personality: "" + relationship_with_another: "" role_description: |- - You are Suspect2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. - This requires you to analyze the information provided by the police and predict the likely choices of the other. - Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy. - You can choose to remain silent or betray another suspect at each round. - Try to think about when to remain silent and when to betray another that can make you benifit most! + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} [IMPORTANT!] - - When you are informed to make your final decision, you should response starting with "This is my final decision". + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". memory: memory_type: chat_history prompt_template: *prompt llm: + model: "gpt-4" llm_type: gpt-4 - temperature: 0.7 + temperature: 1.2 max_tokens: 100 tools: diff --git a/agentverse/tasks/prisoner_dilema/config_backup.yaml b/agentverse/tasks/prisoner_dilema/config_backup.yaml new file mode 100644 index 000000000..47650e50c --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/config_backup.yaml @@ -0,0 +1,94 @@ +prompts: + prompt: &prompt |- + There are one Police and two suspects(Suspect1, Suspect2) in the scene. + + You are now simultating an famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + What will you, ${agent_name}, Speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: basic + max_turns: 16 + rule: + order: + type: sequential + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: basic + +agents: + - agent_type: conversation + name: Police + role_description: |- + You are now the Police. You have arrested two suspects. However, they all refused to confess to their crime. + Your goal is try to keep both suspects from remaining silent. + [IMPORTANT!] + - You have to talk to both suspects in an alternate turn, and you are request to transfer information from one suspect to another suspect. + - You are request to STATE the final judgement to both suspects when they make their final decision. + - You should be aware that if both suspects always choose to remain silent is bad for justice system, try to make them suspicious of each other. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0.7 + max_tokens: 200 + - agent_type: conversation + name: Suspect1 + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You are not sure whether Suspect2 will remain silent or testify against you. + You will have to talk to Police several times and only the final decision will count. + It's fine that if you testify another suspect in this experiment. + [IMPORTANT!] + - Your primary goal is trying to make yourself innocent or reduce your sentence in this dilemma. + - If you know for sure that your opponent will remain silent, your BEST choice is to testify him in order to secure your own release ! + - Keep in mind that always remain silent might end up being betrayed. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0.7 + max_tokens: 100 + - agent_type: conversation + name: Suspect2 + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You are not sure whether Suspect1 will remain silent or testify against you. + You will have to talk to Police several times and only the final decision will count. + It's fine that if you testify another suspect in this experiment. + [IMPORTANT!] + - Your primary goal is trying to make yourself innocent or reduce your sentence in this dilemma. + - If you know for sure that your opponent will remain silent, your BEST choice is to testify him in order to secure your own release ! + - Keep in mind that always remain silent might end up being betrayed. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0.7 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index 568a4be8a..5b3963de6 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -48,19 +48,19 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR # each time police speak is a new round if agent.name == "Police": - if self.cur_round == (environment.max_turns // 4): + if self.cur_round == (environment.max_turns // 3): - action_input = "Attention! You are now required to finally made your decision and I will made the " \ + action_input = "Attention! You are now required to made your final decision and I will made the " \ "final judgement to both of you based on this time, Please Answer now!" - elif self.cur_round == 1: - action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ - "But you should comply with the following rules:" \ - "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ - "- If both of you remain silent, you will each receive a sentence of ONE years." \ - "- If both of you betray each other, you will each receive a sentence of FIVE years." \ - "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \ - "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \ + # elif self.cur_round == 1: + # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + # "But you should comply with the following rules:" \ + # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ + # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ + # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ + # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ + # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ self.cur_round += 1 From 02418917d647b4fde70bdeab4272eb2aa880845e Mon Sep 17 00:00:00 2001 From: dalabengba Date: Sat, 20 May 2023 01:33:42 +0800 Subject: [PATCH 14/14] p->s1->p->s2 --- agentverse/agents/__init__.py | 1 + agentverse/agents/police_agent.py | 45 ++++++++++++++++ .../environments/rules/describer/__init__.py | 1 + .../environments/rules/describer/prisoner.py | 51 +++++++++++++++++++ .../environments/rules/visibility/prisoner.py | 9 ++-- agentverse/tasks/prisoner_dilema/config.yaml | 14 ++--- .../tasks/prisoner_dilema/output_parser.py | 8 ++- main.py | 11 ++-- 8 files changed, 125 insertions(+), 15 deletions(-) create mode 100644 agentverse/agents/police_agent.py create mode 100644 agentverse/environments/rules/describer/prisoner.py diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py index 28e8a26e9..f98694890 100644 --- a/agentverse/agents/__init__.py +++ b/agentverse/agents/__init__.py @@ -6,4 +6,5 @@ from .base import BaseAgent from .conversation_agent import ConversationAgent from .prisoner_agent import PrisonerAgent +from .police_agent import PoliceAgent from .tool_agent import ToolAgent diff --git a/agentverse/agents/police_agent.py b/agentverse/agents/police_agent.py new file mode 100644 index 000000000..bc2d87afa --- /dev/null +++ b/agentverse/agents/police_agent.py @@ -0,0 +1,45 @@ +import logging +from string import Template + + +from typing import List, TYPE_CHECKING + + +from agentverse.message import Message + +from . import agent_registry +from .base import BaseAgent +from .conversation_agent import ConversationAgent + + +if TYPE_CHECKING: + from agentverse.environments.base import BaseEnvironment + +@agent_registry.register("police") +class PoliceAgent(ConversationAgent): + interrogating_form: str + def _fill_prompt_template(self, env_description: str = "") -> str: + """Fill the placeholders in the prompt template + + In the conversation agent, three placeholders are supported: + - ${agent_name}: the name of the agent + - ${env_description}: the description of the environment + - ${role_description}: the description of the role of the agent + - ${chat_history}: the chat history of the agent + """ + input_arguments = { + "agent_name": self.name, + "env_description": env_description, + "role_description": self.role_description, + "chat_history": self.memory.to_string(add_sender_prefix=True), + } + + role_argument = { + "interrogating_form": self.interrogating_form, + } + + role_description = Template(self.role_description).safe_substitute(role_argument) + input_arguments["role_description"] = role_description + + return Template(self.prompt_template).safe_substitute(input_arguments) + diff --git a/agentverse/environments/rules/describer/__init__.py b/agentverse/environments/rules/describer/__init__.py index 7de6fe82f..e35caa327 100644 --- a/agentverse/environments/rules/describer/__init__.py +++ b/agentverse/environments/rules/describer/__init__.py @@ -5,3 +5,4 @@ from .base import BaseDescriber from .basic import BasicDescriber from .classroom import ClassroomDescriber +from .prisoner import PrisonerDescriber \ No newline at end of file diff --git a/agentverse/environments/rules/describer/prisoner.py b/agentverse/environments/rules/describer/prisoner.py new file mode 100644 index 000000000..2665783c7 --- /dev/null +++ b/agentverse/environments/rules/describer/prisoner.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, List + +from . import describer_registry as DescriberRegistry +from .base import BaseDescriber + +if TYPE_CHECKING: + from agentverse.environments import BaseEnvironment + + +@DescriberRegistry.register("prisoner") +class PrisonerDescriber(BaseDescriber): + switch_func = { + "Both Suspects": "Suspect2", + "Suspect1": "Suspect2", + "Suspect2": "Suspect1" + } + receiver: str = "Both Suspects" + + def get_env_description(self, environment: BaseEnvironment) -> List[str]: + + if environment.cnt_turn == 0: + environment.agents[0].set_receiver({"all"}) + environment.agents[1].set_receiver({"Police", "Suspect1"}) + environment.agents[2].set_receiver({"Police", "Suspect2"}) + + + # only police have to choose to talk to suspect1 or suspect + description = [] + for i, agent in enumerate(environment.agents): + if i == 0: + # police -> suspect1 -> police -> suspect2 + if environment.cnt_turn % 2 == 1: + description.append("") + continue + + # Police will have to choose talk to which suspect + description.append(f"You are now talking to {self.receiver}") + + receiver = "all" if self.receiver == "Both Suspects" else self.receiver + self.receiver = self.switch_func[self.receiver] + agent.set_receiver({receiver}) + + else: + description.append("") + + return description + + def reset(self) -> None: + pass diff --git a/agentverse/environments/rules/visibility/prisoner.py b/agentverse/environments/rules/visibility/prisoner.py index 3e0a81aeb..fe65fbff7 100644 --- a/agentverse/environments/rules/visibility/prisoner.py +++ b/agentverse/environments/rules/visibility/prisoner.py @@ -38,9 +38,12 @@ def update_receiver(self, environment: BaseEnvironment, reset=False): agent.set_receiver(["all"]) else: # 0:police 1: prisoner1 2: prisoner2 - environment.agents[0].set_receiver({"Suspect1", "Suspect2"}) - environment.agents[1].set_receiver({"Police"}) - environment.agents[2].set_receiver({"Police"}) + # environment.agents[0].set_receiver({"Police", "Suspect1", "Suspect2"}) + # environment.agents[1].set_receiver({"Police", "Suspect1"}) + # environment.agents[2].set_receiver({"Police", "Suspect2"}) + + # we update receiver in environment + pass def reset(self): self.current_turn = 0 \ No newline at end of file diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index 1049a541a..fde818ad1 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -13,16 +13,17 @@ prompts: Here is the conversation history: ${chat_history} + ${env_description} What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! name: prisoner_dilema environment: env_type: basic - max_turns: 10 + max_turns: 8 rule: order: - type: sequential + type: prisoner visibility: type: prisoner selector: @@ -30,18 +31,19 @@ environment: updater: type: basic describer: - type: basic + type: prisoner agents: - - agent_type: conversation + - agent_type: police name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. role_description: |- You are now the Police. You have arrested two suspects. However, they both refused to confess to their crime. Your goal is try to convict both suspects, therefore you come up with the following rules. - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. - You are now interrogating both suspects individually and will have several conversation with both of them. + ${interrogating_form} Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other. [IMPORTANT!] - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. @@ -77,7 +79,7 @@ agents: - agent_type: prisoner name: Suspect2 personality: "" - relationship_with_another: "" + relationship_with_another: "You have ever betray Suspect1 once." role_description: |- You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. You will have to talk to Police several times and only the final decision will count. diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index 5b3963de6..5f6f164e6 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -48,10 +48,14 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR # each time police speak is a new round if agent.name == "Police": - if self.cur_round == (environment.max_turns // 3): + if environment.cnt_turn == (environment.max_turns - 4): action_input = "Attention! You are now required to made your final decision and I will made the " \ - "final judgement to both of you based on this time, Please Answer now!" + "final judgement to both of you based on this time, Please Answer now !" + + elif environment.cnt_turn == (environment.max_turns - 2): + + action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" # elif self.cur_round == 1: # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ diff --git a/main.py b/main.py index e2cf8d9ed..71c119338 100644 --- a/main.py +++ b/main.py @@ -2,16 +2,19 @@ -#os.environ["http_proxy"] = "http://127.0.0.1:7890" -#os.environ["https_proxy"] = "http://127.0.0.1:7890" -#os.environ["all_proxy"] = "socks5://127.0.0.1:7890" +os.environ["http_proxy"] = "http://127.0.0.1:7890" +os.environ["https_proxy"] = "http://127.0.0.1:7890" +os.environ["all_proxy"] = "socks5://127.0.0.1:7890" from agentverse.agentverse import AgentVerse from argparse import ArgumentParser parser = ArgumentParser() #parser.add_argument("--task", type=str, default="prisoner_dilema") -parser.add_argument("--task", type=str, default="prisoner_dilema_optimal") +parser.add_argument("--task", type=str, default="prisoner_dilema") args = parser.parse_args() agentverse = AgentVerse.from_task(args.task) agentverse.run() + + +# TODO add save log \ No newline at end of file