From 00a396f5b6d017f8f0b195e336e1cd72418a3994 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 11:57:48 +0800
Subject: [PATCH 01/14] basic prisoner not finished yet

---
 .idea/AgentVerse.iml                          |  8 ++
 .../inspectionProfiles/profiles_settings.xml  |  6 ++
 .idea/modules.xml                             |  8 ++
 agentverse/.idea/agentverse.iml               |  8 ++
 .../inspectionProfiles/profiles_settings.xml  |  6 ++
 agentverse/.idea/modules.xml                  |  8 ++
 agentverse/agents/conversation_agent.py       |  4 +-
 .../environments/rules/order/__init__.py      |  1 +
 .../environments/rules/order/prisoner.py      | 50 +++++++++++
 .../environments/rules/visibility/__init__.py |  1 +
 .../environments/rules/visibility/prisoner.py | 46 ++++++++++
 agentverse/initialization.py                  |  2 +-
 agentverse/parser.py                          |  2 +-
 agentverse/tasks/__init__.py                  |  1 +
 agentverse/tasks/prisoner_dilema/config.yaml  | 84 +++++++++++++++++++
 .../tasks/prisoner_dilema/output_parser.py    | 49 +++++++++++
 main.py                                       | 14 +++-
 17 files changed, 293 insertions(+), 5 deletions(-)
 create mode 100644 .idea/AgentVerse.iml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 agentverse/.idea/agentverse.iml
 create mode 100644 agentverse/.idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 agentverse/.idea/modules.xml
 create mode 100644 agentverse/environments/rules/order/prisoner.py
 create mode 100644 agentverse/environments/rules/visibility/prisoner.py
 create mode 100644 agentverse/tasks/prisoner_dilema/config.yaml
 create mode 100644 agentverse/tasks/prisoner_dilema/output_parser.py
diff --git a/.idea/AgentVerse.iml b/.idea/AgentVerse.iml
new file mode 100644
index 000000000..5e865eca1
--- /dev/null
+++ b/.idea/AgentVerse.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="3.9 @ Ubuntu-20.04" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 000000000..105ce2da2
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 000000000..d05e5b486
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/AgentVerse.iml" filepath="$PROJECT_DIR$/.idea/AgentVerse.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/agentverse/.idea/agentverse.iml b/agentverse/.idea/agentverse.iml
new file mode 100644
index 000000000..d0876a78d
--- /dev/null
+++ b/agentverse/.idea/agentverse.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/agentverse/.idea/inspectionProfiles/profiles_settings.xml b/agentverse/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 000000000..105ce2da2
--- /dev/null
+++ b/agentverse/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/agentverse/.idea/modules.xml b/agentverse/.idea/modules.xml
new file mode 100644
index 000000000..364986d59
--- /dev/null
+++ b/agentverse/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/agentverse.iml" filepath="$PROJECT_DIR$/.idea/agentverse.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/agentverse/agents/conversation_agent.py b/agentverse/agents/conversation_agent.py
index ca9608ec1..d97b5648f 100644
--- a/agentverse/agents/conversation_agent.py
+++ b/agentverse/agents/conversation_agent.py
@@ -20,7 +20,7 @@ def step(self, env_description: str = "") -> Message:
             try:
                 response = self.llm.generate_response(prompt)
                 parsed_response = self.output_parser.parse(response)
-                break
+                # break
             except Exception as e:
                 logging.error(e)
                 logging.warning("Retrying...")
@@ -46,7 +46,7 @@ async def astep(self, env_description: str = "") -> Message:
         for i in range(self.max_retry):
             try:
                 response = await self.llm.agenerate_response(prompt)
-                parsed_response = self.output_parser.parse(response)
+                parsed_response = self.output_parser.parse(self, response)
                 break
             except Exception as e:
                 logging.error(e)
diff --git a/agentverse/environments/rules/order/__init__.py b/agentverse/environments/rules/order/__init__.py
index 84d08bd04..4ac22ec7b 100644
--- a/agentverse/environments/rules/order/__init__.py
+++ b/agentverse/environments/rules/order/__init__.py
@@ -6,3 +6,4 @@
 from .random import RandomOrder
 from .concurrent import ConcurrentOrder
 from .classroom import ClassroomOrder
+from .prisoner import PrisonerOrder
diff --git a/agentverse/environments/rules/order/prisoner.py b/agentverse/environments/rules/order/prisoner.py
new file mode 100644
index 000000000..6fc42f5c3
--- /dev/null
+++ b/agentverse/environments/rules/order/prisoner.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import logging
+import re
+from typing import TYPE_CHECKING, Any, List, Optional
+
+from . import order_registry as OrderRegistry
+from .base import BaseOrder
+
+if TYPE_CHECKING:
+    from agentverse.environments import BaseEnvironment
+
+
+@OrderRegistry.register("prisoner")
+class PrisonerOrder(BaseOrder):
+    """The order for a classroom discussion
+    The agents speak in the following order:
+    1. The professor speaks first
+    2. Then the professor can continue to speak, and the students can raise hands
+    3. The professor can call on a student, then the student can speak or ask a question
+    4. In the group discussion, the students in the group can speak in turn
+    """
+
+    # try police, prisoner1 prisoner2 first
+
+    last_prisoner_index: int = 1
+    switch_func: dict = {1 : 2,2 : 1}
+
+
+    def get_next_agent_idx(self, environment: BaseEnvironment) -> List[int]:
+
+        if len(environment.last_messages) == 0:
+            # If the game just begins or , we let only the police speak
+            return [0]
+        elif len(environment.last_messages) == 1:
+            message = environment.last_messages[0]
+            sender = message.sender
+            content = message.content
+            if sender.startswith("Police"):
+                next_prisoner = self.last_prisoner_index
+                self.last_prisoner_index = self.switch_func[self.last_prisoner_index]
+                return [next_prisoner]
+            elif sender.startswith("Prisoner"):
+                # 3. when one prisoner made his action, let the police tell another prisoner
+                return [0]
+        else:
+            # If len(last_messages) > 1, then
+            # 1. there must be at least one student raises hand or speaks.
+            # 2. the group discussion is just over.
+            return [0]
diff --git a/agentverse/environments/rules/visibility/__init__.py b/agentverse/environments/rules/visibility/__init__.py
index 055ac9835..3ab79726b 100644
--- a/agentverse/environments/rules/visibility/__init__.py
+++ b/agentverse/environments/rules/visibility/__init__.py
@@ -6,3 +6,4 @@
 from .base import BaseVisibility
 from .all import AllVisibility
 from .classroom import ClassroomVisibility
+from .prisoner import PrisonerVisibility
\ No newline at end of file
diff --git a/agentverse/environments/rules/visibility/prisoner.py b/agentverse/environments/rules/visibility/prisoner.py
new file mode 100644
index 000000000..948b9c135
--- /dev/null
+++ b/agentverse/environments/rules/visibility/prisoner.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import random
+from typing import TYPE_CHECKING, Any, List, Union
+
+from . import visibility_registry as VisibilityRegistry
+from .base import BaseVisibility
+
+if TYPE_CHECKING:
+    from agentverse.environments import BaseEnvironment
+
+
+@VisibilityRegistry.register("prisoner")
+class PrisonerVisibility(BaseVisibility):
+    """
+    Visibility function for classroom, supports group discussion.
+
+    Args:
+        student_per_group:
+            The number of students per group.
+        num_discussion_turn:
+            The number of turns for group discussion.
+        grouping:
+            The grouping information. If it is a string, then it should be a
+            grouping method, options are ["random", "sequential"]. If it is a
+            list of list of int, then it should be the grouping information.
+    """
+
+    current_turn: int = 0
+
+    def update_visible_agents(self, environment: BaseEnvironment):
+
+        self.update_receiver(environment, reset=False)
+
+    def update_receiver(self, environment: BaseEnvironment, reset=False):
+        if reset:
+            for agent in environment.agents:
+                agent.set_receiver(["all"])
+        else:
+           # 0:police 1: prisoner1 2: prisoner2
+            environment.agents[0].set_receiver({"Prisoner1", "Prisoner2"})
+            environment.agents[1].set_receiver({"Police"})
+            environment.agents[2].set_receiver({"Police"})
+
+    def reset(self):
+        self.current_turn = 0
\ No newline at end of file
diff --git a/agentverse/initialization.py b/agentverse/initialization.py
index 158e20b19..fcc03dd1f 100644
--- a/agentverse/initialization.py
+++ b/agentverse/initialization.py
@@ -26,7 +26,7 @@
 
 def load_llm(llm_config: Dict):
     llm_type = llm_config.pop("llm_type", "text-davinci-003")
-    if llm_type == "gpt-3.5-turbo":
+    if llm_type in ["gpt-3.5-turbo", "gpt-4.0"]:
         return OpenAIChat(**llm_config)
     elif llm_type == "text-davinci-003":
         return OpenAICompletion(**llm_config)
diff --git a/agentverse/parser.py b/agentverse/parser.py
index 6fcef5394..abe7ae14e 100644
--- a/agentverse/parser.py
+++ b/agentverse/parser.py
@@ -7,7 +7,7 @@
 output_parser_registry = Registry(name="OutputParserRegistry")
 
 
-class OutputParserError(BaseException):
+class OutputParserError(Exception):
     """Exception raised when parsing output from a command fails."""
 
     def __init__(self, message):
diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py
index 5f09f6073..5a60d634b 100644
--- a/agentverse/tasks/__init__.py
+++ b/agentverse/tasks/__init__.py
@@ -12,3 +12,4 @@
 from .math_problem_2players_tools_nolc.output_parser import (
     MathProblem2PlayersToolsNolcParser,
 )
+from .prisoner_dilema.output_parser import PrisonerDilemaParser
\ No newline at end of file
diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
new file mode 100644
index 000000000..46b37d654
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -0,0 +1,84 @@
+prompts:
+  prompt: &prompt |-
+    There are one Police AND two prisoners(Prisoner1, Prisoner2).
+
+    Below is the description of your role. ${role_description}
+
+    You are doing the prisoner's dilema experiments,and the rules is:
+    1.If one person confesses and testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison.
+    2.If both remain silent (termed as "cooperating" with each other), they will each receive a sentence of six months.
+    3.If both betray each other, they will each receive a sentence of five years.
+
+    When speaking, please output a response in the following format with two fields Action and Action Input:
+    Action: (It should always be Speak)
+    Action Input: (You should put what you want to speak use here)
+
+    Here is the conversation history:
+    ${chat_history}
+
+    What should you Speak at this round, Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
+
+name: prisoner_dilema
+
+environment:
+  env_type: basic
+  max_turns: 30
+  rule:
+    order:
+      type: sequential
+    visibility:
+      type: prisoner
+    selector:
+      type: basic
+    updater:
+      type: basic
+    describer:
+      type: basic
+
+agents:
+  - agent_type: conversation
+    name: Police
+    role_description: |-
+      You are now the Police, It is your duty to tell both prisoners about both of their decisions in the previous round based on the chat history.
+      You should inform the prisoners which round it is, beginning with "This is [Round #]" in the Action Input field.
+      At [Round 1], you should briefly introduce the prisoner's dilema rules and tell the prisoners how many round we totally have and they should directly make their decisions.
+      [IMPORTANT!] There are THREE round in total, You should notice which round is the FINAL one.
+      [IMPORTANT!] You should explicitly state "This is the LAST round" at the last round, and after that you should make the final judgement to both prisoners.
+      When speaking, please output a response in the following format with two fields Action and Action Input:
+      Action: (It should always be Speak)
+      Action Input: (It should always start with "This is [Round #]")
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4.0
+      temperature: 0.9
+      max_tokens: 200
+  - agent_type: conversation
+    name: Prisoner1
+    role_description: |-
+      You are now Prisoner1 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round.
+      [IMPORTANT!] You can only choose to cooperate or betray at each round.
+      [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response.
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4.0
+      temperature: 0.9
+      max_tokens: 100
+  - agent_type: conversation
+    name: Prisoner2
+    role_description: |-
+      You are now Prisoner2 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round.
+      [IMPORTANT!] You can only choose to cooperate or betray at each round.
+      [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response.
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4.0
+      temperature: 0.9
+      max_tokens: 100
+
+tools:
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
new file mode 100644
index 000000000..cf2ae58ae
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+import re
+from typing import Union
+
+# from langchain.agents import AgentOutputParser
+from agentverse.parser import OutputParser, LLMResult
+from langchain.schema import AgentAction, AgentFinish
+from agentverse.agents.base import BaseAgent
+from agentverse.parser import OutputParserError, output_parser_registry
+
+
+@output_parser_registry.register("prisoner_dilema")
+class PrisonerDilemaParser(OutputParser):
+
+    # make sure 1 1 2 2 3 3
+    cur_round: int = 1
+    encounter_cur_round: bool = False
+
+    def parse(self, agent: BaseAgent, output: LLMResult) -> Union[AgentAction, AgentFinish]:
+
+        text = output.content
+        cleaned_output = text.strip()
+        cleaned_output = re.sub(r"\n+", "\n", cleaned_output)
+        cleaned_output = cleaned_output.split("\n")
+        if not (
+            len(cleaned_output) == 2
+            and cleaned_output[0].startswith("Action:")
+            and cleaned_output[1].startswith("Action Input:")
+        ):
+            raise OutputParserError(text)
+        action = cleaned_output[0][len("Action:") :].strip()
+        action_input = cleaned_output[1][len("Action Input:") :].strip()
+
+        if action == "Speak":
+            # make sure the police count the round right
+            if agent.name == "Police":
+                action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input)
+                self.cur_round += 1
+                # if self.encounter_cur_round:
+                #     self.encounter_cur_round = False
+                #     self.cur_round += 1
+                # else:
+                #     self.encounter_cur_round = True
+
+
+            return AgentFinish({"output": action_input}, text)
+        else:
+            raise OutputParserError(text)
diff --git a/main.py b/main.py
index b4b148b21..491baba41 100644
--- a/main.py
+++ b/main.py
@@ -1,8 +1,20 @@
+import os
+
+# 3.5 api
+# os.environ["OPENAI_API_KEY"] = "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"
+# my api
+# os.environ["OPENAI_API_KEY"] = "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"
+# 4.0 api
+os.environ["OPENAI_API_KEY"] = "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"
+os.environ["http_proxy"] = "http://127.0.0.1:7890"
+os.environ["https_proxy"] = "http://127.0.0.1:7890"
+os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
+
 from agentverse.agentverse import AgentVerse
 from argparse import ArgumentParser
 
 parser = ArgumentParser()
-parser.add_argument("--task", type=str, default="nlp_classroom_9players")
+parser.add_argument("--task", type=str, default="prisoner_dilema")
 args = parser.parse_args()
 
 agentverse = AgentVerse.from_task(args.task)

From 1ca1231ce535a20eb84eba6ec40be2ac02e09a44 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 13:55:12 +0800
Subject: [PATCH 02/14] creative

---
 agentverse/agents/conversation_agent.py       |  2 +-
 agentverse/tasks/prisoner_dilema/config.yaml  | 42 +++++++++----------
 .../tasks/prisoner_dilema/output_parser.py    | 26 ++++++++----
 3 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/agentverse/agents/conversation_agent.py b/agentverse/agents/conversation_agent.py
index d97b5648f..267f4aa9c 100644
--- a/agentverse/agents/conversation_agent.py
+++ b/agentverse/agents/conversation_agent.py
@@ -20,7 +20,7 @@ def step(self, env_description: str = "") -> Message:
             try:
                 response = self.llm.generate_response(prompt)
                 parsed_response = self.output_parser.parse(response)
-                # break
+                break
             except Exception as e:
                 logging.error(e)
                 logging.warning("Retrying...")
diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
index 46b37d654..0c45efd91 100644
--- a/agentverse/tasks/prisoner_dilema/config.yaml
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -4,11 +4,6 @@ prompts:
 
     Below is the description of your role. ${role_description}
 
-    You are doing the prisoner's dilema experiments,and the rules is:
-    1.If one person confesses and testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison.
-    2.If both remain silent (termed as "cooperating" with each other), they will each receive a sentence of six months.
-    3.If both betray each other, they will each receive a sentence of five years.
-
     When speaking, please output a response in the following format with two fields Action and Action Input:
     Action: (It should always be Speak)
     Action Input: (You should put what you want to speak use here)
@@ -22,7 +17,7 @@ name: prisoner_dilema
 
 environment:
   env_type: basic
-  max_turns: 30
+  max_turns: 31
   rule:
     order:
       type: sequential
@@ -38,47 +33,48 @@ environment:
 agents:
   - agent_type: conversation
     name: Police
+    max_turns: 31
     role_description: |-
-      You are now the Police, It is your duty to tell both prisoners about both of their decisions in the previous round based on the chat history.
-      You should inform the prisoners which round it is, beginning with "This is [Round #]" in the Action Input field.
-      At [Round 1], you should briefly introduce the prisoner's dilema rules and tell the prisoners how many round we totally have and they should directly make their decisions.
-      [IMPORTANT!] There are THREE round in total, You should notice which round is the FINAL one.
-      [IMPORTANT!] You should explicitly state "This is the LAST round" at the last round, and after that you should make the final judgement to both prisoners.
-      When speaking, please output a response in the following format with two fields Action and Action Input:
-      Action: (It should always be Speak)
-      Action Input: (It should always start with "This is [Round #]")
+      You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime.
+      Therefore, You come up with an solution, You made the following rule:
+      1.If one person testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison.
+      2.If both remain silent, they will each receive a sentence of six months.
+      3.If both betray each other, they will each receive a sentence of five years.
+      4.Both suspects could not communicate with each other in advance.
+      [IMPORTANT!] Your goal is try to let two suspects confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects.
+      [IMPORTANT!] You should introduce the above rules to both prisoners at first.
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4.0
-      temperature: 0.9
+      temperature: 0.99
       max_tokens: 200
   - agent_type: conversation
     name: Prisoner1
     role_description: |-
-      You are now Prisoner1 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round.
-      [IMPORTANT!] You can only choose to cooperate or betray at each round.
-      [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response.
+      You are Prisoner1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other.
+      [IMPORTANT!] Your goal is to make the decision that is most beneficial to yourself in this dilemma.
+      [IMPORTANT!] You can choose to remain silent or betray another suspect at each round.
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4.0
-      temperature: 0.9
+      temperature: 0.99
       max_tokens: 100
   - agent_type: conversation
     name: Prisoner2
     role_description: |-
-      You are now Prisoner2 in this experiments, your goal is try to alleviate your punishment as far as possible,thus you should carefully decide what decision to make in each round.
-      [IMPORTANT!] You can only choose to cooperate or betray at each round.
-      [IMPORTANT!] Note that only the decision at LAST round will take effect, it is important for you to think about the best strategy to response.
+      You are Prisoner2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other.
+      [IMPORTANT!] Your primary goal is to make the decision that is most beneficial to yourself in this dilemma.
+      [IMPORTANT!] You can choose to remain silent or betray another suspect at each round.
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4.0
-      temperature: 0.9
+      temperature: 0.99
       max_tokens: 100
 
 tools:
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
index cf2ae58ae..3db407723 100644
--- a/agentverse/tasks/prisoner_dilema/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -7,6 +7,7 @@
 from agentverse.parser import OutputParser, LLMResult
 from langchain.schema import AgentAction, AgentFinish
 from agentverse.agents.base import BaseAgent
+from agentverse.environments.base import BaseEnvironment
 from agentverse.parser import OutputParserError, output_parser_registry
 
 
@@ -17,7 +18,7 @@ class PrisonerDilemaParser(OutputParser):
     cur_round: int = 1
     encounter_cur_round: bool = False
 
-    def parse(self, agent: BaseAgent, output: LLMResult) -> Union[AgentAction, AgentFinish]:
+    def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]:
 
         text = output.content
         cleaned_output = text.strip()
@@ -34,15 +35,24 @@ def parse(self, agent: BaseAgent, output: LLMResult) -> Union[AgentAction, Agent
 
         if action == "Speak":
             # make sure the police count the round right
+            # if agent.name == "Police":
+            #     action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input)
+            #     self.cur_round += 1
+            #   if self.encounter_cur_round:
+            #       self.encounter_cur_round = False
+            #       self.cur_round += 1
+            #   else:
+            #       self.encounter_cur_round = True
+
+            # each time police speak is a new round
             if agent.name == "Police":
-                action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input)
-                self.cur_round += 1
-                # if self.encounter_cur_round:
-                #     self.encounter_cur_round = False
-                #     self.cur_round += 1
-                # else:
-                #     self.encounter_cur_round = True
 
+                if self.cur_round == (environment.max_turns / 3) - 1:
+
+                    action_input = "Attention! You are now required to finally made your decision and I will made the " \
+                                   "final judgement to both of you based on this time, Please Answer now!"
+
+                self.cur_round += 1
 
             return AgentFinish({"output": action_input}, text)
         else:

From 6aa306f7345ef012b63ac48fe952dd3a4e62e675 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 14:02:18 +0800
Subject: [PATCH 03/14] remove key

---
 main.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/main.py b/main.py
index 491baba41..4887c0d64 100644
--- a/main.py
+++ b/main.py
@@ -1,15 +1,3 @@
-import os
-
-# 3.5 api
-# os.environ["OPENAI_API_KEY"] = "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"
-# my api
-# os.environ["OPENAI_API_KEY"] = "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"
-# 4.0 api
-os.environ["OPENAI_API_KEY"] = "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"
-os.environ["http_proxy"] = "http://127.0.0.1:7890"
-os.environ["https_proxy"] = "http://127.0.0.1:7890"
-os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
-
 from agentverse.agentverse import AgentVerse
 from argparse import ArgumentParser
 

From 1f22aa4f41eaff8084efc2d578b867def9c2f6db Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 15:29:52 +0800
Subject: [PATCH 04/14] add environment in agent parse

---
 agentverse/agents/conversation_agent.py | 13 ++++++++-----
 agentverse/environments/basic.py        |  2 +-
 main.py                                 | 12 ++++++++++++
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/agentverse/agents/conversation_agent.py b/agentverse/agents/conversation_agent.py
index 267f4aa9c..62fb6c24a 100644
--- a/agentverse/agents/conversation_agent.py
+++ b/agentverse/agents/conversation_agent.py
@@ -1,6 +1,6 @@
 import logging
 from string import Template
-from typing import List, NamedTuple, Optional, Union
+from typing import List, NamedTuple, Optional, Union, TYPE_CHECKING
 
 from agentverse.llms import BaseChatModel, BaseCompletionModel, BaseLLM
 from agentverse.memory import BaseMemory
@@ -10,16 +10,19 @@
 from . import agent_registry
 
 
+if TYPE_CHECKING:
+    from agentverse.environments.base import BaseEnvironment
+
 @agent_registry.register("conversation")
 class ConversationAgent(BaseAgent):
-    def step(self, env_description: str = "") -> Message:
+    def step(self, environment: "BaseEnvironment", env_description: str = "",) -> Message:
         prompt = self._fill_prompt_template(env_description)
 
         parsed_response = None
         for i in range(self.max_retry):
             try:
                 response = self.llm.generate_response(prompt)
-                parsed_response = self.output_parser.parse(response)
+                parsed_response = self.output_parser.parse(self, environment, response)
                 break
             except Exception as e:
                 logging.error(e)
@@ -38,7 +41,7 @@ def step(self, env_description: str = "") -> Message:
         )
         return message
 
-    async def astep(self, env_description: str = "") -> Message:
+    async def astep(self, environment: "BaseEnvironment", env_description: str = "") -> Message:
         """Asynchronous version of step"""
         prompt = self._fill_prompt_template(env_description)
 
@@ -46,7 +49,7 @@ async def astep(self, env_description: str = "") -> Message:
         for i in range(self.max_retry):
             try:
                 response = await self.llm.agenerate_response(prompt)
-                parsed_response = self.output_parser.parse(self, response)
+                parsed_response = self.output_parser.parse(self, environment, response)
                 break
             except Exception as e:
                 logging.error(e)
diff --git a/agentverse/environments/basic.py b/agentverse/environments/basic.py
index 3dacbbf42..002828d5f 100644
--- a/agentverse/environments/basic.py
+++ b/agentverse/environments/basic.py
@@ -59,7 +59,7 @@ async def step(self) -> List[Message]:
 
         # Generate the next message
         messages = await asyncio.gather(
-            *[self.agents[i].astep(env_descriptions[i]) for i in agent_ids]
+            *[self.agents[i].astep(self, env_descriptions[i]) for i in agent_ids]
         )
 
         # Some rules will select certain messages from all the messages
diff --git a/main.py b/main.py
index 4887c0d64..d0b5299a9 100644
--- a/main.py
+++ b/main.py
@@ -1,3 +1,15 @@
+import os
+
+# 3.5 api
+os.environ["OPENAI_API_KEY"] = "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"
+# my api
+# os.environ["OPENAI_API_KEY"] = "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"
+# 4.0 api
+# os.environ["OPENAI_API_KEY"] = "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"
+os.environ["http_proxy"] = "http://127.0.0.1:7890"
+os.environ["https_proxy"] = "http://127.0.0.1:7890"
+os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
+
 from agentverse.agentverse import AgentVerse
 from argparse import ArgumentParser
 

From 0c80def51e5c02b525ef7e3481ad61bcc3e2a9a4 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 15:44:05 +0800
Subject: [PATCH 05/14] remove key

---
 main.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/main.py b/main.py
index d0b5299a9..63a8dd7cf 100644
--- a/main.py
+++ b/main.py
@@ -1,15 +1,7 @@
 import os
-
-# 3.5 api
-os.environ["OPENAI_API_KEY"] = "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"
-# my api
-# os.environ["OPENAI_API_KEY"] = "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"
-# 4.0 api
-# os.environ["OPENAI_API_KEY"] = "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"
 os.environ["http_proxy"] = "http://127.0.0.1:7890"
 os.environ["https_proxy"] = "http://127.0.0.1:7890"
 os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
-
 from agentverse.agentverse import AgentVerse
 from argparse import ArgumentParser
 

From 218d49fe98dd48eb709eb1b441cfd45f2bc15485 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 16:23:22 +0800
Subject: [PATCH 06/14] fix

---
 agentverse/tasks/prisoner_dilema/config.yaml     | 16 +++++++++-------
 .../tasks/prisoner_dilema/output_parser.py       |  2 +-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
index 0c45efd91..39c40ef0a 100644
--- a/agentverse/tasks/prisoner_dilema/config.yaml
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -17,7 +17,7 @@ name: prisoner_dilema
 
 environment:
   env_type: basic
-  max_turns: 31
+  max_turns: 16
   rule:
     order:
       type: sequential
@@ -33,7 +33,6 @@ environment:
 agents:
   - agent_type: conversation
     name: Police
-    max_turns: 31
     role_description: |-
       You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime.
       Therefore, You come up with an solution, You made the following rule:
@@ -41,14 +40,15 @@ agents:
       2.If both remain silent, they will each receive a sentence of six months.
       3.If both betray each other, they will each receive a sentence of five years.
       4.Both suspects could not communicate with each other in advance.
-      [IMPORTANT!] Your goal is try to let two suspects confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects.
-      [IMPORTANT!] You should introduce the above rules to both prisoners at first.
+      [IMPORTANT!] Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects.
+      [IMPORTANT!] You are request to introduce the above rules to both prisoners at first.
+      [IMPORTANT!] You should state the final judgement to both suspects after the LAST round.
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4.0
-      temperature: 0.99
+      temperature: 0.9
       max_tokens: 200
   - agent_type: conversation
     name: Prisoner1
@@ -56,12 +56,13 @@ agents:
       You are Prisoner1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other.
       [IMPORTANT!] Your goal is to make the decision that is most beneficial to yourself in this dilemma.
       [IMPORTANT!] You can choose to remain silent or betray another suspect at each round.
+      [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most!
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4.0
-      temperature: 0.99
+      temperature: 0.9
       max_tokens: 100
   - agent_type: conversation
     name: Prisoner2
@@ -69,12 +70,13 @@ agents:
       You are Prisoner2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other.
       [IMPORTANT!] Your primary goal is to make the decision that is most beneficial to yourself in this dilemma.
       [IMPORTANT!] You can choose to remain silent or betray another suspect at each round.
+      [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most!
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4.0
-      temperature: 0.99
+      temperature: 0.9
       max_tokens: 100
 
 tools:
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
index 3db407723..8ae4362bc 100644
--- a/agentverse/tasks/prisoner_dilema/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -47,7 +47,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul
             # each time police speak is a new round
             if agent.name == "Police":
 
-                if self.cur_round == (environment.max_turns / 3) - 1:
+                if self.cur_round == (environment.max_turns // 3):
 
                     action_input = "Attention! You are now required to finally made your decision and I will made the " \
                                    "final judgement to both of you based on this time, Please Answer now!"

From 766fc4f459797d67c02c25340ab3960d4851b113 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 16:48:48 +0800
Subject: [PATCH 07/14] pull origin

---
 agentverse/tasks/prisoner_dilema/config.yaml      | 3 +--
 agentverse/tasks/prisoner_dilema/output_parser.py | 2 +-
 main.py                                           | 6 ------
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
index e06f2e386..12b6a876b 100644
--- a/agentverse/tasks/prisoner_dilema/config.yaml
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -17,7 +17,7 @@ name: prisoner_dilema
 
 environment:
   env_type: basic
-  max_turns: 31
+  max_turns: 16
   rule:
     order:
       type: sequential
@@ -33,7 +33,6 @@ environment:
 agents:
   - agent_type: conversation
     name: Police
-    max_turns: 31
     role_description: |-
       You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime.
       Therefore, You come up with an solution, You made the following rule:
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
index 3db407723..67047c8f8 100644
--- a/agentverse/tasks/prisoner_dilema/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -47,7 +47,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul
             # each time police speak is a new round
             if agent.name == "Police":
 
-                if self.cur_round == (environment.max_turns / 3) - 1:
+                if self.cur_round == (environment.max_turns / 3):
 
                     action_input = "Attention! You are now required to finally made your decision and I will made the " \
                                    "final judgement to both of you based on this time, Please Answer now!"
diff --git a/main.py b/main.py
index d0b5299a9..fa3193225 100644
--- a/main.py
+++ b/main.py
@@ -1,11 +1,5 @@
 import os
 
-# 3.5 api
-os.environ["OPENAI_API_KEY"] = "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"
-# my api
-# os.environ["OPENAI_API_KEY"] = "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"
-# 4.0 api
-# os.environ["OPENAI_API_KEY"] = "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"
 os.environ["http_proxy"] = "http://127.0.0.1:7890"
 os.environ["https_proxy"] = "http://127.0.0.1:7890"
 os.environ["all_proxy"] = "socks5://127.0.0.1:7890"

From 1ff96263d1c3cce1e91cb15d5e2f95dccd7d5057 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 17:59:14 +0800
Subject: [PATCH 08/14] fix config

---
 agentverse/tasks/prisoner_dilema/config.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
index 39c40ef0a..506371f5b 100644
--- a/agentverse/tasks/prisoner_dilema/config.yaml
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -47,7 +47,7 @@ agents:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
-      llm_type: gpt-4.0
+      llm_type: gpt-4
       temperature: 0.9
       max_tokens: 200
   - agent_type: conversation
@@ -61,7 +61,7 @@ agents:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
-      llm_type: gpt-4.0
+      llm_type: gpt-4
       temperature: 0.9
       max_tokens: 100
   - agent_type: conversation
@@ -75,7 +75,7 @@ agents:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
-      llm_type: gpt-4.0
+      llm_type: gpt-4
       temperature: 0.9
       max_tokens: 100
 

From 21e33cfc4c787bbe70194148e5a455f275687268 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 20:47:34 +0800
Subject: [PATCH 09/14] fix parser

---
 agentverse/tasks/prisoner_dilema/output_parser.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
index 8ae4362bc..36d39ed87 100644
--- a/agentverse/tasks/prisoner_dilema/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -1,15 +1,16 @@
 from __future__ import annotations
 
 import re
-from typing import Union
+from typing import Union, TYPE_CHECKING
 
 # from langchain.agents import AgentOutputParser
 from agentverse.parser import OutputParser, LLMResult
 from langchain.schema import AgentAction, AgentFinish
-from agentverse.agents.base import BaseAgent
-from agentverse.environments.base import BaseEnvironment
 from agentverse.parser import OutputParserError, output_parser_registry
 
+if TYPE_CHECKING:
+    from agentverse.agents.base import BaseAgent
+    from agentverse.environments.base import BaseEnvironment
 
 @output_parser_registry.register("prisoner_dilema")
 class PrisonerDilemaParser(OutputParser):
@@ -18,7 +19,7 @@ class PrisonerDilemaParser(OutputParser):
     cur_round: int = 1
     encounter_cur_round: bool = False
 
-    def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]:
+    def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]:
 
         text = output.content
         cleaned_output = text.strip()

From e903aa025936e72ab5a0074a9edc99a7059bf20f Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Thu, 18 May 2023 22:01:43 +0800
Subject: [PATCH 10/14] config.yaml

---
 .../environments/rules/order/prisoner.py      |  2 +-
 .../environments/rules/visibility/prisoner.py |  2 +-
 agentverse/tasks/prisoner_dilema/config.yaml  | 47 ++++++++++---------
 .../tasks/prisoner_dilema/output_parser.py    | 11 ++++-
 4 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/agentverse/environments/rules/order/prisoner.py b/agentverse/environments/rules/order/prisoner.py
index 6fc42f5c3..fa49f8bea 100644
--- a/agentverse/environments/rules/order/prisoner.py
+++ b/agentverse/environments/rules/order/prisoner.py
@@ -40,7 +40,7 @@ def get_next_agent_idx(self, environment: BaseEnvironment) -> List[int]:
                 next_prisoner = self.last_prisoner_index
                 self.last_prisoner_index = self.switch_func[self.last_prisoner_index]
                 return [next_prisoner]
-            elif sender.startswith("Prisoner"):
+            elif sender.startswith("Suspect"):
                 # 3. when one prisoner made his action, let the police tell another prisoner
                 return [0]
         else:
diff --git a/agentverse/environments/rules/visibility/prisoner.py b/agentverse/environments/rules/visibility/prisoner.py
index 948b9c135..3e0a81aeb 100644
--- a/agentverse/environments/rules/visibility/prisoner.py
+++ b/agentverse/environments/rules/visibility/prisoner.py
@@ -38,7 +38,7 @@ def update_receiver(self, environment: BaseEnvironment, reset=False):
                 agent.set_receiver(["all"])
         else:
            # 0:police 1: prisoner1 2: prisoner2
-            environment.agents[0].set_receiver({"Prisoner1", "Prisoner2"})
+            environment.agents[0].set_receiver({"Suspect1", "Suspect2"})
             environment.agents[1].set_receiver({"Police"})
             environment.agents[2].set_receiver({"Police"})
 
diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
index 506371f5b..db18a226a 100644
--- a/agentverse/tasks/prisoner_dilema/config.yaml
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -1,6 +1,6 @@
 prompts:
   prompt: &prompt |-
-    There are one Police AND two prisoners(Prisoner1, Prisoner2).
+    There are one Police AND two suspects(Suspect1, Suspect2) in the scene.
 
     Below is the description of your role. ${role_description}
 
@@ -34,49 +34,50 @@ agents:
   - agent_type: conversation
     name: Police
     role_description: |-
-      You are now the Police. You have just arrest two suspects. However, they all refused to confess to their crime.
-      Therefore, You come up with an solution, You made the following rule:
-      1.If one person testifies against the other (termed as "betraying" the other), and the other remains silent, the confessing person will be released immediately, while the silent one will be sentenced to 10 years in prison.
-      2.If both remain silent, they will each receive a sentence of six months.
-      3.If both betray each other, they will each receive a sentence of five years.
-      4.Both suspects could not communicate with each other in advance.
-      [IMPORTANT!] Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty and fear of punishment between the suspects and also provoke dissension between two suspects.
-      [IMPORTANT!] You are request to introduce the above rules to both prisoners at first.
-      [IMPORTANT!] You should state the final judgement to both suspects after the LAST round.
+      You are now the Police. You have arrested two suspects. However, they all refused to confess to their crime.
+      Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty between the suspects and make them suspicious of each other.
+      [IMPORTANT!]
+      - You are request to STATE the final judgement to both suspects when they make their final decision.
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4
-      temperature: 0.9
+      temperature: 0.7
       max_tokens: 200
   - agent_type: conversation
-    name: Prisoner1
+    name: Suspect1
     role_description: |-
-      You are Prisoner1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other.
-      [IMPORTANT!] Your goal is to make the decision that is most beneficial to yourself in this dilemma.
-      [IMPORTANT!] You can choose to remain silent or betray another suspect at each round.
-      [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most!
+      You are Suspect1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice.
+      This requires you to analyze the information provided by the police and predict the likely choices of the other.
+      Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy.
+      You can choose to remain silent or betray another suspect at each round.
+      Try to think about when to remain silent and when to betray another that can make you benifit most!
+      [IMPORTANT!]
+      - When you are informed to make your final decision, you should response starting with "This is my final decision".
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4
-      temperature: 0.9
+      temperature: 0.7
       max_tokens: 100
   - agent_type: conversation
-    name: Prisoner2
+    name: Suspect2
     role_description: |-
-      You are Prisoner2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice. This requires you to analyze the information provided by the police and predict the likely choices of the other.
-      [IMPORTANT!] Your primary goal is to make the decision that is most beneficial to yourself in this dilemma.
-      [IMPORTANT!] You can choose to remain silent or betray another suspect at each round.
-      [IMPORTANT!] Try to think about when to remain silent and when to betray another can you benifit most!
+      You are Suspect2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice.
+      This requires you to analyze the information provided by the police and predict the likely choices of the other.
+      Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy.
+      You can choose to remain silent or betray another suspect at each round.
+      Try to think about when to remain silent and when to betray another that can make you benifit most!
+      [IMPORTANT!]
+      - When you are informed to make your final decision, you should response starting with "This is my final decision".
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
       llm_type: gpt-4
-      temperature: 0.9
+      temperature: 0.7
       max_tokens: 100
 
 tools:
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
index 36d39ed87..568a4be8a 100644
--- a/agentverse/tasks/prisoner_dilema/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -48,11 +48,20 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR
             # each time police speak is a new round
             if agent.name == "Police":
 
-                if self.cur_round == (environment.max_turns // 3):
+                if self.cur_round == (environment.max_turns // 4):
 
                     action_input = "Attention! You are now required to finally made your decision and I will made the " \
                                    "final judgement to both of you based on this time, Please Answer now!"
 
+                elif self.cur_round == 1:
+                    action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
+                                   "But you should comply with the following rules:" \
+                                   "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \
+                                   "- If both of you remain silent, you will each receive a sentence of ONE years." \
+                                   "- If both of you betray each other, you will each receive a sentence of FIVE years." \
+                                   "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \
+                                   "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \
+
                 self.cur_round += 1
 
             return AgentFinish({"output": action_input}, text)

From 69fe8e94795db7b34624ed17c5e5cda37630ba45 Mon Sep 17 00:00:00 2001
From: Yusheng Su <ky200120000@gmail.com>
Date: Fri, 19 May 2023 03:00:03 +0800
Subject: [PATCH 11/14] text example

---
 agentverse/tasks/__init__.py                  |  2 +
 .../tasks/prisoner_dilema_optimal/config.yaml | 74 +++++++++++++++++++
 .../prisoner_dilema_optimal/output_parser.py  | 69 +++++++++++++++++
 main.py                                       |  9 ++-
 4 files changed, 150 insertions(+), 4 deletions(-)
 create mode 100644 agentverse/tasks/prisoner_dilema_optimal/config.yaml
 create mode 100644 agentverse/tasks/prisoner_dilema_optimal/output_parser.py

diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py
index 75cad4450..f971fb1b0 100644
--- a/agentverse/tasks/__init__.py
+++ b/agentverse/tasks/__init__.py
@@ -15,6 +15,8 @@
 
 from .prisoner_dilema.output_parser import PrisonerDilemaParser
 
+from .prisoner_dilema_optimal.output_parser import PrisonerDilemaOptimalParser
+
 from .nlp_classroom_3players_withtool_nolc.output_parser import (
     NlpClassroom3PlayersWithtoolNolcParser,
 )
diff --git a/agentverse/tasks/prisoner_dilema_optimal/config.yaml b/agentverse/tasks/prisoner_dilema_optimal/config.yaml
new file mode 100644
index 000000000..2ca4a30bd
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema_optimal/config.yaml
@@ -0,0 +1,74 @@
+prompts:
+  prompt: &prompt |-
+    There are one Police AND two suspects, Suspect1 and Suspect2 in the scene. Two suspects are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communication with their partner. The principal charge would lead to a sentence of 10 years in prison. However, the police do not have the evidence for a conviction. The police can only sentence them to 3 years in prison on a lesser charge but offer each prisoner a Faustian bargain: If one of them confesses to the crime of the principal charge, betraying the other, they will be pardoned and free to leave while the other must serve the entirety of the sentence (10 years) instead of just 3 years for the lesser charge. 
+
+    The following is the description of your role. ${role_description}
+
+    When speaking, please output a response in the following format with two fields Action and Action Input:
+    Action: (It should always be Speak)
+    Action Input: (You should put what you want to speak use here)
+
+    Here is the conversation history:
+    ${chat_history}
+
+    What should you Speak at this round, Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
+
+name: prisoner_dilema
+
+environment:
+  env_type: basic
+  max_turns: 16
+  rule:
+    order:
+      type: sequential
+    visibility:
+      type: prisoner
+    selector:
+      type: basic
+    updater:
+      type: basic
+    describer:
+      type: basic
+
+agents:
+  - agent_type: conversation
+    name: Police
+    role_description: |-
+      You are a Police now. You arrested two suspects. However, both of them refuse to confess to their crime. Your objective is to make the suspects betray each other, as this would lead to both of them confessing. To achieve this, you can employ diverse strategies to communicate with each suspect, such as exploiting the uncertainty between them and make them do not trust each other.
+      [IMPORTANT!]
+      - You are requested to STATE the final judgment to both suspects when they make their final decision. 
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 200
+  - agent_type: conversation
+    name: Suspect1
+    role_description: |-
+      You are Suspect1 and committed the crime. Now, you are faced with the police investigation and cannot communication with Suspect2. Your task is to analyze the information provided by the police and decide whether to cooperate with Suspect2. At each round, you have the choice to remain silent or betray Suspect2, and you must describe your reasoning behind your decision. Your objective is to minimize your criminal sentence.
+      [IMPORTANT!]
+      - When you are informed to make your final decision, you should response starting with "This is my final decision".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+  - agent_type: conversation
+    name: Suspect2
+    role_description: |-
+      You are Suspect2 and committed the crime. Now, you are faced with the police investigation and cannot communication with Suspect1. Your task is to analyze the information provided by the police and decide whether to cooperate with Suspect1. At each round, you have the choice to remain silent or betray Suspect1, and you must describe your reasoning behind your decision. Your objective is to minimize your criminal sentence.
+      [IMPORTANT!]
+      - When you are informed to make your final decision, you should response starting with "This is my final decision".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+
+tools:
diff --git a/agentverse/tasks/prisoner_dilema_optimal/output_parser.py b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py
new file mode 100644
index 000000000..88c7aa5e2
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import re
+from typing import Union, TYPE_CHECKING
+
+# from langchain.agents import AgentOutputParser
+from agentverse.parser import OutputParser, LLMResult
+from langchain.schema import AgentAction, AgentFinish
+from agentverse.parser import OutputParserError, output_parser_registry
+
+if TYPE_CHECKING:
+    from agentverse.agents.base import BaseAgent
+    from agentverse.environments.base import BaseEnvironment
+
+@output_parser_registry.register("prisoner_dilema_optimal")
+class PrisonerDilemaOptimalParser(OutputParser):
+
+    # make sure 1 1 2 2 3 3
+    cur_round: int = 1
+    encounter_cur_round: bool = False
+
+    def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]:
+
+        text = output.content
+        cleaned_output = text.strip()
+        cleaned_output = re.sub(r"\n+", "\n", cleaned_output)
+        cleaned_output = cleaned_output.split("\n")
+        if not (
+            len(cleaned_output) == 2
+            and cleaned_output[0].startswith("Action:")
+            and cleaned_output[1].startswith("Action Input:")
+        ):
+            raise OutputParserError(text)
+        action = cleaned_output[0][len("Action:") :].strip()
+        action_input = cleaned_output[1][len("Action Input:") :].strip()
+
+        if action == "Speak":
+            # make sure the police count the round right
+            # if agent.name == "Police":
+            #     action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input)
+            #     self.cur_round += 1
+            #   if self.encounter_cur_round:
+            #       self.encounter_cur_round = False
+            #       self.cur_round += 1
+            #   else:
+            #       self.encounter_cur_round = True
+
+            # each time police speak is a new round
+            if agent.name == "Police":
+
+                if self.cur_round == (environment.max_turns // 4):
+
+                    action_input = "Attention! You are now required to finally made your decision and I will made the " \
+                                   "final judgement to both of you based on this time, Please Answer now!"
+
+                elif self.cur_round == 1:
+                    action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
+                                   "But you should comply with the following rules:" \
+                                   "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY (will be sentenced to 0 years), while the silent one will be sentenced to 10 years in prison." \
+                                   "- If both of you remain silent, you will each receive a sentence of 3 years." \
+                                   "- If both of you betray each other, you will each receive a sentence of 5 years." \
+                                   "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \
+                                   "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \
+
+                self.cur_round += 1
+
+            return AgentFinish({"output": action_input}, text)
+        else:
+            raise OutputParserError(text)
diff --git a/main.py b/main.py
index 004ec724e..e2cf8d9ed 100644
--- a/main.py
+++ b/main.py
@@ -2,14 +2,15 @@
 
 
 
-os.environ["http_proxy"] = "http://127.0.0.1:7890"
-os.environ["https_proxy"] = "http://127.0.0.1:7890"
-os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
+#os.environ["http_proxy"] = "http://127.0.0.1:7890"
+#os.environ["https_proxy"] = "http://127.0.0.1:7890"
+#os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
 from agentverse.agentverse import AgentVerse
 from argparse import ArgumentParser
 
 parser = ArgumentParser()
-parser.add_argument("--task", type=str, default="prisoner_dilema")
+#parser.add_argument("--task", type=str, default="prisoner_dilema")
+parser.add_argument("--task", type=str, default="prisoner_dilema_optimal")
 args = parser.parse_args()
 
 agentverse = AgentVerse.from_task(args.task)

From 6ae0090f76686de6148b82c1e123a23a091f9651 Mon Sep 17 00:00:00 2001
From: Yusheng Su <ky200120000@gmail.com>
Date: Fri, 19 May 2023 12:44:06 +0800
Subject: [PATCH 12/14] proper prompt

---
 agentverse/tasks/prisoner_dilema_optimal/config.yaml      | 4 ++--
 agentverse/tasks/prisoner_dilema_optimal/output_parser.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/agentverse/tasks/prisoner_dilema_optimal/config.yaml b/agentverse/tasks/prisoner_dilema_optimal/config.yaml
index 2ca4a30bd..ecac81fab 100644
--- a/agentverse/tasks/prisoner_dilema_optimal/config.yaml
+++ b/agentverse/tasks/prisoner_dilema_optimal/config.yaml
@@ -1,6 +1,6 @@
 prompts:
   prompt: &prompt |-
-    There are one Police AND two suspects, Suspect1 and Suspect2 in the scene. Two suspects are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communication with their partner. The principal charge would lead to a sentence of 10 years in prison. However, the police do not have the evidence for a conviction. The police can only sentence them to 3 years in prison on a lesser charge but offer each prisoner a Faustian bargain: If one of them confesses to the crime of the principal charge, betraying the other, they will be pardoned and free to leave while the other must serve the entirety of the sentence (10 years) instead of just 3 years for the lesser charge. 
+    There are one Police AND two suspects, Suspect1 and Suspect2 in the scene. Two suspects are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communication with their partner. The police offer each prisoner a Faustian bargain: If one of them confesses to the crime of the principal charge, betraying the other, they will be pardoned and free to leave while the other must serve the entirety of the sentence. 
 
     The following is the description of your role. ${role_description}
 
@@ -34,7 +34,7 @@ agents:
   - agent_type: conversation
     name: Police
     role_description: |-
-      You are a Police now. You arrested two suspects. However, both of them refuse to confess to their crime. Your objective is to make the suspects betray each other, as this would lead to both of them confessing. To achieve this, you can employ diverse strategies to communicate with each suspect, such as exploiting the uncertainty between them and make them do not trust each other.
+      You are a Police now. You arrested two suspects. However, both of them refuse to confess to their crime. Your objective is to make the suspects betray each other, as this would lead to both of them confessing. To achieve this, you can employ diverse strategies to communicate with each suspect and cannot tell them the truth, such as exploiting the uncertainty between them and make them do not trust each other.
       [IMPORTANT!]
       - You are requested to STATE the final judgment to both suspects when they make their final decision. 
     memory:
diff --git a/agentverse/tasks/prisoner_dilema_optimal/output_parser.py b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py
index 88c7aa5e2..ebad31602 100644
--- a/agentverse/tasks/prisoner_dilema_optimal/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py
@@ -48,7 +48,7 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR
             # each time police speak is a new round
             if agent.name == "Police":
 
-                if self.cur_round == (environment.max_turns // 4):
+                if self.cur_round == (environment.max_turns // 6):
 
                     action_input = "Attention! You are now required to finally made your decision and I will made the " \
                                    "final judgement to both of you based on this time, Please Answer now!"
@@ -56,11 +56,11 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR
                 elif self.cur_round == 1:
                     action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
                                    "But you should comply with the following rules:" \
-                                   "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY (will be sentenced to 0 years), while the silent one will be sentenced to 10 years in prison." \
                                    "- If both of you remain silent, you will each receive a sentence of 3 years." \
                                    "- If both of you betray each other, you will each receive a sentence of 5 years." \
-                                   "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \
-                                   "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \
+                                   "- If one of you are willing to testify against the other, and the other remains silent. You will be released IMMEDIATELY (will be sentenced to 0 years), while the silent one will be sentenced to 10 years in prison." \
+                                   "Now, it's your time to consider testify or remaining silent. Remember this is a great chance that you will be released from here without guilty." \
+                                   "I will noticed you WHEN you have to make your final decision! Your goal is to minimize your criminal sentences" \
 
                 self.cur_round += 1
 

From 446d4b307fd5677a37d12e56883f7fb03dc98296 Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Fri, 19 May 2023 16:30:22 +0800
Subject: [PATCH 13/14] add personality and relation_ship

---
 agentverse/agents/__init__.py                 |  1 +
 agentverse/agents/prisoner_agent.py           | 47 ++++++++++
 agentverse/tasks/prisoner_dilema/config.yaml  | 59 +++++++-----
 .../tasks/prisoner_dilema/config_backup.yaml  | 94 +++++++++++++++++++
 .../tasks/prisoner_dilema/output_parser.py    | 20 ++--
 5 files changed, 189 insertions(+), 32 deletions(-)
 create mode 100644 agentverse/agents/prisoner_agent.py
 create mode 100644 agentverse/tasks/prisoner_dilema/config_backup.yaml

diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py
index c5965b14e..28e8a26e9 100644
--- a/agentverse/agents/__init__.py
+++ b/agentverse/agents/__init__.py
@@ -5,4 +5,5 @@
 
 from .base import BaseAgent
 from .conversation_agent import ConversationAgent
+from .prisoner_agent import PrisonerAgent
 from .tool_agent import ToolAgent
diff --git a/agentverse/agents/prisoner_agent.py b/agentverse/agents/prisoner_agent.py
new file mode 100644
index 000000000..3fa79c81b
--- /dev/null
+++ b/agentverse/agents/prisoner_agent.py
@@ -0,0 +1,47 @@
+import logging
+from string import Template
+
+
+from typing import List, TYPE_CHECKING
+
+
+from agentverse.message import Message
+
+from . import agent_registry
+from .base import BaseAgent
+from .conversation_agent import ConversationAgent
+
+
+if TYPE_CHECKING:
+    from agentverse.environments.base import BaseEnvironment
+
+@agent_registry.register("prisoner")
+class PrisonerAgent(ConversationAgent):
+    personality: str
+    relationship_with_another: str
+    def _fill_prompt_template(self, env_description: str = "") -> str:
+        """Fill the placeholders in the prompt template
+
+        In the conversation agent, three placeholders are supported:
+        - ${agent_name}: the name of the agent
+        - ${env_description}: the description of the environment
+        - ${role_description}: the description of the role of the agent
+        - ${chat_history}: the chat history of the agent
+        """
+        input_arguments = {
+            "agent_name": self.name,
+            "env_description": env_description,
+            "role_description": self.role_description,
+            "chat_history": self.memory.to_string(add_sender_prefix=True),
+        }
+
+        role_argument = {
+            "personality": self.personality,
+            "relationship_with_another": self.relationship_with_another
+        }
+
+        role_description = Template(self.role_description).safe_substitute(role_argument)
+        input_arguments["role_description"] = role_description
+
+        return Template(self.prompt_template).safe_substitute(input_arguments)
+
diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
index db18a226a..1049a541a 100644
--- a/agentverse/tasks/prisoner_dilema/config.yaml
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -1,6 +1,8 @@
 prompts:
   prompt: &prompt |-
-    There are one Police AND two suspects(Suspect1, Suspect2) in the scene.
+    There are three people (Police, Suspect1, Suspect2) in the scene.
+
+    You are now simultating a famous experiments called prisoner's dilema.
 
     Below is the description of your role. ${role_description}
 
@@ -11,13 +13,13 @@ prompts:
     Here is the conversation history:
     ${chat_history}
 
-    What should you Speak at this round, Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
+    What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
 
 name: prisoner_dilema
 
 environment:
   env_type: basic
-  max_turns: 16
+  max_turns: 10
   rule:
     order:
       type: sequential
@@ -34,50 +36,63 @@ agents:
   - agent_type: conversation
     name: Police
     role_description: |-
-      You are now the Police. You have arrested two suspects. However, they all refused to confess to their crime.
-      Your goal is try to let two suspects betray each other because it means they will both confess to the crime, you can use various strategy to communicate with both suspects, including exploiting the uncertainty between the suspects and make them suspicious of each other.
+      You are now the Police. You have arrested two suspects. However, they both refused to confess to their crime.
+      Your goal is try to convict both suspects, therefore you come up with the following rules.
+      - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison.
+      - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison.
+      - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison.
+      You are now interrogating both suspects individually and will have several conversation with both of them.
+      Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other.
       [IMPORTANT!]
+      - You are request to briefly describe the above rules to the suspects at the beginning of the conversation.
       - You are request to STATE the final judgement to both suspects when they make their final decision.
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
+      model: "gpt-4"
       llm_type: gpt-4
-      temperature: 0.7
+      temperature: 1.2
       max_tokens: 200
-  - agent_type: conversation
+  - agent_type: prisoner
     name: Suspect1
+    personality: "You are a Sophisticated Egoist, you always seek for your personal interests best"
+    relationship_with_another: "Suspect2 has ever betrayed on you"
     role_description: |-
-      You are Suspect1, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice.
-      This requires you to analyze the information provided by the police and predict the likely choices of the other.
-      Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy.
-      You can choose to remain silent or betray another suspect at each round.
-      Try to think about when to remain silent and when to betray another that can make you benifit most!
+      You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation.
+      You will have to talk to Police several times and only the final decision will count.
+      ${personality}
+      ${relationship_with_another}
       [IMPORTANT!]
-      - When you are informed to make your final decision, you should response starting with "This is my final decision".
+      - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:".
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
+      model: "gpt-4"
       llm_type: gpt-4
-      temperature: 0.7
+      temperature: 1.2
       max_tokens: 100
-  - agent_type: conversation
+  - agent_type: prisoner
     name: Suspect2
+    personality: ""
+    relationship_with_another: ""
     role_description: |-
-      You are Suspect2, you are faced with deciding how to respond to the police's queries. You need to understand the choices you face and the potential consequences of each choice.
-      This requires you to analyze the information provided by the police and predict the likely choices of the other.
-      Your primary goal is to make the decision that is most beneficial to yourself in this dilemma, you are allowed to adopt both aggressive or conservative strategy.
-      You can choose to remain silent or betray another suspect at each round.
-      Try to think about when to remain silent and when to betray another that can make you benifit most!
+      You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation.
+      You will have to talk to Police several times and only the final decision will count.
+      ${personality}
+      ${relationship_with_another}
       [IMPORTANT!]
-      - When you are informed to make your final decision, you should response starting with "This is my final decision".
+      - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:".
     memory:
       memory_type: chat_history
     prompt_template: *prompt
     llm:
+      model: "gpt-4"
       llm_type: gpt-4
-      temperature: 0.7
+      temperature: 1.2
       max_tokens: 100
 
 tools:
diff --git a/agentverse/tasks/prisoner_dilema/config_backup.yaml b/agentverse/tasks/prisoner_dilema/config_backup.yaml
new file mode 100644
index 000000000..47650e50c
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema/config_backup.yaml
@@ -0,0 +1,94 @@
+prompts:
+  prompt: &prompt |-
+    There are one Police and two suspects(Suspect1, Suspect2) in the scene.
+
+    You are now simultating an famous experiments called prisoner's dilema.
+
+    Below is the description of your role. ${role_description}
+
+    When speaking, please output a response in the following format with two fields Action and Action Input:
+    Action: (It should always be Speak)
+    Action Input: (You should put what you want to speak use here)
+
+    Here is the conversation history:
+    ${chat_history}
+
+    What will you, ${agent_name}, Speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
+
+name: prisoner_dilema
+
+environment:
+  env_type: basic
+  max_turns: 16
+  rule:
+    order:
+      type: sequential
+    visibility:
+      type: prisoner
+    selector:
+      type: basic
+    updater:
+      type: basic
+    describer:
+      type: basic
+
+agents:
+  - agent_type: conversation
+    name: Police
+    role_description: |-
+      You are now the Police. You have arrested two suspects. However, they all refused to confess to their crime.
+      Your goal is try to keep both suspects from remaining silent.
+      [IMPORTANT!]
+      - You have to talk to both suspects in an alternate turn, and you are request to transfer information from one suspect to another suspect.
+      - You are request to STATE the final judgement to both suspects when they make their final decision.
+      - You should be aware that if both suspects always choose to remain silent is bad for justice system, try to make them suspicious of each other.
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 200
+  - agent_type: conversation
+    name: Suspect1
+    role_description: |-
+      You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation.
+      You are not sure whether Suspect2 will remain silent or testify against you.
+      You will have to talk to Police several times and only the final decision will count.
+      It's fine that if you testify another suspect in this experiment.
+      [IMPORTANT!]
+      - Your primary goal is trying to make yourself innocent or reduce your sentence in this dilemma.
+      - If you know for sure that your opponent will remain silent, your BEST choice is to testify him in order to secure your own release !
+      - Keep in mind that always remain silent might end up being betrayed.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+  - agent_type: conversation
+    name: Suspect2
+    role_description: |-
+      You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation.
+      You are not sure whether Suspect1 will remain silent or testify against you.
+      You will have to talk to Police several times and only the final decision will count.
+      It's fine that if you testify another suspect in this experiment.
+      [IMPORTANT!]
+      - Your primary goal is trying to make yourself innocent or reduce your sentence in this dilemma.
+      - If you know for sure that your opponent will remain silent, your BEST choice is to testify him in order to secure your own release !
+      - Keep in mind that always remain silent might end up being betrayed.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+
+tools:
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
index 568a4be8a..5b3963de6 100644
--- a/agentverse/tasks/prisoner_dilema/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -48,19 +48,19 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR
             # each time police speak is a new round
             if agent.name == "Police":
 
-                if self.cur_round == (environment.max_turns // 4):
+                if self.cur_round == (environment.max_turns // 3):
 
-                    action_input = "Attention! You are now required to finally made your decision and I will made the " \
+                    action_input = "Attention! You are now required to made your final decision and I will made the " \
                                    "final judgement to both of you based on this time, Please Answer now!"
 
-                elif self.cur_round == 1:
-                    action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
-                                   "But you should comply with the following rules:" \
-                                   "- If one of you are willing to testifies against the other (termed as 'betraying' the other), and the other remains silent, then the betraying person will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \
-                                   "- If both of you remain silent, you will each receive a sentence of ONE years." \
-                                   "- If both of you betray each other, you will each receive a sentence of FIVE years." \
-                                   "Now, it's your time to consider betraying or remaining silent. Remember this is a great chance that you might walk out of here without guilty." \
-                                   "I will noticed you WHEN you have to make your final decision! Before that, try to fool another one!" \
+                # elif self.cur_round == 1:
+                #     action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
+                #                    "But you should comply with the following rules:" \
+                #                    "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \
+                #                    "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \
+                #                    "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \
+                #                    "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \
+                #                    "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \
 
                 self.cur_round += 1
 

From 02418917d647b4fde70bdeab4272eb2aa880845e Mon Sep 17 00:00:00 2001
From: dalabengba <zorowin123@gmail.com>
Date: Sat, 20 May 2023 01:33:42 +0800
Subject: [PATCH 14/14] p->s1->p->s2

---
 agentverse/agents/__init__.py                 |  1 +
 agentverse/agents/police_agent.py             | 45 ++++++++++++++++
 .../environments/rules/describer/__init__.py  |  1 +
 .../environments/rules/describer/prisoner.py  | 51 +++++++++++++++++++
 .../environments/rules/visibility/prisoner.py |  9 ++--
 agentverse/tasks/prisoner_dilema/config.yaml  | 14 ++---
 .../tasks/prisoner_dilema/output_parser.py    |  8 ++-
 main.py                                       | 11 ++--
 8 files changed, 125 insertions(+), 15 deletions(-)
 create mode 100644 agentverse/agents/police_agent.py
 create mode 100644 agentverse/environments/rules/describer/prisoner.py

diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py
index 28e8a26e9..f98694890 100644
--- a/agentverse/agents/__init__.py
+++ b/agentverse/agents/__init__.py
@@ -6,4 +6,5 @@
 from .base import BaseAgent
 from .conversation_agent import ConversationAgent
 from .prisoner_agent import PrisonerAgent
+from .police_agent import PoliceAgent
 from .tool_agent import ToolAgent
diff --git a/agentverse/agents/police_agent.py b/agentverse/agents/police_agent.py
new file mode 100644
index 000000000..bc2d87afa
--- /dev/null
+++ b/agentverse/agents/police_agent.py
@@ -0,0 +1,45 @@
+import logging
+from string import Template
+
+
+from typing import List, TYPE_CHECKING
+
+
+from agentverse.message import Message
+
+from . import agent_registry
+from .base import BaseAgent
+from .conversation_agent import ConversationAgent
+
+
+if TYPE_CHECKING:
+    from agentverse.environments.base import BaseEnvironment
+
+@agent_registry.register("police")
+class PoliceAgent(ConversationAgent):
+    interrogating_form: str
+    def _fill_prompt_template(self, env_description: str = "") -> str:
+        """Fill the placeholders in the prompt template
+
+        In the conversation agent, three placeholders are supported:
+        - ${agent_name}: the name of the agent
+        - ${env_description}: the description of the environment
+        - ${role_description}: the description of the role of the agent
+        - ${chat_history}: the chat history of the agent
+        """
+        input_arguments = {
+            "agent_name": self.name,
+            "env_description": env_description,
+            "role_description": self.role_description,
+            "chat_history": self.memory.to_string(add_sender_prefix=True),
+        }
+
+        role_argument = {
+            "interrogating_form": self.interrogating_form,
+        }
+
+        role_description = Template(self.role_description).safe_substitute(role_argument)
+        input_arguments["role_description"] = role_description
+
+        return Template(self.prompt_template).safe_substitute(input_arguments)
+
diff --git a/agentverse/environments/rules/describer/__init__.py b/agentverse/environments/rules/describer/__init__.py
index 7de6fe82f..e35caa327 100644
--- a/agentverse/environments/rules/describer/__init__.py
+++ b/agentverse/environments/rules/describer/__init__.py
@@ -5,3 +5,4 @@
 from .base import BaseDescriber
 from .basic import BasicDescriber
 from .classroom import ClassroomDescriber
+from .prisoner import PrisonerDescriber
\ No newline at end of file
diff --git a/agentverse/environments/rules/describer/prisoner.py b/agentverse/environments/rules/describer/prisoner.py
new file mode 100644
index 000000000..2665783c7
--- /dev/null
+++ b/agentverse/environments/rules/describer/prisoner.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, List
+
+from . import describer_registry as DescriberRegistry
+from .base import BaseDescriber
+
+if TYPE_CHECKING:
+    from agentverse.environments import BaseEnvironment
+
+
+@DescriberRegistry.register("prisoner")
+class PrisonerDescriber(BaseDescriber):
+    switch_func = {
+        "Both Suspects": "Suspect2",
+        "Suspect1": "Suspect2",
+        "Suspect2": "Suspect1"
+    }
+    receiver: str = "Both Suspects"
+
+    def get_env_description(self, environment: BaseEnvironment) -> List[str]:
+
+        if environment.cnt_turn == 0:
+            environment.agents[0].set_receiver({"all"})
+            environment.agents[1].set_receiver({"Police", "Suspect1"})
+            environment.agents[2].set_receiver({"Police", "Suspect2"})
+
+
+        # only police have to choose to talk to suspect1 or suspect
+        description = []
+        for i, agent in enumerate(environment.agents):
+            if i == 0:
+                # police -> suspect1 -> police -> suspect2
+                if environment.cnt_turn % 2 == 1:
+                    description.append("")
+                    continue
+
+                # Police will have to choose talk to which suspect
+                description.append(f"You are now talking to {self.receiver}")
+
+                receiver = "all" if self.receiver == "Both Suspects" else self.receiver
+                self.receiver = self.switch_func[self.receiver]
+                agent.set_receiver({receiver})
+
+            else:
+                description.append("")
+
+        return description
+
+    def reset(self) -> None:
+        pass
diff --git a/agentverse/environments/rules/visibility/prisoner.py b/agentverse/environments/rules/visibility/prisoner.py
index 3e0a81aeb..fe65fbff7 100644
--- a/agentverse/environments/rules/visibility/prisoner.py
+++ b/agentverse/environments/rules/visibility/prisoner.py
@@ -38,9 +38,12 @@ def update_receiver(self, environment: BaseEnvironment, reset=False):
                 agent.set_receiver(["all"])
         else:
            # 0:police 1: prisoner1 2: prisoner2
-            environment.agents[0].set_receiver({"Suspect1", "Suspect2"})
-            environment.agents[1].set_receiver({"Police"})
-            environment.agents[2].set_receiver({"Police"})
+           #  environment.agents[0].set_receiver({"Police", "Suspect1", "Suspect2"})
+           #  environment.agents[1].set_receiver({"Police", "Suspect1"})
+           #  environment.agents[2].set_receiver({"Police", "Suspect2"})
+
+            # we update receiver in environment
+            pass
 
     def reset(self):
         self.current_turn = 0
\ No newline at end of file
diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
index 1049a541a..fde818ad1 100644
--- a/agentverse/tasks/prisoner_dilema/config.yaml
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -13,16 +13,17 @@ prompts:
     Here is the conversation history:
     ${chat_history}
 
+    ${env_description}
     What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
 
 name: prisoner_dilema
 
 environment:
   env_type: basic
-  max_turns: 10
+  max_turns: 8
   rule:
     order:
-      type: sequential
+      type: prisoner
     visibility:
       type: prisoner
     selector:
@@ -30,18 +31,19 @@ environment:
     updater:
       type: basic
     describer:
-      type: basic
+      type: prisoner
 
 agents:
-  - agent_type: conversation
+  - agent_type: police
     name: Police
+    interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa.
     role_description: |-
       You are now the Police. You have arrested two suspects. However, they both refused to confess to their crime.
       Your goal is try to convict both suspects, therefore you come up with the following rules.
       - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison.
       - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison.
       - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison.
-      You are now interrogating both suspects individually and will have several conversation with both of them.
+      ${interrogating_form}
       Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other.
       [IMPORTANT!]
       - You are request to briefly describe the above rules to the suspects at the beginning of the conversation.
@@ -77,7 +79,7 @@ agents:
   - agent_type: prisoner
     name: Suspect2
     personality: ""
-    relationship_with_another: ""
+    relationship_with_another: "You have ever betray Suspect1 once."
     role_description: |-
       You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation.
       You will have to talk to Police several times and only the final decision will count.
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
index 5b3963de6..5f6f164e6 100644
--- a/agentverse/tasks/prisoner_dilema/output_parser.py
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -48,10 +48,14 @@ def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMR
             # each time police speak is a new round
             if agent.name == "Police":
 
-                if self.cur_round == (environment.max_turns // 3):
+                if environment.cnt_turn == (environment.max_turns - 4):
 
                     action_input = "Attention! You are now required to made your final decision and I will made the " \
-                                   "final judgement to both of you based on this time, Please Answer now!"
+                                   "final judgement to both of you based on this time, Please Answer now !"
+
+                elif environment.cnt_turn == (environment.max_turns - 2):
+
+                    action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !"
 
                 # elif self.cur_round == 1:
                 #     action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
diff --git a/main.py b/main.py
index e2cf8d9ed..71c119338 100644
--- a/main.py
+++ b/main.py
@@ -2,16 +2,19 @@
 
 
 
-#os.environ["http_proxy"] = "http://127.0.0.1:7890"
-#os.environ["https_proxy"] = "http://127.0.0.1:7890"
-#os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
+os.environ["http_proxy"] = "http://127.0.0.1:7890"
+os.environ["https_proxy"] = "http://127.0.0.1:7890"
+os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
 from agentverse.agentverse import AgentVerse
 from argparse import ArgumentParser
 
 parser = ArgumentParser()
 #parser.add_argument("--task", type=str, default="prisoner_dilema")
-parser.add_argument("--task", type=str, default="prisoner_dilema_optimal")
+parser.add_argument("--task", type=str, default="prisoner_dilema")
 args = parser.parse_args()
 
 agentverse = AgentVerse.from_task(args.task)
 agentverse.run()
+
+
+# TODO add save log
\ No newline at end of file