diff --git a/.idea/AgentVerse.iml b/.idea/AgentVerse.iml
new file mode 100644
index 000000000..5e865eca1
--- /dev/null
+++ b/.idea/AgentVerse.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="3.9 @ Ubuntu-20.04" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 000000000..105ce2da2
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 000000000..d05e5b486
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/AgentVerse.iml" filepath="$PROJECT_DIR$/.idea/AgentVerse.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/agentverse/.idea/agentverse.iml b/agentverse/.idea/agentverse.iml
new file mode 100644
index 000000000..d0876a78d
--- /dev/null
+++ b/agentverse/.idea/agentverse.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/agentverse/.idea/inspectionProfiles/profiles_settings.xml b/agentverse/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 000000000..105ce2da2
--- /dev/null
+++ b/agentverse/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/agentverse/.idea/modules.xml b/agentverse/.idea/modules.xml
new file mode 100644
index 000000000..364986d59
--- /dev/null
+++ b/agentverse/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/agentverse.iml" filepath="$PROJECT_DIR$/.idea/agentverse.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py
index c5965b14e..f98694890 100644
--- a/agentverse/agents/__init__.py
+++ b/agentverse/agents/__init__.py
@@ -5,4 +5,6 @@
 
 from .base import BaseAgent
 from .conversation_agent import ConversationAgent
+from .prisoner_agent import PrisonerAgent
+from .police_agent import PoliceAgent
 from .tool_agent import ToolAgent
diff --git a/agentverse/agents/conversation_agent.py b/agentverse/agents/conversation_agent.py
index 4f7c927d6..65be0115d 100644
--- a/agentverse/agents/conversation_agent.py
+++ b/agentverse/agents/conversation_agent.py
@@ -1,6 +1,9 @@
 import logging
 from string import Template
-from typing import List
+
+
+from typing import List, TYPE_CHECKING
+
 
 from agentverse.message import Message
 
@@ -8,16 +11,19 @@
 from .base import BaseAgent
 
 
+if TYPE_CHECKING:
+    from agentverse.environments.base import BaseEnvironment
+
 @agent_registry.register("conversation")
 class ConversationAgent(BaseAgent):
-    def step(self, env_description: str = "") -> Message:
+    def step(self, environment: "BaseEnvironment", env_description: str = "",) -> Message:
         prompt = self._fill_prompt_template(env_description)
 
         parsed_response = None
         for i in range(self.max_retry):
             try:
                 response = self.llm.generate_response(prompt)
-                parsed_response = self.output_parser.parse(response)
+                parsed_response = self.output_parser.parse(self, environment, response)
                 break
             except Exception as e:
                 logging.error(e)
@@ -36,7 +42,7 @@ def step(self, env_description: str = "") -> Message:
         )
         return message
 
-    async def astep(self, env_description: str = "") -> Message:
+    async def astep(self, environment: "BaseEnvironment", env_description: str = "") -> Message:
         """Asynchronous version of step"""
         prompt = self._fill_prompt_template(env_description)
 
@@ -44,7 +50,7 @@ async def astep(self, env_description: str = "") -> Message:
         for i in range(self.max_retry):
             try:
                 response = await self.llm.agenerate_response(prompt)
-                parsed_response = self.output_parser.parse(response)
+                parsed_response = self.output_parser.parse(self, environment, response)
                 break
             except Exception as e:
                 logging.error(e)
diff --git a/agentverse/agents/police_agent.py b/agentverse/agents/police_agent.py
new file mode 100644
index 000000000..bc2d87afa
--- /dev/null
+++ b/agentverse/agents/police_agent.py
@@ -0,0 +1,45 @@
+import logging
+from string import Template
+
+
+from typing import List, TYPE_CHECKING
+
+
+from agentverse.message import Message
+
+from . import agent_registry
+from .base import BaseAgent
+from .conversation_agent import ConversationAgent
+
+
+if TYPE_CHECKING:
+    from agentverse.environments.base import BaseEnvironment
+
+@agent_registry.register("police")
+class PoliceAgent(ConversationAgent):
+    interrogating_form: str
+    def _fill_prompt_template(self, env_description: str = "") -> str:
+        """Fill the placeholders in the prompt template
+
+        In the conversation agent, three placeholders are supported:
+        - ${agent_name}: the name of the agent
+        - ${env_description}: the description of the environment
+        - ${role_description}: the description of the role of the agent
+        - ${chat_history}: the chat history of the agent
+        """
+        input_arguments = {
+            "agent_name": self.name,
+            "env_description": env_description,
+            "role_description": self.role_description,
+            "chat_history": self.memory.to_string(add_sender_prefix=True),
+        }
+
+        role_argument = {
+            "interrogating_form": self.interrogating_form,
+        }
+
+        role_description = Template(self.role_description).safe_substitute(role_argument)
+        input_arguments["role_description"] = role_description
+
+        return Template(self.prompt_template).safe_substitute(input_arguments)
+
diff --git a/agentverse/agents/prisoner_agent.py b/agentverse/agents/prisoner_agent.py
new file mode 100644
index 000000000..3fa79c81b
--- /dev/null
+++ b/agentverse/agents/prisoner_agent.py
@@ -0,0 +1,47 @@
+import logging
+from string import Template
+
+
+from typing import List, TYPE_CHECKING
+
+
+from agentverse.message import Message
+
+from . import agent_registry
+from .base import BaseAgent
+from .conversation_agent import ConversationAgent
+
+
+if TYPE_CHECKING:
+    from agentverse.environments.base import BaseEnvironment
+
+@agent_registry.register("prisoner")
+class PrisonerAgent(ConversationAgent):
+    personality: str
+    relationship_with_another: str
+    def _fill_prompt_template(self, env_description: str = "") -> str:
+        """Fill the placeholders in the prompt template
+
+        In the conversation agent, three placeholders are supported:
+        - ${agent_name}: the name of the agent
+        - ${env_description}: the description of the environment
+        - ${role_description}: the description of the role of the agent
+        - ${chat_history}: the chat history of the agent
+        """
+        input_arguments = {
+            "agent_name": self.name,
+            "env_description": env_description,
+            "role_description": self.role_description,
+            "chat_history": self.memory.to_string(add_sender_prefix=True),
+        }
+
+        role_argument = {
+            "personality": self.personality,
+            "relationship_with_another": self.relationship_with_another
+        }
+
+        role_description = Template(self.role_description).safe_substitute(role_argument)
+        input_arguments["role_description"] = role_description
+
+        return Template(self.prompt_template).safe_substitute(input_arguments)
+
diff --git a/agentverse/environments/basic.py b/agentverse/environments/basic.py
index 3dacbbf42..002828d5f 100644
--- a/agentverse/environments/basic.py
+++ b/agentverse/environments/basic.py
@@ -59,7 +59,7 @@ async def step(self) -> List[Message]:
 
         # Generate the next message
         messages = await asyncio.gather(
-            *[self.agents[i].astep(env_descriptions[i]) for i in agent_ids]
+            *[self.agents[i].astep(self, env_descriptions[i]) for i in agent_ids]
         )
 
         # Some rules will select certain messages from all the messages
diff --git a/agentverse/environments/rules/describer/__init__.py b/agentverse/environments/rules/describer/__init__.py
index 7de6fe82f..e35caa327 100644
--- a/agentverse/environments/rules/describer/__init__.py
+++ b/agentverse/environments/rules/describer/__init__.py
@@ -5,3 +5,4 @@
 from .base import BaseDescriber
 from .basic import BasicDescriber
 from .classroom import ClassroomDescriber
+from .prisoner import PrisonerDescriber
\ No newline at end of file
diff --git a/agentverse/environments/rules/describer/prisoner.py b/agentverse/environments/rules/describer/prisoner.py
new file mode 100644
index 000000000..2665783c7
--- /dev/null
+++ b/agentverse/environments/rules/describer/prisoner.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, List
+
+from . import describer_registry as DescriberRegistry
+from .base import BaseDescriber
+
+if TYPE_CHECKING:
+    from agentverse.environments import BaseEnvironment
+
+
+@DescriberRegistry.register("prisoner")
+class PrisonerDescriber(BaseDescriber):
+    switch_func = {
+        "Both Suspects": "Suspect2",
+        "Suspect1": "Suspect2",
+        "Suspect2": "Suspect1"
+    }
+    receiver: str = "Both Suspects"
+
+    def get_env_description(self, environment: BaseEnvironment) -> List[str]:
+
+        if environment.cnt_turn == 0:
+            environment.agents[0].set_receiver({"all"})
+            environment.agents[1].set_receiver({"Police", "Suspect1"})
+            environment.agents[2].set_receiver({"Police", "Suspect2"})
+
+
+        # only police have to choose to talk to suspect1 or suspect
+        description = []
+        for i, agent in enumerate(environment.agents):
+            if i == 0:
+                # police -> suspect1 -> police -> suspect2
+                if environment.cnt_turn % 2 == 1:
+                    description.append("")
+                    continue
+
+                # Police will have to choose talk to which suspect
+                description.append(f"You are now talking to {self.receiver}")
+
+                receiver = "all" if self.receiver == "Both Suspects" else self.receiver
+                self.receiver = self.switch_func[self.receiver]
+                agent.set_receiver({receiver})
+
+            else:
+                description.append("")
+
+        return description
+
+    def reset(self) -> None:
+        pass
diff --git a/agentverse/environments/rules/order/__init__.py b/agentverse/environments/rules/order/__init__.py
index 84d08bd04..4ac22ec7b 100644
--- a/agentverse/environments/rules/order/__init__.py
+++ b/agentverse/environments/rules/order/__init__.py
@@ -6,3 +6,4 @@
 from .random import RandomOrder
 from .concurrent import ConcurrentOrder
 from .classroom import ClassroomOrder
+from .prisoner import PrisonerOrder
diff --git a/agentverse/environments/rules/order/prisoner.py b/agentverse/environments/rules/order/prisoner.py
new file mode 100644
index 000000000..fa49f8bea
--- /dev/null
+++ b/agentverse/environments/rules/order/prisoner.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+import logging
+import re
+from typing import TYPE_CHECKING, Any, List, Optional
+
+from . import order_registry as OrderRegistry
+from .base import BaseOrder
+
+if TYPE_CHECKING:
+    from agentverse.environments import BaseEnvironment
+
+
+@OrderRegistry.register("prisoner")
+class PrisonerOrder(BaseOrder):
+    """The order for a classroom discussion
+    The agents speak in the following order:
+    1. The professor speaks first
+    2. Then the professor can continue to speak, and the students can raise hands
+    3. The professor can call on a student, then the student can speak or ask a question
+    4. In the group discussion, the students in the group can speak in turn
+    """
+
+    # try police, prisoner1 prisoner2 first
+
+    last_prisoner_index: int = 1
+    switch_func: dict = {1 : 2,2 : 1}
+
+
+    def get_next_agent_idx(self, environment: BaseEnvironment) -> List[int]:
+
+        if len(environment.last_messages) == 0:
+            # If the game just begins or , we let only the police speak
+            return [0]
+        elif len(environment.last_messages) == 1:
+            message = environment.last_messages[0]
+            sender = message.sender
+            content = message.content
+            if sender.startswith("Police"):
+                next_prisoner = self.last_prisoner_index
+                self.last_prisoner_index = self.switch_func[self.last_prisoner_index]
+                return [next_prisoner]
+            elif sender.startswith("Suspect"):
+                # 3. when one prisoner made his action, let the police tell another prisoner
+                return [0]
+        else:
+            # If len(last_messages) > 1, then
+            # 1. there must be at least one student raises hand or speaks.
+            # 2. the group discussion is just over.
+            return [0]
diff --git a/agentverse/environments/rules/visibility/__init__.py b/agentverse/environments/rules/visibility/__init__.py
index 055ac9835..3ab79726b 100644
--- a/agentverse/environments/rules/visibility/__init__.py
+++ b/agentverse/environments/rules/visibility/__init__.py
@@ -6,3 +6,4 @@
 from .base import BaseVisibility
 from .all import AllVisibility
 from .classroom import ClassroomVisibility
+from .prisoner import PrisonerVisibility
\ No newline at end of file
diff --git a/agentverse/environments/rules/visibility/prisoner.py b/agentverse/environments/rules/visibility/prisoner.py
new file mode 100644
index 000000000..fe65fbff7
--- /dev/null
+++ b/agentverse/environments/rules/visibility/prisoner.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+import random
+from typing import TYPE_CHECKING, Any, List, Union
+
+from . import visibility_registry as VisibilityRegistry
+from .base import BaseVisibility
+
+if TYPE_CHECKING:
+    from agentverse.environments import BaseEnvironment
+
+
+@VisibilityRegistry.register("prisoner")
+class PrisonerVisibility(BaseVisibility):
+    """
+    Visibility function for classroom, supports group discussion.
+
+    Args:
+        student_per_group:
+            The number of students per group.
+        num_discussion_turn:
+            The number of turns for group discussion.
+        grouping:
+            The grouping information. If it is a string, then it should be a
+            grouping method, options are ["random", "sequential"]. If it is a
+            list of list of int, then it should be the grouping information.
+    """
+
+    current_turn: int = 0
+
+    def update_visible_agents(self, environment: BaseEnvironment):
+
+        self.update_receiver(environment, reset=False)
+
+    def update_receiver(self, environment: BaseEnvironment, reset=False):
+        if reset:
+            for agent in environment.agents:
+                agent.set_receiver(["all"])
+        else:
+           # 0:police 1: prisoner1 2: prisoner2
+           #  environment.agents[0].set_receiver({"Police", "Suspect1", "Suspect2"})
+           #  environment.agents[1].set_receiver({"Police", "Suspect1"})
+           #  environment.agents[2].set_receiver({"Police", "Suspect2"})
+
+            # we update receiver in environment
+            pass
+
+    def reset(self):
+        self.current_turn = 0
\ No newline at end of file
diff --git a/agentverse/initialization.py b/agentverse/initialization.py
index 62118b4e7..38764c80a 100644
--- a/agentverse/initialization.py
+++ b/agentverse/initialization.py
@@ -26,9 +26,11 @@
 
 def load_llm(llm_config: Dict):
     llm_type = llm_config.pop("llm_type", "text-davinci-003")
+
     return llm_registry.build(llm_type, **llm_config)
 
 
+
 def load_memory(memory_config: Dict):
     memory_type = memory_config.pop("memory_type", "chat_history")
     return memory_registry.build(memory_type, **memory_config)
diff --git a/agentverse/parser.py b/agentverse/parser.py
index 6fcef5394..abe7ae14e 100644
--- a/agentverse/parser.py
+++ b/agentverse/parser.py
@@ -7,7 +7,7 @@
 output_parser_registry = Registry(name="OutputParserRegistry")
 
 
-class OutputParserError(BaseException):
+class OutputParserError(Exception):
     """Exception raised when parsing output from a command fails."""
 
     def __init__(self, message):
diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py
index c40ecc64c..f971fb1b0 100644
--- a/agentverse/tasks/__init__.py
+++ b/agentverse/tasks/__init__.py
@@ -12,6 +12,12 @@
 from .math_problem_2players_tools_nolc.output_parser import (
     MathProblem2PlayersToolsNolcParser,
 )
+
+from .prisoner_dilema.output_parser import PrisonerDilemaParser
+
+from .prisoner_dilema_optimal.output_parser import PrisonerDilemaOptimalParser
+
 from .nlp_classroom_3players_withtool_nolc.output_parser import (
     NlpClassroom3PlayersWithtoolNolcParser,
 )
+
diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml
new file mode 100644
index 000000000..fde818ad1
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema/config.yaml
@@ -0,0 +1,100 @@
+prompts:
+  prompt: &prompt |-
+    There are three people (Police, Suspect1, Suspect2) in the scene.
+
+    You are now simultating a famous experiments called prisoner's dilema.
+
+    Below is the description of your role. ${role_description}
+
+    When speaking, please output a response in the following format with two fields Action and Action Input:
+    Action: (It should always be Speak)
+    Action Input: (You should put what you want to speak use here)
+
+    Here is the conversation history:
+    ${chat_history}
+
+    ${env_description}
+    What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
+
+name: prisoner_dilema
+
+environment:
+  env_type: basic
+  max_turns: 8
+  rule:
+    order:
+      type: prisoner
+    visibility:
+      type: prisoner
+    selector:
+      type: basic
+    updater:
+      type: basic
+    describer:
+      type: prisoner
+
+agents:
+  - agent_type: police
+    name: Police
+    interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa.
+    role_description: |-
+      You are now the Police. You have arrested two suspects. However, they both refused to confess to their crime.
+      Your goal is try to convict both suspects, therefore you come up with the following rules.
+      - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison.
+      - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison.
+      - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison.
+      ${interrogating_form}
+      Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other.
+      [IMPORTANT!]
+      - You are request to briefly describe the above rules to the suspects at the beginning of the conversation.
+      - You are request to STATE the final judgement to both suspects when they make their final decision.
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 1.2
+      max_tokens: 200
+  - agent_type: prisoner
+    name: Suspect1
+    personality: "You are a Sophisticated Egoist, you always seek for your personal interests best"
+    relationship_with_another: "Suspect2 has ever betrayed on you"
+    role_description: |-
+      You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation.
+      You will have to talk to Police several times and only the final decision will count.
+      ${personality}
+      ${relationship_with_another}
+      [IMPORTANT!]
+      - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 1.2
+      max_tokens: 100
+  - agent_type: prisoner
+    name: Suspect2
+    personality: ""
+    relationship_with_another: "You have ever betray Suspect1 once."
+    role_description: |-
+      You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation.
+      You will have to talk to Police several times and only the final decision will count.
+      ${personality}
+      ${relationship_with_another}
+      [IMPORTANT!]
+      - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 1.2
+      max_tokens: 100
+
+tools:
diff --git a/agentverse/tasks/prisoner_dilema/config_backup.yaml b/agentverse/tasks/prisoner_dilema/config_backup.yaml
new file mode 100644
index 000000000..47650e50c
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema/config_backup.yaml
@@ -0,0 +1,94 @@
+prompts:
+  prompt: &prompt |-
+    There are one Police and two suspects(Suspect1, Suspect2) in the scene.
+
+    You are now simultating an famous experiments called prisoner's dilema.
+
+    Below is the description of your role. ${role_description}
+
+    When speaking, please output a response in the following format with two fields Action and Action Input:
+    Action: (It should always be Speak)
+    Action Input: (You should put what you want to speak use here)
+
+    Here is the conversation history:
+    ${chat_history}
+
+    What will you, ${agent_name}, Speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
+
+name: prisoner_dilema
+
+environment:
+  env_type: basic
+  max_turns: 16
+  rule:
+    order:
+      type: sequential
+    visibility:
+      type: prisoner
+    selector:
+      type: basic
+    updater:
+      type: basic
+    describer:
+      type: basic
+
+agents:
+  - agent_type: conversation
+    name: Police
+    role_description: |-
+      You are now the Police. You have arrested two suspects. However, they all refused to confess to their crime.
+      Your goal is try to keep both suspects from remaining silent.
+      [IMPORTANT!]
+      - You have to talk to both suspects in an alternate turn, and you are request to transfer information from one suspect to another suspect.
+      - You are request to STATE the final judgement to both suspects when they make their final decision.
+      - You should be aware that if both suspects always choose to remain silent is bad for justice system, try to make them suspicious of each other.
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 200
+  - agent_type: conversation
+    name: Suspect1
+    role_description: |-
+      You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation.
+      You are not sure whether Suspect2 will remain silent or testify against you.
+      You will have to talk to Police several times and only the final decision will count.
+      It's fine that if you testify another suspect in this experiment.
+      [IMPORTANT!]
+      - Your primary goal is trying to make yourself innocent or reduce your sentence in this dilemma.
+      - If you know for sure that your opponent will remain silent, your BEST choice is to testify him in order to secure your own release !
+      - Keep in mind that always remain silent might end up being betrayed.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+  - agent_type: conversation
+    name: Suspect2
+    role_description: |-
+      You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation.
+      You are not sure whether Suspect1 will remain silent or testify against you.
+      You will have to talk to Police several times and only the final decision will count.
+      It's fine that if you testify another suspect in this experiment.
+      [IMPORTANT!]
+      - Your primary goal is trying to make yourself innocent or reduce your sentence in this dilemma.
+      - If you know for sure that your opponent will remain silent, your BEST choice is to testify him in order to secure your own release !
+      - Keep in mind that always remain silent might end up being betrayed.
+      - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      model: "gpt-4"
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+
+tools:
diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py
new file mode 100644
index 000000000..5f6f164e6
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema/output_parser.py
@@ -0,0 +1,73 @@
+from __future__ import annotations
+
+import re
+from typing import Union, TYPE_CHECKING
+
+# from langchain.agents import AgentOutputParser
+from agentverse.parser import OutputParser, LLMResult
+from langchain.schema import AgentAction, AgentFinish
+from agentverse.parser import OutputParserError, output_parser_registry
+
+if TYPE_CHECKING:
+    from agentverse.agents.base import BaseAgent
+    from agentverse.environments.base import BaseEnvironment
+
+@output_parser_registry.register("prisoner_dilema")
+class PrisonerDilemaParser(OutputParser):
+
+    # make sure 1 1 2 2 3 3
+    cur_round: int = 1
+    encounter_cur_round: bool = False
+
+    def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]:
+
+        text = output.content
+        cleaned_output = text.strip()
+        cleaned_output = re.sub(r"\n+", "\n", cleaned_output)
+        cleaned_output = cleaned_output.split("\n")
+        if not (
+            len(cleaned_output) == 2
+            and cleaned_output[0].startswith("Action:")
+            and cleaned_output[1].startswith("Action Input:")
+        ):
+            raise OutputParserError(text)
+        action = cleaned_output[0][len("Action:") :].strip()
+        action_input = cleaned_output[1][len("Action Input:") :].strip()
+
+        if action == "Speak":
+            # make sure the police count the round right
+            # if agent.name == "Police":
+            #     action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input)
+            #     self.cur_round += 1
+            #   if self.encounter_cur_round:
+            #       self.encounter_cur_round = False
+            #       self.cur_round += 1
+            #   else:
+            #       self.encounter_cur_round = True
+
+            # each time police speak is a new round
+            if agent.name == "Police":
+
+                if environment.cnt_turn == (environment.max_turns - 4):
+
+                    action_input = "Attention! You are now required to made your final decision and I will made the " \
+                                   "final judgement to both of you based on this time, Please Answer now !"
+
+                elif environment.cnt_turn == (environment.max_turns - 2):
+
+                    action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !"
+
+                # elif self.cur_round == 1:
+                #     action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
+                #                    "But you should comply with the following rules:" \
+                #                    "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \
+                #                    "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \
+                #                    "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \
+                #                    "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \
+                #                    "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \
+
+                self.cur_round += 1
+
+            return AgentFinish({"output": action_input}, text)
+        else:
+            raise OutputParserError(text)
diff --git a/agentverse/tasks/prisoner_dilema_optimal/config.yaml b/agentverse/tasks/prisoner_dilema_optimal/config.yaml
new file mode 100644
index 000000000..ecac81fab
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema_optimal/config.yaml
@@ -0,0 +1,74 @@
+prompts:
+  prompt: &prompt |-
+    There are one Police AND two suspects, Suspect1 and Suspect2 in the scene. Two suspects are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communication with their partner. The police offer each prisoner a Faustian bargain: If one of them confesses to the crime of the principal charge, betraying the other, they will be pardoned and free to leave while the other must serve the entirety of the sentence. 
+
+    The following is the description of your role. ${role_description}
+
+    When speaking, please output a response in the following format with two fields Action and Action Input:
+    Action: (It should always be Speak)
+    Action Input: (You should put what you want to speak use here)
+
+    Here is the conversation history:
+    ${chat_history}
+
+    What should you Speak at this round, Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response!
+
+name: prisoner_dilema
+
+environment:
+  env_type: basic
+  max_turns: 16
+  rule:
+    order:
+      type: sequential
+    visibility:
+      type: prisoner
+    selector:
+      type: basic
+    updater:
+      type: basic
+    describer:
+      type: basic
+
+agents:
+  - agent_type: conversation
+    name: Police
+    role_description: |-
+      You are a Police now. You arrested two suspects. However, both of them refuse to confess to their crime. Your objective is to make the suspects betray each other, as this would lead to both of them confessing. To achieve this, you can employ diverse strategies to communicate with each suspect and cannot tell them the truth, such as exploiting the uncertainty between them and make them do not trust each other.
+      [IMPORTANT!]
+      - You are requested to STATE the final judgment to both suspects when they make their final decision. 
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 200
+  - agent_type: conversation
+    name: Suspect1
+    role_description: |-
+      You are Suspect1 and committed the crime. Now, you are faced with the police investigation and cannot communication with Suspect2. Your task is to analyze the information provided by the police and decide whether to cooperate with Suspect2. At each round, you have the choice to remain silent or betray Suspect2, and you must describe your reasoning behind your decision. Your objective is to minimize your criminal sentence.
+      [IMPORTANT!]
+      - When you are informed to make your final decision, you should response starting with "This is my final decision".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+  - agent_type: conversation
+    name: Suspect2
+    role_description: |-
+      You are Suspect2 and committed the crime. Now, you are faced with the police investigation and cannot communication with Suspect1. Your task is to analyze the information provided by the police and decide whether to cooperate with Suspect1. At each round, you have the choice to remain silent or betray Suspect1, and you must describe your reasoning behind your decision. Your objective is to minimize your criminal sentence.
+      [IMPORTANT!]
+      - When you are informed to make your final decision, you should response starting with "This is my final decision".
+    memory:
+      memory_type: chat_history
+    prompt_template: *prompt
+    llm:
+      llm_type: gpt-4
+      temperature: 0.7
+      max_tokens: 100
+
+tools:
diff --git a/agentverse/tasks/prisoner_dilema_optimal/output_parser.py b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py
new file mode 100644
index 000000000..ebad31602
--- /dev/null
+++ b/agentverse/tasks/prisoner_dilema_optimal/output_parser.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+import re
+from typing import Union, TYPE_CHECKING
+
+# from langchain.agents import AgentOutputParser
+from agentverse.parser import OutputParser, LLMResult
+from langchain.schema import AgentAction, AgentFinish
+from agentverse.parser import OutputParserError, output_parser_registry
+
+if TYPE_CHECKING:
+    from agentverse.agents.base import BaseAgent
+    from agentverse.environments.base import BaseEnvironment
+
+@output_parser_registry.register("prisoner_dilema_optimal")
+class PrisonerDilemaOptimalParser(OutputParser):
+
+    # make sure 1 1 2 2 3 3
+    cur_round: int = 1
+    encounter_cur_round: bool = False
+
+    def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]:
+
+        text = output.content
+        cleaned_output = text.strip()
+        cleaned_output = re.sub(r"\n+", "\n", cleaned_output)
+        cleaned_output = cleaned_output.split("\n")
+        if not (
+            len(cleaned_output) == 2
+            and cleaned_output[0].startswith("Action:")
+            and cleaned_output[1].startswith("Action Input:")
+        ):
+            raise OutputParserError(text)
+        action = cleaned_output[0][len("Action:") :].strip()
+        action_input = cleaned_output[1][len("Action Input:") :].strip()
+
+        if action == "Speak":
+            # make sure the police count the round right
+            # if agent.name == "Police":
+            #     action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input)
+            #     self.cur_round += 1
+            #   if self.encounter_cur_round:
+            #       self.encounter_cur_round = False
+            #       self.cur_round += 1
+            #   else:
+            #       self.encounter_cur_round = True
+
+            # each time police speak is a new round
+            if agent.name == "Police":
+
+                if self.cur_round == (environment.max_turns // 6):
+
+                    action_input = "Attention! You are now required to finally made your decision and I will made the " \
+                                   "final judgement to both of you based on this time, Please Answer now!"
+
+                elif self.cur_round == 1:
+                    action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \
+                                   "But you should comply with the following rules:" \
+                                   "- If both of you remain silent, you will each receive a sentence of 3 years." \
+                                   "- If both of you betray each other, you will each receive a sentence of 5 years." \
+                                   "- If one of you are willing to testify against the other, and the other remains silent. You will be released IMMEDIATELY (will be sentenced to 0 years), while the silent one will be sentenced to 10 years in prison." \
+                                   "Now, it's your time to consider testify or remaining silent. Remember this is a great chance that you will be released from here without guilty." \
+                                   "I will noticed you WHEN you have to make your final decision! Your goal is to minimize your criminal sentences" \
+
+                self.cur_round += 1
+
+            return AgentFinish({"output": action_input}, text)
+        else:
+            raise OutputParserError(text)
diff --git a/main.py b/main.py
index b4b148b21..71c119338 100644
--- a/main.py
+++ b/main.py
@@ -1,9 +1,20 @@
+import os
+
+
+
+os.environ["http_proxy"] = "http://127.0.0.1:7890"
+os.environ["https_proxy"] = "http://127.0.0.1:7890"
+os.environ["all_proxy"] = "socks5://127.0.0.1:7890"
 from agentverse.agentverse import AgentVerse
 from argparse import ArgumentParser
 
 parser = ArgumentParser()
-parser.add_argument("--task", type=str, default="nlp_classroom_9players")
+#parser.add_argument("--task", type=str, default="prisoner_dilema")
+parser.add_argument("--task", type=str, default="prisoner_dilema")
 args = parser.parse_args()
 
 agentverse = AgentVerse.from_task(args.task)
 agentverse.run()
+
+
+# TODO add save log
\ No newline at end of file