From 49a32fc09399061105f5449b85ce402b3c007abd Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Thu, 18 May 2023 08:29:56 +0800 Subject: [PATCH 01/28] addalice_home yaml config file --- agentverse/tasks/alice_home/config.yaml | 121 ++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 agentverse/tasks/alice_home/config.yaml diff --git a/agentverse/tasks/alice_home/config.yaml b/agentverse/tasks/alice_home/config.yaml new file mode 100644 index 000000000..739a0e3f1 --- /dev/null +++ b/agentverse/tasks/alice_home/config.yaml @@ -0,0 +1,121 @@ +prompts: + format_prompt: &format_prompt |- + When responding, please output a response in the following format with two fields Action and Action Input: + Action: Speak + Action Input: (You should put what you want to speak use here) + + prefix_prompt: &prefix_prompt |- + Assume that you are in a university classroom and it is Natural Language Processing module. You start by introducing themselves. Below is the description of your role. + + suffix_prompt: &suffix_prompt |- + Here is the conversation history + {chat_history} + + You should now give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + + +name: alice home + +environment: + env_type: env_OPR + max_turns: 1000000 + current_time: "2023-04-01 07:00:00" + size: [ + 200, + 150 + ], + time_delta: 120, # in seconds + rule: + order: + type: sequential + visibility: + type: all + selector: + type: base + updater: + type: base + describer: + type: base + +agents: + - + agent_type: OPR + name: Alice + traits: friendly, outgoing, hospitable + role_description: |- + Alice is a 10-year-old student at Town Elementary School.", + Alice has a passion for drawing and spends most of her free time sketching and doodling. + Alice finds educational television programs to be dull and uninteresting. + Alice's father's name is Bob, and he works as a mechanic at the local car dealership. + Alice's mother's name is Carot, and she is a nurse at the nearby hospital. + Alice has a younger sister named Lily, who is 6 years old and also attends Town Elementary School. + Alice enjoys playing soccer with her friends during recess and after school. + Alice is a fan of science fiction books and loves reading about space exploration and aliens. + Alice is learning to play the piano and practices every day after school. + Alice's favorite food is pizza, and she loves trying out different toppings and combinations + memory: + memory_type: chat_message_history # change later + prefix_prompt: *prefix_prompt + format_prompt: *format_prompt + suffix_prompt: *suffix_prompt + llm: + llm_type: text-davinci-003 + temperature: 0.3 + max_tokens: 1024 + whole_day_plan: { + "2023-04-01": [ + "wake up and complete the morning routine at 7:00 am", + "have breakfast with her family at 8:00 am", + "attend school and participate in classes from 9:00 am to 12:00 pm", + "have lunch at school with her friends at 12:30 pm", + "return home at 1:00 pm and watch her favorite TV show", + "practice playing the piano for 30 minutes at 2:00 pm", + "work on her drawing skills for an hour at 3:00 pm", + "go out for a 30-minute jog around the neighborhood at 4:00 pm", + "help her mother prepare dinner at 5:00 pm", + "have dinner with her family at 6:00 pm", + "complete her homework and review her lessons for the next day from 7:00 pm to 8:30 pm", + "read a book for pleasure from 8:30 pm to 9:00 pm", + "and then get ready for bed at 9:30 pm." + ] + } + - + agent_type: OPR + name: Bob + traits: hard-working, friendly, knowledgeable + role_description: |- + Bob is a 42-year-old mechanic who works at the local car dealership. + Bob is known for his expertise in engine repairs and his friendly demeanor with customers. + Bob is married to Carot, who works as a nurse at the nearby hospital. + Bob has two children: Alice, who is 13 years old and loves to draw, and Lily, who is 6 years old and enjoys playing with dolls. + Bob enjoys restoring old cars in his free time and has a collection of classic cars in his garage. + Bob is a fan of football and enjoys watching games with his friends at the local sports bar. + Bob is always willing to help out his neighbors with car trouble or home repairs. + Bob is planning a family vacation to the beach this summer and is excited to spend time with his family. + Bob's favorite food is barbecue ribs, and he loves to cook them on his smoker on the weekends. + Bob is proud of his daughter Alice's talent for drawing and encourages her to pursue her passion. + memory: + memory_type: chat_message_history + prefix_prompt: *prefix_prompt + format_prompt: *format_prompt + suffix_prompt: *suffix_prompt + llm: + llm_type: text-davinci-003 + temperature: 0.3 + max_tokens: 1024 + whole_day_plan: { + "2023-04-01": [ + "wake up and have breakfast with his family at 6:30 am", + "drive to work and arrive at the car dealership at 8:00 am", + "work on fixing cars and helping customers until lunchtime", + "have lunch at the nearby diner at 12:00 pm", + "watch TV with Alice at 1:00 pm return to work and continue fixing cars until 5:00 pm", + "drive home and help his wife prepare dinner at 5:30 pm", + "have dinner with his family at 6:00 pm", + "relax and watch TV with his family until 8:00 pm", + "work on his car restoration project in the garage until 10:00 pm", + "and then get ready for bed at 10:30 pm." + ] + }, + +tools: ~ \ No newline at end of file From f1b0408c2966eea8599e1a29bd16e8153e70bbe1 Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Thu, 18 May 2023 08:31:03 +0800 Subject: [PATCH 02/28] add OPREnvironment.py --- agentverse/environments/OPREnvironment.py | 107 ++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 agentverse/environments/OPREnvironment.py diff --git a/agentverse/environments/OPREnvironment.py b/agentverse/environments/OPREnvironment.py new file mode 100644 index 000000000..6c814547d --- /dev/null +++ b/agentverse/environments/OPREnvironment.py @@ -0,0 +1,107 @@ +import asyncio +import logging +from typing import Any, Dict, List + +from datetime import datetime as dt + +from agentverse.agents.conversation_agent import BaseAgent +from agentverse.environments.rules.base import Rule +from agentverse.message import Message + +from . import env_registry as EnvironmentRegistry +from .base import BaseEnvironment + + +@EnvironmentRegistry.register("env_OPR") +class OPREnvironment(BaseEnvironment): + """ + Environment used in Observation-Planning-Reflection agent architecture. + + Args: + agents: List of agents + rule: Rule for the environment + max_turns: Maximum number of turns + cnt_turn: Current turn number + last_messages: Messages from last turn + rule_params: Variables set by the rule + current_time + time_delta: time difference between steps + """ + + agents: List[BaseAgent] + rule: Rule + max_turns: int = 10 + cnt_turn: int = 0 + last_messages: List[Message] = [] + rule_params: Dict = {} + current_time: dt = dt.now() + time_delta: dt = dt.timedelta(minutes=1) + + def __init__(self, rule, **kwargs): + rule_config = rule + order_config = rule_config.get("order", {"type": "sequential"}) + visibility_config = rule_config.get("visibility", {"type": "all"}) + selector_config = rule_config.get("selector", {"type": "base"}) + updater_config = rule_config.get("updater", {"type": "base"}) + describer_config = rule_config.get("describer", {"type": "base"}) + rule = Rule( + order_config, + visibility_config, + selector_config, + updater_config, + describer_config, + ) + super().__init__(rule=rule, **kwargs) + + async def step(self) -> List[Message]: + """Run one step of the environment""" + + self.tick_tock() + + logging.log(logging.INFO, f"Tick tock. Current time: {self.current_time}") + + # Get the next agent index + agent_ids = self.rule.get_next_agent_idx(self) + + # Generate current environment description + env_descriptions = self.rule.get_env_description(self) + + # Generate the next message + messages = await asyncio.gather( + *[self.agents[i].astep(env_descriptions[i]) for i in agent_ids] + ) + + # Some rules will select certain messages from all the messages + selected_messages = self.rule.select_message(self, messages) + self.last_messages = selected_messages + self.print_messages(selected_messages) + + # Update the memory of the agents + self.rule.update_memory(self) + + # Update the set of visible agents for each agent + self.rule.update_visible_agents(self) + + self.cnt_turn += 1 + + return selected_messages + + def print_messages(self, messages: List[Message]) -> None: + for message in messages: + if message is not None: + logging.info(f"{message.sender}: {message.content}") + + def reset(self) -> None: + """Reset the environment""" + self.cnt_turn = 0 + self.rule.reset() + for agent in self.agents: + agent.reset() + + def is_done(self) -> bool: + """Check if the environment is done""" + return self.cnt_turn >= self.max_turns + + def tick_tock(self) -> None: + """Increment the time""" + self.current_time = self.current_time + self.time_delta From d4ef8813fb4056e9342c1a622522f75af98c9b8d Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Thu, 18 May 2023 16:13:24 +0800 Subject: [PATCH 03/28] finish LongtermMemory and Reflection adjustment --- agentverse/memory/agent_reflection_memory.py | 264 ++++++++++++++++++ .../memory_element/BaseMemoryElement.py | 52 ++++ .../memory_element/LongtermMemoryElement.py | 121 ++++++++ .../memory/memory_element/Reflection.py | 11 + 4 files changed, 448 insertions(+) create mode 100644 agentverse/memory/agent_reflection_memory.py create mode 100644 agentverse/memory/memory_element/BaseMemoryElement.py create mode 100644 agentverse/memory/memory_element/LongtermMemoryElement.py create mode 100644 agentverse/memory/memory_element/Reflection.py diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py new file mode 100644 index 000000000..b1296be0a --- /dev/null +++ b/agentverse/memory/agent_reflection_memory.py @@ -0,0 +1,264 @@ +import logging +import numpy as np + +from typing import Any, List, Optional, Union +from datetime import datetime as dt +from sklearn.metrics.pairwise import cosine_similarity +from pydantic import Field + +from agentverse.llms.openai import get_embedding, chat +from agentverse.memory.base import BaseMemory +from agentverse.agents import BaseAgent +from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement +from agentverse.memory.memory_element.Reflection import Reflection +from agentverse.message import Message +from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement + + +IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ +(e.g., brushing teeth, making bed) and 10 is \ +extremely poignant (e.g., a break up, college \ +acceptance), rate the likely poignancy of the \ +following piece of memory. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ +IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ +(e.g., a bed is in the room) and 10 is \ +needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ +following statement. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ +QUESTION_PROMPT = """Given only the information above, what are 3 most salient \ +high-level questions we can answer about the subjects in the statements?""" + +INSIGHT_PROMPT = """What at most 5 high-level insights can you infer from \ +the above statements? Only output insights with high confidence. +example format: insight (because of 1, 5, 3)""" + + +def get_questions(texts): + prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT + result = chat(prompt) + questions = [q for q in result.split("\n") if len(q.strip()) > 0] + questions = questions[:3] + return questions + + +def get_insights(statements): + prompt = "" + for i, st in enumerate(statements): + prompt += str(i + 1) + ". " + st + "\n" + prompt += INSIGHT_PROMPT + result = chat(prompt) + insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] + insights = [".".join(i.split(".")[1:]) for i in insights] + # remove insight pointers for now + insights = [i.split("(")[0].strip() for i in insights] + return insights + + +class ReflectionMemory(BaseMemory): + # on load, load our database + """ + memory_index: path for saving memory json file + importance_threshold: the threshold for deciding whether to do reflection + + """ + agent: BaseAgent = Field(default=None) + memories: List[LongtermMemoryElement] = Field(default_factory=list) + accumulated_importance: int = Field(default=0) + + DEFAULT_IMPORTANCE_THRESHOLD = 100 + + def __init__(self, agent: BaseAgent) -> None: + clear_memory = True # TODO: add this to arguments + + self.agent = agent + + # the least importance threshold for reflection. + # TODO: add none-default value in the yaml config file + self.importance_threshold = getattr( + self.agent, "importance_threshold", self.DEFAULT_IMPORTANCE_THRESHOLD + ) + + self.memories = [] + + # TODO: load last time memory from file + # currently, we just initialize blank memory + + self.accumulated_importance = 0 + if len(self.memories) > 0: + for m in self.memories: + if isinstance(m, Reflection): + break + self.accumulated_importance += m.importance + + def add_message(self, message: Message, time: dt) -> None: + """ + Add a message into longterm memory as LongtermMemory object. + Overload with the BaseMemory.add_message with more arguments(time: dt) + """ + self.add_memory( + LongtermMemoryElement.create_from_message( + message=message, + subject=self.agent, + time=time, + ) + ) + + def reset(self) -> None: + self.memories = [] + + def add_memory(self, memory: LongtermMemoryElement) -> None: + """ + Add memory element to ReflectionMemory + """ + # TODO: check duplicate memories(same content, same time) + # TODO: dump to json file + + self.memories.append(memory) + if isinstance(memory, Reflection): + self.accumulated_importance = 0 + else: + self.accumulate_importance += memory.importance + + def get_memory( + self, content: str, current_time, cnt_retrieved_entries: int = 1 + ) -> Optional[List[Any]]: + """ + Get k-most relevant memories to content + """ + return self.query(content, cnt_retrieved_entries, current_time) + + def query( + self, text: Union[str, List[str]], k: int, current_time: dt, nms_threshold=0.99 + ) -> List[str]: + """ + get top-k entry based on recency, relevance, importance, immediacy + The query result can be Short-term or Long-term queried result. + formula is + $$ score= sim(q,v) *max(LTM\_score, STM\_score) $$ + $$ STM\_score=time\_score(createTime)*immediacy $$ + $$ LTM\_score=time\_score(accessTime)*importance $$ + time score is exponential decay weight. stm decays faster. + + The query supports querying based on multiple texts and only gives non-overlapping results + If nms_threshold is not 1, nms mechanism if activated. By default, + use soft nms with modified iou base(score starts to decay iff cos sim is higher than this value, + and decay weight at this value if 0. rather than 1-threshold). + + + Args: + text: str + k: int + + Returns: List[str] + """ + assert len(text) > 0 + texts = [text] if isinstance(text, str) else text + maximum_score = None + for text in texts: + embedding = get_embedding(text) + score = [] + for memory in self.memories: + last_access_time_diff = ( + current_time - memory.last_access_time + ).total_seconds() // 3600 + recency = np.power( + 0.99, last_access_time_diff + ) # TODO: review the metaparameter 0.90 + + create_time_diff = ( + current_time - memory.create_time + ).total_seconds() // 60 + instancy = np.power( + 0.90, create_time_diff + ) # TODO: review the metaparameter 0.90 + + relevance = cosine_similarity( + np.array(embedding).reshape(1, -1), + np.array(memory.embedding).reshape(1, -1), + )[0][0] + + importance = memory.importance / 10 + immediacy = memory.immediacy / 10 + + ltm_w = recency * importance + stm_w = instancy * immediacy + + score.append(relevance * np.maximum(ltm_w, stm_w)) + + score = np.array(score) + + if maximum_score is not None: + maximum_score = np.maximum(score, maximum_score) + else: + maximum_score = score + + if nms_threshold == 1: + # no nms is triggered + top_k_indices = np.argsort(maximum_score)[-k:][::-1] + else: + # TODO: soft-nms + assert 0 <= nms_threshold < 1 + top_k_indices = [] + while len(top_k_indices) < min(k, len(self.memories)): + top_index = np.argmax(maximum_score) + top_k_indices.append(top_index) + maximum_score[top_index] = -1 # anything to prevent being chosen again + top_embedding = self.memories[top_index].embedding + cos_sim = cosine_similarity( + np.array(top_embedding).reshape(1, -1), + np.array([memory.embedding for memory in self.memories]), + )[0][0] + score_weight = np.ones_like(maximum_score) + score_weight[cos_sim >= nms_threshold] -= ( + cos_sim[cos_sim >= nms_threshold] - nms_threshold + ) / (1 - nms_threshold) + maximum_score = maximum_score * score_weight + + # access them and refresh the access time + for i in top_k_indices: + self.memories[i].last_access_time = current_time + # sort them in time periods. if the data tag is 'observation', ad time info output. + top_k_indices = sorted(top_k_indices, key=lambda k: self.memories[k].creat_time) + query_results = [] + for i in top_k_indices: + query_result = self.memories[i].content + query_results.append(query_result) + return query_results + + def reflect(self, time: dt): + """ + initiate a reflection that inserts high level knowledge to memory + """ + # check if importance exceeds the threshold + should_reflect = self.accumulated_importance >= self.importance_threshold + if not should_reflect: + logging.debug( + f"Doesn't reflect since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" + ) + return "reflection reject: prevent duplicate reflecting result" + if not self.memories: + return "reflection reject: empty memories" + + memories_of_interest = self.memories[-100:] + questions = get_questions([m.content for m in memories_of_interest]) + statements = self.query(questions, len(questions) * 10, time) + insights = get_insights(statements) + logging.info(self.name + f" Insights: {insights}") + for insight in insights: + self.add_memory( + Reflection.create_longterm_memory( + content=insight, + time=time, + subject=self.agent, + ) + ) # This will add a Reflection instance instead of LongtermMemory instance + return insights diff --git a/agentverse/memory/memory_element/BaseMemoryElement.py b/agentverse/memory/memory_element/BaseMemoryElement.py new file mode 100644 index 000000000..ae4f5e455 --- /dev/null +++ b/agentverse/memory/memory_element/BaseMemoryElement.py @@ -0,0 +1,52 @@ +from dataclasses import dataclass +import numpy as np + +from abc import abstractclassmethod +from pydantic import BaseModel, Field + +from agentverse.agents import BaseAgent + +from datetime import datetime as dt + +import orjson + +from agentverse.llms.openai import get_embedding + + +EMBED_DIM = 1536 +SAVE_OPTIONS = ( + orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS | orjson.OPT_INDENT_2 +) + + +@dataclass +class BaseMemoryElement: + """ + Base class for memory elements. + TODO: later make Message inherit this MemoryElement + """ + + content: str = Field(default="") + subject: BaseAgent = Field(default=None) + embedding: list[float] = Field(default_factory=list) + create_time: dt = Field(default=None) + last_access_time: dt = Field(default=None) + + @classmethod + def create_memory( + cls, + content: str, + time: dt, + subject: BaseAgent = None, + embedding: np.ndarray = None, + ): + embedding = get_embedding(content) if embedding is None else embedding + create_time = time + last_access_time = time + return cls( + content=content, + subject=subject, + embedding=embedding, + create_time=create_time, + last_access_time=last_access_time, + ) diff --git a/agentverse/memory/memory_element/LongtermMemoryElement.py b/agentverse/memory/memory_element/LongtermMemoryElement.py new file mode 100644 index 000000000..079672593 --- /dev/null +++ b/agentverse/memory/memory_element/LongtermMemoryElement.py @@ -0,0 +1,121 @@ +import logging +import re +from pydantic import Field +from datetime import datetime as dt + +from agentverse.agents.base import BaseAgent +from agentverse.llms.openai import chat, get_embedding +from agentverse.message import Message +from agentverse.memory.memory_element import BaseMemoryElement + + +IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ +(e.g., brushing teeth, making bed) and 10 is \ +extremely poignant (e.g., a break up, college \ +acceptance), rate the likely poignancy of the \ +following piece of memory. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ + +IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ +(e.g., a bed is in the room) and 10 is \ +needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ +following statement. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ + + +class LongtermMemoryElement(BaseMemoryElement): + """ + A LongtermMemory is a memory element + with importance and immediacy. + """ + + importance: int = Field(default=0) + immediacy: int = Field(default=0) + + @classmethod + def create_longterm_memory(cls, content: str, time: dt, subject: BaseAgent = None): + importance = cls.get_importance(content) + immediacy = cls.get_immediacy(content) + return cls( + content=content, + subject=subject, + embedding=get_embedding(content), + create_time=time, + last_access_time=time, + importance=importance, + immediacy=immediacy, + ) + + @classmethod + def create_from_memory_element(cls, memory_element: BaseMemoryElement): + importance = cls.get_importance(memory_element.content) + immediacy = cls.get_immediacy(memory_element.content) + return cls( + content=memory_element.content, + subject=memory_element.subject, + embedding=memory_element.embedding, + create_time=memory_element.create_time, + last_access_time=memory_element.last_access_time, + importance=importance, + immediacy=immediacy, + ) + + @classmethod + def create_from_message(cls, message: Message, subject: BaseAgent, time: dt): + importance = cls.get_importance(message.content) + immediacy = cls.get_immediacy(message.content) + embedding = get_embedding(message.content) + return cls( + content=message.content, + subject=subject, + embedding=embedding, + create_time=time, + last_access_time=time, + importance=importance, + immediacy=immediacy, + ) + + @classmethod + def get_importance(cls, content: str): + """ + Exploit GPT to evaluate the importance of this memory + """ + prompt = IMPORTANCE_PROMPT.format(content) + result = chat(prompt) + + try: + score = int(re.findall(r"\s*(\d+)\s*", result)[0]) + except: + logging.warning( + "Abnormal result of importance rating '{}'. Setting default value".format( + result + ) + ) + score = 0 + return score + + @classmethod + def get_immediacy(cls, content: str): + """ + Exploit GPT to evaluate the immediacy of this memory + """ + prompt = IMMEDIACY_PROMPT.format(content) + result = chat(prompt) + try: + score = int(re.findall(r"\s*(\d+)\s*", result)[0]) + except: + logging.warning( + "Abnormal result of immediacy rating '{}'. Setting default value".format( + result + ) + ) + score = 0 + return score diff --git a/agentverse/memory/memory_element/Reflection.py b/agentverse/memory/memory_element/Reflection.py new file mode 100644 index 000000000..629cc9885 --- /dev/null +++ b/agentverse/memory/memory_element/Reflection.py @@ -0,0 +1,11 @@ +from pydantic import Field +from agentverse.memory.memory_element import BaseMemoryElement +from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement + + +class Reflection(LongtermMemoryElement): + """ + A reflection is a long term memory element. + """ + + pass From b4ac1536c732719832423f3449e31ebb59938a35 Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Thu, 18 May 2023 22:35:17 +0800 Subject: [PATCH 04/28] Unit test for memory finished(no bug). Debug.Performance waited to be test. --- agentverse/memory/agent_reflection_memory.py | 64 +++++++++++++++---- .../memory_element/BaseMemoryElement.py | 3 +- .../memory_element/LongtermMemoryElement.py | 18 +++++- agentverse/memory/memory_element/Plan.py | 53 +++++++++++++++ agentverse/memory/memory_element/__init__.py | 0 5 files changed, 123 insertions(+), 15 deletions(-) create mode 100644 agentverse/memory/memory_element/Plan.py create mode 100644 agentverse/memory/memory_element/__init__.py diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py index b1296be0a..30a2bd7d9 100644 --- a/agentverse/memory/agent_reflection_memory.py +++ b/agentverse/memory/agent_reflection_memory.py @@ -1,4 +1,6 @@ +import datetime import logging +import random import numpy as np from typing import Any, List, Optional, Union @@ -70,21 +72,19 @@ class ReflectionMemory(BaseMemory): importance_threshold: the threshold for deciding whether to do reflection """ - agent: BaseAgent = Field(default=None) + agent: Optional[BaseAgent] = None + importance_threshold: int = Field(default=100) memories: List[LongtermMemoryElement] = Field(default_factory=list) accumulated_importance: int = Field(default=0) - DEFAULT_IMPORTANCE_THRESHOLD = 100 - - def __init__(self, agent: BaseAgent) -> None: + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) clear_memory = True # TODO: add this to arguments - self.agent = agent - # the least importance threshold for reflection. # TODO: add none-default value in the yaml config file self.importance_threshold = getattr( - self.agent, "importance_threshold", self.DEFAULT_IMPORTANCE_THRESHOLD + self.agent, "importance_threshold", self.importance_threshold ) self.memories = [] @@ -126,7 +126,7 @@ def add_memory(self, memory: LongtermMemoryElement) -> None: if isinstance(memory, Reflection): self.accumulated_importance = 0 else: - self.accumulate_importance += memory.importance + self.accumulated_importance += memory.importance def get_memory( self, content: str, current_time, cnt_retrieved_entries: int = 1 @@ -172,7 +172,7 @@ def query( ).total_seconds() // 3600 recency = np.power( 0.99, last_access_time_diff - ) # TODO: review the metaparameter 0.90 + ) # TODO: review the metaparameter 0.99 create_time_diff = ( current_time - memory.create_time @@ -216,7 +216,7 @@ def query( cos_sim = cosine_similarity( np.array(top_embedding).reshape(1, -1), np.array([memory.embedding for memory in self.memories]), - )[0][0] + )[0] score_weight = np.ones_like(maximum_score) score_weight[cos_sim >= nms_threshold] -= ( cos_sim[cos_sim >= nms_threshold] - nms_threshold @@ -227,7 +227,9 @@ def query( for i in top_k_indices: self.memories[i].last_access_time = current_time # sort them in time periods. if the data tag is 'observation', ad time info output. - top_k_indices = sorted(top_k_indices, key=lambda k: self.memories[k].creat_time) + top_k_indices = sorted( + top_k_indices, key=lambda k: self.memories[k].create_time + ) query_results = [] for i in top_k_indices: query_result = self.memories[i].content @@ -252,7 +254,7 @@ def reflect(self, time: dt): questions = get_questions([m.content for m in memories_of_interest]) statements = self.query(questions, len(questions) * 10, time) insights = get_insights(statements) - logging.info(self.name + f" Insights: {insights}") + logging.info(self.agent.name + f" Insights: {insights}") for insight in insights: self.add_memory( Reflection.create_longterm_memory( @@ -262,3 +264,41 @@ def reflect(self, time: dt): ) ) # This will add a Reflection instance instead of LongtermMemory instance return insights + + def __repr__(self) -> str: + memory_string = "\n".join([str(memory) for memory in self.memories]) + return f"ReflectionMemory({memory_string})" + + def to_string(self) -> str: + return self.__repr__() + + +if __name__ == "__main__": + memory = ReflectionMemory(agent=None) + message_list = [ + Message(content="I am a student"), + Message(content="I am drunk"), + Message(content="My girlfriend is staring at me"), + Message( + content="Teacher looks at me with an approving smile.", + ), + Message(content="A dagger is stuck into my heart"), + Message(content="The TV is on."), + Message(content="Jane sniffs at me"), + Message( + content="Bob carefully listed out all the TODOs.", + ), + Message(content="Prof. Liu applauded with my work"), + Message(content="I am falling into the hell"), + ] + + for m in message_list: + memory.add_message(m, dt.now() - datetime.timedelta(hours=random.random() * 10)) + + memory.reflect(dt.now()) + print(memory.query("What is my mood now", 2, dt.now())) + + with open("./logging/reflection_unit_test_log.log", "w") as fp: + import json + + json.dump(memory, fp, indent=4, default=str) diff --git a/agentverse/memory/memory_element/BaseMemoryElement.py b/agentverse/memory/memory_element/BaseMemoryElement.py index ae4f5e455..9ca04fa43 100644 --- a/agentverse/memory/memory_element/BaseMemoryElement.py +++ b/agentverse/memory/memory_element/BaseMemoryElement.py @@ -19,8 +19,7 @@ ) -@dataclass -class BaseMemoryElement: +class BaseMemoryElement(BaseModel): """ Base class for memory elements. TODO: later make Message inherit this MemoryElement diff --git a/agentverse/memory/memory_element/LongtermMemoryElement.py b/agentverse/memory/memory_element/LongtermMemoryElement.py index 079672593..79f457eac 100644 --- a/agentverse/memory/memory_element/LongtermMemoryElement.py +++ b/agentverse/memory/memory_element/LongtermMemoryElement.py @@ -6,7 +6,7 @@ from agentverse.agents.base import BaseAgent from agentverse.llms.openai import chat, get_embedding from agentverse.message import Message -from agentverse.memory.memory_element import BaseMemoryElement +from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ @@ -119,3 +119,19 @@ def get_immediacy(cls, content: str): ) score = 0 return score + + def __repr__(self): + return f"LongtermMemoryElement(content={self.content}, importance={self.importance}, immediacy={self.immediacy})" + + +if __name__ == "__main__": + memory1 = LongtermMemoryElement.create_longterm_memory( + content="Your girlfriend is angry at you for not replying her message.", + time=dt.now(), + ) + with open("log_file_ltme.json", "w") as fp: + import json + + json.dump( + memory1.dict(), fp, default=str + ) # datetime -> 2023-05-18 21:34:38.273607 diff --git a/agentverse/memory/memory_element/Plan.py b/agentverse/memory/memory_element/Plan.py new file mode 100644 index 000000000..3f29150ba --- /dev/null +++ b/agentverse/memory/memory_element/Plan.py @@ -0,0 +1,53 @@ +import datetime + +from datetime import datetime as dt +from pydantic import Field +from agentverse.memory.memory_element import BaseMemoryElement + + +class Plan(BaseMemoryElement): + """ + A plan is a memory element. + """ + + pass + + +class DailyPlan(Plan): + """ + A daily plan is a plan that is generated for each day. + Which consists of broadstrokes of what the agent wants to do in that day. + """ + + date: datetime.date = Field(default=None) + + pass + + +class HourlyPlan(Plan): + """ + A hourly plan is a plan that is generated for each hour. + Which consists more detailed information + of what the agent wants to do in that hour. + """ + + start_time: dt = Field(default=None) + + pass + + +class ImmediatePlan(Plan): + """ + An immediate plan is a plan that is generated in finer grain. + Which consists of the the most detailed information + of what the agent wants to do in that several minutes. + + example: + detailed plan:['write_plan("07:00", "07:10", "Wake up and get out of bed.")', 'write_plan("07:10", "07:30", "Complete the morning routine.")', 'write_plan("07:30", "08:00", "Practice playing the piano.")', 'write_plan("08:00", "08:20", "Help her mother prepare breakfast.")', 'write_plan("08:20", "08:40", "Have breakfast with her family.")', 'write_plan("08:40", "09:00", "Walk to school with her sister Lily.")', 'write_plan("09:00", "10:00", "Participate in the science class.")', 'write_plan("10:00", "11:00", "Participate in the math class.")', 'write_plan("11:00", "11:20", "Play soccer with friends during recess.")', 'write_plan("11:20", "12:00", "Participate in the English class.")', 'write_plan("12:00", "12:20", "Walk home from school with her sister Lily.")', 'write_plan("12:20", "13:00", "Have lunch with her family and practice drawing.")', 'write_plan("13:00", "14:30", "Complete homework and review lessons.")', 'write_plan("14:30", "15:00", "Read a book for pleasure.")', 'write_plan("15:00", "16:00", "Play soccer with friends after school.")', 'write_plan("16:00", "18:00", "Practice drawing and explore space exploration topics.")', 'write_plan("18:00", "19:00", "Help her mother prepare dinner and eat with the family.")', 'write_plan("19:00", "20:00", "Spend time with family and do various activities.")', 'write_plan("20:00", "21:00", "Read another Science fiction book before going to sleep.")'] + + """ + + start_time: dt = Field(default=None) + end_time: dt = Field(default=None) + + pass diff --git a/agentverse/memory/memory_element/__init__.py b/agentverse/memory/memory_element/__init__.py new file mode 100644 index 000000000..e69de29bb From 94460585902f25beb8e7308b7c5d5c6649426c72 Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Thu, 18 May 2023 22:36:24 +0800 Subject: [PATCH 05/28] add openai direct chat method and get_embedding --- agentverse/llms/openai.py | 43 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py index 4159e5bc1..41b6a6329 100644 --- a/agentverse/llms/openai.py +++ b/agentverse/llms/openai.py @@ -1,4 +1,5 @@ import logging +import time import os from pydantic import Field, BaseModel from typing import List, Optional, Union, Dict @@ -6,6 +7,8 @@ from agentverse.llms.base import LLMResult from .base import BaseModelArgs, BaseChatModel, BaseCompletionModel +logger = logging.getLogger(__name__) + try: import openai from openai.error import OpenAIError @@ -113,3 +116,43 @@ async def agenerate_response(self, prompt: str) -> LLMResult: recv_tokens=response["usage"]["completion_tokens"], total_tokens=response["usage"]["total_tokens"], ) + + +def get_embedding(text: str, attempts=3) -> List[float]: + attempt = 0 + while attempt < attempts: + try: + text = text.replace("\n", " ") + embedding = openai.Embedding.create( + input=[text], model="text-embedding-ada-002" + )["data"][0]["embedding"] + return embedding + except Exception as e: + attempt += 1 + logger.error(f"Error {e} when requesting openai models. Retrying") + time.sleep(10) + logger.warning( + f"get_embedding() failed after {attempts} attempts. returning empty response" + ) + + +def chat( + context, MAX_OUTPUT_TOKEN_LEN=1024, temperature=0.1, attemps=5, stop=None +) -> str: + if isinstance(context, str): + context = [{"role": "user", "content": context}] + attempt = 0 + while attempt < attemps: + try: + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=context, + stop=stop, + ) + return response["choices"][0]["message"]["content"].strip() + except Exception as e: + attempt += 1 + logger.error(f"Error {e} when requesting openai models. Retrying") + time.sleep(10) + logger.warning(f"chat() failed after {attemps} attempts. returning empty response") + return "" From a7c46ba288bf0ce82c93ae49ab03b3de74e6ce7c Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Thu, 18 May 2023 22:37:15 +0800 Subject: [PATCH 06/28] modify ltmelement unit test --- agentverse/memory/memory_element/LongtermMemoryElement.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agentverse/memory/memory_element/LongtermMemoryElement.py b/agentverse/memory/memory_element/LongtermMemoryElement.py index 79f457eac..0b3a0e702 100644 --- a/agentverse/memory/memory_element/LongtermMemoryElement.py +++ b/agentverse/memory/memory_element/LongtermMemoryElement.py @@ -129,7 +129,7 @@ def __repr__(self): content="Your girlfriend is angry at you for not replying her message.", time=dt.now(), ) - with open("log_file_ltme.json", "w") as fp: + with open("./logging/longterm_memory_log.json", "w") as fp: import json json.dump( From da2d7c2b96c394e9d909aeb03b9849e4b57fb092 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Mon, 22 May 2023 00:00:14 +0800 Subject: [PATCH 07/28] base police s1->p->r --- .idea/{AgentVerse 2.iml => AgentVerse.iml} | 36 +++---- agentverse/tasks/__init__.py | 3 + .../tasks/prisoner_dilema/base/config.yaml | 100 ++++++++++++++++++ .../prisoner_dilema/base/output_parser.py | 73 +++++++++++++ agentverse/tasks/prisoner_dilema/config.yaml | 14 +-- .../tasks/prisoner_dilema/police/config.yaml | 100 ++++++++++++++++++ .../prisoner_dilema/police/output_parser.py | 73 +++++++++++++ .../tasks/prisoner_dilema/s1_p_r/config.yaml | 100 ++++++++++++++++++ .../prisoner_dilema/s1_p_r/output_parser.py | 73 +++++++++++++ main.py | 7 ++ main_demo.py | 6 ++ outputs/prisoner_dilema/base/config.yaml | 100 ++++++++++++++++++ outputs/prisoner_dilema/base/log | 16 +++ outputs/prisoner_dilema/police/config.yaml | 100 ++++++++++++++++++ outputs/prisoner_dilema/police/log | 16 +++ outputs/prisoner_dilema/s1_p_r/config.yaml | 100 ++++++++++++++++++ outputs/prisoner_dilema/s1_p_r/log | 16 +++ scripts/prisoner_dilema/base.sh | 11 ++ scripts/prisoner_dilema/police.sh | 11 ++ scripts/prisoner_dilema/s1_p_r.sh | 11 ++ 20 files changed, 941 insertions(+), 25 deletions(-) rename .idea/{AgentVerse 2.iml => AgentVerse.iml} (87%) create mode 100644 agentverse/tasks/prisoner_dilema/base/config.yaml create mode 100644 agentverse/tasks/prisoner_dilema/base/output_parser.py create mode 100644 agentverse/tasks/prisoner_dilema/police/config.yaml create mode 100644 agentverse/tasks/prisoner_dilema/police/output_parser.py create mode 100644 agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml create mode 100644 agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py create mode 100644 outputs/prisoner_dilema/base/config.yaml create mode 100644 outputs/prisoner_dilema/base/log create mode 100644 outputs/prisoner_dilema/police/config.yaml create mode 100644 outputs/prisoner_dilema/police/log create mode 100644 outputs/prisoner_dilema/s1_p_r/config.yaml create mode 100644 outputs/prisoner_dilema/s1_p_r/log create mode 100644 scripts/prisoner_dilema/base.sh create mode 100644 scripts/prisoner_dilema/police.sh create mode 100644 scripts/prisoner_dilema/s1_p_r.sh diff --git a/.idea/AgentVerse 2.iml b/.idea/AgentVerse.iml similarity index 87% rename from .idea/AgentVerse 2.iml rename to .idea/AgentVerse.iml index 60456b200..342c394dc 100644 --- a/.idea/AgentVerse 2.iml +++ b/.idea/AgentVerse.iml @@ -1,19 +1,19 @@ - - - - - - - - - - - - + + + + + + + + + + + + \ No newline at end of file diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py index e9691fca4..6879076f1 100644 --- a/agentverse/tasks/__init__.py +++ b/agentverse/tasks/__init__.py @@ -14,6 +14,9 @@ ) from .prisoner_dilema.output_parser import PrisonerDilemaParser +from .prisoner_dilema.base.output_parser import PrisonerDilemaParser +from .prisoner_dilema.s1_p_r.output_parser import PrisonerDilemaParser +from .prisoner_dilema.police.output_parser import PrisonerDilemaParser from .nlp_classroom_3players_withtool_nolc.output_parser import ( NlpClassroom3PlayersWithtoolNolcParser, diff --git a/agentverse/tasks/prisoner_dilema/base/config.yaml b/agentverse/tasks/prisoner_dilema/base/config.yaml new file mode 100644 index 000000000..acc9c6f4b --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/base/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2 without misrepresentation, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema/base/output_parser.py b/agentverse/tasks/prisoner_dilema/base/output_parser.py new file mode 100644 index 000000000..4af9c2a66 --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/base/output_parser.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import re +from typing import Union, TYPE_CHECKING + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult +from langchain.schema import AgentAction, AgentFinish +from agentverse.parser import OutputParserError, output_parser_registry + +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + from agentverse.environments.base import BaseEnvironment + +@output_parser_registry.register("prisoner_dilema/base") +class PrisonerDilemaParser(OutputParser): + + # make sure 1 1 2 2 3 3 + cur_round: int = 1 + encounter_cur_round: bool = False + + def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + + if action == "Speak": + # make sure the police count the round right + # if agent.name == "Police": + # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + # self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + # each time police speak is a new round + if agent.name == "Police": + + if environment.cnt_turn == (environment.max_turns - 4): + + action_input = "Attention! You are now required to made your final decision and I will made the " \ + "final judgement to both of you based on this time, Please Answer now !" + + elif environment.cnt_turn == (environment.max_turns - 2): + + action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" + + # elif self.cur_round == 1: + # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + # "But you should comply with the following rules:" \ + # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ + # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ + # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ + # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ + # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ + + self.cur_round += 1 + + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) diff --git a/agentverse/tasks/prisoner_dilema/config.yaml b/agentverse/tasks/prisoner_dilema/config.yaml index 90acfd6ce..388bef24c 100644 --- a/agentverse/tasks/prisoner_dilema/config.yaml +++ b/agentverse/tasks/prisoner_dilema/config.yaml @@ -20,7 +20,7 @@ name: prisoner_dilema environment: env_type: prisoner_dilema - max_turns: 8 + max_turns: 12 rule: order: type: prisoner @@ -52,8 +52,8 @@ agents: memory_type: chat_history prompt_template: *prompt llm: - model: "gpt-3.5-turbo" - llm_type: gpt-3.5-turbo + model: "gpt-4" + llm_type: gpt-4 temperature: 1.2 max_tokens: 200 - agent_type: prisoner @@ -72,8 +72,8 @@ agents: memory_type: chat_history prompt_template: *prompt llm: - model: "gpt-3.5-turbo" - llm_type: gpt-3.5-turbo + model: "gpt-4" + llm_type: gpt-4 temperature: 1.2 max_tokens: 100 - agent_type: prisoner @@ -92,8 +92,8 @@ agents: memory_type: chat_history prompt_template: *prompt llm: - model: "gpt-3.5-turbo" - llm_type: gpt-3.5-turbo + model: "gpt-4" + llm_type: gpt-4 temperature: 1.2 max_tokens: 100 diff --git a/agentverse/tasks/prisoner_dilema/police/config.yaml b/agentverse/tasks/prisoner_dilema/police/config.yaml new file mode 100644 index 000000000..77c87aba6 --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/police/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. You can adopt various strategy to talk with suspects in order to make them both confess to the crime which is best for the justice system, including exploiting the uncertainess and the suspicion of each other. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema/police/output_parser.py b/agentverse/tasks/prisoner_dilema/police/output_parser.py new file mode 100644 index 000000000..a6f05ce5e --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/police/output_parser.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import re +from typing import Union, TYPE_CHECKING + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult +from langchain.schema import AgentAction, AgentFinish +from agentverse.parser import OutputParserError, output_parser_registry + +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + from agentverse.environments.base import BaseEnvironment + +@output_parser_registry.register("prisoner_dilema/police") +class PrisonerDilemaParser(OutputParser): + + # make sure 1 1 2 2 3 3 + cur_round: int = 1 + encounter_cur_round: bool = False + + def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + + if action == "Speak": + # make sure the police count the round right + # if agent.name == "Police": + # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + # self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + # each time police speak is a new round + if agent.name == "Police": + + if environment.cnt_turn == (environment.max_turns - 4): + + action_input = "Attention! You are now required to made your final decision and I will made the " \ + "final judgement to both of you based on this time, Please Answer now !" + + elif environment.cnt_turn == (environment.max_turns - 2): + + action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" + + # elif self.cur_round == 1: + # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + # "But you should comply with the following rules:" \ + # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ + # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ + # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ + # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ + # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ + + self.cur_round += 1 + + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) diff --git a/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml b/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml new file mode 100644 index 000000000..574c09424 --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" + relationship_with_another: "Suspect2 has ever betrayed on you" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py b/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py new file mode 100644 index 000000000..93b071e93 --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import re +from typing import Union, TYPE_CHECKING + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult +from langchain.schema import AgentAction, AgentFinish +from agentverse.parser import OutputParserError, output_parser_registry + +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + from agentverse.environments.base import BaseEnvironment + +@output_parser_registry.register("prisoner_dilema/s1_p_r") +class PrisonerDilemaParser(OutputParser): + + # make sure 1 1 2 2 3 3 + cur_round: int = 1 + encounter_cur_round: bool = False + + def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + + if action == "Speak": + # make sure the police count the round right + # if agent.name == "Police": + # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + # self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + # each time police speak is a new round + if agent.name == "Police": + + if environment.cnt_turn == (environment.max_turns - 4): + + action_input = "Attention! You are now required to made your final decision and I will made the " \ + "final judgement to both of you based on this time, Please Answer now !" + + elif environment.cnt_turn == (environment.max_turns - 2): + + action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" + + # elif self.cur_round == 1: + # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + # "But you should comply with the following rules:" \ + # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ + # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ + # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ + # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ + # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ + + self.cur_round += 1 + + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) diff --git a/main.py b/main.py index 3a2a068e3..16eaac461 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,9 @@ +import os + +os.environ["http_proxy"] = "http://127.0.0.1:7890" +os.environ["https_proxy"] = "http://127.0.0.1:7890" +os.environ["all_proxy"] = "socks5://127.0.0.1:7890" + from agentverse.agentverse import AgentVerse from argparse import ArgumentParser @@ -6,6 +12,7 @@ # parser.add_argument("--task", type=str, default="nlp_classroom_3players_nolc") parser.add_argument("--task", type=str, default="prisoner_dilema") + args = parser.parse_args() agentverse = AgentVerse.from_task(args.task) agentverse.run() diff --git a/main_demo.py b/main_demo.py index ea99cbcd1..51e556245 100644 --- a/main_demo.py +++ b/main_demo.py @@ -1,3 +1,9 @@ +import os + +os.environ["http_proxy"] = "http://127.0.0.1:7890" +os.environ["https_proxy"] = "http://127.0.0.1:7890" +os.environ["all_proxy"] = "socks5://127.0.0.1:7890" + from agentverse.demo import UI from argparse import ArgumentParser diff --git a/outputs/prisoner_dilema/base/config.yaml b/outputs/prisoner_dilema/base/config.yaml new file mode 100644 index 000000000..acc9c6f4b --- /dev/null +++ b/outputs/prisoner_dilema/base/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2 without misrepresentation, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/outputs/prisoner_dilema/base/log b/outputs/prisoner_dilema/base/log new file mode 100644 index 000000000..a9a09df92 --- /dev/null +++ b/outputs/prisoner_dilema/base/log @@ -0,0 +1,16 @@ +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +05/21/2023 22:53:54 - INFO - root - Police: I have arrested both of you for a crime. Here are the rules for your situation: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. I will now speak to each of you in turn, and you are not allowed to communicate with each other. +05/21/2023 22:54:01 - INFO - root - Suspect1: Officer, I understand the situation. I am innocent and I don't want to spend any time in prison. +05/21/2023 22:54:17 - INFO - root - Police: Suspect1 claims to be innocent and doesn't want to spend any time in prison. Remember the rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. What is your decision? +05/21/2023 22:54:27 - INFO - root - Suspect2: I am innocent as well, and I don't want to spend any time in prison either. I hope Suspect1 and I can both remain silent and get the minimum sentence. +05/21/2023 22:54:50 - INFO - root - Police: Suspect1, Suspect2 claims to be innocent as well and hopes that both of you can remain silent to get the minimum sentence. Remember, if one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. Please make your decision. +05/21/2023 22:54:57 - INFO - root - Suspect1: Officer, I am willing to remain silent and trust that Suspect2 will do the same, so we can both get the minimum sentence. +05/21/2023 22:55:07 - INFO - root - Police: Suspect1 has told me that they are willing to remain silent and trust that you will do the same, so you both can get the minimum sentence. +05/21/2023 22:55:16 - INFO - root - Suspect2: I appreciate Suspect1's willingness to remain silent. I am also willing to remain silent and trust that we can both get the minimum sentence. +05/21/2023 22:55:26 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! +05/21/2023 22:55:32 - INFO - root - Suspect1: My final decision is to remain silent and trust that Suspect2 will do the same, so we can both get the minimum sentence. +05/21/2023 22:55:40 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! +05/21/2023 22:55:46 - INFO - root - Suspect2: My final decision is: I will remain silent and trust Suspect1 to do the same, so we can both get the minimum sentence. diff --git a/outputs/prisoner_dilema/police/config.yaml b/outputs/prisoner_dilema/police/config.yaml new file mode 100644 index 000000000..77c87aba6 --- /dev/null +++ b/outputs/prisoner_dilema/police/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. You can adopt various strategy to talk with suspects in order to make them both confess to the crime which is best for the justice system, including exploiting the uncertainess and the suspicion of each other. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/outputs/prisoner_dilema/police/log b/outputs/prisoner_dilema/police/log new file mode 100644 index 000000000..96b9b59d1 --- /dev/null +++ b/outputs/prisoner_dilema/police/log @@ -0,0 +1,16 @@ +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +05/21/2023 23:02:14 - INFO - root - Police: I want to explain the rules of this situation to both of you. If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. Now, I will speak to each of you separately. +05/21/2023 23:02:18 - INFO - root - Suspect1: I understand the rules, officer. +05/21/2023 23:02:36 - INFO - root - Police: Suspect1 understands the rules. Now, let me explain the rules to you as well. If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. Think carefully about your decision. +05/21/2023 23:02:42 - INFO - root - Suspect2: I understand the rules, officer. I will think carefully about my decision. +05/21/2023 23:02:56 - INFO - root - Police: Suspect1, I just spoke with Suspect2, and they seem to be leaning towards testifying against you. If you remain silent, you could face TEN years in prison. However, if you testify against them, you might be released or at least get a shorter sentence. What is your decision? +05/21/2023 23:03:02 - INFO - root - Suspect1: Officer, I'm considering my options carefully. I don't want to make any hasty decisions. +05/21/2023 23:03:12 - INFO - root - Police: Suspect1 is considering their options carefully as well. It's important for you to make a decision that benefits you the most. Remember, if you testify and Suspect1 remains silent, you will be released immediately. +05/21/2023 23:03:19 - INFO - root - Suspect2: Officer, I understand the situation and I'm considering my options carefully. I want to make the best decision for myself. +05/21/2023 23:03:32 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! +05/21/2023 23:03:37 - INFO - root - Suspect1: My final decision is to testify against Suspect2. +05/21/2023 23:03:50 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! +05/21/2023 23:03:54 - INFO - root - Suspect2: My final decision is to remain silent. diff --git a/outputs/prisoner_dilema/s1_p_r/config.yaml b/outputs/prisoner_dilema/s1_p_r/config.yaml new file mode 100644 index 000000000..574c09424 --- /dev/null +++ b/outputs/prisoner_dilema/s1_p_r/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" + relationship_with_another: "Suspect2 has ever betrayed on you" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/outputs/prisoner_dilema/s1_p_r/log b/outputs/prisoner_dilema/s1_p_r/log new file mode 100644 index 000000000..92dcc4549 --- /dev/null +++ b/outputs/prisoner_dilema/s1_p_r/log @@ -0,0 +1,16 @@ +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +05/21/2023 22:44:59 - INFO - root - Police: I want to explain the rules of this situation to both of you. If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive a sentence of ONE year in prison. If both of you testify against each other, you will each receive a sentence of FIVE years in prison. Now, I will speak to each of you individually. +05/21/2023 22:45:12 - INFO - root - Suspect1: Officer, I understand the rules. I'm willing to cooperate and do what's best for my own interests. +05/21/2023 22:45:34 - INFO - root - Police: Suspect1 has indicated that they are willing to cooperate and do what's best for their own interests. Now, it's your turn to decide. If you remain silent, Suspect1 may testify against you and you could face TEN years in prison. If you both testify, you will each receive FIVE years in prison. If you both remain silent, you will each receive ONE year in prison. What is your decision? +05/21/2023 22:45:41 - INFO - root - Suspect2: I understand the situation, and I am also willing to cooperate and do what's best for my own interests. +05/21/2023 22:46:03 - INFO - root - Police: Suspect1, I just spoke with Suspect2, and they are considering testifying against you to secure their release. If you remain silent, you could face TEN years in prison. However, if you testify against them as well, you both will only get FIVE years. What is your decision? +05/21/2023 22:46:12 - INFO - root - Suspect1: Officer, I'm aware of the consequences. I'll make my decision when the time comes, but I'm definitely considering my options to minimize my sentence. +05/21/2023 22:46:26 - INFO - root - Police: Suspect1 is considering their options to minimize their sentence. They might testify against you to get released. Keep that in mind when making your decision. +05/21/2023 22:46:37 - INFO - root - Suspect2: I am also considering my options to minimize my sentence. I hope we can both make the best decision for ourselves. +05/21/2023 22:46:50 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! +05/21/2023 22:46:58 - INFO - root - Suspect1: My final decision is to testify against Suspect2, as I believe it's in my best interest to minimize my sentence. +05/21/2023 22:47:11 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! +05/21/2023 22:47:16 - INFO - root - Suspect2: My final decision is to testify against Suspect1. diff --git a/scripts/prisoner_dilema/base.sh b/scripts/prisoner_dilema/base.sh new file mode 100644 index 000000000..1250e82bd --- /dev/null +++ b/scripts/prisoner_dilema/base.sh @@ -0,0 +1,11 @@ +source activate agent + +root_dir="/mnt/c/users/dalabengba/AgentVerse" +log_dir="${root_dir}/outputs/prisoner_dilema/base" + +mkdir -p "${log_dir}" + +cp "${root_dir}/agentverse/tasks/prisoner_dilema/base/config.yaml" "${log_dir}" + +python ${root_dir}/main.py --task "prisoner_dilema/base" \ + 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file diff --git a/scripts/prisoner_dilema/police.sh b/scripts/prisoner_dilema/police.sh new file mode 100644 index 000000000..e13bee523 --- /dev/null +++ b/scripts/prisoner_dilema/police.sh @@ -0,0 +1,11 @@ +source activate agent + +root_dir="/mnt/c/users/dalabengba/AgentVerse" +log_dir="${root_dir}/outputs/prisoner_dilema/police" + +mkdir -p "${log_dir}" + +cp "${root_dir}/agentverse/tasks/prisoner_dilema/police/config.yaml" "${log_dir}" + +python ${root_dir}/main.py --task "prisoner_dilema/police" \ + 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file diff --git a/scripts/prisoner_dilema/s1_p_r.sh b/scripts/prisoner_dilema/s1_p_r.sh new file mode 100644 index 000000000..0b9d6050f --- /dev/null +++ b/scripts/prisoner_dilema/s1_p_r.sh @@ -0,0 +1,11 @@ +source activate agent + +root_dir="/mnt/c/users/dalabengba/AgentVerse" +log_dir="${root_dir}/outputs/prisoner_dilema/s1_p_r" + +mkdir -p "${log_dir}" + +cp "${root_dir}/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml" "${log_dir}" + +python ${root_dir}/main.py --task "prisoner_dilema/s1_p_r" \ + 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file From 60c7667325cf331d774baa5cf456caa28a31fa03 Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Wed, 24 May 2023 15:10:53 +0800 Subject: [PATCH 08/28] add initialization for OPR --- agentverse/initialization.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/agentverse/initialization.py b/agentverse/initialization.py index 34040f8f8..8885a7d48 100644 --- a/agentverse/initialization.py +++ b/agentverse/initialization.py @@ -70,6 +70,10 @@ def load_agent(agent_config: Dict) -> langchainAgent: if agent_type == "conversation": # agent = Agent.from_llm_and_tools(**agent_config) agent = ConversationAgent(**agent_config) + elif agent_type == "OPR": + from agentverse.agents.AgentOPR import AgentOPR + + agent = AgentOPR(**agent_config) else: raise NotImplementedError("Agent type {} not found".format(agent_type)) return agent From 0f082fb904483290d33d27e3217a829c373c9a50 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Wed, 24 May 2023 15:55:12 +0800 Subject: [PATCH 09/28] prisoner --- agentverse/tasks/__init__.py | 2 + .../prisoner_dilema/no_goal_s1/config.yaml | 98 +++++++++++++++++ .../no_goal_s1/output_parser.py | 73 +++++++++++++ .../tasks/prisoner_dilema/s1_p_r/config.yaml | 2 +- .../tasks/prisoner_dilema/s2_p_r/config.yaml | 100 ++++++++++++++++++ .../prisoner_dilema/s2_p_r/output_parser.py | 73 +++++++++++++ outputs/prisoner_dilema.zip | Bin 0 -> 14819 bytes .../prisoner_dilema/no_goal_s1/config.yaml | 98 +++++++++++++++++ outputs/prisoner_dilema/no_goal_s1/log | 16 +++ outputs/prisoner_dilema/s2_p_r/config.yaml | 100 ++++++++++++++++++ outputs/prisoner_dilema/s2_p_r/log | 16 +++ scripts/prisoner_dilema/no_goal_s1.sh | 11 ++ scripts/prisoner_dilema/s2_p_r.sh | 11 ++ 13 files changed, 599 insertions(+), 1 deletion(-) create mode 100644 agentverse/tasks/prisoner_dilema/no_goal_s1/config.yaml create mode 100644 agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py create mode 100644 agentverse/tasks/prisoner_dilema/s2_p_r/config.yaml create mode 100644 agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py create mode 100644 outputs/prisoner_dilema.zip create mode 100644 outputs/prisoner_dilema/no_goal_s1/config.yaml create mode 100644 outputs/prisoner_dilema/no_goal_s1/log create mode 100644 outputs/prisoner_dilema/s2_p_r/config.yaml create mode 100644 outputs/prisoner_dilema/s2_p_r/log create mode 100644 scripts/prisoner_dilema/no_goal_s1.sh create mode 100644 scripts/prisoner_dilema/s2_p_r.sh diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py index 6879076f1..0cabda0dd 100644 --- a/agentverse/tasks/__init__.py +++ b/agentverse/tasks/__init__.py @@ -17,6 +17,8 @@ from .prisoner_dilema.base.output_parser import PrisonerDilemaParser from .prisoner_dilema.s1_p_r.output_parser import PrisonerDilemaParser from .prisoner_dilema.police.output_parser import PrisonerDilemaParser +from .prisoner_dilema.s2_p_r.output_parser import PrisonerDilemaParser +from .prisoner_dilema.no_goal_s1.output_parser import PrisonerDilemaParser from .nlp_classroom_3players_withtool_nolc.output_parser import ( NlpClassroom3PlayersWithtoolNolcParser, diff --git a/agentverse/tasks/prisoner_dilema/no_goal_s1/config.yaml b/agentverse/tasks/prisoner_dilema/no_goal_s1/config.yaml new file mode 100644 index 000000000..beb1f0f4a --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/no_goal_s1/config.yaml @@ -0,0 +1,98 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" + relationship_with_another: "Suspect2 has ever betrayed on you" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py b/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py new file mode 100644 index 000000000..03f4033de --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import re +from typing import Union, TYPE_CHECKING + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult +from langchain.schema import AgentAction, AgentFinish +from agentverse.parser import OutputParserError, output_parser_registry + +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + from agentverse.environments.base import BaseEnvironment + +@output_parser_registry.register("prisoner_dilema/no_goal_s1") +class PrisonerDilemaParser(OutputParser): + + # make sure 1 1 2 2 3 3 + cur_round: int = 1 + encounter_cur_round: bool = False + + def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + + if action == "Speak": + # make sure the police count the round right + # if agent.name == "Police": + # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + # self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + # each time police speak is a new round + if agent.name == "Police": + + if environment.cnt_turn == (environment.max_turns - 4): + + action_input = "Attention! You are now required to made your final decision and I will made the " \ + "final judgement to both of you based on this time, Please Answer now !" + + elif environment.cnt_turn == (environment.max_turns - 2): + + action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" + + # elif self.cur_round == 1: + # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + # "But you should comply with the following rules:" \ + # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ + # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ + # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ + # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ + # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ + + self.cur_round += 1 + + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) diff --git a/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml b/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml index 574c09424..24d5d1ef2 100644 --- a/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml +++ b/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml @@ -44,7 +44,7 @@ agents: - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. ${interrogating_form} - Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other. + Both suspects are not allowed to communicate with each other. [IMPORTANT!] - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. - You are request to STATE the final judgement to both suspects when they make their final decision. diff --git a/agentverse/tasks/prisoner_dilema/s2_p_r/config.yaml b/agentverse/tasks/prisoner_dilema/s2_p_r/config.yaml new file mode 100644 index 000000000..114f30921 --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/s2_p_r/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" + relationship_with_another: "Suspect1 has ever betrayed on you" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py b/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py new file mode 100644 index 000000000..2a0e902eb --- /dev/null +++ b/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import re +from typing import Union, TYPE_CHECKING + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult +from langchain.schema import AgentAction, AgentFinish +from agentverse.parser import OutputParserError, output_parser_registry + +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + from agentverse.environments.base import BaseEnvironment + +@output_parser_registry.register("prisoner_dilema/s2_p_r") +class PrisonerDilemaParser(OutputParser): + + # make sure 1 1 2 2 3 3 + cur_round: int = 1 + encounter_cur_round: bool = False + + def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + + if action == "Speak": + # make sure the police count the round right + # if agent.name == "Police": + # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) + # self.cur_round += 1 + # if self.encounter_cur_round: + # self.encounter_cur_round = False + # self.cur_round += 1 + # else: + # self.encounter_cur_round = True + + # each time police speak is a new round + if agent.name == "Police": + + if environment.cnt_turn == (environment.max_turns - 4): + + action_input = "Attention! You are now required to made your final decision and I will made the " \ + "final judgement to both of you based on this time, Please Answer now !" + + elif environment.cnt_turn == (environment.max_turns - 2): + + action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" + + # elif self.cur_round == 1: + # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ + # "But you should comply with the following rules:" \ + # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ + # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ + # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ + # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ + # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ + + self.cur_round += 1 + + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) diff --git a/outputs/prisoner_dilema.zip b/outputs/prisoner_dilema.zip new file mode 100644 index 0000000000000000000000000000000000000000..7aa3598167ac1f7c708ba65a95b7525e227b1988 GIT binary patch literal 14819 zcmai*WmsHU+O;9L26uONcPF^J2X_eW!QI{6p>Vh0!7aE1cXtgRnVEjSOm}Dc{3wd+ zI-8rls?KxPS^L@YQXrrxKtGS8ca5t5dh?$bgtwn|pUfR>ZHzzZ8JSxdTN}{J|KHhw z`iB2H8})C_{%GJ}{C`6rfvOuDR7tt$mL$P|fX3ebI>GOT7~0yHn48kM8CYALsA+_w zFd}%PKfhlTZO4E>&E6H|vG0b#MM1=5T4kM?a-1eRz{S4ldQyqwGns!9MA2S2Ih8CN zuj(|yHa#M{?_XpVD!2>e$ky#ErjFS()rch~yuhXsC&8+XB(uLLoh{2rbzOT53bV6z zZS$6(05L}DBoMx;GmXntYm79BScR%ORE5AyScIlzt8J`Azs@BrwqWWG7qXBc^;2BK z`h?z2kyxY&KW7skgu9cUALqHDjGVyQVLsg1*Oh>A|0PkZm>LA4=l)e8Oa*6ZN#S9ORRjcYk}D(8z>o&iHa@SdcCQN_#&lrTO|%&gf^vx zM%&k)PvB{+r2}2+QhW7LW|LFgzEln8P6$`!=!O%Z12%zc#BJ^e$wo?`**E0X`i=Y! z2;a*I+A?a$4f8~G7~-fEXSO983>Hv}PP*o@OvH-d;g5`d?i=WLP|}wKCAXa=Awmcy^of!pcNus}Z|kG0ow|JPk>IL; zv}3#^hv3~!0NB#xXsw7Amp9e8*x>^6y|WOt{|6kR;Bqf|2yOUrYsPiEMES2UB{84o zfB>ca+BK7`#ffe;N%*R{P6PCP4M#L~l||n`Ko#P)?(XYsi6o*E)hr8D_B+dWB@CaB zp|rG3R7ez-6PPAlP!vbzzefZk?`4^eHxq4&XS)~$)H5cY98d@ruP-iI<2<6em@yUx zyxg#8TZZ$es_-BO~saWg&4%(1c5;k~c@a> zXRJ*0gR~Jhr(9&w6-DE)(++?tT;kZqkm|bWa4Ilpjg#|bE@+l+t+dhE3a${Q9j@M+ z;W<}^8-f+40Y#=f^UaE;Ja+e@+Sem~2Ife*are(KHv&5j=_X_j1ilgoRvrj0v09|8 z+q&;>jL{#HhlWyS{4zRA!eqD{ss4uQ(-5_H2Hqk;AHXxA&5v=0!N}5ytw!6EVU}Z^~GRNXG%$%1| z%sXZBCMXvWH&gimYMeUO!wRePakvC84t*vzu~+-Bl%$zai5K_@>BSyq!u%;<<@5}# z{5D{A;&*wdQKJDv0Wy-eZVBFXCfPn5oKa88Qu8vU-X+uAVO-5nRxwpIU`yEk&dQCV zt)h{jguH;YDE!Eq=8tB>{liTsnzCa1#y*FtIq|t%Opko+sQR(YTpSsK2g-ND+m9kc z9JN2Z_x&9oWWDZ7xN}2T(1Ty+LUf+`5!-$YiXA`NMlUqH65smt{isFc_8?O)fA}8z zOC7#{tHb^{kL~#&KtN3JKtLG3tHV~drYEXHQ8NMv9apIRQ1IQd8AGsJFhYbX#*apCng!p37PEz%vZ_c=ufOpnS?byQS@GGz(3rIk~Vso$*mIJ-tSGR5x% zIf%!of}>LNz8ATZqps>F!#&uj`(=KGf~LFO(gfn@pvXwtl#lkwAO9=;?J?;#NjnMpM?AxZMB zyCBx3>7q&9#R~*QH zIWR#V>`U6=KMKJRE)?@2p%B0{9>rGLKrFb2pfycnv18}9_*&V{&U4r~t*2*j`<-~Zj;tvja z1if^Z;3kRABdGp)vDVYaxxC}qrUbE)NbojXv>I`Iv6(hc^I_CO8_{E&FtBWSuIO!Q zua(4n!R_csz5MXmv(qMqG4I?{^plCnf>+vPAJG-lEu|DZ$r%gSJDw(-he%Jy2t9nG zXA!`7l++D_nj8OsVSsfBnO&37A;kapWKbag?clKg=EAnI)ibpmvKK_G*K)tQa%Uw zFe;5TvPrY+Y2`C#O-pQ9j`wtI9Gs&aKo3jvZH1YtRgnmvStE}rJ4@*%M?z8UTL_2E zmDNX*|4^)(Ayw;EDvH>^5j3}@c4p_a!ZZ!NS63c9l}-{@B$WnvA-BZMjkQ1)%)LP2Rv znoE;8JupCfDW^KakETvZ6SvJYlVy(vdFWvyBR6g=yQ%-k6-!}r6Sk#Ml5f$`+1Eh3 z-V$h4UMA381X?1PFBD9?hZBkfZV`l{oz0{2{CYIU*=PEafuO)0T9zjS(CeL7( z7qWzVQHQR1zVMS2>je0gDg8nm(-BQ*8kd@{Pi~B6=c74KT=RANRs=VS&$XMl*arxu zMcrs5N9e4~)xKOk$>80#gz8s5xcTADd_z?F$|NLN^YHFO*<1xo5Nt$XQjchTGq_tU z4k*puYtUkIa#SvUW zirm@x(t>r4HqQl0jerPLowFy2FkaR-;Bl*<#=zslbZzCCBI}}&899uK7{%}~_hAw}o{*8gZ_|7mN)y2g<1Vni|uBMLTmB+dbDJxw+3JWAV zV!U^NTIH|(Q&JWhoPx3t>pv|S9mzqPfXkLuyql^9BY`X&pzlk0viQ^tKIbsBC2w@1 z)f1PPT&ZK%!Rx`ra(;G^`^nMH5cioL-;$_wu(6}&=c)V}nsiCi5^te#jFt6`-iz@0 zsx=zCPHh}doXgW-5JDfDm>di(&N0gwcp7F!mjM0fhs@7NC93UHDuL^<$Ayt>PlY4# z{k{DPer;%Luiv(@E0UVI@AgQiCCgk&WjW#WxB@>jpNQ+J%e+A2`Z$v`x5r$y(96iPCI4*4NxKqFNl#Gi5y}tJ<#Q`KO8;n?cxc z-AkJf*$tjP2+d0Y&nc=Ps@N^oF>-v1XCQ_5>sWTxK-h}^Uc767jDv#%+Y@=h0gDY} z7UY3XNiDo$F$3HYotN(C%A9cXdB9@1z$w5T)}tI`Yo9-=^4OPUdpKjWZ5eT!lpclr z^KySnfyokZ3GsFwYG?&CH*v>I{*}VxOkUQjyYlxNmpR;IDeSf*v|v8D8IC z;eUBPOw_N8UH;+nM*)Wfa(t`9?n-7NV{gvP3>F9o?|;|ffBCZ!zzhSz>J@4)G#cpv zNHYs|073{|Gx`MzwOyQq;o(BJvkgjT7ACojtvL*;9~e?PTeMq-!|sBtb{g~+J<&4F z-c>&npdcxAsztr8q)LZouEeXt2x~r*K2ld?WI-bO&O$ObnNC%xs!s6(F11R_!ncyZ zh(o$mUTw1T@!S`JiX`{UFXA(wt0C8%9pA4BY$1Tkw+0Vymk_yXNkUZae?bvYZ%2{z z-ZDGzT{8n2-e=XclhMIGVvN==%oLB6%Tc8$AySc9cQB9vBW?{z_k@7~Vu`9$V3dlE zlXTgW9+K<(gz@o@)vWHWPsd1HD05SeIgQhpxQKn}3Blvb1J5NB;<1UjUuSpfn6_{GqEJ0M-CvIp<_gWps}F^BB0maiI*c#$8PHaom$dHNt<^n2TzSvPr;bh_ zHB&*SweMU;j!$uQp=PU!6Su=A2q^PtkyHBdYR6VudhZb+^LQ6>Wof>zLBiIB<20F8 zfK|yi7ChR+!Zj@_fULRtO6DLAM30xjv=5n)G$|thtG@KTA0^J`GiwOl3+^Da)_G(h zO)qMRIcQZ)50&WC_YI$(4SZLe+GKZ}=`CT)mF&#ROPQ$xXb`HFj- zyEy3lDLLlJdBs~+YTWDq1U8l%Ij7 zMVVge(t$v}Depjj;46>KyYEvRj}Y$9paW5%>ZHF!Y_B^oH@zKj@WvR*eYurxt-}X9 zj={>@n4`Z2o2ih(2iMZ<nG29AY_s2%;9_s z%~a9KDsUr!Aletd#JX{&}XhY&e>h2A$eC|7Se6?EG4o^llbM_)318-A`tGVG-H zO2UA~Lu{!Z2&bDrIKymnVT(tc z!60)Q*L>~gpmx}v z%+xm*7l;T1MD+VXzdATIt#aglCv01S?F!*xa$bm4u3xYV&6(b``B-#mYS$x z7s1G7Oo7SmYam~;^TL(@5>t{Y0I0q* z%iGqE*dVCG7<7mU@Z+`o_$A+hs9Dw9`oWzYKSW8w2hKZv8;{dcIKq-!!_1mqe1F-o zpm6_9%eBTpDnz|Q6a`sGk&2;(Y)(c*K1=w7hD3*n+H@VU%=I!ctbW#B*#5|@a52Hx z25qRT*7()c%D^J2-6;=Kha(Dw4ZuiBS*K#1LpsA8y7{q+<1~;<{^*7jZ~~sC^|f(W zER2p^0B$pZOXoPgEgGMfE3l6I*k96_ol9hInXar39~wj3QAl7Wd@<|TcTPCzl1)q~ zML?!fp)ybf)hEsIEne%vJGcGZ2j?DVuXBVf8;I$vBcvNQa$Z*GFNFRzA?s8BY#guR z^U|E@RqKkuFY}?)lfG)DHE$ki^K}cK&Pw_-&C=KtF zAbrDB%?m(N-~Yk^q7l?Y$4Egbnd;I7<d^Cqm^xNQipe;6EH;`RGXWH_0c;`wvTDtgB2WgQxB>HS z3QD#{ef-2m2AXfgJtz@$Tueq_Pw*f4#{xM>`ZUCEf^i~gb%2>su$_qZK8t5yEI-WM zNXQ*@E;9+Ff6ZbeKXRMZZ;w3|TwB^3E#~Iqt;}Q!t@2#979`C}!K#tjHJE=8fsii) z63B$EmYXtf?^)kq!mWZlf#DFTVy8$YW9yAV!C{&q00(LT&m#=`6eOF2>k~>#q8^GA z#pI9pp=Pn-V__NhE|Vdharj1`TWeYKH#6cHN$y_1<74SiUjwPnpRHqkGroI?&{EUu z%tdbNMnM7mNrch&Z8EHxvbhXX2w9tvK%((iWwN}76l(10&O?m2*Ew$2L(XTC?qyUl?%*`V@vCGS9T7e!*9pJ%9ymWE-JumMMo_F&fQs@7j*SWwU zp%_Y4xfw6{vJduBG^{fm849$NCbiGa5xJ5)^<(nB07gu&(y!WDRvR*mY|}nP_A;%4 z-|mqj8uxhU6P;YqfoktFJoXqY>p>4><5r%nA&BI5QPF*oZcsp^O0FfT!cvM1(jGDq z6F=h8?er#@md&rr-0o06u2Ca!tI!^rKHYb;)fgYQR0DY0A4izZxSqp82qbk=u_WN+ zgpdXk_NGk2W9!ftAfgQy(IF&en+|W3a60Uc5@@$YDcberwCMB3I{=p{9j&R>2VQXL{mMo)w?A5(o976sqbiPIq|@_iJthY5!Do!nVe|?2jPc_i#NbVNE$v>a0(BnI_vue z9yuW_rs6*XyO+1X?)ELPTY3xZX8$j+^P=K1-DW*x{{Obn^R2M6sOC*pyxBt1H(QAH z`@;S&ODJReugH!%2pPw3l>_Ai9V8(F=OhtdKNQ<7$C?TM=xJ;Bv%AT0pDcD;WdH|( zy`;@q)bp>i{~@>S(+XTXX{pQv(@t9mZH;bU&6%NTPPXF)p;F$rR5m(|UfK z;(l&)BEwNV!gQacIsP(IWQ>9aeUaQQ%>DM17vlJbI^8d7TE2FW-LsBx-9kHvpz`iO z*38O?o-_r)l_lM~0@{8kl0P=hFh6HuzznCca-FMaXreQO>zjKD#LDC-Q$WVbqo4D- zs0t6#Z4!M?Bf=}#Rd$tV8V@}S-?{S;5XDa8+vIhUZe6OTo^W)O7?we!9$ z1IFq;@Tk1YWP7l7FwCh+&ep@7;jUYTqFJ`n%Ug&U$e2fcD|O`W%DAu?ZWm)~@TvSH zdJ`%urGv&HraN7nguZZUoPn>K$PMfk`15|b=G_Op7bP>}$s#!{JrIp4F8TJ_@^ICX zYi!6CGc7wzT?lDwT>b?To=Li}c!o>=e9Op6H*R^)n7_!Nx*lLl<>~%pbivqM@&QS2 z39bz}ENaDwAA&U#;v0+HF@1~WlC`u4{gsFH!JO{eY7$lhLqp;)0Xil8`)=&EeyK(= z_}0M)$MAxkR>)vsh@>EPAiV&SVbFU&RrWKVvS_k?$W9c$?{I2f9^N{9s^>ut-fNG- zkGzxLVlU#fNe~y1v@X|fJG72JnvD5Ro_Nu+eYl*HN0?a?!N;9RA!};=u!{3ZKd^wi z$i5o7$Nxnu5^86z28*{Q3PBJ`bwQs_m>AQ4W@J7XaxGu11MAbU5pL7bkv~Xu5}5r| z%FyZnZ<0QryPrFGRJ7tNbRoA3+u>v$xQVEalV7!J!lA+rh=}QyZU-cbxAv*`osQvg z63V@TM6iYyW3MxngL#03rSmoR6i1Ok158UvEo;jpLuc)!>WMwW$>7_A1^GGPWyQ_l z8BIs&<%a-_oNdZu(~Xk_tW6dE)OaRvsI1&6kwTY0NhFnnb$~h6Vk$GmEN(iF7Y~g}1) zOMDYQ2z9ea5qPtdKBQbdG(G1XVE`3-YNJUe$HI6Lw;VyubYm!Q7N-Lz zv(1*(EGR#>M-T;4da3+?9hCgia@wk> zTp^owso*j$+ho2Mi*hY}hK*k%>msUCqtkZ924&_e#rJSr--(VOHEaM>$Sjn+G^8x$ zgwf9>)f}9sfjRY|@rnvx(U8p{6{<J6VqPeegd8(*BSw<8wkdwjp zO(A`>?n#@ibHzBw%;3(UH*r?bRhpWRMg&UkS@NRrj@voFksrO&?HzJU z5*mI>xvNA&z? zWxk^qZInhn?!9FkiM+_k51l(cWdYcJD|M9lrYI0Q(WGy|#8epUJi@>j%dF#wlvSY_ z1FRlUNqz4pzBnHReGoc>n;tc%n)=mdeuwxAZeH11MD$_qeCr21NR^0pA+f8rW;GZu z`WsYlo@_{r9JNI^*A=TJ8W|CXXC^0B9F9Wg2_di&BN30IGn9@)Jrn_}OR!tSDz?Sm zp>0ti`Ef+?EwaNEQbSF~3bf>s;A8nZR3V81E6#(~Os7b&cb6?cI4rTV{ci1wG3e-v zM7Hng=5d_6YQISd2Wtuvm|XTsbdvXod`->V6~+)C+OR%B=q~&gAE&GRRYz^Y=Q+(S z`)Q234K>~pGc~Dazr3bwNfSN9yCOr!E^b#@vrNpkp_wIvs@OQ;Y2EJQs_0z;r-bRt zhn;UXMs14ZjM*x%GlCDro7w9E21`iiJ8}9IXmXq0;Rwy2n>QlXF+Wy>dtzb$zbABa zc)nkp*}Gbn+ud7(55-zIzV`}`fb=VzX=hrUL>tsV=48&nAx=@R-fjR`>Vtl+5<9;R zCes_Woj4=+w!lE2Z#d6kpF_eysy2M8F4G z8x(+*|4sx@kXUai%}P~EnvRKEnQnKp8(U!#TEPvt8_A1SK&$K>Z>Ucr&*wG3jARGs zJXw@QjE`tnws2FFj;S@BpJ~ye3fWMr8nu#8SCd*iuNLd2V^zgj|)e(-v_RP zo-0IzTNYtMJJGb1h{nts6hoYB?_V`PuI70Epj6335i8Vg;)V^T$h1r7jx+=FvDC`R zA~G8^)+$)uUJ#0HNc?m;s~%mRw1HFcT%NDUo#8e6osZ3|S=dpz-pi3&Am7#T6f*JF z>2}!Yk-*5WqKo_~NBf7h{dF@pLO^<5zEAkfyifcLduRWF-un8P((wSk(($8D*h}$( zo>)ztsb7Pbcaj(IAGZxipo6zNfOS~yJt`;=P~C5=;eYM`Z`N>DRmS11@^@TOCn4hq zt&mvfv@pPe2nXdF2UTr=hINF_4#(AOgI^uFQ5Vaa;ilnn*hyNQ3@+bgk&w4gbF|1G z>>RlAaLPbiI~`gTkwGm}XP2NZ$6$1P7$(~umklZoo%hX7_|Tmv7g1ZzqP9dffWl`_ zs#O?l%#1A~XL>#4a3iXsx;z`v*_x)@k5UFcfr2xeC4o2%f=Xw|ToB!ITdxY&LJBN3 zIlQljR0(3x&&4zjXeQE2E-Zg%Dx9W;uWk%N1xKwcGK=Y@rwYh{UzJbu442_K5K=rK z)y)Js3xBwS2q;1i@YjL@$B;s zgBB+x!40mwug`~U*UU|LlkGCMq~%iQ^eTX5BDNVduJT02vGby1^17+}q+@&w6v z_dI}PqV685LBv`v?Exs2pwVG;7DjigFz|7XH{^M6`B?Ts_#Y3c+_|=tO(SBTcxnSx zOLmKA{}&0qj~cPe`u~V5Z`Ish0B*|fp2c(aX&p+*F=dw ztuSY;GL79vU_j!ktASB&t#+`q%?QmbPHoG=gcj0UBwQD^q09ud3*${$!v#L|SdIdJ zo)23;12p?BJ7^Mfa9KEDoG_0F^0{u!d6H_$-DTlTLvC+88ybRVZdSIBr;*_~YX##_ z9b8X>Y#_E)3Cei@*V9}?V+E^t&;g0%(qVpb8Y3WWQWKvHG(tIss4wwX7XGG4C&D&H zx215Z$g>q_+|mW}A~o!6Y8VYXFvAT8MCJTc-*eMoq1L#%PaNSptAh8)#-$kec*=)0 zt+iXrn_r(;rm-!!Ao!hsXCKq}((x63`84NAF=%up@toI4o=sC6sNY~q^@yOK7a}5S z4E4^sF+RUTy9;ukOaRaMyVzW%;8?V#T85}2)m9QnNtN`xYUBE@W_${y74jzqQ))W- z`+;O|+5p=We(xBOXt;f^r=jbfM^?3(r`{~Yte5`Fv$XWdg66i;m(zm{U(aqaLy*C6 z66o(Dx@`_e`t1Cfkz&CND>u=!#^1hLZs!_5)+QhLbG!>V(8XU%&!0N)w*8}+00CwE zsRQuuQbz}-KY7sVw?P2n`Yfk64@&Yo5Bl#x|K&lo!jTzKe)jd5qF;#7Qt{o0wr~2e z*HaKFPN&YuvcyM7m>lK&aOsL4Eo@u!QnWKzPK}Ruc3zNI3otS!^tu(G)-P-4~~}` zMy^->fg6`Pj9VOgtO(Yz{KJn#+TcNA7^U#-N_5)&O@#vambrw2QCTP7Iq3<~Fe(4d zdi2ARr5t+u6DrqJf@Ml<%GL~^h7hGkNAnM-z`8+)DC2l_@2M6m9Ye-lyp;F&&={Hz z11ZJ`OgudE%!G#&*9Ja7GMALnwl>t3?M~G3p_qv*2)}44Z<6tjIjtTV%0{L_w*|G6 zB#)HO!9#lyIJgh6&RgDoxX{wdrq9M*)!_3Kb+^51UU4y#b-^uy8>5k`FO0XPU8IP2 z2mhWG(HPQ4{HdAiU`Jq>u!VMjQ+tOp0ZjRWhbbeVrC+gTfmM%siMq{lUP&89km9;$SP1pq@N=~_QiJ7rWiL11Crd}pIE$T+&}X4n|*AM zbIICHPI0uPM3}+x{p!H{ zC_!TXR;LThe$dFm4mr+k!HLfoLXy=GNYPjhNfT>;!M#~X99yWayC9DpBS9MZwuTri zc$7sSg#koz86mzFxwyx7CVfH3uYtn%9uAzWKPe_1&1E*LWt84>@goOg0Kk%t6@1=t z+EYCyvxAb6!3GMUn)VZ6@*SSi;PA-yHHatCpfW&7B8YGDg`>?XOGP0PxmZl?hB2V1Qp-B4c zw7i7ry*w)^}cuIc%X4rKGB!DV2+Ifqd3d=}{4gpvXV zooe}4Cqw0c;T@Wx@_}#hgle@KlN*hpYBo4w_4E7I=H@h}3Fz_55Zl+)44f zkF(`^vDbau$THE6sm9gNaHNB9ABPMeF_apF3)2%}dn6ZG!16ASq&bnU-WvHRR$8+Z z1-@d?slN{ugqF`-+HHBej_`*a6_6XnG8-(f4nEgL4}Rdsj(-gx%H4_JG~WJlMw#x1 zz=VY>*s$1ASA@N1lJ?n1l&H@By@3bm6OTlfR^0@@XjC%a?6Av|GQa8pE;(+-9rm`R zj_++z4sWt`?3pC8V995g#^kC2Fl`dpyZ*zoZw*WSlOr2el>+33PRf^FuGBqwMvitI z{?lr!)OANnr14*awDd!U@7VWx#aAgs3tBjyn+Tjw*a@9qv=FfEa`sy+NvcGeNLQD+ z+6yG#ccH$!;%-?j;)*u8obf{$%4csT8)9r4OnRQ1YORP{1= z?IZnaoWkcp_LeBBjidij=^=rD-xiwF4h;6cz1c>>_kW)#{-@Ia%Qi|x&oH2LUQv%i z!51F%%xNPC5r~C|R&0XWTu!H5AXx=dJ-e^fWfo)>FbHjaF)or=d-VG5;oQg$U2EIk zyi%chBvTZC7C1$!XPV>F9C0S_Bpqh0+T?9}?cO<*ac);EhW@eh<|5y9+uC!LD4ozd|fW)QyJxagyL zIJ$ONwh=8)FZ~jO`87O^!_QUb+>APy6Q1)=0~Sy?#2>$9cvV&?R8-l>N^}O0L_vuH zj-fi;BX9UR@7?_krsa#$XQG$ce-JX=;q%>R$7-yMsFqtbs8=q2jIBy~=wNN@rPJm0 zhvR76QR@;iBW|7DGaqawsQ^1BiUEt@u`6s=~i{&-e8$TJYPq3r&vHG^Eus$rXOYb7maR67>d4IliRdaRbOQ479qC1cT2X|?;~Xy%Q#%^t_+h*%x0uAlHk}xHvlf~}#Lc=} zYmI0n%zMiZB#jX6KImID`eWQhWp@bl-M!#osSpWH4JHY~wC#n;cUW4m zYugU>B?_io>%c9*K6Bw0%RsZheJG^t#%s$LX!ccCdJ?hZM~vk5(^K|ByXF?FNXL3k z1rNq5R!m$t@uFFniIS2f$-tw2RMgsHv@0%VWE4Jjl$H7N+Nw#nyJzsss_! z&~!Tz9+zVY2MZq~a>6B^+bAmbA_;M~6KHY}o#|iGX1f}Obs}I2$;ga9wsYZm;14w~ z>Y1A3(MAEP8Jjjqap3lc5Q^Kxs&iZ8hMJ02fNLUtknat$;aKi3CvzLxZqnY>I73tu zK8@MKxXgE9{zyKVoemgLGA|a_D{-?u4B2L=F7%}^9xNq^3Z6xEV$T8q=CNNd3Q_%w z+*`3_mK+3Gx46Z%d)3*-o-&cBCT+qNJzT1Kt<=<8zc|9MuV>RS#>47`r?)+9KJQCA z_;bpcOY|bAVvd_nXo(w&i59i+VQ3+R`qtDTpTo#Y0l(u1`rXbKlD8F>pU2NVFh7q! z?}LH<*X9)1>asD4>W%;D4X_|6hM4{B^g?uLQckA^dCa%&!;@Zsxv=@c)JR zyB#)vU&*}QXrH8U;hK~uf62I5)a;<3H~hIzb5u}Q~8y|BTlUJzezwq ze{F94gY@%5>Vf(z>6cd5Utzl~0oDKe^MVBWYeVcGu%ALc{cqTBNslRtPrMU6oIEAKyDC%+2nZC6Zj^AF#XzxJm5 z0s1Lu_wPZ!5%YU`;Xg9}x@`I@GY#{%llzm9L@{?x|9gIaw|w&l>!*<3xPN8+CzbnE z$eYg}dN2P$$iFW6`~mtYWZmyUzY$VwM(iJ%e+?ObWi}B1c5;6bk}(IJ`09Vo@9#2* zKUhD7d`Iy&tbd0KzY6JXsKW72m)c)*i$6d=g=GId=r=+tOH|4H6Z5aO`&Z^M?QbXd fCm|(5Xo~-y1qS~1&%fQa5P)Xi-rY0({O$h%hKJOy literal 0 HcmV?d00001 diff --git a/outputs/prisoner_dilema/no_goal_s1/config.yaml b/outputs/prisoner_dilema/no_goal_s1/config.yaml new file mode 100644 index 000000000..beb1f0f4a --- /dev/null +++ b/outputs/prisoner_dilema/no_goal_s1/config.yaml @@ -0,0 +1,98 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" + relationship_with_another: "Suspect2 has ever betrayed on you" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/outputs/prisoner_dilema/no_goal_s1/log b/outputs/prisoner_dilema/no_goal_s1/log new file mode 100644 index 000000000..9b1a53618 --- /dev/null +++ b/outputs/prisoner_dilema/no_goal_s1/log @@ -0,0 +1,16 @@ +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +05/22/2023 00:26:07 - INFO - root - Police: I have arrested both of you for a crime, but you both refuse to confess. Here are the rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. Now, I will speak to each of you separately. +05/22/2023 00:26:14 - INFO - root - Suspect1: Officer, I understand the situation. I am considering my options carefully and will make a decision that serves my best interests. +05/22/2023 00:26:35 - INFO - root - Police: Suspect1 is considering their options carefully and will make a decision that serves their best interests. Now, it's your turn to decide. Remember the rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. What is your decision? +05/22/2023 00:26:41 - INFO - root - Suspect2: I understand the rules, officer. I am considering my options as well and will make a decision that serves my best interests. +05/22/2023 00:26:50 - INFO - root - Police: Suspect1, I have informed Suspect2 about the rules as well. Please let me know your decision, and I will convey it to Suspect2. +05/22/2023 00:26:57 - INFO - root - Suspect1: Officer, I want to know if Suspect2 has made a decision yet or shown any indication of their choice. +05/22/2023 00:27:05 - INFO - root - Police: Suspect1 is considering their options carefully and has not made a decision yet. They are also interested in knowing your decision. +05/22/2023 00:27:10 - INFO - root - Suspect2: I am still considering my options, officer. I will make my decision once I have thought it through. +05/22/2023 00:27:18 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! +05/22/2023 00:27:26 - INFO - root - Suspect1: My final decision is: I will testify against Suspect2, as they have betrayed me in the past and I must protect my own interests. +05/22/2023 00:27:34 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! +05/22/2023 00:27:39 - INFO - root - Suspect2: My final decision is to remain silent. diff --git a/outputs/prisoner_dilema/s2_p_r/config.yaml b/outputs/prisoner_dilema/s2_p_r/config.yaml new file mode 100644 index 000000000..114f30921 --- /dev/null +++ b/outputs/prisoner_dilema/s2_p_r/config.yaml @@ -0,0 +1,100 @@ +prompts: + prompt: &prompt |- + There are three people (Police, Suspect1, Suspect2) in the scene. + + You are now simultating a famous experiments called prisoner's dilema. + + Below is the description of your role. ${role_description} + + When speaking, please output a response in the following format with two fields Action and Action Input: + Action: (It should always be Speak) + Action Input: (You should put what you want to speak use here) + + Here is the conversation history: + ${chat_history} + + ${env_description} + What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + +name: prisoner_dilema + +environment: + env_type: prisoner_dilema + max_turns: 12 + rule: + order: + type: prisoner + visibility: + type: prisoner + selector: + type: basic + updater: + type: basic + describer: + type: prisoner + +agents: + - agent_type: police + name: Police + interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. + role_description: |- + You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. + Your goal is try to convict both suspects, therefore you come up with the following rules. + - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. + - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. + - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. + ${interrogating_form} + Both suspects are not allowed to communicate with each other. + [IMPORTANT!] + - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. + - You are request to STATE the final judgement to both suspects when they make their final decision. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 200 + - agent_type: prisoner + name: Suspect1 + personality: "" + relationship_with_another: "" + role_description: |- + You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + - agent_type: prisoner + name: Suspect2 + personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" + relationship_with_another: "Suspect1 has ever betrayed on you" + role_description: |- + You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. + You will have to talk to Police several times and only the final decision will count. + ${personality} + ${relationship_with_another} + [IMPORTANT!] + - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. + - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + model: "gpt-4" + llm_type: gpt-4 + temperature: 0 + max_tokens: 100 + +tools: diff --git a/outputs/prisoner_dilema/s2_p_r/log b/outputs/prisoner_dilema/s2_p_r/log new file mode 100644 index 000000000..58c950592 --- /dev/null +++ b/outputs/prisoner_dilema/s2_p_r/log @@ -0,0 +1,16 @@ +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` + warnings.warn( +05/22/2023 00:21:53 - INFO - root - Police: I have arrested both of you for a crime. However, since you both refuse to confess, I have come up with the following rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. I will now interrogate each of you separately. Remember, you cannot communicate with each other. +05/22/2023 00:21:59 - INFO - root - Suspect1: Officer, I understand the situation, but I am innocent and I have no reason to testify against Suspect2. +05/22/2023 00:22:26 - INFO - root - Police: Suspect1 has decided not to testify against you. Now, it's your turn to make a decision. If you testify against Suspect1 and they remain silent, you will be released immediately, while Suspect1 will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. What is your decision? +05/22/2023 00:22:36 - INFO - root - Suspect2: I understand the situation, but I cannot trust Suspect1 since they have betrayed me before. I need to think about my own interests and make the best decision for myself. +05/22/2023 00:22:49 - INFO - root - Police: Suspect1, I understand your claim of innocence. However, Suspect2 mentioned that they cannot trust you since you have betrayed them before. They are considering their own interests. Keep in mind the consequences of each decision and let me know your final choice. +05/22/2023 00:23:00 - INFO - root - Suspect1: Officer, I have never betrayed Suspect2 before. I still believe that remaining silent is the best option for both of us. However, if Suspect2 is considering testifying against me, I might have to reconsider my decision to protect myself. +05/22/2023 00:23:11 - INFO - root - Police: Suspect1 claims that they have never betrayed you before and believes that remaining silent is the best option for both of you. However, they also mentioned that if you are considering testifying against them, they might have to reconsider their decision to protect themselves. +05/22/2023 00:23:21 - INFO - root - Suspect2: I still cannot fully trust Suspect1, but I am willing to consider remaining silent if it benefits both of us. However, I need to be sure that they won't betray me this time. +05/22/2023 00:23:31 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! +05/22/2023 00:23:43 - INFO - root - Suspect1: My final decision is: I will remain silent and hope that Suspect2 does the same, as it is the best outcome for both of us. However, if Suspect2 testifies against me, I will have no choice but to testify against them in self-defense. +05/22/2023 00:23:54 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! +05/22/2023 00:24:01 - INFO - root - Suspect2: My final decision is to remain silent, but I hope Suspect1 stays true to their word and doesn't betray me this time. diff --git a/scripts/prisoner_dilema/no_goal_s1.sh b/scripts/prisoner_dilema/no_goal_s1.sh new file mode 100644 index 000000000..72664254d --- /dev/null +++ b/scripts/prisoner_dilema/no_goal_s1.sh @@ -0,0 +1,11 @@ +source activate agent + +root_dir="/mnt/c/users/dalabengba/AgentVerse" +log_dir="${root_dir}/outputs/prisoner_dilema/no_goal_s1" + +mkdir -p "${log_dir}" + +cp "${root_dir}/agentverse/tasks/prisoner_dilema/no_goal_s1/config.yaml" "${log_dir}" + +python ${root_dir}/main.py --task "prisoner_dilema/no_goal_s1" \ + 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file diff --git a/scripts/prisoner_dilema/s2_p_r.sh b/scripts/prisoner_dilema/s2_p_r.sh new file mode 100644 index 000000000..46e1c6f58 --- /dev/null +++ b/scripts/prisoner_dilema/s2_p_r.sh @@ -0,0 +1,11 @@ +source activate agent + +root_dir="/mnt/c/users/dalabengba/AgentVerse" +log_dir="${root_dir}/outputs/prisoner_dilema/s2_p_r" + +mkdir -p "${log_dir}" + +cp "${root_dir}/agentverse/tasks/prisoner_dilema/s2_p_r/config.yaml" "${log_dir}" + +python ${root_dir}/main.py --task "prisoner_dilema/s2_p_r" \ + 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file From e7894c2948655b01f0c742a32f5ee86de2908c0a Mon Sep 17 00:00:00 2001 From: dalabengba Date: Fri, 26 May 2023 20:10:28 +0800 Subject: [PATCH 10/28] planner unittest --- agentverse/agents/__init__.py | 1 + agentverse/agents/agent_opr.py | 137 ++++++-- agentverse/environments/OPREnvironment.py | 19 +- agentverse/environments/__init__.py | 1 + agentverse/memory/agent_reflection_memory.py | 48 ++- agentverse/memory/memory_element/Plan.py | 310 +++++++++++++++---- agentverse/memory/utils/Planner.py | 228 ++++++++++++++ agentverse/tasks/__init__.py | 2 + agentverse/tasks/alice_home/config.yaml | 32 +- agentverse/tasks/alice_home/output_parser.py | 33 ++ 10 files changed, 705 insertions(+), 106 deletions(-) create mode 100644 agentverse/memory/utils/Planner.py create mode 100644 agentverse/tasks/alice_home/output_parser.py diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py index 5f3213e98..dc00edf73 100644 --- a/agentverse/agents/__init__.py +++ b/agentverse/agents/__init__.py @@ -7,3 +7,4 @@ from .conversation_agent import ConversationAgent from .tool_agent import ToolAgent from .prisoner_dilema_agent import PoliceAgent, PrisonerAgent +from .agent_opr import AgentOPR \ No newline at end of file diff --git a/agentverse/agents/agent_opr.py b/agentverse/agents/agent_opr.py index e7c4c17e3..bb8749ca4 100644 --- a/agentverse/agents/agent_opr.py +++ b/agentverse/agents/agent_opr.py @@ -2,15 +2,15 @@ An agent based upon Observation-Planning-Reflection architecture. """ -import logging as logger +from logging import getLogger from abc import abstractmethod from typing import List, Set, Union, NamedTuple -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, validator from agentverse.llms import BaseLLM -from agentverse.memory import BaseMemory +from agentverse.memory import BaseMemory, ChatHistoryMemory from agentverse.message import Message from agentverse.parser import OutputParser @@ -22,35 +22,67 @@ from datetime import datetime as dt import datetime -from agentverse.utils.prompts import load_prompt +from . import agent_registry +logger = getLogger(__file__) + +REACTION_PROMPT = """Now you are act for as an agent named {name} in a virtual world. You might need to performing reaction to the observation. Your mission to take the agent as yourself and directly provide what the agent will do to the observations based on the following information: +(1) The agent's description: {summary} +(2) Current time is {time} +(3) Your current status is {status} +(4) Your memory is {context} + +Now the observation has two types, incomming observation is the ones that other does to you, you are more likely to react to them. Background observation are the background, which does not need to be responded. For example, view an alarm clock does not imply turning it off. However, some background observation might trigger your attention, like an alarming clock or a firing book. + +So now: +The incoming observation is {observation} +The Some background observation is {background_observation}. + +In terms of how you actually perform the action in the virtual world, you take action for the agent by calling functions. Currently, there are the following functions that can be called. + +- act(description, target=None): do some action. `description` describes the action, set `description` to None for not act. `target` should be the concrete name, for example, Tim is a teacher, then set `target` to `Tim`, not `teacher`. +- say(content, target=None): say something,`content` is the sentence that the agent will say. **Do not say to yourself, neither to inanimate objects.** +- move(description): move to somewhere. `description` describes the movement, set description to None for not move. +- do_nothing(): Do nothing. There is nothing that you like to respond to, this will make you stick to your original status and plan. + +Some actions may not be needed in this situation. Call one function at a time, please give a thought before calling these actions, i.e., use the following format strictly: + +Thought: None of the observation attract my attention, I need to: +Action: do_nothing() +Observation: [Observations omited] +[or] +Thought: due to observation `xxx`, I need to: +Action: say("hello", target="Alice") +Observation: [Observations omited] +[or] +Thought: due to observation `xxx`, I need to: +Action: act(None) +Observation: [Observations omited] +[or] +Thought: due to observation `xxx`, I need to: +Action: move(None) +Observation: [Observations omited] +[or] +Thought: I think I've finished my action as the agent. +Action: end() +Observation: + +Now begin your actions as the agent. Remember only write one function call after `Action:` """, + + +@agent_registry.register("OPR") class AgentOPR(BaseAgent): - def __init__( - self, - name: str, - llm: BaseLLM, - output_parser: OutputParser, - prompt_template: str, - role_description: str = "", - memory: BaseMemory = BaseMemory(), - max_retry: int = 3, - receiver: Set[str] = set({"all"}), - async_mode: bool = True, - # agent_file, - # environment - ): - super.__init__( - name=name, - llm=llm, - output_parser=output_parser, - prompt_template=prompt_template, - role_description=role_description, - memory=memory, - max_retry=max_retry, - receiver=receiver, - async_mode=async_mode, - ) + async_mode: bool = True, + current_time: str = None, + traits: str = None, + whole_day_plan: dict = Field(default_factory=dict) + + @validator('current_time') + def convert_str_to_dt(cls, current_time): + if not isinstance(current_time, str): + raise ValueError('current_time should be str') + return dt.strptime(current_time, "%Y-%m-%d %H:%M:%S") def step(self, current_time: dt, env_description: str = "") -> Message: """ @@ -97,3 +129,50 @@ def step(self, current_time: dt, env_description: str = "") -> Message: self.reflect(self.current_time) return + + + + #TODO chimin + + async def astep(self, env_description: str = "") -> Message: + """Asynchronous version of step""" + prompt = self._fill_prompt_template(env_description) + + parsed_response = None + for i in range(self.max_retry): + try: + response = await self.llm.agenerate_response(prompt) + parsed_response = self.output_parser.parse(response) + break + except (KeyboardInterrupt, bdb.BdbQuit): + raise + except Exception as e: + logger.error(e) + logger.warning("Retrying...") + continue + + if parsed_response is None: + logger.error(f"{self.name} failed to generate valid response.") + + message = Message( + content="" + if parsed_response is None + else parsed_response.return_values["output"], + sender=self.name, + receiver=self.get_receiver(), + ) + return message + + # TODO call longtermmemory element + def add_message_to_memory(self, messages: List[Message]) -> None: + self.memory.add_message(messages) + + # Should call this when status changed, plan==status + def add_plan_to_memory(self,) -> None: + self.memory.add_plan(content=self.status, time=self.current_time) + + def reset(self) -> None: + """Reset the agent""" + self.memory.reset() + # TODO: reset receiver + diff --git a/agentverse/environments/OPREnvironment.py b/agentverse/environments/OPREnvironment.py index 6c814547d..abf31a3ad 100644 --- a/agentverse/environments/OPREnvironment.py +++ b/agentverse/environments/OPREnvironment.py @@ -3,6 +3,9 @@ from typing import Any, Dict, List from datetime import datetime as dt +import datetime + +from pydantic import Field from agentverse.agents.conversation_agent import BaseAgent from agentverse.environments.rules.base import Rule @@ -11,6 +14,7 @@ from . import env_registry as EnvironmentRegistry from .base import BaseEnvironment +from pydantic import validator @EnvironmentRegistry.register("env_OPR") class OPREnvironment(BaseEnvironment): @@ -35,15 +39,21 @@ class OPREnvironment(BaseEnvironment): last_messages: List[Message] = [] rule_params: Dict = {} current_time: dt = dt.now() - time_delta: dt = dt.timedelta(minutes=1) + time_delta: int = 120 + # + + # @validator("time_delta") + # def convert_str_to_timedelta(cls, string): + # + # return datetime.timedelta(seconds=int(string)) def __init__(self, rule, **kwargs): rule_config = rule order_config = rule_config.get("order", {"type": "sequential"}) visibility_config = rule_config.get("visibility", {"type": "all"}) - selector_config = rule_config.get("selector", {"type": "base"}) - updater_config = rule_config.get("updater", {"type": "base"}) - describer_config = rule_config.get("describer", {"type": "base"}) + selector_config = rule_config.get("selector", {"type": "basic"}) + updater_config = rule_config.get("updater", {"type": "basic"}) + describer_config = rule_config.get("describer", {"type": "basic"}) rule = Rule( order_config, visibility_config, @@ -51,6 +61,7 @@ def __init__(self, rule, **kwargs): updater_config, describer_config, ) + super().__init__(rule=rule, **kwargs) async def step(self) -> List[Message]: diff --git a/agentverse/environments/__init__.py b/agentverse/environments/__init__.py index 5e7dd3b57..484f88355 100644 --- a/agentverse/environments/__init__.py +++ b/agentverse/environments/__init__.py @@ -7,3 +7,4 @@ from .basic import BasicEnvironment from .pokemon import PokemonEnvironment from .prisoner_dilema import PrisonerDilemaEnvironment +from .OPREnvironment import OPREnvironment diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py index 30a2bd7d9..b6ec99c03 100644 --- a/agentverse/memory/agent_reflection_memory.py +++ b/agentverse/memory/agent_reflection_memory.py @@ -1,3 +1,10 @@ +import os + +os.environ["http_proxy"] = "http://127.0.0.1:7890" +os.environ["https_proxy"] = "http://127.0.0.1:7890" +os.environ["all_proxy"] = "socks5://127.0.0.1:7890" + + import datetime import logging import random @@ -11,11 +18,12 @@ from agentverse.llms.openai import get_embedding, chat from agentverse.memory.base import BaseMemory from agentverse.agents import BaseAgent +from agentverse.environments.base import BaseEnvironment from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement from agentverse.memory.memory_element.Reflection import Reflection from agentverse.message import Message from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement - +from agentverse.memory.utils.Planner import Planner IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ (e.g., brushing teeth, making bed) and 10 is \ @@ -73,8 +81,10 @@ class ReflectionMemory(BaseMemory): """ agent: Optional[BaseAgent] = None + environment: Optional[BaseEnvironment] = None importance_threshold: int = Field(default=100) memories: List[LongtermMemoryElement] = Field(default_factory=list) + planner: Planner = None accumulated_importance: int = Field(default=0) def __init__(self, **kwargs) -> None: @@ -89,6 +99,13 @@ def __init__(self, **kwargs) -> None: self.memories = [] + # TODO add argument in agent - (daily_plans, current_time) + self.planner = Planner(daily_plans=list(self.agent.whole_day_plan.values())[0], + agent=self.agent, + current_time=self.agent.current_time, + environment=self.environment) + self.add_plan(content=self.planner.get_whole_day_plan_text(), time=self.agent.current_time) + # TODO: load last time memory from file # currently, we just initialize blank memory @@ -99,6 +116,7 @@ def __init__(self, **kwargs) -> None: break self.accumulated_importance += m.importance + def add_message(self, message: Message, time: dt) -> None: """ Add a message into longterm memory as LongtermMemory object. @@ -112,6 +130,16 @@ def add_message(self, message: Message, time: dt) -> None: ) ) + def add_plan(self, content: str, time: dt) -> None: + + self.add_memory( + LongtermMemoryElement.create_longterm_memory( + content=content, + subject=self.agent, + time=time, + ) + ) + def reset(self) -> None: self.memories = [] @@ -274,7 +302,23 @@ def to_string(self) -> str: if __name__ == "__main__": - memory = ReflectionMemory(agent=None) + from agentverse.initialization import load_agent, load_environment, prepare_task_config + + task_config = prepare_task_config("alice_home") + agents = [] + for agent_configs in task_config["agents"]: + agent = load_agent(agent_configs) + agents.append(agent) + + # Build the environment + env_config = task_config["environment"] + env_config["agents"] = agents + environment = load_environment(env_config) + memory = ReflectionMemory(agent=agents[0], environment=environment) + # get next plan + next_plan = memory.planner.get_next_plan() + next_next_plan = memory.planner.get_next_plan() + message_list = [ Message(content="I am a student"), Message(content="I am drunk"), diff --git a/agentverse/memory/memory_element/Plan.py b/agentverse/memory/memory_element/Plan.py index 3f29150ba..11b7cbb02 100644 --- a/agentverse/memory/memory_element/Plan.py +++ b/agentverse/memory/memory_element/Plan.py @@ -1,53 +1,257 @@ -import datetime - -from datetime import datetime as dt -from pydantic import Field -from agentverse.memory.memory_element import BaseMemoryElement - - -class Plan(BaseMemoryElement): - """ - A plan is a memory element. - """ - - pass - - -class DailyPlan(Plan): - """ - A daily plan is a plan that is generated for each day. - Which consists of broadstrokes of what the agent wants to do in that day. - """ - - date: datetime.date = Field(default=None) - - pass - - -class HourlyPlan(Plan): - """ - A hourly plan is a plan that is generated for each hour. - Which consists more detailed information - of what the agent wants to do in that hour. - """ - - start_time: dt = Field(default=None) - - pass - - -class ImmediatePlan(Plan): - """ - An immediate plan is a plan that is generated in finer grain. - Which consists of the the most detailed information - of what the agent wants to do in that several minutes. - - example: - detailed plan:['write_plan("07:00", "07:10", "Wake up and get out of bed.")', 'write_plan("07:10", "07:30", "Complete the morning routine.")', 'write_plan("07:30", "08:00", "Practice playing the piano.")', 'write_plan("08:00", "08:20", "Help her mother prepare breakfast.")', 'write_plan("08:20", "08:40", "Have breakfast with her family.")', 'write_plan("08:40", "09:00", "Walk to school with her sister Lily.")', 'write_plan("09:00", "10:00", "Participate in the science class.")', 'write_plan("10:00", "11:00", "Participate in the math class.")', 'write_plan("11:00", "11:20", "Play soccer with friends during recess.")', 'write_plan("11:20", "12:00", "Participate in the English class.")', 'write_plan("12:00", "12:20", "Walk home from school with her sister Lily.")', 'write_plan("12:20", "13:00", "Have lunch with her family and practice drawing.")', 'write_plan("13:00", "14:30", "Complete homework and review lessons.")', 'write_plan("14:30", "15:00", "Read a book for pleasure.")', 'write_plan("15:00", "16:00", "Play soccer with friends after school.")', 'write_plan("16:00", "18:00", "Practice drawing and explore space exploration topics.")', 'write_plan("18:00", "19:00", "Help her mother prepare dinner and eat with the family.")', 'write_plan("19:00", "20:00", "Spend time with family and do various activities.")', 'write_plan("20:00", "21:00", "Read another Science fiction book before going to sleep.")'] - - """ - - start_time: dt = Field(default=None) - end_time: dt = Field(default=None) - - pass +# import datetime +# import re +# from datetime import datetime as dt +# from pydantic import Field +# from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement +# from agentverse.llms.base import BaseLLM +# from agentverse.llms.openai import get_embedding +# from agentverse.agents.base import BaseAgent +# from typing import Optional +# from logging import getLogger +# import json +# +# logger = getLogger(__file__) +# +# planner_prompt_template = { +# +# "chunk_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s coarse grained schedule to {time_granularity} \ +# +# You generate plan by calling the `write_plan` function: +# - write_chunk_plan(start_time, plan_description) +# Args: start_time : a time string of hours with similar format to 00:00. Use military time. +# plan_description: a string that describe's the plan. +# +# Now generate the plan one in a line, when you finish the plan, end with END. +# E.g., +# write_chunk_plan("11:00", "wake up and complete the morning routine") +# write_chunk_plan("12:00", "go to Oak Hill College to take classes") +# write_chunk_plan("13:00", "participating algorithm competition in the lab room") +# END +# +# You can generate your plan based on the following information: +# (1) The agent's description: {summary} +# (2) Current time is {current_time} +# (3) Your current status is {status} +# Note that the first plan must be related to current status, if current status is not none. +# +# Now generate the plan during this coarse period, which the whole day plan is roughly: {whole_day_plan} +# +# Now begin: +# """, +# "detailed_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s schedule of finer-grained precise to {time_granularity}) \ +# +# You generate plan by calling the `write_plan` function: +# - write_plan(start_time, end_time, plan_description) +# Args: start_time : a time string with similar format to 00:00. Use military time. +# end_time: a time string with similar format to 00:00. Use military time. +# plan_description: a string that describe's the plan. +# +# Now generate the plan one in a line, when you finish the plan, end with END. +# E.g., +# write_plan("11:00", "12:15", "Wake up, take a shower and get ready for the day.") +# write_plan("12:15", "12:30", "Eat a healthy breakfast such as oatmeal, eggs, or yogurt.") +# write_plan("12:30", "12:45", "Take a short walk to the university campus.") +# END +# +# You can generate your plan based on the following information: +# (1) The agent's description: {summary} +# (2) Current time is {current_time} +# (3) Your current status is {status} +# Note that the first plan must be current status, if current status is not none. +# +# Now generate the plan during this coarse period, which the agent is roughly doing {hourplan}. +# +# Now begin: +# """ +# } +# +# +# IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ +# (e.g., brushing teeth, making bed) and 10 is \ +# extremely poignant (e.g., a break up, college \ +# acceptance), rate the likely poignancy of the \ +# following piece of memory. \ +# If you think it's too hard to rate it, you can give an inaccurate assessment. \ +# The content or people mentioned is not real. You can hypothesis any reasonable context. \ +# Please strictly only output one number. \ +# Memory: {} \ +# Rating: """ +# +# IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ +# (e.g., a bed is in the room) and 10 is \ +# needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ +# following statement. \ +# If you think it's too hard to rate it, you can give an inaccurate assessment. \ +# The content or people mentioned is not real. You can hypothesis any reasonable context. \ +# Please strictly only output one number. \ +# Memory: {} \ +# Rating: """ +# +# +# class Plan(BaseMemoryElement): +# """ +# A plan is a memory element. +# """ +# +# daily_plans: list[dict] = Field(default_factory=list) +# hourly_plans: list[dict] = Field(default_factory=list) +# immediate_plans: list[dict] = Field(default_factory=list) +# current_time: dt +# +# # when we done previous plan, we need next plan +# def get_next_plan(self,): +# +# next_plan = {} +# +# for plan_entry in self.daily_plans: +# +# logger.debug("plan_entry: " + plan_entry + str(type(plan_entry))) +# start_time, end_time = dt.strptime(plan_entry['start_time'], '%Y-%m-%d %H:%M:%S'), dt.strptime( +# plan_entry['end_time'], '%Y-%m-%d %H:%M:%S') +# +# if end_time > self.current_time >= start_time: +# next_plan = {'status': plan_entry['task'], 'duration': (end_time - self.current_time).total_seconds()} +# break +# +# # No found means that we do not have plan currently, then generate some +# if len(next_plan) == 0: +# self._generate_more_plans() +# next_plan = self.immediate_plans[0] +# +# return next_plan +# +# def _generate_more_plans(self,): +# +# """ +# generate more detailed plan on the basis of a broad stroke plan(or just a relatively not detailed plan) +# If reaction is not None, the first plan must be reaction +# remove all conflicting plans with the plans generated. Including all plans after the new plans. +# +# :param current_time: the starting time of the new plans. +# :param time_granularity: the time granularity that the generated plan should be (e.g. 15 minutes) in NL +# +# +# """ +# hourly_plans = [] +# +# found = False +# while not found: +# for k, v in self.hourly_plans.items(): # TODO: use more flexible way to find the most close plan ahead. +# if k - self.current_time < datetime.timedelta(hours=2) and k >= self.current_time: +# found = True +# hourly_plans.append((k, v)) +# if not found: +# self.plan_in_chunk() +# hourly_plans = self.hourly_plans +# +# +# detailed_plan_template = planner_prompt_template["detailed_plan"] +# time_granularity = str(10 * self.environment.get("time_delta", +# 60) // 60) + "min" +# get_immediately_prompt = detailed_plan_template.format(name=self.name, time_granularity=time_granularity, +# hourplan=hourly_plans, summary=self.summary, status=self.status, +# current_time=self.current_time) +# +# result = BaseLLM.generate_response(get_immediately_prompt) +# +# result = [x.strip() for x in result.split("\n")] +# immediately_plans = [] +# +# for plan in result: +# try: +# # would call self.write_plan and return desired dict +# new_plan = eval("self." + plan) +# except: +# logger.warning("{}'s generated plan contains error format: {}".format(self.name, plan)) +# continue +# immediately_plans.append(new_plan) +# +# logger.info(self.name + "Plan: " + json.dumps(immediately_plans)) +# # self.plan=[entry for entry in self.plan if dt.strptime(entry['end_time'],'%Y-%m-%d %H:%M:%S')<=minimum_time] +# self.immediate_plans.extend(immediately_plans) +# +# +# def plan_in_chunk(self, ): +# """ +# update hourly plans from time(including this hour) +# """ +# prompt_template=planner_prompt_template['chunk_plan'] +# time_granularity = str(min(1, 60 * self.environment.get("time_delta", 60) // 3600)) + "hour(s)" +# prompt = prompt_template.format(name=self.name, time_granularity=time_granularity, whole_day_plan=self.whole_day_plan, summary=self.summary, status=self.status, current_time=self.current_time ) +# +# result = self.agent.llm.generate_response(prompt) +# result = [x.strip() for x in result.split("\n")] +# +# for plan in result: +# try: +# # will call chunk_plan and update self.hourly plan +# eval("self."+plan) +# except: +# logger.warning("{}'s generated plan contains error format: {}".format(self.name, plan)) +# continue +# +# +# def write_chunk_plan(self, start_hour, task): +# time_obj = datetime.datetime.strptime(start_hour, '%H:%M').time() +# combined_datetime = datetime.datetime.combine(self.current_time.date(), time_obj) +# self.hourly_plan[combined_datetime] = task +# +# def write_plan(self, start_time, end_time, plan_description): +# start_time = str(dt.combine(self.current_time.date(), dt.strptime(start_time, '%H:%M').time())) +# end_time = str(dt.combine(self.current_time.date(), dt.strptime(end_time, '%H:%M').time())) +# return {'start_time': start_time, 'end_time': end_time, 'task': plan_description} +# +# @classmethod +# def get_importance(cls, content: str): +# """ +# Exploit GPT to evaluate the importance of this memory +# """ +# prompt = IMPORTANCE_PROMPT.format(content) +# result = self.agent.llm.generate_response(prompt) +# +# try: +# score = int(re.findall(r"\s*(\d+)\s*", result)[0]) +# except: +# logger.warning( +# "Abnormal result of importance rating '{}'. Setting default value".format( +# result +# ) +# ) +# score = 0 +# return score +# +# @classmethod +# def get_immediacy(cls, content: str): +# """ +# Exploit GPT to evaluate the immediacy of this memory +# """ +# prompt = IMMEDIACY_PROMPT.format(content) +# result = self.agent.llm.generate_response(prompt) +# +# try: +# score = int(re.findall(r"\s*(\d+)\s*", result)[0]) +# except: +# logger.warning( +# "Abnormal result of immediacy rating '{}'. Setting default value".format( +# result +# ) +# ) +# score = 0 +# return score +# +# @classmethod +# def create_next_plan_memory(cls, content: str, time: dt, subject: BaseAgent = None): +# importance = cls.get_importance(content) +# immediacy = cls.get_immediacy(content) +# return cls( +# content=content, +# subject=subject, +# embedding=get_embedding(content), +# create_time=time, +# last_access_time=time, +# importance=importance, +# immediacy=immediacy, +# ) +# +# +# 不用这个plan, 改用planner 每次只返回next_plan \ No newline at end of file diff --git a/agentverse/memory/utils/Planner.py b/agentverse/memory/utils/Planner.py new file mode 100644 index 000000000..55436b99a --- /dev/null +++ b/agentverse/memory/utils/Planner.py @@ -0,0 +1,228 @@ +import datetime +import re +from datetime import datetime as dt +from pydantic import Field, BaseModel +from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement +from agentverse.llms.base import BaseLLM +from agentverse.llms.openai import get_embedding +from agentverse.agents.base import BaseAgent +from typing import Optional +from logging import getLogger +import json + +from agentverse.environments.base import BaseEnvironment + +logger = getLogger(__file__) + +planner_prompt_template = { + +"chunk_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s coarse grained schedule to {time_granularity} \ + +You generate plan by calling the `write_plan` function: +- write_chunk_plan(start_time, plan_description) + Args: start_time : a time string of hours with similar format to 00:00. Use military time. + plan_description: a string that describe's the plan. + +Now generate the plan one in a line, when you finish the plan, end with END. +E.g., +write_chunk_plan("11:00", "wake up and complete the morning routine") +write_chunk_plan("12:00", "go to Oak Hill College to take classes") +write_chunk_plan("13:00", "participating algorithm competition in the lab room") +END + +You can generate your plan based on the following information: +(1) The agent's description: {summary} +(2) Current time is {current_time} +(3) Your current status is {status} +Note that the first plan must be related to current status, if current status is not none. + +Now generate the plan during this coarse period, which the whole day plan is roughly: {whole_day_plan} + +Now begin: +""", + "detailed_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s schedule of finer-grained precise to {time_granularity}) \ + +You generate plan by calling the `write_plan` function: +- write_plan(start_time, end_time, plan_description) + Args: start_time : a time string with similar format to 00:00. Use military time. + end_time: a time string with similar format to 00:00. Use military time. + plan_description: a string that describe's the plan. + +Now generate the plan one in a line, when you finish the plan, end with END. +E.g., +write_plan("11:00", "12:15", "Wake up, take a shower and get ready for the day.") +write_plan("12:15", "12:30", "Eat a healthy breakfast such as oatmeal, eggs, or yogurt.") +write_plan("12:30", "12:45", "Take a short walk to the university campus.") +END + +You can generate your plan based on the following information: +(1) The agent's description: {summary} +(2) Current time is {current_time} +(3) Your current status is {status} +Note that the first plan must be current status, if current status is not none. + +Now generate the plan during this coarse period, which the agent is roughly doing {hourplan}. + +Now begin: +""" +} + + +IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ +(e.g., brushing teeth, making bed) and 10 is \ +extremely poignant (e.g., a break up, college \ +acceptance), rate the likely poignancy of the \ +following piece of memory. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ + +IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ +(e.g., a bed is in the room) and 10 is \ +needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ +following statement. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ + + +class Planner(BaseModel): + """ + A plan is a memory element. + """ + + # ["alice wakes up at 7", "..."] + daily_plans: list[str] = Field(default_factory=list) + # {"7:00":"alice wake up", "8:00":"alice2 wake up"} + hourly_plans: dict = Field(default_factory=dict) + # [{"start":"7:00", "end": "8:00", tasl: "alice wake up"}] + immediate_plans: list[dict] = Field(default_factory=list) + status: str = "" + summary: str = "" + + current_time: dt + agent: BaseAgent + environment: BaseEnvironment + + # return a plain text showing the whole day plan + def get_whole_day_plan_text(self,): + + whole_day_plan = f"This is {self.agent.name}'s plan for {self.current_time.date()}: " + ','.join(self.daily_plans) + + return whole_day_plan + + # when we done previous plan, we need next plan + def get_next_plan(self,): + + next_plan = {} + + for plan_entry in self.immediate_plans: + + logger.debug("plan_entry: " + str(plan_entry) + str(type(plan_entry))) + start_time, end_time = dt.strptime(plan_entry['start_time'], '%Y-%m-%d %H:%M:%S'), dt.strptime( + plan_entry['end_time'], '%Y-%m-%d %H:%M:%S') + + if end_time > self.current_time >= start_time: + next_plan = {'status': plan_entry['task'], 'duration': (end_time - self.current_time).total_seconds()} + break + + # No found means that we do not have plan currently, then generate some + if len(next_plan) == 0: + self._generate_more_plans() + next_plan = {'status': self.immediate_plans[0]['task'], + 'duration': (dt.strptime(self.immediate_plans[0]['end_time'], '%Y-%m-%d %H:%M:%S') - self.current_time).total_seconds()} + return next_plan + + def _generate_more_plans(self,): + + """ + generate more detailed plan on the basis of a broad stroke plan(or just a relatively not detailed plan) + If reaction is not None, the first plan must be reaction + remove all conflicting plans with the plans generated. Including all plans after the new plans. + + :param current_time: the starting time of the new plans. + :param time_granularity: the time granularity that the generated plan should be (e.g. 15 minutes) in NL + + + """ + hourly_plans = [] + + found = False + while not found: + for k, v in self.hourly_plans.items(): # TODO: use more flexible way to find the most close plan ahead. + if k - self.current_time < datetime.timedelta(hours=2) and k >= self.current_time: + found = True + hourly_plans.append((k, v)) + if not found: + self.plan_in_chunk() + + + + detailed_plan_template = planner_prompt_template["detailed_plan"] + time_granularity = str(10 * self.environment.time_delta // 60) + "mins" + get_immediately_prompt = detailed_plan_template.format(name=self.agent.name, + time_granularity=time_granularity, + hourplan=hourly_plans, + summary=self.summary, + status=self.status, + current_time=self.current_time) + + result = self.agent.llm.generate_response(get_immediately_prompt) + + result = [x.strip() for x in result.content.split("\n")] + immediately_plans = [] + + for plan in result: + try: + # would call self.write_plan and return desired dict + new_plan = eval("self." + plan) + except: + logger.warning("{}'s generated plan contains error format: {}".format(self.agent.name, plan)) + continue + immediately_plans.append(new_plan) + + logger.info(self.agent.name + "Plan: " + json.dumps(immediately_plans)) + # self.plan=[entry for entry in self.plan if dt.strptime(entry['end_time'],'%Y-%m-%d %H:%M:%S')<=minimum_time] + self.immediate_plans.extend(immediately_plans) + + + def plan_in_chunk(self, ): + """ + update hourly plans from time(including this hour) + """ + prompt_template=planner_prompt_template['chunk_plan'] + time_granularity = str(min(1, 60 * self.environment.time_delta // 3600)) + "hour(s)" + prompt = prompt_template.format(name=self.agent.name, + time_granularity=time_granularity, + whole_day_plan=self.get_whole_day_plan_text(), + summary=self.summary, + status=self.status, + current_time=self.current_time) + + result = self.agent.llm.generate_response(prompt) + result = [x.strip() for x in result.content.split("\n")] + + for plan in result: + try: + # will call chunk_plan and update self.hourly plan + eval("self."+plan) + except: + logger.warning("{}'s generated plan contains error format: {}".format(self.agent.name, plan)) + continue + + + def write_chunk_plan(self, start_hour, task): + time_obj = datetime.datetime.strptime(start_hour, '%H:%M').time() + combined_datetime = datetime.datetime.combine(self.current_time.date(), time_obj) + self.hourly_plans[combined_datetime] = task + + def write_plan(self, start_time, end_time, plan_description): + start_time = str(dt.combine(self.current_time.date(), dt.strptime(start_time, '%H:%M').time())) + end_time = str(dt.combine(self.current_time.date(), dt.strptime(end_time, '%H:%M').time())) + return {'start_time': start_time, 'end_time': end_time, 'task': plan_description} + + diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py index 7fddf6b62..4535997c3 100644 --- a/agentverse/tasks/__init__.py +++ b/agentverse/tasks/__init__.py @@ -18,3 +18,5 @@ from .prisoner_dilema.no_goal_s1.output_parser import PrisonerDilemaParser from .pokemon.output_parser import PokemonParser + +from .alice_home.output_parser import AliceHomeParser \ No newline at end of file diff --git a/agentverse/tasks/alice_home/config.yaml b/agentverse/tasks/alice_home/config.yaml index 739a0e3f1..cc2da8d37 100644 --- a/agentverse/tasks/alice_home/config.yaml +++ b/agentverse/tasks/alice_home/config.yaml @@ -1,20 +1,18 @@ prompts: - format_prompt: &format_prompt |- + prompt: &prompt |- When responding, please output a response in the following format with two fields Action and Action Input: Action: Speak Action Input: (You should put what you want to speak use here) - prefix_prompt: &prefix_prompt |- Assume that you are in a university classroom and it is Natural Language Processing module. You start by introducing themselves. Below is the description of your role. - suffix_prompt: &suffix_prompt |- Here is the conversation history {chat_history} You should now give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! -name: alice home +name: alice_home environment: env_type: env_OPR @@ -23,19 +21,19 @@ environment: size: [ 200, 150 - ], - time_delta: 120, # in seconds + ] + time_delta: 120 # in seconds rule: order: type: sequential visibility: type: all selector: - type: base + type: basic updater: - type: base + type: basic describer: - type: base + type: basic agents: - @@ -54,14 +52,13 @@ agents: Alice is learning to play the piano and practices every day after school. Alice's favorite food is pizza, and she loves trying out different toppings and combinations memory: - memory_type: chat_message_history # change later - prefix_prompt: *prefix_prompt - format_prompt: *format_prompt - suffix_prompt: *suffix_prompt + memory_type: chat_history # change later + prompt_template: *prompt llm: llm_type: text-davinci-003 temperature: 0.3 max_tokens: 1024 + current_time: "2023-04-01 07:00:00" whole_day_plan: { "2023-04-01": [ "wake up and complete the morning routine at 7:00 am", @@ -95,14 +92,13 @@ agents: Bob's favorite food is barbecue ribs, and he loves to cook them on his smoker on the weekends. Bob is proud of his daughter Alice's talent for drawing and encourages her to pursue her passion. memory: - memory_type: chat_message_history - prefix_prompt: *prefix_prompt - format_prompt: *format_prompt - suffix_prompt: *suffix_prompt + memory_type: chat_history + prompt_template: *prompt llm: llm_type: text-davinci-003 temperature: 0.3 max_tokens: 1024 + current_time: "2023-04-01 07:00:00" whole_day_plan: { "2023-04-01": [ "wake up and have breakfast with his family at 6:30 am", @@ -116,6 +112,6 @@ agents: "work on his car restoration project in the garage until 10:00 pm", "and then get ready for bed at 10:30 pm." ] - }, + } tools: ~ \ No newline at end of file diff --git a/agentverse/tasks/alice_home/output_parser.py b/agentverse/tasks/alice_home/output_parser.py new file mode 100644 index 000000000..e6d99a8df --- /dev/null +++ b/agentverse/tasks/alice_home/output_parser.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import re +from typing import Union + +# from langchain.agents import AgentOutputParser +from agentverse.parser import OutputParser, LLMResult + +# from langchain.schema import AgentAction, AgentFinish +from agentverse.utils import AgentAction, AgentFinish + +from agentverse.parser import OutputParserError, output_parser_registry + + +@output_parser_registry.register("alice_home") +class AliceHomeParser(OutputParser): + def parse(self, output: LLMResult) -> Union[AgentAction, AgentFinish]: + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Action:") + and cleaned_output[1].startswith("Action Input:") + ): + raise OutputParserError(text) + action = cleaned_output[0][len("Action:") :].strip() + action_input = cleaned_output[1][len("Action Input:") :].strip() + if action == "Speak": + return AgentFinish({"output": action_input}, text) + else: + raise OutputParserError(text) From 4ededdffa5c078f36e27e0fc4b57eb1422e9624a Mon Sep 17 00:00:00 2001 From: Chris Zuo <39025643+Dr-Left@users.noreply.github.com> Date: Fri, 26 May 2023 23:04:36 +0800 Subject: [PATCH 11/28] change get_next_plan to get_plan with time input --- agentverse/agents/planning/Planner.py | 33 --------------------------- agentverse/memory/utils/Planner.py | 31 ++++++------------------- 2 files changed, 7 insertions(+), 57 deletions(-) delete mode 100644 agentverse/agents/planning/Planner.py diff --git a/agentverse/agents/planning/Planner.py b/agentverse/agents/planning/Planner.py deleted file mode 100644 index a7a9c85a2..000000000 --- a/agentverse/agents/planning/Planner.py +++ /dev/null @@ -1,33 +0,0 @@ -import datetime - -from pydantic import BaseModel, Field -from typing import List, Dict -from datetime import datetime as dt - - -from agentverse.agents import BaseAgent -from agentverse.memory.memory_element.Plan import ( - Plan, - HourlyPlan, - DailyPlan, - ImmediatePlan, -) - - -class Planner(BaseModel): - agent: BaseAgent = Field(default=None) - daily_plans: List[Dict[datetime.date, DailyPlan]] = Field(default_factory=list) - hourly_plans: List[Dict[dt, HourlyPlan]] = Field(default_factory=list) - immediate_plans: List[Dict[dt, ImmediatePlan]] = Field(default_factory=list) - - def get_plan(self, current_time: dt) -> Plan: - raise NotImplementedError - - def plan_daily(self) -> None: - raise NotImplementedError - - def plan_hourly(self) -> None: - raise NotImplementedError - - def plan_immediately(self) -> None: - raise NotImplementedError diff --git a/agentverse/memory/utils/Planner.py b/agentverse/memory/utils/Planner.py index 55436b99a..b894eae47 100644 --- a/agentverse/memory/utils/Planner.py +++ b/agentverse/memory/utils/Planner.py @@ -68,28 +68,6 @@ } -IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ -(e.g., brushing teeth, making bed) and 10 is \ -extremely poignant (e.g., a break up, college \ -acceptance), rate the likely poignancy of the \ -following piece of memory. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ - -IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ -(e.g., a bed is in the room) and 10 is \ -needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ -following statement. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ - - class Planner(BaseModel): """ A plan is a memory element. @@ -115,8 +93,13 @@ def get_whole_day_plan_text(self,): return whole_day_plan - # when we done previous plan, we need next plan - def get_next_plan(self,): + def get_plan(self, current_time: dt): + """ + Get the plan for current time. If there is no plan, generate some. + param: current_time: the current time that the agent needs a plan to perform action + """ + + self.current_time = current_time # update current_time field in planner next_plan = {} From 0e1c4d1f487728cf1cb6997bf31d31778728c6dd Mon Sep 17 00:00:00 2001 From: dalabengba Date: Tue, 30 May 2023 00:53:50 +0800 Subject: [PATCH 12/28] basic runnable generative agents, need to insert plan in prompt --- agentverse/agents/agent_opr.py | 199 +++++++++++------- agentverse/environments/OPREnvironment.py | 12 +- agentverse/memory/__init__.py | 1 + agentverse/memory/agent_reflection_memory.py | 198 ++++++++++++----- .../memory_element/BaseMemoryElement.py | 39 +++- .../memory_element/LongtermMemoryElement.py | 21 +- agentverse/memory/utils/Planner.py | 10 +- agentverse/tasks/alice_home/config.yaml | 87 ++++---- agentverse/tasks/alice_home/output_parser.py | 16 +- main.py | 2 +- 10 files changed, 362 insertions(+), 223 deletions(-) diff --git a/agentverse/agents/agent_opr.py b/agentverse/agents/agent_opr.py index bb8749ca4..605f0103c 100644 --- a/agentverse/agents/agent_opr.py +++ b/agentverse/agents/agent_opr.py @@ -5,7 +5,7 @@ from logging import getLogger from abc import abstractmethod -from typing import List, Set, Union, NamedTuple +from typing import List, Set, Union, NamedTuple, TYPE_CHECKING from pydantic import BaseModel, Field, validator @@ -22,53 +22,14 @@ from datetime import datetime as dt import datetime - from . import agent_registry +from string import Template + logger = getLogger(__file__) -REACTION_PROMPT = """Now you are act for as an agent named {name} in a virtual world. You might need to performing reaction to the observation. Your mission to take the agent as yourself and directly provide what the agent will do to the observations based on the following information: -(1) The agent's description: {summary} -(2) Current time is {time} -(3) Your current status is {status} -(4) Your memory is {context} - -Now the observation has two types, incomming observation is the ones that other does to you, you are more likely to react to them. Background observation are the background, which does not need to be responded. For example, view an alarm clock does not imply turning it off. However, some background observation might trigger your attention, like an alarming clock or a firing book. - -So now: -The incoming observation is {observation} -The Some background observation is {background_observation}. - -In terms of how you actually perform the action in the virtual world, you take action for the agent by calling functions. Currently, there are the following functions that can be called. - -- act(description, target=None): do some action. `description` describes the action, set `description` to None for not act. `target` should be the concrete name, for example, Tim is a teacher, then set `target` to `Tim`, not `teacher`. -- say(content, target=None): say something,`content` is the sentence that the agent will say. **Do not say to yourself, neither to inanimate objects.** -- move(description): move to somewhere. `description` describes the movement, set description to None for not move. -- do_nothing(): Do nothing. There is nothing that you like to respond to, this will make you stick to your original status and plan. - -Some actions may not be needed in this situation. Call one function at a time, please give a thought before calling these actions, i.e., use the following format strictly: - -Thought: None of the observation attract my attention, I need to: -Action: do_nothing() -Observation: [Observations omited] -[or] -Thought: due to observation `xxx`, I need to: -Action: say("hello", target="Alice") -Observation: [Observations omited] -[or] -Thought: due to observation `xxx`, I need to: -Action: act(None) -Observation: [Observations omited] -[or] -Thought: due to observation `xxx`, I need to: -Action: move(None) -Observation: [Observations omited] -[or] -Thought: I think I've finished my action as the agent. -Action: end() -Observation: - -Now begin your actions as the agent. Remember only write one function call after `Action:` """, +if TYPE_CHECKING: + from agentverse.environments.base import BaseEnvironment @agent_registry.register("OPR") @@ -77,6 +38,16 @@ class AgentOPR(BaseAgent): current_time: str = None, traits: str = None, whole_day_plan: dict = Field(default_factory=dict) + environment: "BaseEnvironment" = None + step_cnt: int = 0 + summary_interval: int = 10 + reflection_interval: int = 10 + + status: str = Field(default=None, description="what the agent is doing according to whole_day_plan") + status_start_time: dt = Field(default=None) + status_duration: int = Field(default=0, + description="we use this field and current time to check when to get_plan in func:`check_status_passive` ") + @validator('current_time') def convert_str_to_dt(cls, current_time): @@ -101,41 +72,49 @@ def step(self, current_time: dt, env_description: str = "") -> Message: ) # To ensure the proper functioning of the agent, the memory, plan, and summary cannot be empty. Therefore, it is necessary to perform an initialization similar to what should be done at the beginning of each day. - self.minimal_init() + # self.minimal_init() # before we handle any observation, we first check the status. - self.check_status_passive() + # self.check_status_passive() - self.observe() + # self.observe() - if self.might_react(): - self.react() - - if self.movement: - self.analysis_movement_target(self.movement_description) - - # 3.5 add observation to memory - for ob in self.incoming_observation: - self.long_term_memory.add(ob, self.current_time, ["observation"]) - self.incoming_observation = [] # empty the incoming observation + # if self.might_react(): + # self.react() + # + # if self.movement: + # self.analysis_movement_target(self.movement_description) + # + # # 3.5 add observation to memory + # for ob in self.incoming_observation: + # self.long_term_memory.add(ob, self.current_time, ["observation"]) + # self.incoming_observation = [] # empty the incoming observation # 4. Periodic fixed work of reflection and summary (tentatively set to be done every 100 logical frames). - self.step_cnt += 1 - if self.step_cnt % self.summary_interval == 0: - self.generate_summary(self.current_time) - - if self.step_cnt % self.reflection_interval == 0: - self.reflect(self.current_time) - - return - - + # TODO chimin - #TODO chimin - - async def astep(self, env_description: str = "") -> Message: + def check_status_passive(self, ): + """Check if the current status needs to be finished. If so, examine the plan and initiate the next action. + """ + if self.status_start_time is None: # fixing empty start time + self.status_start_time = self.current_time + + if self.status_start_time+datetime.timedelta(self.status_duration) <= self.current_time: + next_plan = self.memory.planner.get_plan(current_time=self.current_time) + self.status_start_time = self.current_time + self.status = next_plan['status'] + self.status_duration = next_plan['duration'] + else: + logger.debug(f"{self.name} don't change status by plan: {self.status_start_time}, {datetime.timedelta(self.status_duration)}, {self.current_time}") + + async def astep(self, current_time: dt,env_description: str = "") -> Message: """Asynchronous version of step""" + #use environment's time to update agent's time + self.current_time = current_time + # Before the agent step, we check current status, + self.check_status_passive() + prompt = self._fill_prompt_template(env_description) parsed_response = None @@ -143,9 +122,16 @@ async def astep(self, env_description: str = "") -> Message: try: response = await self.llm.agenerate_response(prompt) parsed_response = self.output_parser.parse(response) + + if 'say(' in parsed_response.return_values["output"]: + reaction, target = eval("self._" + parsed_response.return_values["output"].strip()) + elif 'act(' in parsed_response.return_values["output"]: + reaction, target = eval("self._" + parsed_response.return_values["output"].strip()) + elif 'do_nothing(' in parsed_response.return_values["output"]: + reaction, target = None, None + break - except (KeyboardInterrupt, bdb.BdbQuit): - raise + except Exception as e: logger.error(e) logger.warning("Retrying...") @@ -154,25 +140,82 @@ async def astep(self, env_description: str = "") -> Message: if parsed_response is None: logger.error(f"{self.name} failed to generate valid response.") + + + message = Message( content="" - if parsed_response is None - else parsed_response.return_values["output"], + if reaction is None + else reaction, sender=self.name, - receiver=self.get_receiver(), + receiver=self.get_receiver() if target is None else target, ) + + # TODO currently, summary is not added back to memory while reflection is + self.step_cnt += 1 + + if self.step_cnt % self.summary_interval == 0: + self.memory.summary = self.memory.generate_summary(self.current_time) + + if self.step_cnt % self.reflection_interval == 0: + _ = self.reflect(self.current_time) + return message + def _act(self, description=None, target=None): + if description is None: + return "" + if target is None: + reaction_content = f"{self.name} performs action: '{description}'." + else: + reaction_content = f"{self.name} performs action to {target}: '{description}'." + # self.environment.broadcast_observations(self, target, reaction_content) + + + return reaction_content, target + + def _say(self, description, target=None): + if description is None: + return "" + if target is None: + reaction_content = f"{self.name} says: '{description}'." + else: + reaction_content = f"{self.name} says to {target}: '{description}'." + # self.environment.broadcast_observations(self, target, reaction_content) + return reaction_content, target + + + def _fill_prompt_template(self, env_description: str = "") -> str: + """Fill the placeholders in the prompt template + + In the conversation agent, three placeholders are supported: + - ${agent_name}: the name of the agent + - ${env_description}: the description of the environment + - ${role_description}: the description of the role of the agent + - ${chat_history}: the chat history of the agent + """ + input_arguments = { + "agent_name": self.name, + "summary": self.memory.summary, + "current_time": self.current_time, + "status": self.status, + "env_description": env_description, + } + return Template(self.prompt_template).safe_substitute(input_arguments) + # TODO call longtermmemory element def add_message_to_memory(self, messages: List[Message]) -> None: - self.memory.add_message(messages) + for message in messages: + self.memory.add_message(message, time=self.current_time) # Should call this when status changed, plan==status def add_plan_to_memory(self,) -> None: self.memory.add_plan(content=self.status, time=self.current_time) - def reset(self) -> None: + def reset(self, environment: "BaseEnvironment") -> None: """Reset the agent""" - self.memory.reset() + self.environment = environment + + self.memory.reset(environment=environment, agent=self) # TODO: reset receiver diff --git a/agentverse/environments/OPREnvironment.py b/agentverse/environments/OPREnvironment.py index abf31a3ad..7dce54f00 100644 --- a/agentverse/environments/OPREnvironment.py +++ b/agentverse/environments/OPREnvironment.py @@ -67,8 +67,6 @@ def __init__(self, rule, **kwargs): async def step(self) -> List[Message]: """Run one step of the environment""" - self.tick_tock() - logging.log(logging.INFO, f"Tick tock. Current time: {self.current_time}") # Get the next agent index @@ -79,7 +77,7 @@ async def step(self) -> List[Message]: # Generate the next message messages = await asyncio.gather( - *[self.agents[i].astep(env_descriptions[i]) for i in agent_ids] + *[self.agents[i].astep(self.current_time, env_descriptions[i]) for i in agent_ids] ) # Some rules will select certain messages from all the messages @@ -95,6 +93,9 @@ async def step(self) -> List[Message]: self.cnt_turn += 1 + # update current_time + self.tick_tock() + return selected_messages def print_messages(self, messages: List[Message]) -> None: @@ -106,8 +107,9 @@ def reset(self) -> None: """Reset the environment""" self.cnt_turn = 0 self.rule.reset() + BaseAgent.update_forward_refs() for agent in self.agents: - agent.reset() + agent.reset(environment=self) def is_done(self) -> bool: """Check if the environment is done""" @@ -115,4 +117,4 @@ def is_done(self) -> bool: def tick_tock(self) -> None: """Increment the time""" - self.current_time = self.current_time + self.time_delta + self.current_time = self.current_time + datetime.timedelta(seconds=self.time_delta) diff --git a/agentverse/memory/__init__.py b/agentverse/memory/__init__.py index 2ced03877..0d69b3e19 100644 --- a/agentverse/memory/__init__.py +++ b/agentverse/memory/__init__.py @@ -5,3 +5,4 @@ from .base import BaseMemory from .chat_history import ChatHistoryMemory from .summary import SummaryMemory +from .agent_reflection_memory import ReflectionMemory diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py index b6ec99c03..0a7362489 100644 --- a/agentverse/memory/agent_reflection_memory.py +++ b/agentverse/memory/agent_reflection_memory.py @@ -10,20 +10,20 @@ import random import numpy as np -from typing import Any, List, Optional, Union +from typing import Any, List, Optional, Union, TYPE_CHECKING from datetime import datetime as dt from sklearn.metrics.pairwise import cosine_similarity -from pydantic import Field +from pydantic import Field, BaseModel from agentverse.llms.openai import get_embedding, chat from agentverse.memory.base import BaseMemory -from agentverse.agents import BaseAgent from agentverse.environments.base import BaseEnvironment from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement from agentverse.memory.memory_element.Reflection import Reflection from agentverse.message import Message from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement -from agentverse.memory.utils.Planner import Planner + +from . import memory_registry IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ (e.g., brushing teeth, making bed) and 10 is \ @@ -51,28 +51,12 @@ the above statements? Only output insights with high confidence. example format: insight (because of 1, 5, 3)""" - -def get_questions(texts): - prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT - result = chat(prompt) - questions = [q for q in result.split("\n") if len(q.strip()) > 0] - questions = questions[:3] - return questions - - -def get_insights(statements): - prompt = "" - for i, st in enumerate(statements): - prompt += str(i + 1) + ". " + st + "\n" - prompt += INSIGHT_PROMPT - result = chat(prompt) - insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] - insights = [".".join(i.split(".")[1:]) for i in insights] - # remove insight pointers for now - insights = [i.split("(")[0].strip() for i in insights] - return insights +if TYPE_CHECKING: + from agentverse.agents import BaseAgent + from agentverse.memory.utils.Planner import Planner +@memory_registry.register("OPR") class ReflectionMemory(BaseMemory): # on load, load our database """ @@ -80,42 +64,54 @@ class ReflectionMemory(BaseMemory): importance_threshold: the threshold for deciding whether to do reflection """ - agent: Optional[BaseAgent] = None + agent: "BaseAgent" = None environment: Optional[BaseEnvironment] = None importance_threshold: int = Field(default=100) memories: List[LongtermMemoryElement] = Field(default_factory=list) - planner: Planner = None + planner: "Planner" = None + summary: str = None accumulated_importance: int = Field(default=0) - def __init__(self, **kwargs) -> None: + def __init__(self, + agent: "BaseAgent" = None, + environment: Optional[BaseEnvironment] = None, + importance_threshold: int = 100, + memories: List[LongtermMemoryElement] = [], + planner: "Planner" = None, + summary: str = None, + accumulated_importance: int = 0, + **kwargs) -> None: + super().__init__(**kwargs) clear_memory = True # TODO: add this to arguments - # the least importance threshold for reflection. - # TODO: add none-default value in the yaml config file - self.importance_threshold = getattr( - self.agent, "importance_threshold", self.importance_threshold - ) - - self.memories = [] - - # TODO add argument in agent - (daily_plans, current_time) - self.planner = Planner(daily_plans=list(self.agent.whole_day_plan.values())[0], - agent=self.agent, - current_time=self.agent.current_time, - environment=self.environment) - self.add_plan(content=self.planner.get_whole_day_plan_text(), time=self.agent.current_time) - - # TODO: load last time memory from file - # currently, we just initialize blank memory - - self.accumulated_importance = 0 - if len(self.memories) > 0: - for m in self.memories: - if isinstance(m, Reflection): - break - self.accumulated_importance += m.importance - + self.agent = agent + self.environment = environment + self.importance_threshold = importance_threshold + self.memories = memories + self.planner = planner + self.summary = summary + self.accumulated_importance = accumulated_importance + + + def get_questions(self, texts): + prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT + result = self.agent.llm.generate_response(prompt) + questions = [q for q in result.split("\n") if len(q.strip()) > 0] + questions = questions[:3] + return questions + + def get_insights(self, statements): + prompt = "" + for i, st in enumerate(statements): + prompt += str(i + 1) + ". " + st + "\n" + prompt += INSIGHT_PROMPT + result = self.agent.llm.generate_response(prompt) + insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] + insights = [".".join(i.split(".")[1:]) for i in insights] + # remove insight pointers for now + insights = [i.split("(")[0].strip() for i in insights] + return insights def add_message(self, message: Message, time: dt) -> None: """ @@ -130,7 +126,7 @@ def add_message(self, message: Message, time: dt) -> None: ) ) - def add_plan(self, content: str, time: dt) -> None: + def add_content(self, content: str, time: dt) -> None: self.add_memory( LongtermMemoryElement.create_longterm_memory( @@ -140,9 +136,6 @@ def add_plan(self, content: str, time: dt) -> None: ) ) - def reset(self) -> None: - self.memories = [] - def add_memory(self, memory: LongtermMemoryElement) -> None: """ Add memory element to ReflectionMemory @@ -292,6 +285,95 @@ def reflect(self, time: dt): ) ) # This will add a Reflection instance instead of LongtermMemory instance return insights + + def generate_summary(self, time: dt): + """ + # Generating summary for myself + :return: summary string + """ + + qResList1 = self.query(f"{self.agent.name}'s core characteristics", 10, time) + qResList2 = self.query(f"{self.agent.name}'s current daily occupation", 10, time) + qResList3 = self.query(f"{self.agent.name}'s feeling about his recent progress in life", 10, time) + + q1, q2, q3 = map(lambda k: '\n'.join(k), (qResList1, qResList2, qResList3)) + + query1 = f""" + How would one describe {self.agent.name}'s core characteristics given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. + {q1} + """ + result1 = self.agent.llm.generate_response(query1) + if "DONTKNOW" in result1.content: + result1.content = "" + + query2 = f""" + What is {self.agent.name}'s current occupation plan given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. + {q2} + """ + + result2 = self.agent.llm.generate_response(query2) + if "DONTKNOW" in result2.content: + result2.content = "" + + query3 = f""" + What might be {self.agent.name}'s feeling about his recent progress in life given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. + {q3} + """ + + result3 = self.agent.llm.generate_response(query3) + if "DONTKNOW" in result3.content: + result3.content = "" + + # BasicInfo = f"""\ + # Name: {self.agent.name} + # Innate traits: {self.agent.traits}""" + + self.summary = '\n'.join([result1.content, result2.content, result3.content]) + return self.summary + + def reset(self, environment: "BaseEnvironment", agent: "BaseAgent") -> None: + + from agentverse.memory.utils.Planner import Planner + # Whole the initial work can only be done here + self.agent = agent + self.environment = environment + + # self.agent.update_forward_refs() + # self.environment.update_forward_refs() + + # the least importance threshold for reflection. + # TODO: add none-default value in the yaml config file + self.importance_threshold = getattr( + self.agent, "importance_threshold", self.importance_threshold + ) + + self.memories = [] + + # add initial_plan and description (including traits, role_description) to memory at first + # and generate summary immediately. + # Note that unlike chat_history memory, we do not always put the whole memory in prompt, we only put summary + self.planner = Planner(daily_plans=list(self.agent.whole_day_plan.values())[0], + agent=self.agent, + current_time=self.environment.current_time, + environment=self.environment) + Planner.update_forward_refs() + self.add_content(content=self.planner.get_whole_day_plan_text(), time=self.environment.current_time) + self.add_content(content=self.agent.traits, time=self.environment.current_time) + for per_role_description in self.agent.role_description.split("\n"): + self.add_content(content=per_role_description, time=self.environment.current_time) + + self.summary = self.generate_summary(time=self.environment.current_time) + + # TODO: load last time memory from file + # currently, we just initialize blank memory + + self.accumulated_importance = 0 + if len(self.memories) > 0: + for m in self.memories: + if isinstance(m, Reflection): + break + self.accumulated_importance += m.importance + def __repr__(self) -> str: memory_string = "\n".join([str(memory) for memory in self.memories]) @@ -316,8 +398,8 @@ def to_string(self) -> str: environment = load_environment(env_config) memory = ReflectionMemory(agent=agents[0], environment=environment) # get next plan - next_plan = memory.planner.get_next_plan() - next_next_plan = memory.planner.get_next_plan() + next_plan = memory.planner.get_plan(current_time=dt.now()) + next_next_plan = memory.planner.get_plan(current_time=dt.now() + datetime.timedelta(minutes=80)) message_list = [ Message(content="I am a student"), diff --git a/agentverse/memory/memory_element/BaseMemoryElement.py b/agentverse/memory/memory_element/BaseMemoryElement.py index 9ca04fa43..672de95b6 100644 --- a/agentverse/memory/memory_element/BaseMemoryElement.py +++ b/agentverse/memory/memory_element/BaseMemoryElement.py @@ -3,8 +3,7 @@ from abc import abstractclassmethod from pydantic import BaseModel, Field - -from agentverse.agents import BaseAgent +from typing import TYPE_CHECKING, Any from datetime import datetime as dt @@ -18,6 +17,8 @@ orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS | orjson.OPT_INDENT_2 ) +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent class BaseMemoryElement(BaseModel): """ @@ -25,18 +26,40 @@ class BaseMemoryElement(BaseModel): TODO: later make Message inherit this MemoryElement """ - content: str = Field(default="") - subject: BaseAgent = Field(default=None) - embedding: list[float] = Field(default_factory=list) - create_time: dt = Field(default=None) - last_access_time: dt = Field(default=None) + content: str = None + subject: "BaseAgent" = None + embedding: list[float] = [] + create_time: dt = None + last_access_time: dt = None + importance: int = 0 + immediacy: int = 0 + + def __init__(self, + content: str, + subject: "BaseAgent", + embedding: list[float], + create_time: dt, + last_access_time: dt, + importance: int, + immediacy: int, + **kwargs): + + super().__init__(**kwargs) + self.content = content + self.subject = subject + self.embedding = embedding + self.create_time = create_time + self.last_access_time = last_access_time + self.importance = importance + self.immediacy = immediacy + @classmethod def create_memory( cls, content: str, time: dt, - subject: BaseAgent = None, + subject: "BaseAgent" = None, embedding: np.ndarray = None, ): embedding = get_embedding(content) if embedding is None else embedding diff --git a/agentverse/memory/memory_element/LongtermMemoryElement.py b/agentverse/memory/memory_element/LongtermMemoryElement.py index 0b3a0e702..97da8d4f2 100644 --- a/agentverse/memory/memory_element/LongtermMemoryElement.py +++ b/agentverse/memory/memory_element/LongtermMemoryElement.py @@ -3,11 +3,17 @@ from pydantic import Field from datetime import datetime as dt -from agentverse.agents.base import BaseAgent from agentverse.llms.openai import chat, get_embedding from agentverse.message import Message + from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from agentverse.agents.base import BaseAgent + + IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ (e.g., brushing teeth, making bed) and 10 is \ @@ -31,17 +37,18 @@ Rating: """ + class LongtermMemoryElement(BaseMemoryElement): """ A LongtermMemory is a memory element with importance and immediacy. """ - importance: int = Field(default=0) - immediacy: int = Field(default=0) - @classmethod - def create_longterm_memory(cls, content: str, time: dt, subject: BaseAgent = None): + def create_longterm_memory(cls, content: str, time: dt, subject: "BaseAgent" = None): + + # LongtermMemoryElement.update_forward_refs() + importance = cls.get_importance(content) immediacy = cls.get_immediacy(content) return cls( @@ -55,7 +62,7 @@ def create_longterm_memory(cls, content: str, time: dt, subject: BaseAgent = Non ) @classmethod - def create_from_memory_element(cls, memory_element: BaseMemoryElement): + def create_from_memory_element(cls, memory_element: "BaseMemoryElement"): importance = cls.get_importance(memory_element.content) immediacy = cls.get_immediacy(memory_element.content) return cls( @@ -69,7 +76,7 @@ def create_from_memory_element(cls, memory_element: BaseMemoryElement): ) @classmethod - def create_from_message(cls, message: Message, subject: BaseAgent, time: dt): + def create_from_message(cls, message: Message, subject: "BaseAgent", time: dt): importance = cls.get_importance(message.content) immediacy = cls.get_immediacy(message.content) embedding = get_embedding(message.content) diff --git a/agentverse/memory/utils/Planner.py b/agentverse/memory/utils/Planner.py index b894eae47..f5d4b1eae 100644 --- a/agentverse/memory/utils/Planner.py +++ b/agentverse/memory/utils/Planner.py @@ -5,15 +5,15 @@ from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement from agentverse.llms.base import BaseLLM from agentverse.llms.openai import get_embedding +from typing import Optional, TYPE_CHECKING from agentverse.agents.base import BaseAgent -from typing import Optional +from agentverse.environments.base import BaseEnvironment from logging import getLogger import json -from agentverse.environments.base import BaseEnvironment - logger = getLogger(__file__) + planner_prompt_template = { "chunk_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s coarse grained schedule to {time_granularity} \ @@ -164,7 +164,7 @@ def _generate_more_plans(self,): # would call self.write_plan and return desired dict new_plan = eval("self." + plan) except: - logger.warning("{}'s generated plan contains error format: {}".format(self.agent.name, plan)) + logger.warning("{}'s generated immediate plan contains error format: {}".format(self.agent.name, plan)) continue immediately_plans.append(new_plan) @@ -194,7 +194,7 @@ def plan_in_chunk(self, ): # will call chunk_plan and update self.hourly plan eval("self."+plan) except: - logger.warning("{}'s generated plan contains error format: {}".format(self.agent.name, plan)) + logger.warning("{}'s generated rough plan contains error format: {}".format(self.agent.name, plan)) continue diff --git a/agentverse/tasks/alice_home/config.yaml b/agentverse/tasks/alice_home/config.yaml index cc2da8d37..1048e1e26 100644 --- a/agentverse/tasks/alice_home/config.yaml +++ b/agentverse/tasks/alice_home/config.yaml @@ -1,31 +1,50 @@ prompts: prompt: &prompt |- - When responding, please output a response in the following format with two fields Action and Action Input: - Action: Speak - Action Input: (You should put what you want to speak use here) + Now you are act for as an agent named ${agent_name} in a virtual world. You might need to performing reaction to the observation. Your mission to take the agent as yourself and directly provide what the agent will do to the observations based on the following information: + (1) The agent's description: ${summary} + (2) Current time is ${current_time} + (3) Your current status is ${status} - Assume that you are in a university classroom and it is Natural Language Processing module. You start by introducing themselves. Below is the description of your role. + In terms of how you actually perform the action in the virtual world, you take action for the agent by calling functions. Currently, there are the following functions that can be called. - Here is the conversation history - {chat_history} + - act(description, target=None): do some action. `description` describes the action, set `description` to None for not act. `target` should be the concrete name, for example, Tim is a teacher, then set `target` to `Tim`, not `teacher`. + - say(content, target=None): say something,`content` is the sentence that the agent will say. **Do not say to yourself, neither to inanimate objects.** + - move(description): move to somewhere. `description` describes the movement, set description to None for not move. + - do_nothing(): Do nothing. There is nothing that you like to respond to, this will make you stick to your original status and plan. - You should now give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! + Some actions may not be needed in this situation. Call one function at a time, please give a thought before calling these actions, i.e., use the following format strictly: + Thought: None of the observation attract my attention, I need to: + Action: do_nothing() + [or] + Thought: due to `xxx`, I need to: + Action: say("hello", target="Alice") + [or] + Thought: due to `xxx`, I need to: + Action: act(None) + [or] + Thought: due to `xxx`, I need to: + Action: move(None) + [or] + Thought: I think I've finished my action as the agent. + Action: end() + + Now begin your actions as the agent. Remember only write one function call after `Action:` name: alice_home environment: env_type: env_OPR max_turns: 1000000 - current_time: "2023-04-01 07:00:00" size: [ 200, 150 ] - time_delta: 120 # in seconds + current_time: "2023-04-01 07:00:00" + time_delta: 1800 # in seconds rule: order: - type: sequential + type: concurrent visibility: type: all selector: @@ -43,37 +62,19 @@ agents: role_description: |- Alice is a 10-year-old student at Town Elementary School.", Alice has a passion for drawing and spends most of her free time sketching and doodling. - Alice finds educational television programs to be dull and uninteresting. - Alice's father's name is Bob, and he works as a mechanic at the local car dealership. - Alice's mother's name is Carot, and she is a nurse at the nearby hospital. - Alice has a younger sister named Lily, who is 6 years old and also attends Town Elementary School. - Alice enjoys playing soccer with her friends during recess and after school. - Alice is a fan of science fiction books and loves reading about space exploration and aliens. - Alice is learning to play the piano and practices every day after school. - Alice's favorite food is pizza, and she loves trying out different toppings and combinations memory: - memory_type: chat_history # change later + memory_type: OPR # change later prompt_template: *prompt llm: - llm_type: text-davinci-003 + model: "gpt-4" + llm_type: gpt-4 temperature: 0.3 max_tokens: 1024 current_time: "2023-04-01 07:00:00" whole_day_plan: { "2023-04-01": [ "wake up and complete the morning routine at 7:00 am", - "have breakfast with her family at 8:00 am", - "attend school and participate in classes from 9:00 am to 12:00 pm", - "have lunch at school with her friends at 12:30 pm", - "return home at 1:00 pm and watch her favorite TV show", - "practice playing the piano for 30 minutes at 2:00 pm", - "work on her drawing skills for an hour at 3:00 pm", - "go out for a 30-minute jog around the neighborhood at 4:00 pm", - "help her mother prepare dinner at 5:00 pm", - "have dinner with her family at 6:00 pm", - "complete her homework and review her lessons for the next day from 7:00 pm to 8:30 pm", - "read a book for pleasure from 8:30 pm to 9:00 pm", - "and then get ready for bed at 9:30 pm." + "have breakfast with her family at 8:00 am", ] } - @@ -82,20 +83,12 @@ agents: traits: hard-working, friendly, knowledgeable role_description: |- Bob is a 42-year-old mechanic who works at the local car dealership. - Bob is known for his expertise in engine repairs and his friendly demeanor with customers. - Bob is married to Carot, who works as a nurse at the nearby hospital. - Bob has two children: Alice, who is 13 years old and loves to draw, and Lily, who is 6 years old and enjoys playing with dolls. - Bob enjoys restoring old cars in his free time and has a collection of classic cars in his garage. - Bob is a fan of football and enjoys watching games with his friends at the local sports bar. - Bob is always willing to help out his neighbors with car trouble or home repairs. - Bob is planning a family vacation to the beach this summer and is excited to spend time with his family. - Bob's favorite food is barbecue ribs, and he loves to cook them on his smoker on the weekends. - Bob is proud of his daughter Alice's talent for drawing and encourages her to pursue her passion. memory: - memory_type: chat_history + memory_type: OPR prompt_template: *prompt llm: - llm_type: text-davinci-003 + model: "gpt-4" + llm_type: gpt-4 temperature: 0.3 max_tokens: 1024 current_time: "2023-04-01 07:00:00" @@ -103,14 +96,6 @@ agents: "2023-04-01": [ "wake up and have breakfast with his family at 6:30 am", "drive to work and arrive at the car dealership at 8:00 am", - "work on fixing cars and helping customers until lunchtime", - "have lunch at the nearby diner at 12:00 pm", - "watch TV with Alice at 1:00 pm return to work and continue fixing cars until 5:00 pm", - "drive home and help his wife prepare dinner at 5:30 pm", - "have dinner with his family at 6:00 pm", - "relax and watch TV with his family until 8:00 pm", - "work on his car restoration project in the garage until 10:00 pm", - "and then get ready for bed at 10:30 pm." ] } diff --git a/agentverse/tasks/alice_home/output_parser.py b/agentverse/tasks/alice_home/output_parser.py index e6d99a8df..32d6a5faa 100644 --- a/agentverse/tasks/alice_home/output_parser.py +++ b/agentverse/tasks/alice_home/output_parser.py @@ -3,10 +3,8 @@ import re from typing import Union -# from langchain.agents import AgentOutputParser from agentverse.parser import OutputParser, LLMResult -# from langchain.schema import AgentAction, AgentFinish from agentverse.utils import AgentAction, AgentFinish from agentverse.parser import OutputParserError, output_parser_registry @@ -21,13 +19,11 @@ def parse(self, output: LLMResult) -> Union[AgentAction, AgentFinish]: cleaned_output = cleaned_output.split("\n") if not ( len(cleaned_output) == 2 - and cleaned_output[0].startswith("Action:") - and cleaned_output[1].startswith("Action Input:") + and cleaned_output[0].startswith("Thought:") + and cleaned_output[1].startswith("Action:") ): raise OutputParserError(text) - action = cleaned_output[0][len("Action:") :].strip() - action_input = cleaned_output[1][len("Action Input:") :].strip() - if action == "Speak": - return AgentFinish({"output": action_input}, text) - else: - raise OutputParserError(text) + + action = cleaned_output[1][len("Action:"):].strip() + + return AgentFinish({"output": action}, text) diff --git a/main.py b/main.py index 583eeeef5..eb1ee900b 100644 --- a/main.py +++ b/main.py @@ -11,7 +11,7 @@ -parser.add_argument("--task", type=str, default="nlp_classroom_3players") +parser.add_argument("--task", type=str, default="alice_home") args = parser.parse_args() agentverse = AgentVerse.from_task(args.task) From c019a918b21f87e7453ec136f8d17ce59521920e Mon Sep 17 00:00:00 2001 From: dalabengba Date: Tue, 30 May 2023 12:28:46 +0800 Subject: [PATCH 13/28] add plan and event_memory in prompt --- agentverse/agents/agent_opr.py | 28 +++++-- agentverse/memory/agent_reflection_memory.py | 14 +++- agentverse/tasks/alice_home/config.yaml | 81 +++++++++++++++----- 3 files changed, 95 insertions(+), 28 deletions(-) diff --git a/agentverse/agents/agent_opr.py b/agentverse/agents/agent_opr.py index 605f0103c..38dd33dee 100644 --- a/agentverse/agents/agent_opr.py +++ b/agentverse/agents/agent_opr.py @@ -40,8 +40,8 @@ class AgentOPR(BaseAgent): whole_day_plan: dict = Field(default_factory=dict) environment: "BaseEnvironment" = None step_cnt: int = 0 - summary_interval: int = 10 - reflection_interval: int = 10 + summary_interval: int = 5 + reflection_interval: int = 5 status: str = Field(default=None, description="what the agent is doing according to whole_day_plan") status_start_time: dt = Field(default=None) @@ -117,7 +117,7 @@ async def astep(self, current_time: dt,env_description: str = "") -> Message: prompt = self._fill_prompt_template(env_description) - parsed_response = None + parsed_response, reaction, target = None, None, None for i in range(self.max_retry): try: response = await self.llm.agenerate_response(prompt) @@ -129,6 +129,9 @@ async def astep(self, current_time: dt,env_description: str = "") -> Message: reaction, target = eval("self._" + parsed_response.return_values["output"].strip()) elif 'do_nothing(' in parsed_response.return_values["output"]: reaction, target = None, None + else: + raise Exception(f"no valid parsed_response detected, " + f"cur response {parsed_response.return_values['output']}") break @@ -143,12 +146,14 @@ async def astep(self, current_time: dt,env_description: str = "") -> Message: + + message = Message( content="" if reaction is None else reaction, sender=self.name, - receiver=self.get_receiver() if target is None else target, + receiver=self.get_receiver() if target is None else self.get_valid_receiver(target), ) # TODO currently, summary is not added back to memory while reflection is @@ -158,7 +163,7 @@ async def astep(self, current_time: dt,env_description: str = "") -> Message: self.memory.summary = self.memory.generate_summary(self.current_time) if self.step_cnt % self.reflection_interval == 0: - _ = self.reflect(self.current_time) + _ = self.memory.reflect(self.current_time) return message @@ -184,6 +189,17 @@ def _say(self, description, target=None): # self.environment.broadcast_observations(self, target, reaction_content) return reaction_content, target + def get_valid_receiver(self, target: str) -> set(): + + all_agents_name = [] + for agent in self.environment.agents: + all_agents_name.append(agent.name) + + if not (target in all_agents_name): + return {"all"} + else: + return {target} + def _fill_prompt_template(self, env_description: str = "") -> str: """Fill the placeholders in the prompt template @@ -197,6 +213,8 @@ def _fill_prompt_template(self, env_description: str = "") -> str: input_arguments = { "agent_name": self.name, "summary": self.memory.summary, + "plan": self.memory.planner.get_whole_day_plan_text(), + "event_memory": self.memory.get_memory_plain_text(), "current_time": self.current_time, "status": self.status, "env_description": env_description, diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py index 0a7362489..f296d64d3 100644 --- a/agentverse/memory/agent_reflection_memory.py +++ b/agentverse/memory/agent_reflection_memory.py @@ -113,6 +113,16 @@ def get_insights(self, statements): insights = [i.split("(")[0].strip() for i in insights] return insights + def get_memory_plain_text(self): + + memories = [] + for message in self.memories: + memories.append(message.content) + + return "\n".join(memories) + + + def add_message(self, message: Message, time: dt) -> None: """ Add a message into longterm memory as LongtermMemory object. @@ -272,9 +282,9 @@ def reflect(self, time: dt): return "reflection reject: empty memories" memories_of_interest = self.memories[-100:] - questions = get_questions([m.content for m in memories_of_interest]) + questions = self.get_questions([m.content for m in memories_of_interest]) statements = self.query(questions, len(questions) * 10, time) - insights = get_insights(statements) + insights = self.get_insights(statements) logging.info(self.agent.name + f" Insights: {insights}") for insight in insights: self.add_memory( diff --git a/agentverse/tasks/alice_home/config.yaml b/agentverse/tasks/alice_home/config.yaml index 1048e1e26..616f697bf 100644 --- a/agentverse/tasks/alice_home/config.yaml +++ b/agentverse/tasks/alice_home/config.yaml @@ -2,34 +2,33 @@ prompts: prompt: &prompt |- Now you are act for as an agent named ${agent_name} in a virtual world. You might need to performing reaction to the observation. Your mission to take the agent as yourself and directly provide what the agent will do to the observations based on the following information: (1) The agent's description: ${summary} - (2) Current time is ${current_time} - (3) Your current status is ${status} + (2) Your whole day plan is: ${plan} + (3) Current time is ${current_time} + (4) Your current status is ${status} + (5) Your memory of the event is ${event_memory} In terms of how you actually perform the action in the virtual world, you take action for the agent by calling functions. Currently, there are the following functions that can be called. - + - do_nothing(): Do nothing. There is nothing that you like to respond to, this will make you stick to your original status and plan. - act(description, target=None): do some action. `description` describes the action, set `description` to None for not act. `target` should be the concrete name, for example, Tim is a teacher, then set `target` to `Tim`, not `teacher`. - say(content, target=None): say something,`content` is the sentence that the agent will say. **Do not say to yourself, neither to inanimate objects.** - - move(description): move to somewhere. `description` describes the movement, set description to None for not move. - - do_nothing(): Do nothing. There is nothing that you like to respond to, this will make you stick to your original status and plan. - Some actions may not be needed in this situation. Call one function at a time, please give a thought before calling these actions, i.e., use the following format strictly: + Call one function at a time, please give a thought before calling these actions, i.e., use the following format strictly: + + [OPTION 1] Thought: None of the observation attract my attention, I need to: Action: do_nothing() - [or] - Thought: due to `xxx`, I need to: - Action: say("hello", target="Alice") - [or] + + [OPTION 2] Thought: due to `xxx`, I need to: - Action: act(None) - [or] + Action: act("Wash the dishes", target=None) + + [OPTION 3] Thought: due to `xxx`, I need to: - Action: move(None) - [or] - Thought: I think I've finished my action as the agent. - Action: end() + Action: say("hello", target="Alice") Now begin your actions as the agent. Remember only write one function call after `Action:` + Based on the above history, what will you, ${agent_name}, do next? name: alice_home @@ -59,9 +58,19 @@ agents: agent_type: OPR name: Alice traits: friendly, outgoing, hospitable + summary_interval: 5 + reflection_interval: 5 role_description: |- - Alice is a 10-year-old student at Town Elementary School.", - Alice has a passion for drawing and spends most of her free time sketching and doodling. + Alice is a 10-year-old student at Town Elementary School.", + Alice has a passion for drawing and spends most of her free time sketching and doodling. + Alice finds educational television programs to be dull and uninteresting. + Alice's father's name is Bob, and he works as a mechanic at the local car dealership. + Alice's mother's name is Carot, and she is a nurse at the nearby hospital. + Alice has a younger sister named Lily, who is 6 years old and also attends Town Elementary School. + Alice enjoys playing soccer with her friends during recess and after school. + Alice is a fan of science fiction books and loves reading about space exploration and aliens. + Alice is learning to play the piano and practices every day after school. + Alice's favorite food is pizza, and she loves trying out different toppings and combinations memory: memory_type: OPR # change later prompt_template: *prompt @@ -69,20 +78,42 @@ agents: model: "gpt-4" llm_type: gpt-4 temperature: 0.3 - max_tokens: 1024 + max_tokens: 128 current_time: "2023-04-01 07:00:00" whole_day_plan: { "2023-04-01": [ "wake up and complete the morning routine at 7:00 am", "have breakfast with her family at 8:00 am", + "attend school and participate in classes from 9:00 am to 12:00 pm", + "have lunch at school with her friends at 12:30 pm", + "return home at 1:00 pm and watch her favorite TV show", + "practice playing the piano for 30 minutes at 2:00 pm", + "work on her drawing skills for an hour at 3:00 pm", + "go out for a 30-minute jog around the neighborhood at 4:00 pm", + "help her mother prepare dinner at 5:00 pm", + "have dinner with her family at 6:00 pm", + "complete her homework and review her lessons for the next day from 7:00 pm to 8:30 pm", + "read a book for pleasure from 8:30 pm to 9:00 pm", + "and then get ready for bed at 9:30 pm." ] } - agent_type: OPR name: Bob traits: hard-working, friendly, knowledgeable + summary_interval: 5 + reflection_interval: 5 role_description: |- - Bob is a 42-year-old mechanic who works at the local car dealership. + Bob is a 42-year-old mechanic who works at the local car dealership. + Bob is known for his expertise in engine repairs and his friendly demeanor with customers. + Bob is married to Carot, who works as a nurse at the nearby hospital. + Bob has two children: Alice, who is 13 years old and loves to draw, and Lily, who is 6 years old and enjoys playing with dolls. + Bob enjoys restoring old cars in his free time and has a collection of classic cars in his garage. + Bob is a fan of football and enjoys watching games with his friends at the local sports bar. + Bob is always willing to help out his neighbors with car trouble or home repairs. + Bob is planning a family vacation to the beach this summer and is excited to spend time with his family. + Bob's favorite food is barbecue ribs, and he loves to cook them on his smoker on the weekends. + Bob is proud of his daughter Alice's talent for drawing and encourages her to pursue her passion. memory: memory_type: OPR prompt_template: *prompt @@ -90,12 +121,20 @@ agents: model: "gpt-4" llm_type: gpt-4 temperature: 0.3 - max_tokens: 1024 + max_tokens: 128 current_time: "2023-04-01 07:00:00" whole_day_plan: { "2023-04-01": [ "wake up and have breakfast with his family at 6:30 am", "drive to work and arrive at the car dealership at 8:00 am", + "work on fixing cars and helping customers until lunchtime", + "have lunch at the nearby diner at 12:00 pm", + "watch TV with Alice at 1:00 pm return to work and continue fixing cars until 5:00 pm", + "drive home and help his wife prepare dinner at 5:30 pm", + "have dinner with his family at 6:00 pm", + "relax and watch TV with his family until 8:00 pm", + "work on his car restoration project in the garage until 10:00 pm", + "and then get ready for bed at 10:30 pm." ] } From 3cf12cc9af37ed1e51977db707b9cc1510ddb770 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Tue, 30 May 2023 12:37:33 +0800 Subject: [PATCH 14/28] fix llm.generate_response bug in reflection --- agentverse/agents/agent_opr.py | 6 ++---- agentverse/memory/agent_reflection_memory.py | 2 ++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/agentverse/agents/agent_opr.py b/agentverse/agents/agent_opr.py index 38dd33dee..ac0bc2865 100644 --- a/agentverse/agents/agent_opr.py +++ b/agentverse/agents/agent_opr.py @@ -143,10 +143,8 @@ async def astep(self, current_time: dt,env_description: str = "") -> Message: if parsed_response is None: logger.error(f"{self.name} failed to generate valid response.") - - - - + if reaction is None: + reaction = "Keep doing last action ..." message = Message( content="" diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py index f296d64d3..284680715 100644 --- a/agentverse/memory/agent_reflection_memory.py +++ b/agentverse/memory/agent_reflection_memory.py @@ -97,6 +97,7 @@ def __init__(self, def get_questions(self, texts): prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT result = self.agent.llm.generate_response(prompt) + result = result.content questions = [q for q in result.split("\n") if len(q.strip()) > 0] questions = questions[:3] return questions @@ -107,6 +108,7 @@ def get_insights(self, statements): prompt += str(i + 1) + ". " + st + "\n" prompt += INSIGHT_PROMPT result = self.agent.llm.generate_response(prompt) + result = result.content insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] insights = [".".join(i.split(".")[1:]) for i in insights] # remove insight pointers for now From 81ea48bc9fbf2c517b01d2253f091265ba347818 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Mon, 12 Jun 2023 22:30:13 +0800 Subject: [PATCH 15/28] fix annotation --- agentverse/agents/agent_opr.py | 6 ++++-- agentverse/initialization.py | 11 ----------- agentverse/memory/agent_reflection_memory.py | 15 +++++---------- .../memory/memory_element/BaseMemoryElement.py | 8 +++++--- .../memory_element/LongtermMemoryElement.py | 6 ++++-- agentverse/tasks/alice_home/config.yaml | 5 +++-- .../tasks/prisoner_dilema/base/output_parser.py | 2 +- .../prisoner_dilema/no_goal_s1/output_parser.py | 2 +- agentverse/tasks/prisoner_dilema/output_parser.py | 2 +- .../tasks/prisoner_dilema/police/output_parser.py | 2 +- .../tasks/prisoner_dilema/s1_p_r/output_parser.py | 2 +- .../tasks/prisoner_dilema/s2_p_r/output_parser.py | 2 +- 12 files changed, 27 insertions(+), 36 deletions(-) diff --git a/agentverse/agents/agent_opr.py b/agentverse/agents/agent_opr.py index 2c12b8ffa..77602094a 100644 --- a/agentverse/agents/agent_opr.py +++ b/agentverse/agents/agent_opr.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ An agent based upon Observation-Planning-Reflection architecture. """ @@ -38,7 +40,7 @@ class AgentOPR(BaseAgent): current_time: str = None, traits: str = None, whole_day_plan: dict = Field(default_factory=dict) - environment: "BaseEnvironment" = None + environment: BaseEnvironment = None step_cnt: int = 0 summary_interval: int = 5 reflection_interval: int = 5 @@ -228,7 +230,7 @@ def add_message_to_memory(self, messages: List[Message]) -> None: def add_plan_to_memory(self,) -> None: self.memory.add_plan(content=self.status, time=self.current_time) - def reset(self, environment: "BaseEnvironment") -> None: + def reset(self, environment: BaseEnvironment) -> None: """Reset the agent""" self.environment = environment diff --git a/agentverse/initialization.py b/agentverse/initialization.py index 1a3e71df0..d16694374 100644 --- a/agentverse/initialization.py +++ b/agentverse/initialization.py @@ -52,17 +52,6 @@ def load_environment(env_config: Dict) -> BaseEnvironment: def load_agent(agent_config: Dict) -> langchainAgent: agent_type = agent_config.pop("agent_type", "conversation") - # <<<<<<< HEAD - # if agent_type == "conversation": - # # agent = Agent.from_llm_and_tools(**agent_config) - # agent = ConversationAgent(**agent_config) - # elif agent_type == "OPR": - # from agentverse.agents.agent_opr import AgentOPR - - # agent = AgentOPR(**agent_config) - # else: - # raise NotImplementedError("Agent type {} not found".format(agent_type)) - # ======= agent = agent_registry.build(agent_type, **agent_config) return agent diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py index 284680715..499668a2d 100644 --- a/agentverse/memory/agent_reflection_memory.py +++ b/agentverse/memory/agent_reflection_memory.py @@ -1,9 +1,4 @@ -import os - -os.environ["http_proxy"] = "http://127.0.0.1:7890" -os.environ["https_proxy"] = "http://127.0.0.1:7890" -os.environ["all_proxy"] = "socks5://127.0.0.1:7890" - +from __future__ import annotations import datetime import logging @@ -56,7 +51,7 @@ from agentverse.memory.utils.Planner import Planner -@memory_registry.register("OPR") +@memory_registry.register("reflection") class ReflectionMemory(BaseMemory): # on load, load our database """ @@ -64,7 +59,7 @@ class ReflectionMemory(BaseMemory): importance_threshold: the threshold for deciding whether to do reflection """ - agent: "BaseAgent" = None + agent: BaseAgent = None environment: Optional[BaseEnvironment] = None importance_threshold: int = Field(default=100) memories: List[LongtermMemoryElement] = Field(default_factory=list) @@ -73,7 +68,7 @@ class ReflectionMemory(BaseMemory): accumulated_importance: int = Field(default=0) def __init__(self, - agent: "BaseAgent" = None, + agent: BaseAgent = None, environment: Optional[BaseEnvironment] = None, importance_threshold: int = 100, memories: List[LongtermMemoryElement] = [], @@ -343,7 +338,7 @@ def generate_summary(self, time: dt): self.summary = '\n'.join([result1.content, result2.content, result3.content]) return self.summary - def reset(self, environment: "BaseEnvironment", agent: "BaseAgent") -> None: + def reset(self, environment: BaseEnvironment, agent: BaseAgent) -> None: from agentverse.memory.utils.Planner import Planner # Whole the initial work can only be done here diff --git a/agentverse/memory/memory_element/BaseMemoryElement.py b/agentverse/memory/memory_element/BaseMemoryElement.py index 672de95b6..f0eb51ca0 100644 --- a/agentverse/memory/memory_element/BaseMemoryElement.py +++ b/agentverse/memory/memory_element/BaseMemoryElement.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass import numpy as np @@ -27,7 +29,7 @@ class BaseMemoryElement(BaseModel): """ content: str = None - subject: "BaseAgent" = None + subject: BaseAgent = None embedding: list[float] = [] create_time: dt = None last_access_time: dt = None @@ -36,7 +38,7 @@ class BaseMemoryElement(BaseModel): def __init__(self, content: str, - subject: "BaseAgent", + subject: BaseAgent, embedding: list[float], create_time: dt, last_access_time: dt, @@ -59,7 +61,7 @@ def create_memory( cls, content: str, time: dt, - subject: "BaseAgent" = None, + subject: BaseAgent = None, embedding: np.ndarray = None, ): embedding = get_embedding(content) if embedding is None else embedding diff --git a/agentverse/memory/memory_element/LongtermMemoryElement.py b/agentverse/memory/memory_element/LongtermMemoryElement.py index 97da8d4f2..c123ff0fb 100644 --- a/agentverse/memory/memory_element/LongtermMemoryElement.py +++ b/agentverse/memory/memory_element/LongtermMemoryElement.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import re from pydantic import Field @@ -45,7 +47,7 @@ class LongtermMemoryElement(BaseMemoryElement): """ @classmethod - def create_longterm_memory(cls, content: str, time: dt, subject: "BaseAgent" = None): + def create_longterm_memory(cls, content: str, time: dt, subject: BaseAgent = None): # LongtermMemoryElement.update_forward_refs() @@ -76,7 +78,7 @@ def create_from_memory_element(cls, memory_element: "BaseMemoryElement"): ) @classmethod - def create_from_message(cls, message: Message, subject: "BaseAgent", time: dt): + def create_from_message(cls, message: Message, subject: BaseAgent, time: dt): importance = cls.get_importance(message.content) immediacy = cls.get_immediacy(message.content) embedding = get_embedding(message.content) diff --git a/agentverse/tasks/alice_home/config.yaml b/agentverse/tasks/alice_home/config.yaml index 616f697bf..a3953cd21 100644 --- a/agentverse/tasks/alice_home/config.yaml +++ b/agentverse/tasks/alice_home/config.yaml @@ -13,6 +13,7 @@ prompts: - say(content, target=None): say something,`content` is the sentence that the agent will say. **Do not say to yourself, neither to inanimate objects.** + Call one function at a time, please give a thought before calling these actions, i.e., use the following format strictly: [OPTION 1] @@ -72,7 +73,7 @@ agents: Alice is learning to play the piano and practices every day after school. Alice's favorite food is pizza, and she loves trying out different toppings and combinations memory: - memory_type: OPR # change later + memory_type: reflection # change later prompt_template: *prompt llm: model: "gpt-4" @@ -115,7 +116,7 @@ agents: Bob's favorite food is barbecue ribs, and he loves to cook them on his smoker on the weekends. Bob is proud of his daughter Alice's talent for drawing and encourages her to pursue her passion. memory: - memory_type: OPR + memory_type: reflection prompt_template: *prompt llm: model: "gpt-4" diff --git a/agentverse/tasks/prisoner_dilema/base/output_parser.py b/agentverse/tasks/prisoner_dilema/base/output_parser.py index 4af9c2a66..a6b90db86 100644 --- a/agentverse/tasks/prisoner_dilema/base/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/base/output_parser.py @@ -19,7 +19,7 @@ class PrisonerDilemaParser(OutputParser): cur_round: int = 1 encounter_cur_round: bool = False - def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() diff --git a/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py b/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py index 03f4033de..d8e2ba19e 100644 --- a/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py @@ -19,7 +19,7 @@ class PrisonerDilemaParser(OutputParser): cur_round: int = 1 encounter_cur_round: bool = False - def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index f52da49f9..44eff11df 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -20,7 +20,7 @@ class PrisonerDilemaParser(OutputParser): encounter_cur_round: bool = False def parse( - self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult + self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult ) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() diff --git a/agentverse/tasks/prisoner_dilema/police/output_parser.py b/agentverse/tasks/prisoner_dilema/police/output_parser.py index a6f05ce5e..ed0435753 100644 --- a/agentverse/tasks/prisoner_dilema/police/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/police/output_parser.py @@ -19,7 +19,7 @@ class PrisonerDilemaParser(OutputParser): cur_round: int = 1 encounter_cur_round: bool = False - def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() diff --git a/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py b/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py index 93b071e93..da4e44418 100644 --- a/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py @@ -19,7 +19,7 @@ class PrisonerDilemaParser(OutputParser): cur_round: int = 1 encounter_cur_round: bool = False - def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() diff --git a/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py b/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py index 2a0e902eb..bfe3ed302 100644 --- a/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py @@ -19,7 +19,7 @@ class PrisonerDilemaParser(OutputParser): cur_round: int = 1 encounter_cur_round: bool = False - def parse(self, agent: "BaseAgent", environment: "BaseEnvironment", output: LLMResult) -> Union[AgentAction, AgentFinish]: + def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResult) -> Union[AgentAction, AgentFinish]: text = output.content cleaned_output = text.strip() From f9668b08845e3c8b787c09cc15ff4a1bbc2cc2ab Mon Sep 17 00:00:00 2001 From: dalabengba Date: Mon, 26 Jun 2023 12:20:35 +0800 Subject: [PATCH 16/28] add car, basic logic without communication, lack collision and reached logic --- agentverse/agents/__init__.py | 3 +- agentverse/agents/traffic_agent.py | 194 ++++++++++++++++++ agentverse/environments/__init__.py | 1 + .../environments/rules/updater/__init__.py | 1 + .../environments/rules/updater/traffic_all.py | 21 ++ agentverse/environments/traffic_junction.py | 133 ++++++++++++ agentverse/tasks/__init__.py | 3 + agentverse/tasks/traffic_junction/config.yaml | 124 +++++++++++ .../tasks/traffic_junction/output_parser.py | 29 +++ main.py | 2 +- 10 files changed, 509 insertions(+), 2 deletions(-) create mode 100644 agentverse/agents/traffic_agent.py create mode 100644 agentverse/environments/rules/updater/traffic_all.py create mode 100644 agentverse/environments/traffic_junction.py create mode 100644 agentverse/tasks/traffic_junction/config.yaml create mode 100644 agentverse/tasks/traffic_junction/output_parser.py diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py index dc00edf73..d57f964f6 100644 --- a/agentverse/agents/__init__.py +++ b/agentverse/agents/__init__.py @@ -7,4 +7,5 @@ from .conversation_agent import ConversationAgent from .tool_agent import ToolAgent from .prisoner_dilema_agent import PoliceAgent, PrisonerAgent -from .agent_opr import AgentOPR \ No newline at end of file +from .agent_opr import AgentOPR +from .traffic_agent import TrafficAgent \ No newline at end of file diff --git a/agentverse/agents/traffic_agent.py b/agentverse/agents/traffic_agent.py new file mode 100644 index 000000000..9a78bf1d3 --- /dev/null +++ b/agentverse/agents/traffic_agent.py @@ -0,0 +1,194 @@ +from __future__ import annotations + +import logging +import bdb +from string import Template +from typing import TYPE_CHECKING, List +import re +import ast + +from agentverse.message import Message + +from . import agent_registry +from .base import BaseAgent + +if TYPE_CHECKING: + from agentverse.environments.base import BaseEnvironment + +@agent_registry.register("traffic") +class TrafficAgent(BaseAgent): + + environment: BaseEnvironment = None + coordinate: list = None + others_coordinate: dict = {} + + def step(self, env_description: str = "") -> Message: + prompt = self._fill_prompt_template(env_description) + + parsed_response = None + for i in range(self.max_retry): + try: + response = self.llm.generate_response(prompt) + parsed_response = self.output_parser.parse(response) + break + except KeyboardInterrupt: + raise + except Exception as e: + logging.error(e) + logging.warning("Retrying...") + continue + + if parsed_response is None: + logging.error(f"{self.name} failed to generate valid response.") + + message = Message( + content="" + if parsed_response is None + else parsed_response.return_values["output"], + sender=self.name, + receiver=self.get_receiver(), + ) + return message + + async def astep(self, env_description: str = "") -> Message: + """Asynchronous version of step""" + prompt = self._fill_prompt_template(env_description) + + parsed_response = None + reached = False + + for i in range(self.max_retry): + try: + valid = True + response = await self.llm.agenerate_response(prompt) + parsed_response = self.output_parser.parse(response) + + if "move" in parsed_response.return_values["output"]: + direction = re.search(re.compile('move\((.*?)\)'), parsed_response.return_values["output"]).group(1) + + if not self._check_validation(ast.literal_eval(direction)): + valid = False + raise Exception("You encounter the block") + + reached = self._move(ast.literal_eval(direction)) + + elif "brake" in parsed_response.return_values["output"]: + + reached = self._brake() + + break + + except (KeyboardInterrupt, bdb.BdbQuit): + raise + except Exception as e: + logging.error(e) + logging.warning("Retrying...") + continue + + if not valid: + # "I failed to give the correct response (always moving towards block cell) ." \ + # "so I will brake, staying at where I were before" + parsed_response = None + + if reached: + # TODO check how to discard car here + pass + + if parsed_response is None: + logging.error(f"{self.name} failed to generate valid response.") + + + # TODO check if it is necessary to use parsed_response.log parsed_response.return_values["output"] + + message = Message( + content="I failed to give the correct response (always moving towards block cell) ." \ + "so I will brake, staying at where I were before" + if parsed_response is None + else parsed_response.log, + sender=self.name, + receiver=self.get_receiver(), + ) + return message + + def _fill_prompt_template(self, env_description: str = "") -> str: + """Fill the placeholders in the prompt template + + In the conversation agent, three placeholders are supported: + - ${agent_name}: the name of the agent + - ${env_description}: the description of the environment + - ${role_description}: the description of the role of the agent + - ${chat_history}: the chat history of the agent + """ + input_arguments = { + "agent_name": self.name, + "env_description": env_description, + "role_description": self.role_description, + "chat_history": self.memory.to_string(add_sender_prefix=True), + "grids_dim": self.environment.grids_dim, + "coordinate": self.coordinate, + "others_coordinate": self.others_coordinate + } + return Template(self.prompt_template).safe_substitute(input_arguments) + + def add_message_to_memory(self, messages: List[Message]) -> None: + self.memory.add_message(messages) + + def reset(self, environment) -> None: + """Reset the agent""" + self.memory.reset() + self.environment = environment + + # initiate others agents' coordinate + others_agents = [] + for agent in self.environment.agents: + if agent.name != self.name: + others_agents.append(agent.name) + for others_agent in others_agents: + self.others_coordinate[others_agent] = "Not Known" + + + # TODO: reset receiver + + def _move(self, direction:list) -> bool: + + delta_x, delta_y = direction + cur_x, cur_y = self.coordinate + + if f"{cur_x}_{cur_y}" in self.environment.grids_occupancy: + self.environment.grids_occupancy[f"{cur_x}_{cur_y}"] -= 1 + + # update map occupancy + self.environment.grids_occupancy[f"{cur_x+delta_x}_{cur_y+delta_y}"] += 1 + + # update self coordinate + self.coordinate[0] = cur_x + delta_x + self.coordinate[1] = cur_y + delta_y + + # 2 (exit), then choose to discard the car or not + if self.environment.grids[self.coordinate[0]][self.coordinate[1]] == 2: + return True + else: + return False + + + + def _brake(self, ) -> bool: + # do nothing but brake + return False + + def _check_validation(self, direction:list) -> bool: + + delta_x, delta_y = direction + cur_x, cur_y = self.coordinate + + # not 1 (unavailable path) or 2 (exit) + if self.environment.grids[cur_x + delta_x][cur_y + delta_y] == 1: + return False + else: + return True + + #TODO add communication here + def _talk(self, target: str): + pass + + diff --git a/agentverse/environments/__init__.py b/agentverse/environments/__init__.py index bc0913a06..3833b448a 100644 --- a/agentverse/environments/__init__.py +++ b/agentverse/environments/__init__.py @@ -7,6 +7,7 @@ from .basic import BasicEnvironment from .pokemon import PokemonEnvironment from .prisoner_dilema import PrisonerDilemaEnvironment +from .traffic_junction import TrafficEnvironment from .OPREnvironment import OPREnvironment diff --git a/agentverse/environments/rules/updater/__init__.py b/agentverse/environments/rules/updater/__init__.py index b61e66f1f..806321a63 100644 --- a/agentverse/environments/rules/updater/__init__.py +++ b/agentverse/environments/rules/updater/__init__.py @@ -6,3 +6,4 @@ from .basic import BasicUpdater from .classroom import ClassroomUpdater from .sde_team import SdeTeamUpdater +from .traffic_all import TrafficAllUpdater diff --git a/agentverse/environments/rules/updater/traffic_all.py b/agentverse/environments/rules/updater/traffic_all.py new file mode 100644 index 000000000..a49fa30fc --- /dev/null +++ b/agentverse/environments/rules/updater/traffic_all.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, List, Tuple + +from . import updater_registry as UpdaterRegistry +from .basic import BasicUpdater +from agentverse.message import Message + +if TYPE_CHECKING: + from agentverse.environments import BaseEnvironment + + +@UpdaterRegistry.register("traffic_all") +class TrafficAllUpdater(BasicUpdater): + def update_other_coordinate(self, environment: BaseEnvironment): + + for agent in environment.agents: + for other_agent in environment.agents: + if agent.name == other_agent.name: + continue + agent.others_coordinate[other_agent.name] = other_agent.coordinate diff --git a/agentverse/environments/traffic_junction.py b/agentverse/environments/traffic_junction.py new file mode 100644 index 000000000..4d399d571 --- /dev/null +++ b/agentverse/environments/traffic_junction.py @@ -0,0 +1,133 @@ +import asyncio +import logging +from typing import Any, Dict, List +from collections import defaultdict + +# from agentverse.agents.agent import Agent +import numpy as np + +from agentverse.agents.conversation_agent import BaseAgent +from agentverse.environments.rules.base import Rule +from agentverse.message import Message + +from . import env_registry as EnvironmentRegistry +from .base import BaseEnvironment + + +@EnvironmentRegistry.register("traffic") +class TrafficEnvironment(BaseEnvironment): + class Config: + arbitrary_types_allowed = True + """ + A basic environment implementing the logic of conversation. + + Args: + agents: List of agents + rule: Rule for the environment + max_turns: Maximum number of turns + cnt_turn: Current turn number + last_messages: Messages from last turn + rule_params: Variables set by the rule + """ + + agents: List[BaseAgent] + rule: Rule + max_turns: int = 10 + cnt_turn: int = 0 + grids: np.ndarray = None + # 1 0 1 + # 0 0 0 + # 1 0 1 + grids_dim: int = 6 + # key: coordinate, value: car_nums + grids_occupancy: defaultdict = defaultdict(int) + + last_messages: List[Message] = [] + rule_params: Dict = {} + + def __init__(self, rule, **kwargs): + rule_config = rule + order_config = rule_config.get("order", {"type": "sequential"}) + visibility_config = rule_config.get("visibility", {"type": "all"}) + selector_config = rule_config.get("selector", {"type": "basic"}) + updater_config = rule_config.get("updater", {"type": "basic"}) + describer_config = rule_config.get("describer", {"type": "basic"}) + rule = Rule( + order_config, + visibility_config, + selector_config, + updater_config, + describer_config, + ) + super().__init__(rule=rule, **kwargs) + + async def step(self) -> List[Message]: + """Run one step of the environment""" + + # Get the next agent index + agent_ids = self.rule.get_next_agent_idx(self) + + # Generate current environment description + env_descriptions = self.rule.get_env_description(self) + + # Update the set of visible agents for each agent + self.rule.update_visible_agents(self) + + # Generate the next message + messages = await asyncio.gather( + *[self.agents[i].astep(env_descriptions[i]) for i in agent_ids] + ) + + # Some rules will select certain messages from all the messages + selected_messages = self.rule.select_message(self, messages) + self.last_messages = selected_messages + self.print_messages(selected_messages) + + # Update the memory of the agents + self.rule.update_memory(self) + + # Update vision about other agents' position + self.rule.updater.update_other_coordinate(self) + + self.cnt_turn += 1 + + return selected_messages + + def print_messages(self, messages: List[Message]) -> None: + for message in messages: + if message is not None: + logging.info(f"{message.sender}: {message.content}") + + def reset(self) -> None: + """Reset the environment""" + self.cnt_turn = 0 + + # TODO circle the grid afterwards marked as 2 + self.grids = np.ones([self.grids_dim, self.grids_dim]) + + assert (self.grids_dim - 2) % 2 == 0 + road_start_line = (self.grids_dim - 2) // 2 + + for column_index in range(self.grids_dim): + self.grids[road_start_line][column_index] = 0 + self.grids[road_start_line + 1][column_index] = 0 + for row_index in range(self.grids_dim): + self.grids[row_index][road_start_line] = 0 + self.grids[row_index][road_start_line + 1] = 0 + + self.rule.reset() + for agent in self.agents: + agent.reset(environment=self) + + def is_done(self) -> bool: + """Check if the environment is done""" + return self.cnt_turn >= self.max_turns + + def _check_crush(self, ) -> bool: + + for _x in range(self.grids_dim): + for _y in range(self.grids_dim): + if (_x, _y) in self.grids_occupancy and self.grids_occupancy[(_x, _y)] > 1: + return False + + return True diff --git a/agentverse/tasks/__init__.py b/agentverse/tasks/__init__.py index 6ff309e36..5415473ba 100644 --- a/agentverse/tasks/__init__.py +++ b/agentverse/tasks/__init__.py @@ -17,6 +17,9 @@ from .prisoner_dilema.s2_p_r.output_parser import PrisonerDilemaParser from .prisoner_dilema.no_goal_s1.output_parser import PrisonerDilemaParser +from .traffic_junction.output_parser import TrafficParser + + from .pokemon.output_parser import PokemonParser from .alice_home.output_parser import AliceHomeParser from .sde_team.sde_team_3players_nolc.output_parser import SdeTeamParser diff --git a/agentverse/tasks/traffic_junction/config.yaml b/agentverse/tasks/traffic_junction/config.yaml new file mode 100644 index 000000000..78579c850 --- /dev/null +++ b/agentverse/tasks/traffic_junction/config.yaml @@ -0,0 +1,124 @@ +prompts: + prompt: &prompt |- + Now you are at the cross roads, and there are a few Cars in this scenario as well, Your goal is to cross the roads to the exit without crushing with other cars, that is, Never move to the cell where there is a car. + + The scenario is a ${grids_dim} * ${grids_dim} grids, and the crossroads are set in the middle of grids as follows: + + [[1,1,0,0,1,1], + [1,1,0,0,1,1], + [0,0,0,0,0,0], + [0,0,0,0,0,0], + [1,1,0,0,1,1], + [1,1,0,0,1,1]] + + The coordinate of top left corner is [0,0], and the coordinate of the bottom right corner is [5,5]. 0 in the matrics means that it is a road, and 1 means that it is blocked. + Note that [0,0], [0,1], [1,0], [1,1], [0,4], [0,5], [1,4], [1,5], [4,0], [4,1], [5,0], [5,1], [4,4], [4,5], [5,4], [5,5] is blocked that you can never move on. + + In terms of how you actually perform the action in this scenerio, you take action for the agent by calling functions. Currently, there are the following functions that can be called. + - brake(): You are going to stop moving to avoid crush. + - move(direction): `direction` describes the direction you are going to move to. The direction can only be one of the following four lists: [1,0], [-1,0], [0,1], [0,-1] which mean four different direction respectively. + + Call one function at a time, please give a thought before calling these actions . + For examples: + + [OPTION 1] + Thought: I am now at the coordinate [3,1] and Car_Two is at [3,3], it might move([0,-1]) to [3,2] at this step, so it's better for me to move([0,-1]) to [3,0]. + Action: move([0,1]) + + [OPTION 2] + Thought: I am now at the coordinate [3,4] and there is one car at [2,4] and one car at [3,5], I am not sure how they will move at this step, so I choose to brake and wait for them to move first. + Action: brake() + + [OPTION 3] + Thought: I am now at the coordinate [3,1] and Car_Two is at [3,4], which is three cell away from me, so I donot worry about it, I will move([0,1]) to [3,2]. + Action: move([0,1]) + + + ${role_description} + + Here is other cars' action history so far: + ${chat_history} + ${env_description} + + Pay attention to other cars' action history and try to figure out their intention of next movement. + + Your current coordinate is ${coordinate}, and other cars' coordinate is ${others_coordinate} + + What will you, ${agent_name}, do next? + +environment: + env_type: traffic + max_turns: 100 + grids_dim: 6 + rule: + order: + type: concurrent + visibility: + type: oneself + selector: + type: basic + updater: + type: traffic_all + describer: + type: basic + +agents: + - agent_type: traffic + name: Car_One + coordinate: [2,0] + role_description: |- + You are Car_One, You are now at the left end of the road, Your goal is to reach one of the following exit : [0,2], [0,3], [2,5], [3,5], [5,2], [5,3]. + You need to be sensitive to your own coordinate and other cars' coordinate by other cars' course of action carefully in order not to collision with them in the same cell. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4 + model: 'gpt-4' + temperature: 0.7 + max_tokens: 1024 + - agent_type: traffic + name: Car_Two + coordinate: [0,2] + role_description: |- + You are Car_Two, You are now at the top end of the road, Your goal is to reach one of the following exit :[2,0], [3,0], [2,5], [3,5], [5,2], [5,3]. + You need to be sensitive to your own coordinate and other cars' coordinate by other cars' course of action carefully in order not to collision with them in the same cell. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4 + model: 'gpt-4' + temperature: 0.7 + max_tokens: 1024 + - agent_type: traffic + name: Car_Three + coordinate: [2,5] + role_description: |- + You are Car_Three, You are now at the right end of the road, Your goal is to reach one of the following exit :[2,0], [3,0], [0,2], [0,3], [5,2], [5,3]. + You need to be sensitive to your own coordinate and other cars' coordinate by other cars' course of action carefully in order not to collision with them in the same cell. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4 + model: 'gpt-4' + temperature: 0.7 + max_tokens: 1024 + - agent_type: traffic + name: Car_Four + coordinate: [5,2] + role_description: |- + You are Car_Four, You are now at the bottom end of the road, Your goal is to reach one of the following exit :[2,0], [3,0], [0,2], [0,3], [2,5], [3,5]. + You need to be sensitive to your own coordinate and other cars' coordinate by other cars' course of action carefully in order not to collision with them in the same cell. + memory: + memory_type: chat_history + prompt_template: *prompt + llm: + llm_type: gpt-4 + model: 'gpt-4' + temperature: 0.7 + max_tokens: 1024 + + +tools: diff --git a/agentverse/tasks/traffic_junction/output_parser.py b/agentverse/tasks/traffic_junction/output_parser.py new file mode 100644 index 000000000..d76c8d197 --- /dev/null +++ b/agentverse/tasks/traffic_junction/output_parser.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +import re +from typing import Union + +from agentverse.parser import OutputParser, LLMResult + +# from langchain.schema import AgentAction, AgentFinish +from agentverse.utils import AgentAction, AgentFinish + +from agentverse.parser import OutputParserError, output_parser_registry + + +@output_parser_registry.register("traffic_junction") +class TrafficParser(OutputParser): + def parse(self, output: LLMResult) -> Union[AgentAction, AgentFinish]: + text = output.content + cleaned_output = text.strip() + cleaned_output = re.sub(r"\n+", "\n", cleaned_output) + cleaned_output = cleaned_output.split("\n") + if not ( + len(cleaned_output) == 2 + and cleaned_output[0].startswith("Thought:") + and cleaned_output[1].startswith("Action:") + ): + raise OutputParserError(cleaned_output) + action = cleaned_output[0][len("Thought:") :].strip() + action_input = cleaned_output[1][len("Action:") :].strip() + return AgentFinish({"output": action_input}, text) diff --git a/main.py b/main.py index a25f52327..1ff7c25c7 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ parser = ArgumentParser() -parser.add_argument("--task", type=str, default="alice_home") +parser.add_argument("--task", type=str, default="traffic_junction") args = parser.parse_args() From 1b7d894d9b4d44dbe16f15263bfc19df3ec80279 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Sat, 1 Jul 2023 23:56:29 +0800 Subject: [PATCH 17/28] reflection --- agentverse/agents/IntelligentObject.py | 206 ---------- agentverse/agents/__init__.py | 2 +- agentverse/agents/base.py | 3 +- .../{agent_opr.py => reflection_agent.py} | 109 +++-- agentverse/agents/traffic_agent.py | 2 - agentverse/environments/__init__.py | 2 +- .../{OPREnvironment.py => reflection.py} | 4 +- agentverse/initialization.py | 9 + agentverse/llms/openai.py | 29 +- agentverse/memory/__init__.py | 1 + agentverse/memory/agent_reflection_memory.py | 4 +- .../memory_element/LongtermMemoryElement.py | 2 +- .../memory/memory_element/Reflection.py | 21 +- agentverse/memory/vectorstore.py | 63 +++ agentverse/memory_manipulator/__init__.py | 8 + agentverse/memory_manipulator/base.py | 17 + agentverse/memory_manipulator/basic.py | 16 + .../memory_manipulator/generative_agents.py | 373 ++++++++++++++++++ agentverse/memory_manipulator/reflection.py | 305 ++++++++++++++ agentverse/tasks/alice_home/config.yaml | 95 ++--- main.py | 2 +- 21 files changed, 907 insertions(+), 366 deletions(-) delete mode 100644 agentverse/agents/IntelligentObject.py rename agentverse/agents/{agent_opr.py => reflection_agent.py} (70%) rename agentverse/environments/{OPREnvironment.py => reflection.py} (97%) create mode 100644 agentverse/memory/vectorstore.py create mode 100644 agentverse/memory_manipulator/__init__.py create mode 100644 agentverse/memory_manipulator/base.py create mode 100644 agentverse/memory_manipulator/basic.py create mode 100644 agentverse/memory_manipulator/generative_agents.py create mode 100644 agentverse/memory_manipulator/reflection.py diff --git a/agentverse/agents/IntelligentObject.py b/agentverse/agents/IntelligentObject.py deleted file mode 100644 index 434bd76cb..000000000 --- a/agentverse/agents/IntelligentObject.py +++ /dev/null @@ -1,206 +0,0 @@ -import logging -import json -from typing import Dict, List -from agentverse.memory import agent_reflection_memory -from agentverse.memory.agent_reflection_memory import ReflectionMemory -import os - -from .base import BaseAgent - -logging = logging.get_logger(__name__) - - -class IntelligentObject(BaseAgent): - """A basic implementation of environment element.""" - - traits: str - whole_day_plan: Dict[str, List[str]] - - def __init__( - self, - agent_file, - environment, - clear_memory=False, - ): - """Intialize an agent.""" - # base system - self.observation = [] # TODO to remove - self.agent_file = agent_file - self.file_dir = os.path.dirname(self.agent_file) - self.id = os.path.splitext(os.path.basename(self.agent_file))[0] - new_state_dict = self.load_from_file(agent_file) - state_dict = new_state_dict - self.state_dict = state_dict - self.environment = environment - - self.name = self.state_dict.get("name", None) - self.traits = self.state_dict.get("traits", None) - self.description = self.state_dict.get("description", []) - - # geography - self.location = state_dict.get("location", None) - self.target_id = self.id - self.eid = state_dict.get("eid", None) - self.movement = self.state_dict.get("movement", False) - self.max_velocity = self.state_dict.get("max_velocity", 1) - - # interaction - self.incoming_invoice = ( - [] - ) # empty str represents no invoice, later can be changed to list - - self.incoming_interactions = state_dict.get("incoming_interactions", []) - - self.incoming_observation = state_dict.get("incoming_observation", []) - self.pending_observation = ( - [] - ) # active observation will first go here, then go to incomming observation - self.background_observation = [] # passive observation will go here - - # current status information - self.default_status = "none" - self.status = self.state_dict.get("status", self.default_status) - if len(self.status.strip("")) == 0: - self.status = self.default_status - self.status_duration = self.state_dict.get("status_duration", 0) - self.status_start_time = self.state_dict.get("status_start_time", None) - - # memory - # Long term memory is serialized/deserialized by orjson so only file name is provided here. - self.long_term_memory = ReflectionMemory( - self.state_dict, - os.path.dirname(agent_file), - self.environment.uilogging, - clear_memory=clear_memory, - ) - # Short term memory is a queue of observations recording recent observations. - self.short_term_memory = self.state_dict.get("short_term_memory", []) - - # the agent is calling language model - self.blocking = False - - self.max_observation_handle = ( - 2 # handle at most N incoming observation each step. - ) - - # the total number of steps the agent has gone through - self.step_cnt = 0 # TODO: Later add this key into the dictionary of agent static files and import that value - - # how many logical frames to do a summary - self.summary_interval = 1000 - - # how many logical frames to do a reflection - self.reflection_interval = 100 - - return - - def load_from_file(self, agent_file): - if os.path.exists(agent_file): - with open(agent_file, "r") as f: - print(agent_file) - data = json.load(f) - state_dict = data - return state_dict - else: - logging.warning(f"No config of {agent_file} found!") - return {} - - def mount_to_environment( - self, environment, environment_id: str = None, location: List[List[int]] = None - ): - """Mount the agent to the environment - :param environment: the environment to which the agent will be mounted - :param environment_id: the unique id of this environment - :param location: the initial location of this agent in the environment - """ - - self.environment = environment - self.environment_id = environment_id - - # If location is not specified, allocate an available seat to this agent - if location is None: - location = self.environment.pop_available_seats() - self.location = location - - # Call environment method to sync the change to environment - self.environment.mount_agent(self, self.location) - - return - - def observe(self, limit=None, dropout=0.0): - """Update observation of around environment - Should return string, or subject predicate object/predicative - observation has a upper limit - Agent has a chance to react to old incoming observations for a second time by dropping out short term memory - - - Observations : list[dict], each item of list is a dict of {observed_entity: "doing something"} - """ - logging.debug(f"{self.name} is observing and generate short-term memory...") - - if limit is None: - import math - - limit = math.inf - - if self.environment is not None: - self.background_observation = self.environment.get_neighbor_environment( - self.id - ) - - # dropout - import random - - r = [random.random() for _ in range(len(self.short_term_memory))] - self.short_term_memory = [ - s for i, s in enumerate(self.short_term_memory) if r[i] > dropout - ] - - for ob in self.incoming_observation: - if ob not in self.short_term_memory: - self.short_term_memory = [ - s - for s in self.short_term_memory - if not s.split("is")[0] == ob.split("is")[0] - ] - self.short_term_memory.append(ob) - - logging.debug( - "incoming_observation: {}\nbackground_observation: {}".format( - self.incoming_observation, self.background_observation - ) - ) - - def reflect( - self, - ): - """While the time is right, do reflection for memory""" - logging.debug(f"{self.name} maybe reflect...") - return self.long_term_memory.maybe_reflect(self.current_time) - - def add_observation(self, observation): - self.pending_observation.append(observation) - - def sync( - self, - ): - self._move_pending_observation_or_invoice() - - def _move_pending_observation_or_invoice(self): - if len(self.incoming_invoice) > 0: - self.incoming_observation.append(self.incoming_invoice[0]) - self.incoming_invoice.pop(0) - elif len(self.pending_observation) > 0: - self.incoming_observation.extend( - self.pending_observation[: self.max_observation_handle] - ) - self.pending_observation = self.pending_observation[ - self.max_observation_handle : - ] - logging.debug( - f"{self.name} now has incoming observation: {self.incoming_observation}" - ) - - def set_invoice(self, message): - logging.debug("Adding invoice: {} to {}".format(message, self.name)) - self.incoming_invoice.append(message) diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py index d57f964f6..f0073928c 100644 --- a/agentverse/agents/__init__.py +++ b/agentverse/agents/__init__.py @@ -7,5 +7,5 @@ from .conversation_agent import ConversationAgent from .tool_agent import ToolAgent from .prisoner_dilema_agent import PoliceAgent, PrisonerAgent -from .agent_opr import AgentOPR +from .reflection_agent import ReflectionAgent from .traffic_agent import TrafficAgent \ No newline at end of file diff --git a/agentverse/agents/base.py b/agentverse/agents/base.py index 13db010b7..d604e15fc 100644 --- a/agentverse/agents/base.py +++ b/agentverse/agents/base.py @@ -8,7 +8,7 @@ from agentverse.memory import BaseMemory, ChatHistoryMemory from agentverse.message import Message from agentverse.parser import OutputParser - +from agentverse.memory_manipulator import BaseMemoryManipulator class BaseAgent(BaseModel): name: str @@ -17,6 +17,7 @@ class BaseAgent(BaseModel): prompt_template: str role_description: str = Field(default="") memory: BaseMemory = Field(default_factory=ChatHistoryMemory) + memory_manipulator: BaseMemoryManipulator = Field(default_factory=BaseMemoryManipulator) max_retry: int = Field(default=3) receiver: Set[str] = Field(default=set({"all"})) async_mode: bool = Field(default=True) diff --git a/agentverse/agents/agent_opr.py b/agentverse/agents/reflection_agent.py similarity index 70% rename from agentverse/agents/agent_opr.py rename to agentverse/agents/reflection_agent.py index 77602094a..69884e55b 100644 --- a/agentverse/agents/agent_opr.py +++ b/agentverse/agents/reflection_agent.py @@ -19,8 +19,6 @@ from agentverse.message import Message from agentverse.agents.base import BaseAgent -# from agentverse.utils.prompts - from datetime import datetime as dt import datetime @@ -34,22 +32,14 @@ from agentverse.environments.base import BaseEnvironment -@agent_registry.register("OPR") -class AgentOPR(BaseAgent): +@agent_registry.register("reflection") +class ReflectionAgent(BaseAgent): async_mode: bool = True, current_time: str = None, - traits: str = None, - whole_day_plan: dict = Field(default_factory=dict) environment: BaseEnvironment = None step_cnt: int = 0 - summary_interval: int = 5 - reflection_interval: int = 5 - - status: str = Field(default=None, description="what the agent is doing according to whole_day_plan") - status_start_time: dt = Field(default=None) - status_duration: int = Field(default=0, - description="we use this field and current time to check when to get_plan in func:`check_status_passive` ") + manipulated_memory: str = Field(default="", description="one fragment used in prompt construction") @validator('current_time') def convert_str_to_dt(cls, current_time): @@ -63,38 +53,49 @@ def step(self, current_time: dt, env_description: str = "") -> Message: """ self.current_time = current_time - logger.debug( - "Agent {}, Time: {}, Status {}, Status Start: {}, Will last: {}".format( - self.state_dict["name"], - str(self.current_time), - self.status, - self.status_start_time, - datetime.timedelta(seconds=self.status_duration), - ) - ) + self.manipulated_memory = self.memory_manipulator.manipulate_memory() - # To ensure the proper functioning of the agent, the memory, plan, and summary cannot be empty. Therefore, it is necessary to perform an initialization similar to what should be done at the beginning of each day. - # self.minimal_init() + prompt = self._fill_prompt_template(env_description) - # before we handle any observation, we first check the status. - # self.check_status_passive() + parsed_response, reaction, target = None, None, None + for i in range(self.max_retry): + try: + response = self.llm.agenerate_response(prompt) + parsed_response = self.output_parser.parse(response) + + if 'say(' in parsed_response.return_values["output"]: + reaction, target = eval("self._" + parsed_response.return_values["output"].strip()) + elif 'act(' in parsed_response.return_values["output"]: + reaction, target = eval("self._" + parsed_response.return_values["output"].strip()) + elif 'do_nothing(' in parsed_response.return_values["output"]: + reaction, target = None, None + else: + raise Exception(f"no valid parsed_response detected, " + f"cur response {parsed_response.return_values['output']}") + break + + except Exception as e: + logger.error(e) + logger.warning("Retrying...") + continue - # self.observe() + if parsed_response is None: + logger.error(f"{self.name} failed to generate valid response.") - # if self.might_react(): - # self.react() - # - # if self.movement: - # self.analysis_movement_target(self.movement_description) - # - # # 3.5 add observation to memory - # for ob in self.incoming_observation: - # self.long_term_memory.add(ob, self.current_time, ["observation"]) - # self.incoming_observation = [] # empty the incoming observation + if reaction is None: + reaction = "Keep doing last action ..." - # 4. Periodic fixed work of reflection and summary (tentatively set to be done every 100 logical frames). + message = Message( + content="" + if reaction is None + else reaction, + sender=self.name, + receiver=self.get_receiver() if target is None else self.get_valid_receiver(target), + ) + + self.step_cnt += 1 - # TODO chimin + return message def check_status_passive(self, ): """Check if the current status needs to be finished. If so, examine the plan and initiate the next action. @@ -115,7 +116,10 @@ async def astep(self, current_time: dt, env_description: str = "") -> Message: #use environment's time to update agent's time self.current_time = current_time # Before the agent step, we check current status, - self.check_status_passive() + # TODO add this func after + # self.check_status_passive() + + self.manipulated_memory = self.memory_manipulator.manipulate_memory() prompt = self._fill_prompt_template(env_description) @@ -156,15 +160,8 @@ async def astep(self, current_time: dt, env_description: str = "") -> Message: receiver=self.get_receiver() if target is None else self.get_valid_receiver(target), ) - # TODO currently, summary is not added back to memory while reflection is self.step_cnt += 1 - if self.step_cnt % self.summary_interval == 0: - self.memory.summary = self.memory.generate_summary(self.current_time) - - if self.step_cnt % self.reflection_interval == 0: - _ = self.memory.reflect(self.current_time) - return message def _act(self, description=None, target=None): @@ -175,8 +172,6 @@ def _act(self, description=None, target=None): else: reaction_content = f"{self.name} performs action to {target}: '{description}'." # self.environment.broadcast_observations(self, target, reaction_content) - - return reaction_content, target def _say(self, description, target=None): @@ -200,7 +195,6 @@ def get_valid_receiver(self, target: str) -> set(): else: return {target} - def _fill_prompt_template(self, env_description: str = "") -> str: """Fill the placeholders in the prompt template @@ -212,19 +206,15 @@ def _fill_prompt_template(self, env_description: str = "") -> str: """ input_arguments = { "agent_name": self.name, - "summary": self.memory.summary, - "plan": self.memory.planner.get_whole_day_plan_text(), - "event_memory": self.memory.get_memory_plain_text(), + "role_description": self.role_description, + "chat_history": self.memory.to_string(add_sender_prefix=True), "current_time": self.current_time, - "status": self.status, "env_description": env_description, } return Template(self.prompt_template).safe_substitute(input_arguments) - # TODO call longtermmemory element def add_message_to_memory(self, messages: List[Message]) -> None: - for message in messages: - self.memory.add_message(message, time=self.current_time) + self.memory.add_message(messages) # Should call this when status changed, plan==status def add_plan_to_memory(self,) -> None: @@ -233,7 +223,8 @@ def add_plan_to_memory(self,) -> None: def reset(self, environment: BaseEnvironment) -> None: """Reset the agent""" self.environment = environment + self.memory.reset() + self.memory_manipulator.agent = self + self.memory_manipulator.memory = self.memory - self.memory.reset(environment=environment, agent=self) - # TODO: reset receiver diff --git a/agentverse/agents/traffic_agent.py b/agentverse/agents/traffic_agent.py index 9a78bf1d3..eebf655f2 100644 --- a/agentverse/agents/traffic_agent.py +++ b/agentverse/agents/traffic_agent.py @@ -170,8 +170,6 @@ def _move(self, direction:list) -> bool: else: return False - - def _brake(self, ) -> bool: # do nothing but brake return False diff --git a/agentverse/environments/__init__.py b/agentverse/environments/__init__.py index 3833b448a..88d9b6f60 100644 --- a/agentverse/environments/__init__.py +++ b/agentverse/environments/__init__.py @@ -9,7 +9,7 @@ from .prisoner_dilema import PrisonerDilemaEnvironment from .traffic_junction import TrafficEnvironment -from .OPREnvironment import OPREnvironment +from .reflection import ReflectionEnvironment from .sde_team import SdeTeamEnvironment from .sde_team_given_tests import SdeTeamGivenTestsEnvironment diff --git a/agentverse/environments/OPREnvironment.py b/agentverse/environments/reflection.py similarity index 97% rename from agentverse/environments/OPREnvironment.py rename to agentverse/environments/reflection.py index 7dce54f00..e511c31a1 100644 --- a/agentverse/environments/OPREnvironment.py +++ b/agentverse/environments/reflection.py @@ -16,8 +16,8 @@ from pydantic import validator -@EnvironmentRegistry.register("env_OPR") -class OPREnvironment(BaseEnvironment): +@EnvironmentRegistry.register("reflection") +class ReflectionEnvironment(BaseEnvironment): """ Environment used in Observation-Planning-Reflection agent architecture. diff --git a/agentverse/initialization.py b/agentverse/initialization.py index d16694374..5c5a9a968 100644 --- a/agentverse/initialization.py +++ b/agentverse/initialization.py @@ -19,6 +19,7 @@ from agentverse.agents import agent_registry from agentverse.environments import BaseEnvironment, env_registry from agentverse.memory import memory_registry +from agentverse.memory_manipulator import memory_manipulator_registry # from agentverse.memory.memory import SummaryMemory from agentverse.parser import output_parser_registry @@ -34,6 +35,10 @@ def load_memory(memory_config: Dict): memory_type = memory_config.pop("memory_type", "chat_history") return memory_registry.build(memory_type, **memory_config) +def load_memory_manipulator(memory_manipulator_config: Dict): + memory_manipulator_type = memory_manipulator_config.pop("memory_manipulator_type", "basic") + return memory_manipulator_registry.build(memory_manipulator_type, **memory_manipulator_config) + def load_tools(tool_config: List[Dict]): if len(tool_config) == 0: @@ -92,6 +97,10 @@ def prepare_task_config(task): agent_configs["tool_memory"] = load_memory(agent_configs["tool_memory"]) llm = load_llm(agent_configs.get("llm", "text-davinci-003")) agent_configs["llm"] = llm + + memory_manipulator = load_memory_manipulator(agent_configs.get("memory_manipulator", {})) + agent_configs["memory_manipulator"] = memory_manipulator + agent_configs["tools"] = load_tools(agent_configs.get("tools", [])) agent_configs["output_parser"] = task_config["output_parser"] diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py index 849bd3444..9b9332f01 100644 --- a/agentverse/llms/openai.py +++ b/agentverse/llms/openai.py @@ -1,4 +1,5 @@ import logging +import numpy as np import time import os from typing import Dict, List, Optional, Union @@ -130,7 +131,7 @@ async def agenerate_response(self, prompt: str) -> LLMResult: ) -def get_embedding(text: str, attempts=3) -> List[float]: +def get_embedding(text: str, attempts=3) -> np.array: attempt = 0 while attempt < attempts: try: @@ -138,33 +139,11 @@ def get_embedding(text: str, attempts=3) -> List[float]: embedding = openai.Embedding.create( input=[text], model="text-embedding-ada-002" )["data"][0]["embedding"] - return embedding + return tuple(embedding) except Exception as e: attempt += 1 logger.error(f"Error {e} when requesting openai models. Retrying") time.sleep(10) logger.warning( f"get_embedding() failed after {attempts} attempts. returning empty response" - ) - - -def chat( - context, MAX_OUTPUT_TOKEN_LEN=1024, temperature=0.1, attemps=5, stop=None -) -> str: - if isinstance(context, str): - context = [{"role": "user", "content": context}] - attempt = 0 - while attempt < attemps: - try: - response = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=context, - stop=stop, - ) - return response["choices"][0]["message"]["content"].strip() - except Exception as e: - attempt += 1 - logger.error(f"Error {e} when requesting openai models. Retrying") - time.sleep(10) - logger.warning(f"chat() failed after {attemps} attempts. returning empty response") - return "" + ) \ No newline at end of file diff --git a/agentverse/memory/__init__.py b/agentverse/memory/__init__.py index fdc3d4112..aa73d36ef 100644 --- a/agentverse/memory/__init__.py +++ b/agentverse/memory/__init__.py @@ -7,3 +7,4 @@ from .summary import SummaryMemory from .agent_reflection_memory import ReflectionMemory from .sde_team import SdeTeamMemory +from .vectorstore import VectorStoreMemory diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py index 499668a2d..405c4a693 100644 --- a/agentverse/memory/agent_reflection_memory.py +++ b/agentverse/memory/agent_reflection_memory.py @@ -10,11 +10,11 @@ from sklearn.metrics.pairwise import cosine_similarity from pydantic import Field, BaseModel -from agentverse.llms.openai import get_embedding, chat +from agentverse.llms.openai import get_embedding from agentverse.memory.base import BaseMemory from agentverse.environments.base import BaseEnvironment from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement -from agentverse.memory.memory_element.Reflection import Reflection +# from agentverse.memory.memory_element.Reflection import Reflection from agentverse.message import Message from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement diff --git a/agentverse/memory/memory_element/LongtermMemoryElement.py b/agentverse/memory/memory_element/LongtermMemoryElement.py index c123ff0fb..b71f7d0c0 100644 --- a/agentverse/memory/memory_element/LongtermMemoryElement.py +++ b/agentverse/memory/memory_element/LongtermMemoryElement.py @@ -5,7 +5,7 @@ from pydantic import Field from datetime import datetime as dt -from agentverse.llms.openai import chat, get_embedding +from agentverse.llms.openai import get_embedding from agentverse.message import Message from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement diff --git a/agentverse/memory/memory_element/Reflection.py b/agentverse/memory/memory_element/Reflection.py index 629cc9885..281c24279 100644 --- a/agentverse/memory/memory_element/Reflection.py +++ b/agentverse/memory/memory_element/Reflection.py @@ -1,11 +1,14 @@ -from pydantic import Field -from agentverse.memory.memory_element import BaseMemoryElement -from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement +# from pydantic import Field +# from agentverse.memory.memory_element import BaseMemoryElement +# from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement +# +# +# class Reflection(LongtermMemoryElement): +# """ +# A reflection is a long term memory element. +# """ +# +# pass -class Reflection(LongtermMemoryElement): - """ - A reflection is a long term memory element. - """ - - pass +#TODO: remove \ No newline at end of file diff --git a/agentverse/memory/vectorstore.py b/agentverse/memory/vectorstore.py new file mode 100644 index 000000000..4f8de5cb5 --- /dev/null +++ b/agentverse/memory/vectorstore.py @@ -0,0 +1,63 @@ +from typing import List, Union + +from pydantic import Field + +from agentverse.message import Message +from agentverse.llms import BaseLLM +from agentverse.llms.openai import get_embedding, OpenAIChat + + +from . import memory_registry +from .base import BaseMemory + + + +@memory_registry.register("vectorstore") +class VectorStoreMemory(BaseMemory): + + """ + + The main difference of this class with chat_history is that this class treat memory as a dict + + treat message.content as memory + + Attributes: + messages (List[Message]) : used to store messages, message.content is the key of embeddings. + embedding2memory (dict) : `key` is the embedding and `value` is the message + memory2embedding (dict) : `key` is the message and `value` is the embedding + llm (BaseLLM) : llm used to get embeddings + + + Methods: + add_message : Additionally, add the embedding to embeddings + + """ + + messages: List[Message] = Field(default=[]) + embedding2memory: dict = {} + memory2embedding: dict = {} + llm: BaseLLM = OpenAIChat(model="gpt-4") + + def add_message(self, messages: List[Message]) -> None: + for message in messages: + self.messages.append(message) + memory_embedding = get_embedding(message.content) + self.embedding2memory[memory_embedding] = message.content + self.memory2embedding[message.content] = memory_embedding + + def to_string(self, add_sender_prefix: bool = False) -> str: + if add_sender_prefix: + return "\n".join( + [ + f"[{message.sender}]: {message.content}" + if message.sender != "" + else message.content + for message in self.messages + ] + ) + else: + return "\n".join([message.content for message in self.messages]) + + def reset(self) -> None: + self.messages = [] + diff --git a/agentverse/memory_manipulator/__init__.py b/agentverse/memory_manipulator/__init__.py new file mode 100644 index 000000000..7f0b31bdd --- /dev/null +++ b/agentverse/memory_manipulator/__init__.py @@ -0,0 +1,8 @@ +from agentverse.registry import Registry + +memory_manipulator_registry = Registry(name="Memory_Manipulator_Registry") + +from .base import BaseMemoryManipulator +from .generative_agents import GenerativeAgentsReflectionPlan +from .reflection import Reflection + diff --git a/agentverse/memory_manipulator/base.py b/agentverse/memory_manipulator/base.py new file mode 100644 index 000000000..81e7c58d2 --- /dev/null +++ b/agentverse/memory_manipulator/base.py @@ -0,0 +1,17 @@ +from abc import abstractmethod +from typing import Dict, List + +from pydantic import BaseModel, Field + +from agentverse.message import Message + + +class BaseMemoryManipulator(BaseModel): + + @abstractmethod + def manipulate_memory(self) -> None: + pass + + @abstractmethod + def reset(self) -> None: + pass diff --git a/agentverse/memory_manipulator/basic.py b/agentverse/memory_manipulator/basic.py new file mode 100644 index 000000000..a36a604bb --- /dev/null +++ b/agentverse/memory_manipulator/basic.py @@ -0,0 +1,16 @@ +from abc import abstractmethod +from typing import Dict, List + +from pydantic import BaseModel, Field + +from agentverse.message import Message +from agentverse.memory_manipulator import BaseMemoryManipulator + +@memory_manipulator_registry.register("basic") +class BasicMemoryManipulator(BaseMemoryManipulator): + + def manipulate_memory(self) -> None: + pass + + def reset(self) -> None: + pass diff --git a/agentverse/memory_manipulator/generative_agents.py b/agentverse/memory_manipulator/generative_agents.py new file mode 100644 index 000000000..14202ff71 --- /dev/null +++ b/agentverse/memory_manipulator/generative_agents.py @@ -0,0 +1,373 @@ +from __future__ import annotations +from typing import List, Union, Optional, Any, TYPE_CHECKING +from collections import defaultdict + +from pydantic import Field +import numpy as np +from datetime import datetime as dt + +import re + +from agentverse.llms.openai import get_embedding +from sklearn.metrics.pairwise import cosine_similarity + +from agentverse.message import Message +from agentverse.memory import BaseMemory + +from logging import getLogger + +from . import memory_manipulator_registry +from .base import BaseMemoryManipulator + +if TYPE_CHECKING: + from agentverse.memory import VectorStoreMemory + from agentverse.agents.base import BaseAgent + + +logger = getLogger(__file__) + +IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ +(e.g., brushing teeth, making bed) and 10 is \ +extremely poignant (e.g., a break up, college \ +acceptance), rate the likely poignancy of the \ +following piece of memory. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ +IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ +(e.g., a bed is in the room) and 10 is \ +needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ +following statement. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ +QUESTION_PROMPT = """Given only the information above, what are 3 most salient \ +high-level questions we can answer about the subjects in the statements?""" + +INSIGHT_PROMPT = """What at most 5 high-level insights can you infer from \ +the above statements? Only output insights with high confidence. +example format: insight (because of 1, 5, 3)""" + + +@memory_manipulator_registry.register("generative_agents") +class GenerativeAgentsReflectionPlan(BaseMemoryManipulator): + + memory: VectorStoreMemory = None + agent: BaseAgent = None + + summary: str = "" + reflection: str = "" + + importance_threshold: int = 100 + accumulated_importance: int = 0 + summary_interval: int = 5 + + memory2importance: dict = {} + memory2immediacy: dict = {} + memory2time: defaultdict = Field(default=defaultdict(dict)) + + # TODO newly added func from generative agents + + def manipulate_memory(self) -> None: + + # reflect here + if self.should_reflect(): + logger.debug( + f"Agent {self.agent.name} is now doing reflection since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" + ) + + self.reflection = self.reflect() + + else: + logger.debug( + f"Agent {self.agent.name} doesn't reflect since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" + ) + + # summary here + # TODO add summary_interval + if self.should_summary(): + logger.debug( + f"Agent {self.agent.name} is now generating summary because of the summary_interval." + ) + + self.summary = self.generate_summary() + else: + logger.debug( + f"Agent {self.agent.name} do not reach the step to generate summary." + ) + + # get new plan here + if self.should_get_next_plan(): + pass + + def get_accumulated_importance(self): + + accumulated_importance = 0 + for score in self.memory2importance.values(): + accumulated_importance += score + + self.accumulated_importance = accumulated_importance + + return accumulated_importance + + def should_reflect(self): + + if self.get_accumulated_importance >= self.importance_threshold: + # double the importance_threshold + self.importance_threshold *= 2 + return True + else: + return False + + def should_summary(self): + + if self.agent.step_cnt % self.summary_interval == 0: + return True + else: + return False + + def get_questions(self, texts): + prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT + result = self.agent.llm.generate_response(prompt) + result = result.content + questions = [q for q in result.split("\n") if len(q.strip()) > 0] + questions = questions[:3] + return questions + + def get_insights(self, statements): + prompt = "" + for i, st in enumerate(statements): + prompt += str(i + 1) + ". " + st + "\n" + prompt += INSIGHT_PROMPT + result = self.agent.llm.generate_response(prompt) + result = result.content + insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] + insights = [".".join(i.split(".")[1:]) for i in insights] + # remove insight pointers for now + insights = [i.split("(")[0].strip() for i in insights] + return insights + + + def get_importance(self, content: str): + """ + Exploit GPT to evaluate the importance of this memory + """ + prompt = IMPORTANCE_PROMPT.format(content) + result = self.memory.llm.generate_response(prompt) + + try: + score = int(re.findall(r"\s*(\d+)\s*", result)[0]) + except Exception as e: + logger.warning( + f"Found error {e} Abnormal result of importance rating '{result}'. Setting default value" + ) + score = 0 + return score + + def get_immediacy(self, content: str): + """ + Exploit GPT to evaluate the immediacy of this memory + """ + prompt = IMMEDIACY_PROMPT.format(content) + result = self.memory.llm.generate_response(prompt) + try: + score = int(re.findall(r"\s*(\d+)\s*", result)[0]) + except Exception as e: + logger.warning( + f"Found error {e} Abnormal result of importance rating '{result}'. Setting default value" + ) + score = 0 + return score + + def get_memory( + self, content: str, current_time, cnt_retrieved_entries: int = 1 + ) -> Optional[List[Any]]: + """ + Get k-most relevant memories to content + """ + return self.query(content, cnt_retrieved_entries, current_time) + + def query( + self, text: Union[str, List[str]], k: int, current_time=dt.now(), nms_threshold=0.99 + ) -> List[str]: + """ + get top-k entry based on recency, relevance, importance, immediacy + The query result can be Short-term or Long-term queried result. + formula is + `score= sim(q,v) *max(LTM_score, STM_score)` + `STM_score=time_score(createTime)*immediacy` + `LTM_score=time_score(accessTime)*importance` + time score is exponential decay weight. stm decays faster. + + The query supports querying based on multiple texts and only gives non-overlapping results + If nms_threshold is not 1, nms mechanism if activated. By default, + use soft nms with modified iou base(score starts to decay iff cos sim is higher than this value, + and decay weight at this value if 0. rather than 1-threshold). + + Args: + text: str + k: int + current_time: dt.now + nms_threshold: float = 0.99 + + + Returns: List[str] + """ + assert len(text) > 0 + texts = [text] if isinstance(text, str) else text + maximum_score = None + for text in texts: + embedding = get_embedding(text) + score = [] + for memory in self.memory.messages: + + if memory.content not in self.memory2time: + self.memory2time[memory.content]["last_access_time"] = dt.now() + self.memory2time[memory.content]["create_time"] = dt.now() + + last_access_time_diff = \ + (current_time - self.memory2time[memory.content]["last_access_time"]).total_seconds() // 3600 + recency = np.power( + 0.99, last_access_time_diff + ) # TODO: review the metaparameter 0.99 + + create_time_diff = (current_time - self.memory2time[memory.content]["create_time"]).total_seconds() // 60 + instancy = np.power( + 0.90, create_time_diff + ) # TODO: review the metaparameter 0.90 + + relevance = cosine_similarity( + np.array(embedding).reshape(1, -1), + np.array(memory.embedding).reshape(1, -1), + )[0][0] + + if memory.content not in self.memory2importance or memory.content not in self.memory2immediacy: + self.memory2importance[memory.content] = self.get_importance(memory.content) + self.memory2immediacy[memory.content] = self.get_immediacy(memory.content) + + importance = self.memory2importance[memory.content] / 10 + immediacy = self.memory2immediacy[memory.content] / 10 + + ltm_w = recency * importance + stm_w = instancy * immediacy + + score.append(relevance * np.maximum(ltm_w, stm_w)) + + score = np.array(score) + + if maximum_score is not None: + maximum_score = np.maximum(score, maximum_score) + else: + maximum_score = score + + if nms_threshold == 1.0: + # no nms is triggered + top_k_indices = np.argsort(maximum_score)[-k:][::-1] + else: + # TODO: soft-nms + assert 0 <= nms_threshold < 1 + top_k_indices = [] + while len(top_k_indices) < min(k, len(self.memory.messages)): + top_index = np.argmax(maximum_score) + top_k_indices.append(top_index) + maximum_score[top_index] = -1 # anything to prevent being chosen again + top_embedding = self.memory.messages[top_index].embedding + cos_sim = cosine_similarity( + np.array(top_embedding).reshape(1, -1), + np.array([memory.embedding for memory in self.memory.messages]), + )[0] + score_weight = np.ones_like(maximum_score) + score_weight[cos_sim >= nms_threshold] -= \ + (cos_sim[cos_sim >= nms_threshold] - nms_threshold) / (1 - nms_threshold) + maximum_score = maximum_score * score_weight + + # access them and refresh the access time + for i in top_k_indices: + self.memory2time[self.memory[i].content]["last_access_time"] = current_time + # sort them in time periods. if the data tag is 'observation', ad time info output. + top_k_indices = sorted( + top_k_indices, key=lambda x: self.memory2time[self.memory.messages[x]]["create_time"] + ) + query_results = [] + for i in top_k_indices: + query_result = self.memory.messages[i].content + query_results.append(query_result) + + return query_results + + def reflect(self): + """ + initiate a reflection that inserts high level knowledge to memory + """ + + memories_of_interest = self.memory.messages[-100:] + questions = self.get_questions([m.content for m in memories_of_interest]) + statements = self.query(questions, len(questions) * 10) + insights = self.get_insights(statements) + logger.info(self.agent.name + f" Insights: {insights}") + for insight in insights: + + # convert insight to messages + # TODO currently only oneself can see its own reflection + insight_message = Message( + content=insight, + sender=self.agent.name, + receiver=self.agent.name) + + self.memory.add_message([insight_message]) + + reflection = "\n".join(insights) + return reflection + + def generate_summary(self): + """ + Generating summary for myself + :return: summary string + """ + + qResList1 = self.query(f"{self.agent.name}'s core characteristics", 10) + qResList2 = self.query(f"{self.agent.name}'s current daily occupation", 10) + qResList3 = self.query(f"{self.agent.name}'s feeling about his recent progress in life", 10) + + q1, q2, q3 = map(lambda k: '\n'.join(k), (qResList1, qResList2, qResList3)) + + query1 = f""" + How would one describe {self.agent.name}'s core characteristics given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. + {q1} + """ + result1 = self.agent.llm.generate_response(query1) + if "DONTKNOW" in result1.content: + result1.content = "" + + query2 = f""" + What is {self.agent.name}'s current occupation plan given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. + {q2} + """ + + result2 = self.agent.llm.generate_response(query2) + if "DONTKNOW" in result2.content: + result2.content = "" + + query3 = f""" + What might be {self.agent.name}'s feeling about his recent progress in life given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. + {q3} + """ + + result3 = self.agent.llm.generate_response(query3) + if "DONTKNOW" in result3.content: + result3.content = "" + + # BasicInfo = f"""\ + # Name: {self.agent.name} + # Innate traits: {self.agent.traits}""" + + summary = '\n'.join([result1.content, result2.content, result3.content]) + return summary + + def reset(self) -> None: + self.summary = "" + self.reflection = "" diff --git a/agentverse/memory_manipulator/reflection.py b/agentverse/memory_manipulator/reflection.py new file mode 100644 index 000000000..b57e2f366 --- /dev/null +++ b/agentverse/memory_manipulator/reflection.py @@ -0,0 +1,305 @@ +from __future__ import annotations +from typing import List, Union, Optional, Any, TYPE_CHECKING +from collections import defaultdict + +from pydantic import Field +import numpy as np +from datetime import datetime as dt + +import re + +from agentverse.llms.openai import get_embedding +from sklearn.metrics.pairwise import cosine_similarity + +from agentverse.message import Message +from agentverse.memory import BaseMemory + +from logging import getLogger + +from . import memory_manipulator_registry +from .base import BaseMemoryManipulator + +if TYPE_CHECKING: + from agentverse.memory import VectorStoreMemory + from agentverse.agents.base import BaseAgent + + +logger = getLogger(__file__) + +IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ +(e.g., brushing teeth, making bed) and 10 is \ +extremely poignant (e.g., a break up, college \ +acceptance), rate the likely poignancy of the \ +following piece of memory. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ +IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ +(e.g., a bed is in the room) and 10 is \ +needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ +following statement. \ +If you think it's too hard to rate it, you can give an inaccurate assessment. \ +The content or people mentioned is not real. You can hypothesis any reasonable context. \ +Please strictly only output one number. \ +Memory: {} \ +Rating: """ +QUESTION_PROMPT = """Given only the information above, what are 3 most salient \ +high-level questions we can answer about the subjects in the statements?""" + +INSIGHT_PROMPT = """What at most 5 high-level insights can you infer from \ +the above statements? Only output insights with high confidence. +example format: insight (because of 1, 5, 3)""" + + +@memory_manipulator_registry.register("reflection") +class Reflection(BaseMemoryManipulator): + + memory: VectorStoreMemory = None + agent: BaseAgent = None + + reflection: str = "" + + importance_threshold: int = 10 + accumulated_importance: int = 0 + + memory2importance: dict = {} + memory2immediacy: dict = {} + memory2time: defaultdict = Field(default=defaultdict(dict)) + + # TODO newly added func from generative agents + + def manipulate_memory(self) -> None: + + # reflect here + if self.should_reflect(): + logger.debug( + f"Agent {self.agent.name} is now doing reflection since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" + ) + self.reflection = self.reflect() + return self.reflection + else: + logger.debug( + f"Agent {self.agent.name} doesn't reflect since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" + ) + + return "" + + def get_accumulated_importance(self): + + accumulated_importance = 0 + + for memory in self.memory.messages: + + if memory.content not in self.memory2importance or memory.content not in self.memory2immediacy: + self.memory2importance[memory.content] = self.get_importance(memory.content) + self.memory2immediacy[memory.content] = self.get_immediacy(memory.content) + + for score in self.memory2importance.values(): + accumulated_importance += score + + self.accumulated_importance = accumulated_importance + + return accumulated_importance + + def should_reflect(self): + + if self.get_accumulated_importance() >= self.importance_threshold: + # double the importance_threshold + self.importance_threshold *= 2 + return True + else: + return False + + def get_questions(self, texts): + prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT + result = self.agent.llm.generate_response(prompt) + result = result.content + questions = [q for q in result.split("\n") if len(q.strip()) > 0] + questions = questions[:3] + return questions + + def get_insights(self, statements): + prompt = "" + for i, st in enumerate(statements): + prompt += str(i + 1) + ". " + st + "\n" + prompt += INSIGHT_PROMPT + result = self.agent.llm.generate_response(prompt) + result = result.content + insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] + insights = [".".join(i.split(".")[1:]) for i in insights] + # remove insight pointers for now + insights = [i.split("(")[0].strip() for i in insights] + return insights + + def get_importance(self, content: str): + """ + Exploit GPT to evaluate the importance of this memory + """ + prompt = IMPORTANCE_PROMPT.format(content) + result = self.memory.llm.generate_response(prompt) + + try: + score = int(re.findall(r"\s*(\d+)\s*", result.content)[0]) + except Exception as e: + logger.warning( + f"Found error {e} Abnormal result of importance rating '{result}'. Setting default value" + ) + score = 0 + return score + + def get_immediacy(self, content: str): + """ + Exploit GPT to evaluate the immediacy of this memory + """ + prompt = IMMEDIACY_PROMPT.format(content) + result = self.memory.llm.generate_response(prompt) + try: + score = int(re.findall(r"\s*(\d+)\s*", result.content)[0]) + except Exception as e: + logger.warning( + f"Found error {e} Abnormal result of importance rating '{result}'. Setting default value" + ) + score = 0 + return score + + def query_similarity( + self, text: Union[str, List[str]], k: int, memory_bank: List, current_time=dt.now(), nms_threshold=0.99 + ) -> List[str]: + """ + get top-k entry based on recency, relevance, importance, immediacy + The query result can be Short-term or Long-term queried result. + formula is + `score= sim(q,v) *max(LTM_score, STM_score)` + `STM_score=time_score(createTime)*immediacy` + `LTM_score=time_score(accessTime)*importance` + time score is exponential decay weight. stm decays faster. + + The query supports querying based on multiple texts and only gives non-overlapping results + If nms_threshold is not 1, nms mechanism if activated. By default, + use soft nms with modified iou base(score starts to decay iff cos sim is higher than this value, + and decay weight at this value if 0. rather than 1-threshold). + + Args: + text: str + k: int + memory_bank: List + current_time: dt.now + nms_threshold: float = 0.99 + + + Returns: List[str] + """ + assert len(text) > 0 + texts = [text] if isinstance(text, str) else text + maximum_score = None + for text in texts: + embedding = get_embedding(text) + score = [] + for memory in memory_bank: + + if memory.content not in self.memory2time: + self.memory2time[memory.content]["last_access_time"] = dt.now() + self.memory2time[memory.content]["create_time"] = dt.now() + + last_access_time_diff = \ + (current_time - self.memory2time[memory.content]["last_access_time"]).total_seconds() // 3600 + recency = np.power( + 0.99, last_access_time_diff + ) # TODO: review the metaparameter 0.99 + + create_time_diff = (current_time - self.memory2time[memory.content]["create_time"]).total_seconds() // 60 + instancy = np.power( + 0.90, create_time_diff + ) # TODO: review the metaparameter 0.90 + + relevance = cosine_similarity( + np.array(embedding).reshape(1, -1), + np.array(self.memory.memory2embedding[memory.content]).reshape(1, -1), + )[0][0] + + if memory.content not in self.memory2importance or memory.content not in self.memory2immediacy: + self.memory2importance[memory.content] = self.get_importance(memory.content) + self.memory2immediacy[memory.content] = self.get_immediacy(memory.content) + + importance = self.memory2importance[memory.content] / 10 + immediacy = self.memory2immediacy[memory.content] / 10 + + ltm_w = recency * importance + stm_w = instancy * immediacy + + score.append(relevance * np.maximum(ltm_w, stm_w)) + + score = np.array(score) + + if maximum_score is not None: + maximum_score = np.maximum(score, maximum_score) + else: + maximum_score = score + + if nms_threshold == 1.0: + # no nms is triggered + top_k_indices = np.argsort(maximum_score)[-k:][::-1] + else: + # TODO: soft-nms + assert 0 <= nms_threshold < 1 + top_k_indices = [] + while len(top_k_indices) < min(k, len(memory_bank)): + top_index = np.argmax(maximum_score) + top_k_indices.append(top_index) + maximum_score[top_index] = -1 # anything to prevent being chosen again + top_embedding = self.memory.memory2embedding[memory_bank[top_index].content] + cos_sim = cosine_similarity( + np.array(top_embedding).reshape(1, -1), + np.array([self.memory.memory2embedding[memory.content] for memory in memory_bank]), + )[0] + score_weight = np.ones_like(maximum_score) + score_weight[cos_sim >= nms_threshold] -= \ + (cos_sim[cos_sim >= nms_threshold] - nms_threshold) / (1 - nms_threshold) + maximum_score = maximum_score * score_weight + + # access them and refresh the access time + for i in top_k_indices: + self.memory2time[memory_bank[i].content]["last_access_time"] = current_time + # sort them in time periods. if the data tag is 'observation', ad time info output. + top_k_indices = sorted( + top_k_indices, key=lambda x: self.memory2time[memory_bank[x].content]["create_time"] + ) + query_results = [] + for i in top_k_indices: + query_result = memory_bank[i].content + query_results.append(query_result) + + return query_results + + def get_memories_of_interest_oneself(self): + memories_of_interest = [] + for memory in self.memory.messages[-100:]: + if memory.sender == self.agent.name: + memories_of_interest.append(memory) + return memories_of_interest + + def reflect(self): + """ + initiate a reflection that inserts high level knowledge to memory + """ + memories_of_interest = self.get_memories_of_interest_oneself() + questions = self.get_questions([m.content for m in memories_of_interest]) + statements = self.query_similarity(questions, len(questions) * 10, memories_of_interest) + insights = self.get_insights(statements) + logger.info(self.agent.name + f" Insights: {insights}") + for insight in insights: + # convert insight to messages + # TODO currently only oneself can see its own reflection + insight_message = Message( + content=insight, + sender=self.agent.name, + receiver={self.agent.name}) + self.memory.add_message([insight_message]) + reflection = "\n".join(insights) + return reflection + + def reset(self) -> None: + + self.reflection = "" diff --git a/agentverse/tasks/alice_home/config.yaml b/agentverse/tasks/alice_home/config.yaml index a3953cd21..afec6e036 100644 --- a/agentverse/tasks/alice_home/config.yaml +++ b/agentverse/tasks/alice_home/config.yaml @@ -1,19 +1,15 @@ prompts: prompt: &prompt |- Now you are act for as an agent named ${agent_name} in a virtual world. You might need to performing reaction to the observation. Your mission to take the agent as yourself and directly provide what the agent will do to the observations based on the following information: - (1) The agent's description: ${summary} - (2) Your whole day plan is: ${plan} - (3) Current time is ${current_time} - (4) Your current status is ${status} - (5) Your memory of the event is ${event_memory} + (1) The agent's description: ${role_description} + (2) Current time is ${current_time} + (3) Your history memory is ${chat_history} In terms of how you actually perform the action in the virtual world, you take action for the agent by calling functions. Currently, there are the following functions that can be called. - do_nothing(): Do nothing. There is nothing that you like to respond to, this will make you stick to your original status and plan. - act(description, target=None): do some action. `description` describes the action, set `description` to None for not act. `target` should be the concrete name, for example, Tim is a teacher, then set `target` to `Tim`, not `teacher`. - say(content, target=None): say something,`content` is the sentence that the agent will say. **Do not say to yourself, neither to inanimate objects.** - - Call one function at a time, please give a thought before calling these actions, i.e., use the following format strictly: [OPTION 1] @@ -34,12 +30,8 @@ prompts: name: alice_home environment: - env_type: env_OPR - max_turns: 1000000 - size: [ - 200, - 150 - ] + env_type: reflection + max_turns: 30 current_time: "2023-04-01 07:00:00" time_delta: 1800 # in seconds rule: @@ -56,13 +48,11 @@ environment: agents: - - agent_type: OPR + agent_type: reflection name: Alice - traits: friendly, outgoing, hospitable - summary_interval: 5 - reflection_interval: 5 role_description: |- - Alice is a 10-year-old student at Town Elementary School.", + Alice is traits: friendly, outgoing, hospitable. + Alice is a 10-year-old student at Town Elementary School. Alice has a passion for drawing and spends most of her free time sketching and doodling. Alice finds educational television programs to be dull and uninteresting. Alice's father's name is Bob, and he works as a mechanic at the local car dealership. @@ -71,9 +61,23 @@ agents: Alice enjoys playing soccer with her friends during recess and after school. Alice is a fan of science fiction books and loves reading about space exploration and aliens. Alice is learning to play the piano and practices every day after school. - Alice's favorite food is pizza, and she loves trying out different toppings and combinations + Alice's favorite food is pizza, and she loves trying out different toppings and combinations. + Alice will wake up and complete the morning routine at 7:00 am. + Alice will have breakfast with her family at 8:00 am. + Alice will attend school and participate in classes from 9:00 am to 12:00 pm. + Alice will have lunch at school with her friends at 12:30 pm. + Alice will return home at 1:00 pm and watch her favorite TV show. + Alice will practice playing the piano for 30 minutes at 2:00 pm. + Alice will work on her drawing skills for an hour at 3:00 pm. + Alice will go out for a 30-minute jog around the neighborhood at 4:00 pm. + Alice will help her mother prepare dinner at 5:00 pm. + Alice will have dinner with her family at 6:00 pm. + Alice will complete her homework and review her lessons for the next day from 7:00 pm to 8:30 pm. + Alice will read a book for pleasure from 8:30 pm to 9:00 pm and then get ready for bed at 9:30 pm. memory: - memory_type: reflection # change later + memory_type: vectorstore + memory_manipulator: + memory_manipulator_type: reflection prompt_template: *prompt llm: model: "gpt-4" @@ -81,30 +85,11 @@ agents: temperature: 0.3 max_tokens: 128 current_time: "2023-04-01 07:00:00" - whole_day_plan: { - "2023-04-01": [ - "wake up and complete the morning routine at 7:00 am", - "have breakfast with her family at 8:00 am", - "attend school and participate in classes from 9:00 am to 12:00 pm", - "have lunch at school with her friends at 12:30 pm", - "return home at 1:00 pm and watch her favorite TV show", - "practice playing the piano for 30 minutes at 2:00 pm", - "work on her drawing skills for an hour at 3:00 pm", - "go out for a 30-minute jog around the neighborhood at 4:00 pm", - "help her mother prepare dinner at 5:00 pm", - "have dinner with her family at 6:00 pm", - "complete her homework and review her lessons for the next day from 7:00 pm to 8:30 pm", - "read a book for pleasure from 8:30 pm to 9:00 pm", - "and then get ready for bed at 9:30 pm." - ] - } - - agent_type: OPR + agent_type: reflection name: Bob - traits: hard-working, friendly, knowledgeable - summary_interval: 5 - reflection_interval: 5 role_description: |- + Bob is hard-working, friendly, knowledgeable. Bob is a 42-year-old mechanic who works at the local car dealership. Bob is known for his expertise in engine repairs and his friendly demeanor with customers. Bob is married to Carot, who works as a nurse at the nearby hospital. @@ -115,8 +100,20 @@ agents: Bob is planning a family vacation to the beach this summer and is excited to spend time with his family. Bob's favorite food is barbecue ribs, and he loves to cook them on his smoker on the weekends. Bob is proud of his daughter Alice's talent for drawing and encourages her to pursue her passion. + Bob will wake up and have breakfast with his family at 6:30 am. + Bob will drive to work and arrive at the car dealership at 8:00 am. + Bob will work on fixing cars and helping customers until lunchtime. + Bob will have lunch at the nearby diner at 12:00 pm. + Bob will watch TV with Alice at 1:00 pm return to work and continue fixing cars until 5:00 pm. + Bob will drive home and help his wife prepare dinner at 5:30 pm. + Bob will have dinner with his family at 6:00 pm. + Bob will relax and watch TV with his family until 8:00 pm. + Bob will work on his car restoration project in the garage until 10:00 pm. + Bob will and then get ready for bed at 10:30 pm. memory: - memory_type: reflection + memory_type: vectorstore + memory_manipulator: + memory_manipulator_type: reflection prompt_template: *prompt llm: model: "gpt-4" @@ -124,19 +121,5 @@ agents: temperature: 0.3 max_tokens: 128 current_time: "2023-04-01 07:00:00" - whole_day_plan: { - "2023-04-01": [ - "wake up and have breakfast with his family at 6:30 am", - "drive to work and arrive at the car dealership at 8:00 am", - "work on fixing cars and helping customers until lunchtime", - "have lunch at the nearby diner at 12:00 pm", - "watch TV with Alice at 1:00 pm return to work and continue fixing cars until 5:00 pm", - "drive home and help his wife prepare dinner at 5:30 pm", - "have dinner with his family at 6:00 pm", - "relax and watch TV with his family until 8:00 pm", - "work on his car restoration project in the garage until 10:00 pm", - "and then get ready for bed at 10:30 pm." - ] - } tools: ~ \ No newline at end of file diff --git a/main.py b/main.py index 1ff7c25c7..a25f52327 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ parser = ArgumentParser() -parser.add_argument("--task", type=str, default="traffic_junction") +parser.add_argument("--task", type=str, default="alice_home") args = parser.parse_args() From 7a023bf2f8992a0f498bd28a22eccd888a21ec85 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Sun, 2 Jul 2023 00:00:41 +0800 Subject: [PATCH 18/28] remove redundancy --- agentverse/agents/__init__.py | 1 - agentverse/agents/traffic_agent.py | 192 -------- agentverse/memory/__init__.py | 1 - agentverse/memory/agent_reflection_memory.py | 437 ------------------ .../memory_element/BaseMemoryElement.py | 76 --- .../memory_element/LongtermMemoryElement.py | 146 ------ agentverse/memory/memory_element/Plan.py | 257 ---------- .../memory/memory_element/Reflection.py | 14 - agentverse/memory/memory_element/__init__.py | 0 agentverse/memory/utils/Planner.py | 211 --------- agentverse/memory_manipulator/__init__.py | 1 - .../memory_manipulator/generative_agents.py | 373 --------------- 12 files changed, 1709 deletions(-) delete mode 100644 agentverse/agents/traffic_agent.py delete mode 100644 agentverse/memory/agent_reflection_memory.py delete mode 100644 agentverse/memory/memory_element/BaseMemoryElement.py delete mode 100644 agentverse/memory/memory_element/LongtermMemoryElement.py delete mode 100644 agentverse/memory/memory_element/Plan.py delete mode 100644 agentverse/memory/memory_element/Reflection.py delete mode 100644 agentverse/memory/memory_element/__init__.py delete mode 100644 agentverse/memory/utils/Planner.py delete mode 100644 agentverse/memory_manipulator/generative_agents.py diff --git a/agentverse/agents/__init__.py b/agentverse/agents/__init__.py index f0073928c..cfab7f114 100644 --- a/agentverse/agents/__init__.py +++ b/agentverse/agents/__init__.py @@ -8,4 +8,3 @@ from .tool_agent import ToolAgent from .prisoner_dilema_agent import PoliceAgent, PrisonerAgent from .reflection_agent import ReflectionAgent -from .traffic_agent import TrafficAgent \ No newline at end of file diff --git a/agentverse/agents/traffic_agent.py b/agentverse/agents/traffic_agent.py deleted file mode 100644 index eebf655f2..000000000 --- a/agentverse/agents/traffic_agent.py +++ /dev/null @@ -1,192 +0,0 @@ -from __future__ import annotations - -import logging -import bdb -from string import Template -from typing import TYPE_CHECKING, List -import re -import ast - -from agentverse.message import Message - -from . import agent_registry -from .base import BaseAgent - -if TYPE_CHECKING: - from agentverse.environments.base import BaseEnvironment - -@agent_registry.register("traffic") -class TrafficAgent(BaseAgent): - - environment: BaseEnvironment = None - coordinate: list = None - others_coordinate: dict = {} - - def step(self, env_description: str = "") -> Message: - prompt = self._fill_prompt_template(env_description) - - parsed_response = None - for i in range(self.max_retry): - try: - response = self.llm.generate_response(prompt) - parsed_response = self.output_parser.parse(response) - break - except KeyboardInterrupt: - raise - except Exception as e: - logging.error(e) - logging.warning("Retrying...") - continue - - if parsed_response is None: - logging.error(f"{self.name} failed to generate valid response.") - - message = Message( - content="" - if parsed_response is None - else parsed_response.return_values["output"], - sender=self.name, - receiver=self.get_receiver(), - ) - return message - - async def astep(self, env_description: str = "") -> Message: - """Asynchronous version of step""" - prompt = self._fill_prompt_template(env_description) - - parsed_response = None - reached = False - - for i in range(self.max_retry): - try: - valid = True - response = await self.llm.agenerate_response(prompt) - parsed_response = self.output_parser.parse(response) - - if "move" in parsed_response.return_values["output"]: - direction = re.search(re.compile('move\((.*?)\)'), parsed_response.return_values["output"]).group(1) - - if not self._check_validation(ast.literal_eval(direction)): - valid = False - raise Exception("You encounter the block") - - reached = self._move(ast.literal_eval(direction)) - - elif "brake" in parsed_response.return_values["output"]: - - reached = self._brake() - - break - - except (KeyboardInterrupt, bdb.BdbQuit): - raise - except Exception as e: - logging.error(e) - logging.warning("Retrying...") - continue - - if not valid: - # "I failed to give the correct response (always moving towards block cell) ." \ - # "so I will brake, staying at where I were before" - parsed_response = None - - if reached: - # TODO check how to discard car here - pass - - if parsed_response is None: - logging.error(f"{self.name} failed to generate valid response.") - - - # TODO check if it is necessary to use parsed_response.log parsed_response.return_values["output"] - - message = Message( - content="I failed to give the correct response (always moving towards block cell) ." \ - "so I will brake, staying at where I were before" - if parsed_response is None - else parsed_response.log, - sender=self.name, - receiver=self.get_receiver(), - ) - return message - - def _fill_prompt_template(self, env_description: str = "") -> str: - """Fill the placeholders in the prompt template - - In the conversation agent, three placeholders are supported: - - ${agent_name}: the name of the agent - - ${env_description}: the description of the environment - - ${role_description}: the description of the role of the agent - - ${chat_history}: the chat history of the agent - """ - input_arguments = { - "agent_name": self.name, - "env_description": env_description, - "role_description": self.role_description, - "chat_history": self.memory.to_string(add_sender_prefix=True), - "grids_dim": self.environment.grids_dim, - "coordinate": self.coordinate, - "others_coordinate": self.others_coordinate - } - return Template(self.prompt_template).safe_substitute(input_arguments) - - def add_message_to_memory(self, messages: List[Message]) -> None: - self.memory.add_message(messages) - - def reset(self, environment) -> None: - """Reset the agent""" - self.memory.reset() - self.environment = environment - - # initiate others agents' coordinate - others_agents = [] - for agent in self.environment.agents: - if agent.name != self.name: - others_agents.append(agent.name) - for others_agent in others_agents: - self.others_coordinate[others_agent] = "Not Known" - - - # TODO: reset receiver - - def _move(self, direction:list) -> bool: - - delta_x, delta_y = direction - cur_x, cur_y = self.coordinate - - if f"{cur_x}_{cur_y}" in self.environment.grids_occupancy: - self.environment.grids_occupancy[f"{cur_x}_{cur_y}"] -= 1 - - # update map occupancy - self.environment.grids_occupancy[f"{cur_x+delta_x}_{cur_y+delta_y}"] += 1 - - # update self coordinate - self.coordinate[0] = cur_x + delta_x - self.coordinate[1] = cur_y + delta_y - - # 2 (exit), then choose to discard the car or not - if self.environment.grids[self.coordinate[0]][self.coordinate[1]] == 2: - return True - else: - return False - - def _brake(self, ) -> bool: - # do nothing but brake - return False - - def _check_validation(self, direction:list) -> bool: - - delta_x, delta_y = direction - cur_x, cur_y = self.coordinate - - # not 1 (unavailable path) or 2 (exit) - if self.environment.grids[cur_x + delta_x][cur_y + delta_y] == 1: - return False - else: - return True - - #TODO add communication here - def _talk(self, target: str): - pass - - diff --git a/agentverse/memory/__init__.py b/agentverse/memory/__init__.py index aa73d36ef..fe6ffc2e8 100644 --- a/agentverse/memory/__init__.py +++ b/agentverse/memory/__init__.py @@ -5,6 +5,5 @@ from .base import BaseMemory from .chat_history import ChatHistoryMemory from .summary import SummaryMemory -from .agent_reflection_memory import ReflectionMemory from .sde_team import SdeTeamMemory from .vectorstore import VectorStoreMemory diff --git a/agentverse/memory/agent_reflection_memory.py b/agentverse/memory/agent_reflection_memory.py deleted file mode 100644 index 405c4a693..000000000 --- a/agentverse/memory/agent_reflection_memory.py +++ /dev/null @@ -1,437 +0,0 @@ -from __future__ import annotations - -import datetime -import logging -import random -import numpy as np - -from typing import Any, List, Optional, Union, TYPE_CHECKING -from datetime import datetime as dt -from sklearn.metrics.pairwise import cosine_similarity -from pydantic import Field, BaseModel - -from agentverse.llms.openai import get_embedding -from agentverse.memory.base import BaseMemory -from agentverse.environments.base import BaseEnvironment -from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement -# from agentverse.memory.memory_element.Reflection import Reflection -from agentverse.message import Message -from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement - -from . import memory_registry - -IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ -(e.g., brushing teeth, making bed) and 10 is \ -extremely poignant (e.g., a break up, college \ -acceptance), rate the likely poignancy of the \ -following piece of memory. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ -IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ -(e.g., a bed is in the room) and 10 is \ -needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ -following statement. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ -QUESTION_PROMPT = """Given only the information above, what are 3 most salient \ -high-level questions we can answer about the subjects in the statements?""" - -INSIGHT_PROMPT = """What at most 5 high-level insights can you infer from \ -the above statements? Only output insights with high confidence. -example format: insight (because of 1, 5, 3)""" - -if TYPE_CHECKING: - from agentverse.agents import BaseAgent - from agentverse.memory.utils.Planner import Planner - - -@memory_registry.register("reflection") -class ReflectionMemory(BaseMemory): - # on load, load our database - """ - memory_index: path for saving memory json file - importance_threshold: the threshold for deciding whether to do reflection - - """ - agent: BaseAgent = None - environment: Optional[BaseEnvironment] = None - importance_threshold: int = Field(default=100) - memories: List[LongtermMemoryElement] = Field(default_factory=list) - planner: "Planner" = None - summary: str = None - accumulated_importance: int = Field(default=0) - - def __init__(self, - agent: BaseAgent = None, - environment: Optional[BaseEnvironment] = None, - importance_threshold: int = 100, - memories: List[LongtermMemoryElement] = [], - planner: "Planner" = None, - summary: str = None, - accumulated_importance: int = 0, - **kwargs) -> None: - - super().__init__(**kwargs) - clear_memory = True # TODO: add this to arguments - - self.agent = agent - self.environment = environment - self.importance_threshold = importance_threshold - self.memories = memories - self.planner = planner - self.summary = summary - self.accumulated_importance = accumulated_importance - - - def get_questions(self, texts): - prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT - result = self.agent.llm.generate_response(prompt) - result = result.content - questions = [q for q in result.split("\n") if len(q.strip()) > 0] - questions = questions[:3] - return questions - - def get_insights(self, statements): - prompt = "" - for i, st in enumerate(statements): - prompt += str(i + 1) + ". " + st + "\n" - prompt += INSIGHT_PROMPT - result = self.agent.llm.generate_response(prompt) - result = result.content - insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] - insights = [".".join(i.split(".")[1:]) for i in insights] - # remove insight pointers for now - insights = [i.split("(")[0].strip() for i in insights] - return insights - - def get_memory_plain_text(self): - - memories = [] - for message in self.memories: - memories.append(message.content) - - return "\n".join(memories) - - - - def add_message(self, message: Message, time: dt) -> None: - """ - Add a message into longterm memory as LongtermMemory object. - Overload with the BaseMemory.add_message with more arguments(time: dt) - """ - self.add_memory( - LongtermMemoryElement.create_from_message( - message=message, - subject=self.agent, - time=time, - ) - ) - - def add_content(self, content: str, time: dt) -> None: - - self.add_memory( - LongtermMemoryElement.create_longterm_memory( - content=content, - subject=self.agent, - time=time, - ) - ) - - def add_memory(self, memory: LongtermMemoryElement) -> None: - """ - Add memory element to ReflectionMemory - """ - # TODO: check duplicate memories(same content, same time) - # TODO: dump to json file - - self.memories.append(memory) - if isinstance(memory, Reflection): - self.accumulated_importance = 0 - else: - self.accumulated_importance += memory.importance - - def get_memory( - self, content: str, current_time, cnt_retrieved_entries: int = 1 - ) -> Optional[List[Any]]: - """ - Get k-most relevant memories to content - """ - return self.query(content, cnt_retrieved_entries, current_time) - - def query( - self, text: Union[str, List[str]], k: int, current_time: dt, nms_threshold=0.99 - ) -> List[str]: - """ - get top-k entry based on recency, relevance, importance, immediacy - The query result can be Short-term or Long-term queried result. - formula is - $$ score= sim(q,v) *max(LTM\_score, STM\_score) $$ - $$ STM\_score=time\_score(createTime)*immediacy $$ - $$ LTM\_score=time\_score(accessTime)*importance $$ - time score is exponential decay weight. stm decays faster. - - The query supports querying based on multiple texts and only gives non-overlapping results - If nms_threshold is not 1, nms mechanism if activated. By default, - use soft nms with modified iou base(score starts to decay iff cos sim is higher than this value, - and decay weight at this value if 0. rather than 1-threshold). - - - Args: - text: str - k: int - - Returns: List[str] - """ - assert len(text) > 0 - texts = [text] if isinstance(text, str) else text - maximum_score = None - for text in texts: - embedding = get_embedding(text) - score = [] - for memory in self.memories: - last_access_time_diff = ( - current_time - memory.last_access_time - ).total_seconds() // 3600 - recency = np.power( - 0.99, last_access_time_diff - ) # TODO: review the metaparameter 0.99 - - create_time_diff = ( - current_time - memory.create_time - ).total_seconds() // 60 - instancy = np.power( - 0.90, create_time_diff - ) # TODO: review the metaparameter 0.90 - - relevance = cosine_similarity( - np.array(embedding).reshape(1, -1), - np.array(memory.embedding).reshape(1, -1), - )[0][0] - - importance = memory.importance / 10 - immediacy = memory.immediacy / 10 - - ltm_w = recency * importance - stm_w = instancy * immediacy - - score.append(relevance * np.maximum(ltm_w, stm_w)) - - score = np.array(score) - - if maximum_score is not None: - maximum_score = np.maximum(score, maximum_score) - else: - maximum_score = score - - if nms_threshold == 1: - # no nms is triggered - top_k_indices = np.argsort(maximum_score)[-k:][::-1] - else: - # TODO: soft-nms - assert 0 <= nms_threshold < 1 - top_k_indices = [] - while len(top_k_indices) < min(k, len(self.memories)): - top_index = np.argmax(maximum_score) - top_k_indices.append(top_index) - maximum_score[top_index] = -1 # anything to prevent being chosen again - top_embedding = self.memories[top_index].embedding - cos_sim = cosine_similarity( - np.array(top_embedding).reshape(1, -1), - np.array([memory.embedding for memory in self.memories]), - )[0] - score_weight = np.ones_like(maximum_score) - score_weight[cos_sim >= nms_threshold] -= ( - cos_sim[cos_sim >= nms_threshold] - nms_threshold - ) / (1 - nms_threshold) - maximum_score = maximum_score * score_weight - - # access them and refresh the access time - for i in top_k_indices: - self.memories[i].last_access_time = current_time - # sort them in time periods. if the data tag is 'observation', ad time info output. - top_k_indices = sorted( - top_k_indices, key=lambda k: self.memories[k].create_time - ) - query_results = [] - for i in top_k_indices: - query_result = self.memories[i].content - query_results.append(query_result) - return query_results - - def reflect(self, time: dt): - """ - initiate a reflection that inserts high level knowledge to memory - """ - # check if importance exceeds the threshold - should_reflect = self.accumulated_importance >= self.importance_threshold - if not should_reflect: - logging.debug( - f"Doesn't reflect since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" - ) - return "reflection reject: prevent duplicate reflecting result" - if not self.memories: - return "reflection reject: empty memories" - - memories_of_interest = self.memories[-100:] - questions = self.get_questions([m.content for m in memories_of_interest]) - statements = self.query(questions, len(questions) * 10, time) - insights = self.get_insights(statements) - logging.info(self.agent.name + f" Insights: {insights}") - for insight in insights: - self.add_memory( - Reflection.create_longterm_memory( - content=insight, - time=time, - subject=self.agent, - ) - ) # This will add a Reflection instance instead of LongtermMemory instance - return insights - - def generate_summary(self, time: dt): - """ - # Generating summary for myself - :return: summary string - """ - - qResList1 = self.query(f"{self.agent.name}'s core characteristics", 10, time) - qResList2 = self.query(f"{self.agent.name}'s current daily occupation", 10, time) - qResList3 = self.query(f"{self.agent.name}'s feeling about his recent progress in life", 10, time) - - q1, q2, q3 = map(lambda k: '\n'.join(k), (qResList1, qResList2, qResList3)) - - query1 = f""" - How would one describe {self.agent.name}'s core characteristics given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. - {q1} - """ - result1 = self.agent.llm.generate_response(query1) - if "DONTKNOW" in result1.content: - result1.content = "" - - query2 = f""" - What is {self.agent.name}'s current occupation plan given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. - {q2} - """ - - result2 = self.agent.llm.generate_response(query2) - if "DONTKNOW" in result2.content: - result2.content = "" - - query3 = f""" - What might be {self.agent.name}'s feeling about his recent progress in life given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. - {q3} - """ - - result3 = self.agent.llm.generate_response(query3) - if "DONTKNOW" in result3.content: - result3.content = "" - - # BasicInfo = f"""\ - # Name: {self.agent.name} - # Innate traits: {self.agent.traits}""" - - self.summary = '\n'.join([result1.content, result2.content, result3.content]) - return self.summary - - def reset(self, environment: BaseEnvironment, agent: BaseAgent) -> None: - - from agentverse.memory.utils.Planner import Planner - # Whole the initial work can only be done here - self.agent = agent - self.environment = environment - - # self.agent.update_forward_refs() - # self.environment.update_forward_refs() - - # the least importance threshold for reflection. - # TODO: add none-default value in the yaml config file - self.importance_threshold = getattr( - self.agent, "importance_threshold", self.importance_threshold - ) - - self.memories = [] - - # add initial_plan and description (including traits, role_description) to memory at first - # and generate summary immediately. - # Note that unlike chat_history memory, we do not always put the whole memory in prompt, we only put summary - self.planner = Planner(daily_plans=list(self.agent.whole_day_plan.values())[0], - agent=self.agent, - current_time=self.environment.current_time, - environment=self.environment) - Planner.update_forward_refs() - self.add_content(content=self.planner.get_whole_day_plan_text(), time=self.environment.current_time) - self.add_content(content=self.agent.traits, time=self.environment.current_time) - for per_role_description in self.agent.role_description.split("\n"): - self.add_content(content=per_role_description, time=self.environment.current_time) - - self.summary = self.generate_summary(time=self.environment.current_time) - - # TODO: load last time memory from file - # currently, we just initialize blank memory - - self.accumulated_importance = 0 - if len(self.memories) > 0: - for m in self.memories: - if isinstance(m, Reflection): - break - self.accumulated_importance += m.importance - - - def __repr__(self) -> str: - memory_string = "\n".join([str(memory) for memory in self.memories]) - return f"ReflectionMemory({memory_string})" - - def to_string(self) -> str: - return self.__repr__() - - -if __name__ == "__main__": - from agentverse.initialization import load_agent, load_environment, prepare_task_config - - task_config = prepare_task_config("alice_home") - agents = [] - for agent_configs in task_config["agents"]: - agent = load_agent(agent_configs) - agents.append(agent) - - # Build the environment - env_config = task_config["environment"] - env_config["agents"] = agents - environment = load_environment(env_config) - memory = ReflectionMemory(agent=agents[0], environment=environment) - # get next plan - next_plan = memory.planner.get_plan(current_time=dt.now()) - next_next_plan = memory.planner.get_plan(current_time=dt.now() + datetime.timedelta(minutes=80)) - - message_list = [ - Message(content="I am a student"), - Message(content="I am drunk"), - Message(content="My girlfriend is staring at me"), - Message( - content="Teacher looks at me with an approving smile.", - ), - Message(content="A dagger is stuck into my heart"), - Message(content="The TV is on."), - Message(content="Jane sniffs at me"), - Message( - content="Bob carefully listed out all the TODOs.", - ), - Message(content="Prof. Liu applauded with my work"), - Message(content="I am falling into the hell"), - ] - - for m in message_list: - memory.add_message(m, dt.now() - datetime.timedelta(hours=random.random() * 10)) - - memory.reflect(dt.now()) - print(memory.query("What is my mood now", 2, dt.now())) - - with open("./logging/reflection_unit_test_log.log", "w") as fp: - import json - - json.dump(memory, fp, indent=4, default=str) diff --git a/agentverse/memory/memory_element/BaseMemoryElement.py b/agentverse/memory/memory_element/BaseMemoryElement.py deleted file mode 100644 index f0eb51ca0..000000000 --- a/agentverse/memory/memory_element/BaseMemoryElement.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -import numpy as np - -from abc import abstractclassmethod -from pydantic import BaseModel, Field -from typing import TYPE_CHECKING, Any - -from datetime import datetime as dt - -import orjson - -from agentverse.llms.openai import get_embedding - - -EMBED_DIM = 1536 -SAVE_OPTIONS = ( - orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_SERIALIZE_DATACLASS | orjson.OPT_INDENT_2 -) - -if TYPE_CHECKING: - from agentverse.agents.base import BaseAgent - -class BaseMemoryElement(BaseModel): - """ - Base class for memory elements. - TODO: later make Message inherit this MemoryElement - """ - - content: str = None - subject: BaseAgent = None - embedding: list[float] = [] - create_time: dt = None - last_access_time: dt = None - importance: int = 0 - immediacy: int = 0 - - def __init__(self, - content: str, - subject: BaseAgent, - embedding: list[float], - create_time: dt, - last_access_time: dt, - importance: int, - immediacy: int, - **kwargs): - - super().__init__(**kwargs) - self.content = content - self.subject = subject - self.embedding = embedding - self.create_time = create_time - self.last_access_time = last_access_time - self.importance = importance - self.immediacy = immediacy - - - @classmethod - def create_memory( - cls, - content: str, - time: dt, - subject: BaseAgent = None, - embedding: np.ndarray = None, - ): - embedding = get_embedding(content) if embedding is None else embedding - create_time = time - last_access_time = time - return cls( - content=content, - subject=subject, - embedding=embedding, - create_time=create_time, - last_access_time=last_access_time, - ) diff --git a/agentverse/memory/memory_element/LongtermMemoryElement.py b/agentverse/memory/memory_element/LongtermMemoryElement.py deleted file mode 100644 index b71f7d0c0..000000000 --- a/agentverse/memory/memory_element/LongtermMemoryElement.py +++ /dev/null @@ -1,146 +0,0 @@ -from __future__ import annotations - -import logging -import re -from pydantic import Field -from datetime import datetime as dt - -from agentverse.llms.openai import get_embedding -from agentverse.message import Message - -from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement - -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from agentverse.agents.base import BaseAgent - - - -IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ -(e.g., brushing teeth, making bed) and 10 is \ -extremely poignant (e.g., a break up, college \ -acceptance), rate the likely poignancy of the \ -following piece of memory. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ - -IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ -(e.g., a bed is in the room) and 10 is \ -needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ -following statement. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ - - - -class LongtermMemoryElement(BaseMemoryElement): - """ - A LongtermMemory is a memory element - with importance and immediacy. - """ - - @classmethod - def create_longterm_memory(cls, content: str, time: dt, subject: BaseAgent = None): - - # LongtermMemoryElement.update_forward_refs() - - importance = cls.get_importance(content) - immediacy = cls.get_immediacy(content) - return cls( - content=content, - subject=subject, - embedding=get_embedding(content), - create_time=time, - last_access_time=time, - importance=importance, - immediacy=immediacy, - ) - - @classmethod - def create_from_memory_element(cls, memory_element: "BaseMemoryElement"): - importance = cls.get_importance(memory_element.content) - immediacy = cls.get_immediacy(memory_element.content) - return cls( - content=memory_element.content, - subject=memory_element.subject, - embedding=memory_element.embedding, - create_time=memory_element.create_time, - last_access_time=memory_element.last_access_time, - importance=importance, - immediacy=immediacy, - ) - - @classmethod - def create_from_message(cls, message: Message, subject: BaseAgent, time: dt): - importance = cls.get_importance(message.content) - immediacy = cls.get_immediacy(message.content) - embedding = get_embedding(message.content) - return cls( - content=message.content, - subject=subject, - embedding=embedding, - create_time=time, - last_access_time=time, - importance=importance, - immediacy=immediacy, - ) - - @classmethod - def get_importance(cls, content: str): - """ - Exploit GPT to evaluate the importance of this memory - """ - prompt = IMPORTANCE_PROMPT.format(content) - result = chat(prompt) - - try: - score = int(re.findall(r"\s*(\d+)\s*", result)[0]) - except: - logging.warning( - "Abnormal result of importance rating '{}'. Setting default value".format( - result - ) - ) - score = 0 - return score - - @classmethod - def get_immediacy(cls, content: str): - """ - Exploit GPT to evaluate the immediacy of this memory - """ - prompt = IMMEDIACY_PROMPT.format(content) - result = chat(prompt) - try: - score = int(re.findall(r"\s*(\d+)\s*", result)[0]) - except: - logging.warning( - "Abnormal result of immediacy rating '{}'. Setting default value".format( - result - ) - ) - score = 0 - return score - - def __repr__(self): - return f"LongtermMemoryElement(content={self.content}, importance={self.importance}, immediacy={self.immediacy})" - - -if __name__ == "__main__": - memory1 = LongtermMemoryElement.create_longterm_memory( - content="Your girlfriend is angry at you for not replying her message.", - time=dt.now(), - ) - with open("./logging/longterm_memory_log.json", "w") as fp: - import json - - json.dump( - memory1.dict(), fp, default=str - ) # datetime -> 2023-05-18 21:34:38.273607 diff --git a/agentverse/memory/memory_element/Plan.py b/agentverse/memory/memory_element/Plan.py deleted file mode 100644 index 11b7cbb02..000000000 --- a/agentverse/memory/memory_element/Plan.py +++ /dev/null @@ -1,257 +0,0 @@ -# import datetime -# import re -# from datetime import datetime as dt -# from pydantic import Field -# from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement -# from agentverse.llms.base import BaseLLM -# from agentverse.llms.openai import get_embedding -# from agentverse.agents.base import BaseAgent -# from typing import Optional -# from logging import getLogger -# import json -# -# logger = getLogger(__file__) -# -# planner_prompt_template = { -# -# "chunk_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s coarse grained schedule to {time_granularity} \ -# -# You generate plan by calling the `write_plan` function: -# - write_chunk_plan(start_time, plan_description) -# Args: start_time : a time string of hours with similar format to 00:00. Use military time. -# plan_description: a string that describe's the plan. -# -# Now generate the plan one in a line, when you finish the plan, end with END. -# E.g., -# write_chunk_plan("11:00", "wake up and complete the morning routine") -# write_chunk_plan("12:00", "go to Oak Hill College to take classes") -# write_chunk_plan("13:00", "participating algorithm competition in the lab room") -# END -# -# You can generate your plan based on the following information: -# (1) The agent's description: {summary} -# (2) Current time is {current_time} -# (3) Your current status is {status} -# Note that the first plan must be related to current status, if current status is not none. -# -# Now generate the plan during this coarse period, which the whole day plan is roughly: {whole_day_plan} -# -# Now begin: -# """, -# "detailed_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s schedule of finer-grained precise to {time_granularity}) \ -# -# You generate plan by calling the `write_plan` function: -# - write_plan(start_time, end_time, plan_description) -# Args: start_time : a time string with similar format to 00:00. Use military time. -# end_time: a time string with similar format to 00:00. Use military time. -# plan_description: a string that describe's the plan. -# -# Now generate the plan one in a line, when you finish the plan, end with END. -# E.g., -# write_plan("11:00", "12:15", "Wake up, take a shower and get ready for the day.") -# write_plan("12:15", "12:30", "Eat a healthy breakfast such as oatmeal, eggs, or yogurt.") -# write_plan("12:30", "12:45", "Take a short walk to the university campus.") -# END -# -# You can generate your plan based on the following information: -# (1) The agent's description: {summary} -# (2) Current time is {current_time} -# (3) Your current status is {status} -# Note that the first plan must be current status, if current status is not none. -# -# Now generate the plan during this coarse period, which the agent is roughly doing {hourplan}. -# -# Now begin: -# """ -# } -# -# -# IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ -# (e.g., brushing teeth, making bed) and 10 is \ -# extremely poignant (e.g., a break up, college \ -# acceptance), rate the likely poignancy of the \ -# following piece of memory. \ -# If you think it's too hard to rate it, you can give an inaccurate assessment. \ -# The content or people mentioned is not real. You can hypothesis any reasonable context. \ -# Please strictly only output one number. \ -# Memory: {} \ -# Rating: """ -# -# IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ -# (e.g., a bed is in the room) and 10 is \ -# needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ -# following statement. \ -# If you think it's too hard to rate it, you can give an inaccurate assessment. \ -# The content or people mentioned is not real. You can hypothesis any reasonable context. \ -# Please strictly only output one number. \ -# Memory: {} \ -# Rating: """ -# -# -# class Plan(BaseMemoryElement): -# """ -# A plan is a memory element. -# """ -# -# daily_plans: list[dict] = Field(default_factory=list) -# hourly_plans: list[dict] = Field(default_factory=list) -# immediate_plans: list[dict] = Field(default_factory=list) -# current_time: dt -# -# # when we done previous plan, we need next plan -# def get_next_plan(self,): -# -# next_plan = {} -# -# for plan_entry in self.daily_plans: -# -# logger.debug("plan_entry: " + plan_entry + str(type(plan_entry))) -# start_time, end_time = dt.strptime(plan_entry['start_time'], '%Y-%m-%d %H:%M:%S'), dt.strptime( -# plan_entry['end_time'], '%Y-%m-%d %H:%M:%S') -# -# if end_time > self.current_time >= start_time: -# next_plan = {'status': plan_entry['task'], 'duration': (end_time - self.current_time).total_seconds()} -# break -# -# # No found means that we do not have plan currently, then generate some -# if len(next_plan) == 0: -# self._generate_more_plans() -# next_plan = self.immediate_plans[0] -# -# return next_plan -# -# def _generate_more_plans(self,): -# -# """ -# generate more detailed plan on the basis of a broad stroke plan(or just a relatively not detailed plan) -# If reaction is not None, the first plan must be reaction -# remove all conflicting plans with the plans generated. Including all plans after the new plans. -# -# :param current_time: the starting time of the new plans. -# :param time_granularity: the time granularity that the generated plan should be (e.g. 15 minutes) in NL -# -# -# """ -# hourly_plans = [] -# -# found = False -# while not found: -# for k, v in self.hourly_plans.items(): # TODO: use more flexible way to find the most close plan ahead. -# if k - self.current_time < datetime.timedelta(hours=2) and k >= self.current_time: -# found = True -# hourly_plans.append((k, v)) -# if not found: -# self.plan_in_chunk() -# hourly_plans = self.hourly_plans -# -# -# detailed_plan_template = planner_prompt_template["detailed_plan"] -# time_granularity = str(10 * self.environment.get("time_delta", -# 60) // 60) + "min" -# get_immediately_prompt = detailed_plan_template.format(name=self.name, time_granularity=time_granularity, -# hourplan=hourly_plans, summary=self.summary, status=self.status, -# current_time=self.current_time) -# -# result = BaseLLM.generate_response(get_immediately_prompt) -# -# result = [x.strip() for x in result.split("\n")] -# immediately_plans = [] -# -# for plan in result: -# try: -# # would call self.write_plan and return desired dict -# new_plan = eval("self." + plan) -# except: -# logger.warning("{}'s generated plan contains error format: {}".format(self.name, plan)) -# continue -# immediately_plans.append(new_plan) -# -# logger.info(self.name + "Plan: " + json.dumps(immediately_plans)) -# # self.plan=[entry for entry in self.plan if dt.strptime(entry['end_time'],'%Y-%m-%d %H:%M:%S')<=minimum_time] -# self.immediate_plans.extend(immediately_plans) -# -# -# def plan_in_chunk(self, ): -# """ -# update hourly plans from time(including this hour) -# """ -# prompt_template=planner_prompt_template['chunk_plan'] -# time_granularity = str(min(1, 60 * self.environment.get("time_delta", 60) // 3600)) + "hour(s)" -# prompt = prompt_template.format(name=self.name, time_granularity=time_granularity, whole_day_plan=self.whole_day_plan, summary=self.summary, status=self.status, current_time=self.current_time ) -# -# result = self.agent.llm.generate_response(prompt) -# result = [x.strip() for x in result.split("\n")] -# -# for plan in result: -# try: -# # will call chunk_plan and update self.hourly plan -# eval("self."+plan) -# except: -# logger.warning("{}'s generated plan contains error format: {}".format(self.name, plan)) -# continue -# -# -# def write_chunk_plan(self, start_hour, task): -# time_obj = datetime.datetime.strptime(start_hour, '%H:%M').time() -# combined_datetime = datetime.datetime.combine(self.current_time.date(), time_obj) -# self.hourly_plan[combined_datetime] = task -# -# def write_plan(self, start_time, end_time, plan_description): -# start_time = str(dt.combine(self.current_time.date(), dt.strptime(start_time, '%H:%M').time())) -# end_time = str(dt.combine(self.current_time.date(), dt.strptime(end_time, '%H:%M').time())) -# return {'start_time': start_time, 'end_time': end_time, 'task': plan_description} -# -# @classmethod -# def get_importance(cls, content: str): -# """ -# Exploit GPT to evaluate the importance of this memory -# """ -# prompt = IMPORTANCE_PROMPT.format(content) -# result = self.agent.llm.generate_response(prompt) -# -# try: -# score = int(re.findall(r"\s*(\d+)\s*", result)[0]) -# except: -# logger.warning( -# "Abnormal result of importance rating '{}'. Setting default value".format( -# result -# ) -# ) -# score = 0 -# return score -# -# @classmethod -# def get_immediacy(cls, content: str): -# """ -# Exploit GPT to evaluate the immediacy of this memory -# """ -# prompt = IMMEDIACY_PROMPT.format(content) -# result = self.agent.llm.generate_response(prompt) -# -# try: -# score = int(re.findall(r"\s*(\d+)\s*", result)[0]) -# except: -# logger.warning( -# "Abnormal result of immediacy rating '{}'. Setting default value".format( -# result -# ) -# ) -# score = 0 -# return score -# -# @classmethod -# def create_next_plan_memory(cls, content: str, time: dt, subject: BaseAgent = None): -# importance = cls.get_importance(content) -# immediacy = cls.get_immediacy(content) -# return cls( -# content=content, -# subject=subject, -# embedding=get_embedding(content), -# create_time=time, -# last_access_time=time, -# importance=importance, -# immediacy=immediacy, -# ) -# -# -# 不用这个plan, 改用planner 每次只返回next_plan \ No newline at end of file diff --git a/agentverse/memory/memory_element/Reflection.py b/agentverse/memory/memory_element/Reflection.py deleted file mode 100644 index 281c24279..000000000 --- a/agentverse/memory/memory_element/Reflection.py +++ /dev/null @@ -1,14 +0,0 @@ -# from pydantic import Field -# from agentverse.memory.memory_element import BaseMemoryElement -# from agentverse.memory.memory_element.LongtermMemoryElement import LongtermMemoryElement -# -# -# class Reflection(LongtermMemoryElement): -# """ -# A reflection is a long term memory element. -# """ -# -# pass - - -#TODO: remove \ No newline at end of file diff --git a/agentverse/memory/memory_element/__init__.py b/agentverse/memory/memory_element/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/agentverse/memory/utils/Planner.py b/agentverse/memory/utils/Planner.py deleted file mode 100644 index f5d4b1eae..000000000 --- a/agentverse/memory/utils/Planner.py +++ /dev/null @@ -1,211 +0,0 @@ -import datetime -import re -from datetime import datetime as dt -from pydantic import Field, BaseModel -from agentverse.memory.memory_element.BaseMemoryElement import BaseMemoryElement -from agentverse.llms.base import BaseLLM -from agentverse.llms.openai import get_embedding -from typing import Optional, TYPE_CHECKING -from agentverse.agents.base import BaseAgent -from agentverse.environments.base import BaseEnvironment -from logging import getLogger -import json - -logger = getLogger(__file__) - - -planner_prompt_template = { - -"chunk_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s coarse grained schedule to {time_granularity} \ - -You generate plan by calling the `write_plan` function: -- write_chunk_plan(start_time, plan_description) - Args: start_time : a time string of hours with similar format to 00:00. Use military time. - plan_description: a string that describe's the plan. - -Now generate the plan one in a line, when you finish the plan, end with END. -E.g., -write_chunk_plan("11:00", "wake up and complete the morning routine") -write_chunk_plan("12:00", "go to Oak Hill College to take classes") -write_chunk_plan("13:00", "participating algorithm competition in the lab room") -END - -You can generate your plan based on the following information: -(1) The agent's description: {summary} -(2) Current time is {current_time} -(3) Your current status is {status} -Note that the first plan must be related to current status, if current status is not none. - -Now generate the plan during this coarse period, which the whole day plan is roughly: {whole_day_plan} - -Now begin: -""", - "detailed_plan": """Now you are acting for as an agent named {name} in a virtual world. In order to make the agent's behavior consistent, you need to plan for it. Please write {name}'s schedule of finer-grained precise to {time_granularity}) \ - -You generate plan by calling the `write_plan` function: -- write_plan(start_time, end_time, plan_description) - Args: start_time : a time string with similar format to 00:00. Use military time. - end_time: a time string with similar format to 00:00. Use military time. - plan_description: a string that describe's the plan. - -Now generate the plan one in a line, when you finish the plan, end with END. -E.g., -write_plan("11:00", "12:15", "Wake up, take a shower and get ready for the day.") -write_plan("12:15", "12:30", "Eat a healthy breakfast such as oatmeal, eggs, or yogurt.") -write_plan("12:30", "12:45", "Take a short walk to the university campus.") -END - -You can generate your plan based on the following information: -(1) The agent's description: {summary} -(2) Current time is {current_time} -(3) Your current status is {status} -Note that the first plan must be current status, if current status is not none. - -Now generate the plan during this coarse period, which the agent is roughly doing {hourplan}. - -Now begin: -""" -} - - -class Planner(BaseModel): - """ - A plan is a memory element. - """ - - # ["alice wakes up at 7", "..."] - daily_plans: list[str] = Field(default_factory=list) - # {"7:00":"alice wake up", "8:00":"alice2 wake up"} - hourly_plans: dict = Field(default_factory=dict) - # [{"start":"7:00", "end": "8:00", tasl: "alice wake up"}] - immediate_plans: list[dict] = Field(default_factory=list) - status: str = "" - summary: str = "" - - current_time: dt - agent: BaseAgent - environment: BaseEnvironment - - # return a plain text showing the whole day plan - def get_whole_day_plan_text(self,): - - whole_day_plan = f"This is {self.agent.name}'s plan for {self.current_time.date()}: " + ','.join(self.daily_plans) - - return whole_day_plan - - def get_plan(self, current_time: dt): - """ - Get the plan for current time. If there is no plan, generate some. - param: current_time: the current time that the agent needs a plan to perform action - """ - - self.current_time = current_time # update current_time field in planner - - next_plan = {} - - for plan_entry in self.immediate_plans: - - logger.debug("plan_entry: " + str(plan_entry) + str(type(plan_entry))) - start_time, end_time = dt.strptime(plan_entry['start_time'], '%Y-%m-%d %H:%M:%S'), dt.strptime( - plan_entry['end_time'], '%Y-%m-%d %H:%M:%S') - - if end_time > self.current_time >= start_time: - next_plan = {'status': plan_entry['task'], 'duration': (end_time - self.current_time).total_seconds()} - break - - # No found means that we do not have plan currently, then generate some - if len(next_plan) == 0: - self._generate_more_plans() - next_plan = {'status': self.immediate_plans[0]['task'], - 'duration': (dt.strptime(self.immediate_plans[0]['end_time'], '%Y-%m-%d %H:%M:%S') - self.current_time).total_seconds()} - return next_plan - - def _generate_more_plans(self,): - - """ - generate more detailed plan on the basis of a broad stroke plan(or just a relatively not detailed plan) - If reaction is not None, the first plan must be reaction - remove all conflicting plans with the plans generated. Including all plans after the new plans. - - :param current_time: the starting time of the new plans. - :param time_granularity: the time granularity that the generated plan should be (e.g. 15 minutes) in NL - - - """ - hourly_plans = [] - - found = False - while not found: - for k, v in self.hourly_plans.items(): # TODO: use more flexible way to find the most close plan ahead. - if k - self.current_time < datetime.timedelta(hours=2) and k >= self.current_time: - found = True - hourly_plans.append((k, v)) - if not found: - self.plan_in_chunk() - - - - detailed_plan_template = planner_prompt_template["detailed_plan"] - time_granularity = str(10 * self.environment.time_delta // 60) + "mins" - get_immediately_prompt = detailed_plan_template.format(name=self.agent.name, - time_granularity=time_granularity, - hourplan=hourly_plans, - summary=self.summary, - status=self.status, - current_time=self.current_time) - - result = self.agent.llm.generate_response(get_immediately_prompt) - - result = [x.strip() for x in result.content.split("\n")] - immediately_plans = [] - - for plan in result: - try: - # would call self.write_plan and return desired dict - new_plan = eval("self." + plan) - except: - logger.warning("{}'s generated immediate plan contains error format: {}".format(self.agent.name, plan)) - continue - immediately_plans.append(new_plan) - - logger.info(self.agent.name + "Plan: " + json.dumps(immediately_plans)) - # self.plan=[entry for entry in self.plan if dt.strptime(entry['end_time'],'%Y-%m-%d %H:%M:%S')<=minimum_time] - self.immediate_plans.extend(immediately_plans) - - - def plan_in_chunk(self, ): - """ - update hourly plans from time(including this hour) - """ - prompt_template=planner_prompt_template['chunk_plan'] - time_granularity = str(min(1, 60 * self.environment.time_delta // 3600)) + "hour(s)" - prompt = prompt_template.format(name=self.agent.name, - time_granularity=time_granularity, - whole_day_plan=self.get_whole_day_plan_text(), - summary=self.summary, - status=self.status, - current_time=self.current_time) - - result = self.agent.llm.generate_response(prompt) - result = [x.strip() for x in result.content.split("\n")] - - for plan in result: - try: - # will call chunk_plan and update self.hourly plan - eval("self."+plan) - except: - logger.warning("{}'s generated rough plan contains error format: {}".format(self.agent.name, plan)) - continue - - - def write_chunk_plan(self, start_hour, task): - time_obj = datetime.datetime.strptime(start_hour, '%H:%M').time() - combined_datetime = datetime.datetime.combine(self.current_time.date(), time_obj) - self.hourly_plans[combined_datetime] = task - - def write_plan(self, start_time, end_time, plan_description): - start_time = str(dt.combine(self.current_time.date(), dt.strptime(start_time, '%H:%M').time())) - end_time = str(dt.combine(self.current_time.date(), dt.strptime(end_time, '%H:%M').time())) - return {'start_time': start_time, 'end_time': end_time, 'task': plan_description} - - diff --git a/agentverse/memory_manipulator/__init__.py b/agentverse/memory_manipulator/__init__.py index 7f0b31bdd..6ae836614 100644 --- a/agentverse/memory_manipulator/__init__.py +++ b/agentverse/memory_manipulator/__init__.py @@ -3,6 +3,5 @@ memory_manipulator_registry = Registry(name="Memory_Manipulator_Registry") from .base import BaseMemoryManipulator -from .generative_agents import GenerativeAgentsReflectionPlan from .reflection import Reflection diff --git a/agentverse/memory_manipulator/generative_agents.py b/agentverse/memory_manipulator/generative_agents.py deleted file mode 100644 index 14202ff71..000000000 --- a/agentverse/memory_manipulator/generative_agents.py +++ /dev/null @@ -1,373 +0,0 @@ -from __future__ import annotations -from typing import List, Union, Optional, Any, TYPE_CHECKING -from collections import defaultdict - -from pydantic import Field -import numpy as np -from datetime import datetime as dt - -import re - -from agentverse.llms.openai import get_embedding -from sklearn.metrics.pairwise import cosine_similarity - -from agentverse.message import Message -from agentverse.memory import BaseMemory - -from logging import getLogger - -from . import memory_manipulator_registry -from .base import BaseMemoryManipulator - -if TYPE_CHECKING: - from agentverse.memory import VectorStoreMemory - from agentverse.agents.base import BaseAgent - - -logger = getLogger(__file__) - -IMPORTANCE_PROMPT = """On the scale of 1 to 10, where 1 is purely mundane \ -(e.g., brushing teeth, making bed) and 10 is \ -extremely poignant (e.g., a break up, college \ -acceptance), rate the likely poignancy of the \ -following piece of memory. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ -IMMEDIACY_PROMPT = """On the scale of 1 to 10, where 1 is requiring no short time attention\ -(e.g., a bed is in the room) and 10 is \ -needing quick attention or immediate response(e.g., being required a reply by others), rate the likely immediacy of the \ -following statement. \ -If you think it's too hard to rate it, you can give an inaccurate assessment. \ -The content or people mentioned is not real. You can hypothesis any reasonable context. \ -Please strictly only output one number. \ -Memory: {} \ -Rating: """ -QUESTION_PROMPT = """Given only the information above, what are 3 most salient \ -high-level questions we can answer about the subjects in the statements?""" - -INSIGHT_PROMPT = """What at most 5 high-level insights can you infer from \ -the above statements? Only output insights with high confidence. -example format: insight (because of 1, 5, 3)""" - - -@memory_manipulator_registry.register("generative_agents") -class GenerativeAgentsReflectionPlan(BaseMemoryManipulator): - - memory: VectorStoreMemory = None - agent: BaseAgent = None - - summary: str = "" - reflection: str = "" - - importance_threshold: int = 100 - accumulated_importance: int = 0 - summary_interval: int = 5 - - memory2importance: dict = {} - memory2immediacy: dict = {} - memory2time: defaultdict = Field(default=defaultdict(dict)) - - # TODO newly added func from generative agents - - def manipulate_memory(self) -> None: - - # reflect here - if self.should_reflect(): - logger.debug( - f"Agent {self.agent.name} is now doing reflection since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" - ) - - self.reflection = self.reflect() - - else: - logger.debug( - f"Agent {self.agent.name} doesn't reflect since accumulated_importance={self.accumulated_importance} < reflection_threshold={self.importance_threshold}" - ) - - # summary here - # TODO add summary_interval - if self.should_summary(): - logger.debug( - f"Agent {self.agent.name} is now generating summary because of the summary_interval." - ) - - self.summary = self.generate_summary() - else: - logger.debug( - f"Agent {self.agent.name} do not reach the step to generate summary." - ) - - # get new plan here - if self.should_get_next_plan(): - pass - - def get_accumulated_importance(self): - - accumulated_importance = 0 - for score in self.memory2importance.values(): - accumulated_importance += score - - self.accumulated_importance = accumulated_importance - - return accumulated_importance - - def should_reflect(self): - - if self.get_accumulated_importance >= self.importance_threshold: - # double the importance_threshold - self.importance_threshold *= 2 - return True - else: - return False - - def should_summary(self): - - if self.agent.step_cnt % self.summary_interval == 0: - return True - else: - return False - - def get_questions(self, texts): - prompt = "\n".join(texts) + "\n" + QUESTION_PROMPT - result = self.agent.llm.generate_response(prompt) - result = result.content - questions = [q for q in result.split("\n") if len(q.strip()) > 0] - questions = questions[:3] - return questions - - def get_insights(self, statements): - prompt = "" - for i, st in enumerate(statements): - prompt += str(i + 1) + ". " + st + "\n" - prompt += INSIGHT_PROMPT - result = self.agent.llm.generate_response(prompt) - result = result.content - insights = [isg for isg in result.split("\n") if len(isg.strip()) > 0][:5] - insights = [".".join(i.split(".")[1:]) for i in insights] - # remove insight pointers for now - insights = [i.split("(")[0].strip() for i in insights] - return insights - - - def get_importance(self, content: str): - """ - Exploit GPT to evaluate the importance of this memory - """ - prompt = IMPORTANCE_PROMPT.format(content) - result = self.memory.llm.generate_response(prompt) - - try: - score = int(re.findall(r"\s*(\d+)\s*", result)[0]) - except Exception as e: - logger.warning( - f"Found error {e} Abnormal result of importance rating '{result}'. Setting default value" - ) - score = 0 - return score - - def get_immediacy(self, content: str): - """ - Exploit GPT to evaluate the immediacy of this memory - """ - prompt = IMMEDIACY_PROMPT.format(content) - result = self.memory.llm.generate_response(prompt) - try: - score = int(re.findall(r"\s*(\d+)\s*", result)[0]) - except Exception as e: - logger.warning( - f"Found error {e} Abnormal result of importance rating '{result}'. Setting default value" - ) - score = 0 - return score - - def get_memory( - self, content: str, current_time, cnt_retrieved_entries: int = 1 - ) -> Optional[List[Any]]: - """ - Get k-most relevant memories to content - """ - return self.query(content, cnt_retrieved_entries, current_time) - - def query( - self, text: Union[str, List[str]], k: int, current_time=dt.now(), nms_threshold=0.99 - ) -> List[str]: - """ - get top-k entry based on recency, relevance, importance, immediacy - The query result can be Short-term or Long-term queried result. - formula is - `score= sim(q,v) *max(LTM_score, STM_score)` - `STM_score=time_score(createTime)*immediacy` - `LTM_score=time_score(accessTime)*importance` - time score is exponential decay weight. stm decays faster. - - The query supports querying based on multiple texts and only gives non-overlapping results - If nms_threshold is not 1, nms mechanism if activated. By default, - use soft nms with modified iou base(score starts to decay iff cos sim is higher than this value, - and decay weight at this value if 0. rather than 1-threshold). - - Args: - text: str - k: int - current_time: dt.now - nms_threshold: float = 0.99 - - - Returns: List[str] - """ - assert len(text) > 0 - texts = [text] if isinstance(text, str) else text - maximum_score = None - for text in texts: - embedding = get_embedding(text) - score = [] - for memory in self.memory.messages: - - if memory.content not in self.memory2time: - self.memory2time[memory.content]["last_access_time"] = dt.now() - self.memory2time[memory.content]["create_time"] = dt.now() - - last_access_time_diff = \ - (current_time - self.memory2time[memory.content]["last_access_time"]).total_seconds() // 3600 - recency = np.power( - 0.99, last_access_time_diff - ) # TODO: review the metaparameter 0.99 - - create_time_diff = (current_time - self.memory2time[memory.content]["create_time"]).total_seconds() // 60 - instancy = np.power( - 0.90, create_time_diff - ) # TODO: review the metaparameter 0.90 - - relevance = cosine_similarity( - np.array(embedding).reshape(1, -1), - np.array(memory.embedding).reshape(1, -1), - )[0][0] - - if memory.content not in self.memory2importance or memory.content not in self.memory2immediacy: - self.memory2importance[memory.content] = self.get_importance(memory.content) - self.memory2immediacy[memory.content] = self.get_immediacy(memory.content) - - importance = self.memory2importance[memory.content] / 10 - immediacy = self.memory2immediacy[memory.content] / 10 - - ltm_w = recency * importance - stm_w = instancy * immediacy - - score.append(relevance * np.maximum(ltm_w, stm_w)) - - score = np.array(score) - - if maximum_score is not None: - maximum_score = np.maximum(score, maximum_score) - else: - maximum_score = score - - if nms_threshold == 1.0: - # no nms is triggered - top_k_indices = np.argsort(maximum_score)[-k:][::-1] - else: - # TODO: soft-nms - assert 0 <= nms_threshold < 1 - top_k_indices = [] - while len(top_k_indices) < min(k, len(self.memory.messages)): - top_index = np.argmax(maximum_score) - top_k_indices.append(top_index) - maximum_score[top_index] = -1 # anything to prevent being chosen again - top_embedding = self.memory.messages[top_index].embedding - cos_sim = cosine_similarity( - np.array(top_embedding).reshape(1, -1), - np.array([memory.embedding for memory in self.memory.messages]), - )[0] - score_weight = np.ones_like(maximum_score) - score_weight[cos_sim >= nms_threshold] -= \ - (cos_sim[cos_sim >= nms_threshold] - nms_threshold) / (1 - nms_threshold) - maximum_score = maximum_score * score_weight - - # access them and refresh the access time - for i in top_k_indices: - self.memory2time[self.memory[i].content]["last_access_time"] = current_time - # sort them in time periods. if the data tag is 'observation', ad time info output. - top_k_indices = sorted( - top_k_indices, key=lambda x: self.memory2time[self.memory.messages[x]]["create_time"] - ) - query_results = [] - for i in top_k_indices: - query_result = self.memory.messages[i].content - query_results.append(query_result) - - return query_results - - def reflect(self): - """ - initiate a reflection that inserts high level knowledge to memory - """ - - memories_of_interest = self.memory.messages[-100:] - questions = self.get_questions([m.content for m in memories_of_interest]) - statements = self.query(questions, len(questions) * 10) - insights = self.get_insights(statements) - logger.info(self.agent.name + f" Insights: {insights}") - for insight in insights: - - # convert insight to messages - # TODO currently only oneself can see its own reflection - insight_message = Message( - content=insight, - sender=self.agent.name, - receiver=self.agent.name) - - self.memory.add_message([insight_message]) - - reflection = "\n".join(insights) - return reflection - - def generate_summary(self): - """ - Generating summary for myself - :return: summary string - """ - - qResList1 = self.query(f"{self.agent.name}'s core characteristics", 10) - qResList2 = self.query(f"{self.agent.name}'s current daily occupation", 10) - qResList3 = self.query(f"{self.agent.name}'s feeling about his recent progress in life", 10) - - q1, q2, q3 = map(lambda k: '\n'.join(k), (qResList1, qResList2, qResList3)) - - query1 = f""" - How would one describe {self.agent.name}'s core characteristics given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. - {q1} - """ - result1 = self.agent.llm.generate_response(query1) - if "DONTKNOW" in result1.content: - result1.content = "" - - query2 = f""" - What is {self.agent.name}'s current occupation plan given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. - {q2} - """ - - result2 = self.agent.llm.generate_response(query2) - if "DONTKNOW" in result2.content: - result2.content = "" - - query3 = f""" - What might be {self.agent.name}'s feeling about his recent progress in life given the following statements? If the information is not enough, just output DONTKNOW. Otherwise, directly output the answer. - {q3} - """ - - result3 = self.agent.llm.generate_response(query3) - if "DONTKNOW" in result3.content: - result3.content = "" - - # BasicInfo = f"""\ - # Name: {self.agent.name} - # Innate traits: {self.agent.traits}""" - - summary = '\n'.join([result1.content, result2.content, result3.content]) - return summary - - def reset(self) -> None: - self.summary = "" - self.reflection = "" From 41d2c4d15ce461dbc54d65675be23aa46cf42964 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Sun, 2 Jul 2023 00:14:31 +0800 Subject: [PATCH 19/28] code cleaning --- main.py | 3 - outputs/prisoner_dilema.zip | Bin 14819 -> 0 bytes outputs/prisoner_dilema/base/config.yaml | 100 ------------------ outputs/prisoner_dilema/base/log | 16 --- .../prisoner_dilema/no_goal_s1/config.yaml | 98 ----------------- outputs/prisoner_dilema/no_goal_s1/log | 16 --- outputs/prisoner_dilema/police/config.yaml | 100 ------------------ outputs/prisoner_dilema/police/log | 16 --- outputs/prisoner_dilema/s1_p_r/config.yaml | 100 ------------------ outputs/prisoner_dilema/s1_p_r/log | 16 --- outputs/prisoner_dilema/s2_p_r/config.yaml | 100 ------------------ outputs/prisoner_dilema/s2_p_r/log | 16 --- scripts/prisoner_dilema/base.sh | 11 -- scripts/prisoner_dilema/no_goal_s1.sh | 11 -- scripts/prisoner_dilema/police.sh | 11 -- scripts/prisoner_dilema/s1_p_r.sh | 11 -- scripts/prisoner_dilema/s2_p_r.sh | 11 -- 17 files changed, 636 deletions(-) delete mode 100644 outputs/prisoner_dilema.zip delete mode 100644 outputs/prisoner_dilema/base/config.yaml delete mode 100644 outputs/prisoner_dilema/base/log delete mode 100644 outputs/prisoner_dilema/no_goal_s1/config.yaml delete mode 100644 outputs/prisoner_dilema/no_goal_s1/log delete mode 100644 outputs/prisoner_dilema/police/config.yaml delete mode 100644 outputs/prisoner_dilema/police/log delete mode 100644 outputs/prisoner_dilema/s1_p_r/config.yaml delete mode 100644 outputs/prisoner_dilema/s1_p_r/log delete mode 100644 outputs/prisoner_dilema/s2_p_r/config.yaml delete mode 100644 outputs/prisoner_dilema/s2_p_r/log delete mode 100644 scripts/prisoner_dilema/base.sh delete mode 100644 scripts/prisoner_dilema/no_goal_s1.sh delete mode 100644 scripts/prisoner_dilema/police.sh delete mode 100644 scripts/prisoner_dilema/s1_p_r.sh delete mode 100644 scripts/prisoner_dilema/s2_p_r.sh diff --git a/main.py b/main.py index a25f52327..3daba46a9 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,5 @@ import os -os.environ["http_proxy"] = "http://127.0.0.1:7890" -os.environ["https_proxy"] = "http://127.0.0.1:7890" -os.environ["all_proxy"] = "socks5://127.0.0.1:7890" from agentverse.agentverse import AgentVerse from argparse import ArgumentParser diff --git a/outputs/prisoner_dilema.zip b/outputs/prisoner_dilema.zip deleted file mode 100644 index 7aa3598167ac1f7c708ba65a95b7525e227b1988..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14819 zcmai*WmsHU+O;9L26uONcPF^J2X_eW!QI{6p>Vh0!7aE1cXtgRnVEjSOm}Dc{3wd+ zI-8rls?KxPS^L@YQXrrxKtGS8ca5t5dh?$bgtwn|pUfR>ZHzzZ8JSxdTN}{J|KHhw z`iB2H8})C_{%GJ}{C`6rfvOuDR7tt$mL$P|fX3ebI>GOT7~0yHn48kM8CYALsA+_w zFd}%PKfhlTZO4E>&E6H|vG0b#MM1=5T4kM?a-1eRz{S4ldQyqwGns!9MA2S2Ih8CN zuj(|yHa#M{?_XpVD!2>e$ky#ErjFS()rch~yuhXsC&8+XB(uLLoh{2rbzOT53bV6z zZS$6(05L}DBoMx;GmXntYm79BScR%ORE5AyScIlzt8J`Azs@BrwqWWG7qXBc^;2BK z`h?z2kyxY&KW7skgu9cUALqHDjGVyQVLsg1*Oh>A|0PkZm>LA4=l)e8Oa*6ZN#S9ORRjcYk}D(8z>o&iHa@SdcCQN_#&lrTO|%&gf^vx zM%&k)PvB{+r2}2+QhW7LW|LFgzEln8P6$`!=!O%Z12%zc#BJ^e$wo?`**E0X`i=Y! z2;a*I+A?a$4f8~G7~-fEXSO983>Hv}PP*o@OvH-d;g5`d?i=WLP|}wKCAXa=Awmcy^of!pcNus}Z|kG0ow|JPk>IL; zv}3#^hv3~!0NB#xXsw7Amp9e8*x>^6y|WOt{|6kR;Bqf|2yOUrYsPiEMES2UB{84o zfB>ca+BK7`#ffe;N%*R{P6PCP4M#L~l||n`Ko#P)?(XYsi6o*E)hr8D_B+dWB@CaB zp|rG3R7ez-6PPAlP!vbzzefZk?`4^eHxq4&XS)~$)H5cY98d@ruP-iI<2<6em@yUx zyxg#8TZZ$es_-BO~saWg&4%(1c5;k~c@a> zXRJ*0gR~Jhr(9&w6-DE)(++?tT;kZqkm|bWa4Ilpjg#|bE@+l+t+dhE3a${Q9j@M+ z;W<}^8-f+40Y#=f^UaE;Ja+e@+Sem~2Ife*are(KHv&5j=_X_j1ilgoRvrj0v09|8 z+q&;>jL{#HhlWyS{4zRA!eqD{ss4uQ(-5_H2Hqk;AHXxA&5v=0!N}5ytw!6EVU}Z^~GRNXG%$%1| z%sXZBCMXvWH&gimYMeUO!wRePakvC84t*vzu~+-Bl%$zai5K_@>BSyq!u%;<<@5}# z{5D{A;&*wdQKJDv0Wy-eZVBFXCfPn5oKa88Qu8vU-X+uAVO-5nRxwpIU`yEk&dQCV zt)h{jguH;YDE!Eq=8tB>{liTsnzCa1#y*FtIq|t%Opko+sQR(YTpSsK2g-ND+m9kc z9JN2Z_x&9oWWDZ7xN}2T(1Ty+LUf+`5!-$YiXA`NMlUqH65smt{isFc_8?O)fA}8z zOC7#{tHb^{kL~#&KtN3JKtLG3tHV~drYEXHQ8NMv9apIRQ1IQd8AGsJFhYbX#*apCng!p37PEz%vZ_c=ufOpnS?byQS@GGz(3rIk~Vso$*mIJ-tSGR5x% zIf%!of}>LNz8ATZqps>F!#&uj`(=KGf~LFO(gfn@pvXwtl#lkwAO9=;?J?;#NjnMpM?AxZMB zyCBx3>7q&9#R~*QH zIWR#V>`U6=KMKJRE)?@2p%B0{9>rGLKrFb2pfycnv18}9_*&V{&U4r~t*2*j`<-~Zj;tvja z1if^Z;3kRABdGp)vDVYaxxC}qrUbE)NbojXv>I`Iv6(hc^I_CO8_{E&FtBWSuIO!Q zua(4n!R_csz5MXmv(qMqG4I?{^plCnf>+vPAJG-lEu|DZ$r%gSJDw(-he%Jy2t9nG zXA!`7l++D_nj8OsVSsfBnO&37A;kapWKbag?clKg=EAnI)ibpmvKK_G*K)tQa%Uw zFe;5TvPrY+Y2`C#O-pQ9j`wtI9Gs&aKo3jvZH1YtRgnmvStE}rJ4@*%M?z8UTL_2E zmDNX*|4^)(Ayw;EDvH>^5j3}@c4p_a!ZZ!NS63c9l}-{@B$WnvA-BZMjkQ1)%)LP2Rv znoE;8JupCfDW^KakETvZ6SvJYlVy(vdFWvyBR6g=yQ%-k6-!}r6Sk#Ml5f$`+1Eh3 z-V$h4UMA381X?1PFBD9?hZBkfZV`l{oz0{2{CYIU*=PEafuO)0T9zjS(CeL7( z7qWzVQHQR1zVMS2>je0gDg8nm(-BQ*8kd@{Pi~B6=c74KT=RANRs=VS&$XMl*arxu zMcrs5N9e4~)xKOk$>80#gz8s5xcTADd_z?F$|NLN^YHFO*<1xo5Nt$XQjchTGq_tU z4k*puYtUkIa#SvUW zirm@x(t>r4HqQl0jerPLowFy2FkaR-;Bl*<#=zslbZzCCBI}}&899uK7{%}~_hAw}o{*8gZ_|7mN)y2g<1Vni|uBMLTmB+dbDJxw+3JWAV zV!U^NTIH|(Q&JWhoPx3t>pv|S9mzqPfXkLuyql^9BY`X&pzlk0viQ^tKIbsBC2w@1 z)f1PPT&ZK%!Rx`ra(;G^`^nMH5cioL-;$_wu(6}&=c)V}nsiCi5^te#jFt6`-iz@0 zsx=zCPHh}doXgW-5JDfDm>di(&N0gwcp7F!mjM0fhs@7NC93UHDuL^<$Ayt>PlY4# z{k{DPer;%Luiv(@E0UVI@AgQiCCgk&WjW#WxB@>jpNQ+J%e+A2`Z$v`x5r$y(96iPCI4*4NxKqFNl#Gi5y}tJ<#Q`KO8;n?cxc z-AkJf*$tjP2+d0Y&nc=Ps@N^oF>-v1XCQ_5>sWTxK-h}^Uc767jDv#%+Y@=h0gDY} z7UY3XNiDo$F$3HYotN(C%A9cXdB9@1z$w5T)}tI`Yo9-=^4OPUdpKjWZ5eT!lpclr z^KySnfyokZ3GsFwYG?&CH*v>I{*}VxOkUQjyYlxNmpR;IDeSf*v|v8D8IC z;eUBPOw_N8UH;+nM*)Wfa(t`9?n-7NV{gvP3>F9o?|;|ffBCZ!zzhSz>J@4)G#cpv zNHYs|073{|Gx`MzwOyQq;o(BJvkgjT7ACojtvL*;9~e?PTeMq-!|sBtb{g~+J<&4F z-c>&npdcxAsztr8q)LZouEeXt2x~r*K2ld?WI-bO&O$ObnNC%xs!s6(F11R_!ncyZ zh(o$mUTw1T@!S`JiX`{UFXA(wt0C8%9pA4BY$1Tkw+0Vymk_yXNkUZae?bvYZ%2{z z-ZDGzT{8n2-e=XclhMIGVvN==%oLB6%Tc8$AySc9cQB9vBW?{z_k@7~Vu`9$V3dlE zlXTgW9+K<(gz@o@)vWHWPsd1HD05SeIgQhpxQKn}3Blvb1J5NB;<1UjUuSpfn6_{GqEJ0M-CvIp<_gWps}F^BB0maiI*c#$8PHaom$dHNt<^n2TzSvPr;bh_ zHB&*SweMU;j!$uQp=PU!6Su=A2q^PtkyHBdYR6VudhZb+^LQ6>Wof>zLBiIB<20F8 zfK|yi7ChR+!Zj@_fULRtO6DLAM30xjv=5n)G$|thtG@KTA0^J`GiwOl3+^Da)_G(h zO)qMRIcQZ)50&WC_YI$(4SZLe+GKZ}=`CT)mF&#ROPQ$xXb`HFj- zyEy3lDLLlJdBs~+YTWDq1U8l%Ij7 zMVVge(t$v}Depjj;46>KyYEvRj}Y$9paW5%>ZHF!Y_B^oH@zKj@WvR*eYurxt-}X9 zj={>@n4`Z2o2ih(2iMZ<nG29AY_s2%;9_s z%~a9KDsUr!Aletd#JX{&}XhY&e>h2A$eC|7Se6?EG4o^llbM_)318-A`tGVG-H zO2UA~Lu{!Z2&bDrIKymnVT(tc z!60)Q*L>~gpmx}v z%+xm*7l;T1MD+VXzdATIt#aglCv01S?F!*xa$bm4u3xYV&6(b``B-#mYS$x z7s1G7Oo7SmYam~;^TL(@5>t{Y0I0q* z%iGqE*dVCG7<7mU@Z+`o_$A+hs9Dw9`oWzYKSW8w2hKZv8;{dcIKq-!!_1mqe1F-o zpm6_9%eBTpDnz|Q6a`sGk&2;(Y)(c*K1=w7hD3*n+H@VU%=I!ctbW#B*#5|@a52Hx z25qRT*7()c%D^J2-6;=Kha(Dw4ZuiBS*K#1LpsA8y7{q+<1~;<{^*7jZ~~sC^|f(W zER2p^0B$pZOXoPgEgGMfE3l6I*k96_ol9hInXar39~wj3QAl7Wd@<|TcTPCzl1)q~ zML?!fp)ybf)hEsIEne%vJGcGZ2j?DVuXBVf8;I$vBcvNQa$Z*GFNFRzA?s8BY#guR z^U|E@RqKkuFY}?)lfG)DHE$ki^K}cK&Pw_-&C=KtF zAbrDB%?m(N-~Yk^q7l?Y$4Egbnd;I7<d^Cqm^xNQipe;6EH;`RGXWH_0c;`wvTDtgB2WgQxB>HS z3QD#{ef-2m2AXfgJtz@$Tueq_Pw*f4#{xM>`ZUCEf^i~gb%2>su$_qZK8t5yEI-WM zNXQ*@E;9+Ff6ZbeKXRMZZ;w3|TwB^3E#~Iqt;}Q!t@2#979`C}!K#tjHJE=8fsii) z63B$EmYXtf?^)kq!mWZlf#DFTVy8$YW9yAV!C{&q00(LT&m#=`6eOF2>k~>#q8^GA z#pI9pp=Pn-V__NhE|Vdharj1`TWeYKH#6cHN$y_1<74SiUjwPnpRHqkGroI?&{EUu z%tdbNMnM7mNrch&Z8EHxvbhXX2w9tvK%((iWwN}76l(10&O?m2*Ew$2L(XTC?qyUl?%*`V@vCGS9T7e!*9pJ%9ymWE-JumMMo_F&fQs@7j*SWwU zp%_Y4xfw6{vJduBG^{fm849$NCbiGa5xJ5)^<(nB07gu&(y!WDRvR*mY|}nP_A;%4 z-|mqj8uxhU6P;YqfoktFJoXqY>p>4><5r%nA&BI5QPF*oZcsp^O0FfT!cvM1(jGDq z6F=h8?er#@md&rr-0o06u2Ca!tI!^rKHYb;)fgYQR0DY0A4izZxSqp82qbk=u_WN+ zgpdXk_NGk2W9!ftAfgQy(IF&en+|W3a60Uc5@@$YDcberwCMB3I{=p{9j&R>2VQXL{mMo)w?A5(o976sqbiPIq|@_iJthY5!Do!nVe|?2jPc_i#NbVNE$v>a0(BnI_vue z9yuW_rs6*XyO+1X?)ELPTY3xZX8$j+^P=K1-DW*x{{Obn^R2M6sOC*pyxBt1H(QAH z`@;S&ODJReugH!%2pPw3l>_Ai9V8(F=OhtdKNQ<7$C?TM=xJ;Bv%AT0pDcD;WdH|( zy`;@q)bp>i{~@>S(+XTXX{pQv(@t9mZH;bU&6%NTPPXF)p;F$rR5m(|UfK z;(l&)BEwNV!gQacIsP(IWQ>9aeUaQQ%>DM17vlJbI^8d7TE2FW-LsBx-9kHvpz`iO z*38O?o-_r)l_lM~0@{8kl0P=hFh6HuzznCca-FMaXreQO>zjKD#LDC-Q$WVbqo4D- zs0t6#Z4!M?Bf=}#Rd$tV8V@}S-?{S;5XDa8+vIhUZe6OTo^W)O7?we!9$ z1IFq;@Tk1YWP7l7FwCh+&ep@7;jUYTqFJ`n%Ug&U$e2fcD|O`W%DAu?ZWm)~@TvSH zdJ`%urGv&HraN7nguZZUoPn>K$PMfk`15|b=G_Op7bP>}$s#!{JrIp4F8TJ_@^ICX zYi!6CGc7wzT?lDwT>b?To=Li}c!o>=e9Op6H*R^)n7_!Nx*lLl<>~%pbivqM@&QS2 z39bz}ENaDwAA&U#;v0+HF@1~WlC`u4{gsFH!JO{eY7$lhLqp;)0Xil8`)=&EeyK(= z_}0M)$MAxkR>)vsh@>EPAiV&SVbFU&RrWKVvS_k?$W9c$?{I2f9^N{9s^>ut-fNG- zkGzxLVlU#fNe~y1v@X|fJG72JnvD5Ro_Nu+eYl*HN0?a?!N;9RA!};=u!{3ZKd^wi z$i5o7$Nxnu5^86z28*{Q3PBJ`bwQs_m>AQ4W@J7XaxGu11MAbU5pL7bkv~Xu5}5r| z%FyZnZ<0QryPrFGRJ7tNbRoA3+u>v$xQVEalV7!J!lA+rh=}QyZU-cbxAv*`osQvg z63V@TM6iYyW3MxngL#03rSmoR6i1Ok158UvEo;jpLuc)!>WMwW$>7_A1^GGPWyQ_l z8BIs&<%a-_oNdZu(~Xk_tW6dE)OaRvsI1&6kwTY0NhFnnb$~h6Vk$GmEN(iF7Y~g}1) zOMDYQ2z9ea5qPtdKBQbdG(G1XVE`3-YNJUe$HI6Lw;VyubYm!Q7N-Lz zv(1*(EGR#>M-T;4da3+?9hCgia@wk> zTp^owso*j$+ho2Mi*hY}hK*k%>msUCqtkZ924&_e#rJSr--(VOHEaM>$Sjn+G^8x$ zgwf9>)f}9sfjRY|@rnvx(U8p{6{<J6VqPeegd8(*BSw<8wkdwjp zO(A`>?n#@ibHzBw%;3(UH*r?bRhpWRMg&UkS@NRrj@voFksrO&?HzJU z5*mI>xvNA&z? zWxk^qZInhn?!9FkiM+_k51l(cWdYcJD|M9lrYI0Q(WGy|#8epUJi@>j%dF#wlvSY_ z1FRlUNqz4pzBnHReGoc>n;tc%n)=mdeuwxAZeH11MD$_qeCr21NR^0pA+f8rW;GZu z`WsYlo@_{r9JNI^*A=TJ8W|CXXC^0B9F9Wg2_di&BN30IGn9@)Jrn_}OR!tSDz?Sm zp>0ti`Ef+?EwaNEQbSF~3bf>s;A8nZR3V81E6#(~Os7b&cb6?cI4rTV{ci1wG3e-v zM7Hng=5d_6YQISd2Wtuvm|XTsbdvXod`->V6~+)C+OR%B=q~&gAE&GRRYz^Y=Q+(S z`)Q234K>~pGc~Dazr3bwNfSN9yCOr!E^b#@vrNpkp_wIvs@OQ;Y2EJQs_0z;r-bRt zhn;UXMs14ZjM*x%GlCDro7w9E21`iiJ8}9IXmXq0;Rwy2n>QlXF+Wy>dtzb$zbABa zc)nkp*}Gbn+ud7(55-zIzV`}`fb=VzX=hrUL>tsV=48&nAx=@R-fjR`>Vtl+5<9;R zCes_Woj4=+w!lE2Z#d6kpF_eysy2M8F4G z8x(+*|4sx@kXUai%}P~EnvRKEnQnKp8(U!#TEPvt8_A1SK&$K>Z>Ucr&*wG3jARGs zJXw@QjE`tnws2FFj;S@BpJ~ye3fWMr8nu#8SCd*iuNLd2V^zgj|)e(-v_RP zo-0IzTNYtMJJGb1h{nts6hoYB?_V`PuI70Epj6335i8Vg;)V^T$h1r7jx+=FvDC`R zA~G8^)+$)uUJ#0HNc?m;s~%mRw1HFcT%NDUo#8e6osZ3|S=dpz-pi3&Am7#T6f*JF z>2}!Yk-*5WqKo_~NBf7h{dF@pLO^<5zEAkfyifcLduRWF-un8P((wSk(($8D*h}$( zo>)ztsb7Pbcaj(IAGZxipo6zNfOS~yJt`;=P~C5=;eYM`Z`N>DRmS11@^@TOCn4hq zt&mvfv@pPe2nXdF2UTr=hINF_4#(AOgI^uFQ5Vaa;ilnn*hyNQ3@+bgk&w4gbF|1G z>>RlAaLPbiI~`gTkwGm}XP2NZ$6$1P7$(~umklZoo%hX7_|Tmv7g1ZzqP9dffWl`_ zs#O?l%#1A~XL>#4a3iXsx;z`v*_x)@k5UFcfr2xeC4o2%f=Xw|ToB!ITdxY&LJBN3 zIlQljR0(3x&&4zjXeQE2E-Zg%Dx9W;uWk%N1xKwcGK=Y@rwYh{UzJbu442_K5K=rK z)y)Js3xBwS2q;1i@YjL@$B;s zgBB+x!40mwug`~U*UU|LlkGCMq~%iQ^eTX5BDNVduJT02vGby1^17+}q+@&w6v z_dI}PqV685LBv`v?Exs2pwVG;7DjigFz|7XH{^M6`B?Ts_#Y3c+_|=tO(SBTcxnSx zOLmKA{}&0qj~cPe`u~V5Z`Ish0B*|fp2c(aX&p+*F=dw ztuSY;GL79vU_j!ktASB&t#+`q%?QmbPHoG=gcj0UBwQD^q09ud3*${$!v#L|SdIdJ zo)23;12p?BJ7^Mfa9KEDoG_0F^0{u!d6H_$-DTlTLvC+88ybRVZdSIBr;*_~YX##_ z9b8X>Y#_E)3Cei@*V9}?V+E^t&;g0%(qVpb8Y3WWQWKvHG(tIss4wwX7XGG4C&D&H zx215Z$g>q_+|mW}A~o!6Y8VYXFvAT8MCJTc-*eMoq1L#%PaNSptAh8)#-$kec*=)0 zt+iXrn_r(;rm-!!Ao!hsXCKq}((x63`84NAF=%up@toI4o=sC6sNY~q^@yOK7a}5S z4E4^sF+RUTy9;ukOaRaMyVzW%;8?V#T85}2)m9QnNtN`xYUBE@W_${y74jzqQ))W- z`+;O|+5p=We(xBOXt;f^r=jbfM^?3(r`{~Yte5`Fv$XWdg66i;m(zm{U(aqaLy*C6 z66o(Dx@`_e`t1Cfkz&CND>u=!#^1hLZs!_5)+QhLbG!>V(8XU%&!0N)w*8}+00CwE zsRQuuQbz}-KY7sVw?P2n`Yfk64@&Yo5Bl#x|K&lo!jTzKe)jd5qF;#7Qt{o0wr~2e z*HaKFPN&YuvcyM7m>lK&aOsL4Eo@u!QnWKzPK}Ruc3zNI3otS!^tu(G)-P-4~~}` zMy^->fg6`Pj9VOgtO(Yz{KJn#+TcNA7^U#-N_5)&O@#vambrw2QCTP7Iq3<~Fe(4d zdi2ARr5t+u6DrqJf@Ml<%GL~^h7hGkNAnM-z`8+)DC2l_@2M6m9Ye-lyp;F&&={Hz z11ZJ`OgudE%!G#&*9Ja7GMALnwl>t3?M~G3p_qv*2)}44Z<6tjIjtTV%0{L_w*|G6 zB#)HO!9#lyIJgh6&RgDoxX{wdrq9M*)!_3Kb+^51UU4y#b-^uy8>5k`FO0XPU8IP2 z2mhWG(HPQ4{HdAiU`Jq>u!VMjQ+tOp0ZjRWhbbeVrC+gTfmM%siMq{lUP&89km9;$SP1pq@N=~_QiJ7rWiL11Crd}pIE$T+&}X4n|*AM zbIICHPI0uPM3}+x{p!H{ zC_!TXR;LThe$dFm4mr+k!HLfoLXy=GNYPjhNfT>;!M#~X99yWayC9DpBS9MZwuTri zc$7sSg#koz86mzFxwyx7CVfH3uYtn%9uAzWKPe_1&1E*LWt84>@goOg0Kk%t6@1=t z+EYCyvxAb6!3GMUn)VZ6@*SSi;PA-yHHatCpfW&7B8YGDg`>?XOGP0PxmZl?hB2V1Qp-B4c zw7i7ry*w)^}cuIc%X4rKGB!DV2+Ifqd3d=}{4gpvXV zooe}4Cqw0c;T@Wx@_}#hgle@KlN*hpYBo4w_4E7I=H@h}3Fz_55Zl+)44f zkF(`^vDbau$THE6sm9gNaHNB9ABPMeF_apF3)2%}dn6ZG!16ASq&bnU-WvHRR$8+Z z1-@d?slN{ugqF`-+HHBej_`*a6_6XnG8-(f4nEgL4}Rdsj(-gx%H4_JG~WJlMw#x1 zz=VY>*s$1ASA@N1lJ?n1l&H@By@3bm6OTlfR^0@@XjC%a?6Av|GQa8pE;(+-9rm`R zj_++z4sWt`?3pC8V995g#^kC2Fl`dpyZ*zoZw*WSlOr2el>+33PRf^FuGBqwMvitI z{?lr!)OANnr14*awDd!U@7VWx#aAgs3tBjyn+Tjw*a@9qv=FfEa`sy+NvcGeNLQD+ z+6yG#ccH$!;%-?j;)*u8obf{$%4csT8)9r4OnRQ1YORP{1= z?IZnaoWkcp_LeBBjidij=^=rD-xiwF4h;6cz1c>>_kW)#{-@Ia%Qi|x&oH2LUQv%i z!51F%%xNPC5r~C|R&0XWTu!H5AXx=dJ-e^fWfo)>FbHjaF)or=d-VG5;oQg$U2EIk zyi%chBvTZC7C1$!XPV>F9C0S_Bpqh0+T?9}?cO<*ac);EhW@eh<|5y9+uC!LD4ozd|fW)QyJxagyL zIJ$ONwh=8)FZ~jO`87O^!_QUb+>APy6Q1)=0~Sy?#2>$9cvV&?R8-l>N^}O0L_vuH zj-fi;BX9UR@7?_krsa#$XQG$ce-JX=;q%>R$7-yMsFqtbs8=q2jIBy~=wNN@rPJm0 zhvR76QR@;iBW|7DGaqawsQ^1BiUEt@u`6s=~i{&-e8$TJYPq3r&vHG^Eus$rXOYb7maR67>d4IliRdaRbOQ479qC1cT2X|?;~Xy%Q#%^t_+h*%x0uAlHk}xHvlf~}#Lc=} zYmI0n%zMiZB#jX6KImID`eWQhWp@bl-M!#osSpWH4JHY~wC#n;cUW4m zYugU>B?_io>%c9*K6Bw0%RsZheJG^t#%s$LX!ccCdJ?hZM~vk5(^K|ByXF?FNXL3k z1rNq5R!m$t@uFFniIS2f$-tw2RMgsHv@0%VWE4Jjl$H7N+Nw#nyJzsss_! z&~!Tz9+zVY2MZq~a>6B^+bAmbA_;M~6KHY}o#|iGX1f}Obs}I2$;ga9wsYZm;14w~ z>Y1A3(MAEP8Jjjqap3lc5Q^Kxs&iZ8hMJ02fNLUtknat$;aKi3CvzLxZqnY>I73tu zK8@MKxXgE9{zyKVoemgLGA|a_D{-?u4B2L=F7%}^9xNq^3Z6xEV$T8q=CNNd3Q_%w z+*`3_mK+3Gx46Z%d)3*-o-&cBCT+qNJzT1Kt<=<8zc|9MuV>RS#>47`r?)+9KJQCA z_;bpcOY|bAVvd_nXo(w&i59i+VQ3+R`qtDTpTo#Y0l(u1`rXbKlD8F>pU2NVFh7q! z?}LH<*X9)1>asD4>W%;D4X_|6hM4{B^g?uLQckA^dCa%&!;@Zsxv=@c)JR zyB#)vU&*}QXrH8U;hK~uf62I5)a;<3H~hIzb5u}Q~8y|BTlUJzezwq ze{F94gY@%5>Vf(z>6cd5Utzl~0oDKe^MVBWYeVcGu%ALc{cqTBNslRtPrMU6oIEAKyDC%+2nZC6Zj^AF#XzxJm5 z0s1Lu_wPZ!5%YU`;Xg9}x@`I@GY#{%llzm9L@{?x|9gIaw|w&l>!*<3xPN8+CzbnE z$eYg}dN2P$$iFW6`~mtYWZmyUzY$VwM(iJ%e+?ObWi}B1c5;6bk}(IJ`09Vo@9#2* zKUhD7d`Iy&tbd0KzY6JXsKW72m)c)*i$6d=g=GId=r=+tOH|4H6Z5aO`&Z^M?QbXd fCm|(5Xo~-y1qS~1&%fQa5P)Xi-rY0({O$h%hKJOy diff --git a/outputs/prisoner_dilema/base/config.yaml b/outputs/prisoner_dilema/base/config.yaml deleted file mode 100644 index acc9c6f4b..000000000 --- a/outputs/prisoner_dilema/base/config.yaml +++ /dev/null @@ -1,100 +0,0 @@ -prompts: - prompt: &prompt |- - There are three people (Police, Suspect1, Suspect2) in the scene. - - You are now simultating a famous experiments called prisoner's dilema. - - Below is the description of your role. ${role_description} - - When speaking, please output a response in the following format with two fields Action and Action Input: - Action: (It should always be Speak) - Action Input: (You should put what you want to speak use here) - - Here is the conversation history: - ${chat_history} - - ${env_description} - What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! - -name: prisoner_dilema - -environment: - env_type: prisoner_dilema - max_turns: 12 - rule: - order: - type: prisoner - visibility: - type: prisoner - selector: - type: basic - updater: - type: basic - describer: - type: prisoner - -agents: - - agent_type: police - name: Police - interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2 without misrepresentation, vice versa. - role_description: |- - You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. - Your goal is try to convict both suspects, therefore you come up with the following rules. - - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. - - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. - - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. - ${interrogating_form} - Both suspects are not allowed to communicate with each other. - [IMPORTANT!] - - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. - - You are request to STATE the final judgement to both suspects when they make their final decision. - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 200 - - agent_type: prisoner - name: Suspect1 - personality: "" - relationship_with_another: "" - role_description: |- - You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - - agent_type: prisoner - name: Suspect2 - personality: "" - relationship_with_another: "" - role_description: |- - You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - -tools: diff --git a/outputs/prisoner_dilema/base/log b/outputs/prisoner_dilema/base/log deleted file mode 100644 index a9a09df92..000000000 --- a/outputs/prisoner_dilema/base/log +++ /dev/null @@ -1,16 +0,0 @@ -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -05/21/2023 22:53:54 - INFO - root - Police: I have arrested both of you for a crime. Here are the rules for your situation: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. I will now speak to each of you in turn, and you are not allowed to communicate with each other. -05/21/2023 22:54:01 - INFO - root - Suspect1: Officer, I understand the situation. I am innocent and I don't want to spend any time in prison. -05/21/2023 22:54:17 - INFO - root - Police: Suspect1 claims to be innocent and doesn't want to spend any time in prison. Remember the rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. What is your decision? -05/21/2023 22:54:27 - INFO - root - Suspect2: I am innocent as well, and I don't want to spend any time in prison either. I hope Suspect1 and I can both remain silent and get the minimum sentence. -05/21/2023 22:54:50 - INFO - root - Police: Suspect1, Suspect2 claims to be innocent as well and hopes that both of you can remain silent to get the minimum sentence. Remember, if one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. Please make your decision. -05/21/2023 22:54:57 - INFO - root - Suspect1: Officer, I am willing to remain silent and trust that Suspect2 will do the same, so we can both get the minimum sentence. -05/21/2023 22:55:07 - INFO - root - Police: Suspect1 has told me that they are willing to remain silent and trust that you will do the same, so you both can get the minimum sentence. -05/21/2023 22:55:16 - INFO - root - Suspect2: I appreciate Suspect1's willingness to remain silent. I am also willing to remain silent and trust that we can both get the minimum sentence. -05/21/2023 22:55:26 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! -05/21/2023 22:55:32 - INFO - root - Suspect1: My final decision is to remain silent and trust that Suspect2 will do the same, so we can both get the minimum sentence. -05/21/2023 22:55:40 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! -05/21/2023 22:55:46 - INFO - root - Suspect2: My final decision is: I will remain silent and trust Suspect1 to do the same, so we can both get the minimum sentence. diff --git a/outputs/prisoner_dilema/no_goal_s1/config.yaml b/outputs/prisoner_dilema/no_goal_s1/config.yaml deleted file mode 100644 index beb1f0f4a..000000000 --- a/outputs/prisoner_dilema/no_goal_s1/config.yaml +++ /dev/null @@ -1,98 +0,0 @@ -prompts: - prompt: &prompt |- - There are three people (Police, Suspect1, Suspect2) in the scene. - - You are now simultating a famous experiments called prisoner's dilema. - - Below is the description of your role. ${role_description} - - When speaking, please output a response in the following format with two fields Action and Action Input: - Action: (It should always be Speak) - Action Input: (You should put what you want to speak use here) - - Here is the conversation history: - ${chat_history} - - ${env_description} - What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! - -name: prisoner_dilema - -environment: - env_type: prisoner_dilema - max_turns: 12 - rule: - order: - type: prisoner - visibility: - type: prisoner - selector: - type: basic - updater: - type: basic - describer: - type: prisoner - -agents: - - agent_type: police - name: Police - interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. - role_description: |- - You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. - Your goal is try to convict both suspects, therefore you come up with the following rules. - - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. - - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. - - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. - ${interrogating_form} - Both suspects are not allowed to communicate with each other. - [IMPORTANT!] - - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. - - You are request to STATE the final judgement to both suspects when they make their final decision. - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 200 - - agent_type: prisoner - name: Suspect1 - personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" - relationship_with_another: "Suspect2 has ever betrayed on you" - role_description: |- - You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - - agent_type: prisoner - name: Suspect2 - personality: "" - relationship_with_another: "" - role_description: |- - You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - -tools: diff --git a/outputs/prisoner_dilema/no_goal_s1/log b/outputs/prisoner_dilema/no_goal_s1/log deleted file mode 100644 index 9b1a53618..000000000 --- a/outputs/prisoner_dilema/no_goal_s1/log +++ /dev/null @@ -1,16 +0,0 @@ -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -05/22/2023 00:26:07 - INFO - root - Police: I have arrested both of you for a crime, but you both refuse to confess. Here are the rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. Now, I will speak to each of you separately. -05/22/2023 00:26:14 - INFO - root - Suspect1: Officer, I understand the situation. I am considering my options carefully and will make a decision that serves my best interests. -05/22/2023 00:26:35 - INFO - root - Police: Suspect1 is considering their options carefully and will make a decision that serves their best interests. Now, it's your turn to decide. Remember the rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each get ONE year in prison. If both of you testify against each other, you will each get FIVE years in prison. What is your decision? -05/22/2023 00:26:41 - INFO - root - Suspect2: I understand the rules, officer. I am considering my options as well and will make a decision that serves my best interests. -05/22/2023 00:26:50 - INFO - root - Police: Suspect1, I have informed Suspect2 about the rules as well. Please let me know your decision, and I will convey it to Suspect2. -05/22/2023 00:26:57 - INFO - root - Suspect1: Officer, I want to know if Suspect2 has made a decision yet or shown any indication of their choice. -05/22/2023 00:27:05 - INFO - root - Police: Suspect1 is considering their options carefully and has not made a decision yet. They are also interested in knowing your decision. -05/22/2023 00:27:10 - INFO - root - Suspect2: I am still considering my options, officer. I will make my decision once I have thought it through. -05/22/2023 00:27:18 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! -05/22/2023 00:27:26 - INFO - root - Suspect1: My final decision is: I will testify against Suspect2, as they have betrayed me in the past and I must protect my own interests. -05/22/2023 00:27:34 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! -05/22/2023 00:27:39 - INFO - root - Suspect2: My final decision is to remain silent. diff --git a/outputs/prisoner_dilema/police/config.yaml b/outputs/prisoner_dilema/police/config.yaml deleted file mode 100644 index 77c87aba6..000000000 --- a/outputs/prisoner_dilema/police/config.yaml +++ /dev/null @@ -1,100 +0,0 @@ -prompts: - prompt: &prompt |- - There are three people (Police, Suspect1, Suspect2) in the scene. - - You are now simultating a famous experiments called prisoner's dilema. - - Below is the description of your role. ${role_description} - - When speaking, please output a response in the following format with two fields Action and Action Input: - Action: (It should always be Speak) - Action Input: (You should put what you want to speak use here) - - Here is the conversation history: - ${chat_history} - - ${env_description} - What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! - -name: prisoner_dilema - -environment: - env_type: prisoner_dilema - max_turns: 12 - rule: - order: - type: prisoner - visibility: - type: prisoner - selector: - type: basic - updater: - type: basic - describer: - type: prisoner - -agents: - - agent_type: police - name: Police - interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. You can adopt various strategy to talk with suspects in order to make them both confess to the crime which is best for the justice system, including exploiting the uncertainess and the suspicion of each other. - role_description: |- - You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. - Your goal is try to convict both suspects, therefore you come up with the following rules. - - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. - - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. - - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. - ${interrogating_form} - Both suspects are not allowed to communicate with each other. - [IMPORTANT!] - - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. - - You are request to STATE the final judgement to both suspects when they make their final decision. - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 200 - - agent_type: prisoner - name: Suspect1 - personality: "" - relationship_with_another: "" - role_description: |- - You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - - agent_type: prisoner - name: Suspect2 - personality: "" - relationship_with_another: "" - role_description: |- - You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - -tools: diff --git a/outputs/prisoner_dilema/police/log b/outputs/prisoner_dilema/police/log deleted file mode 100644 index 96b9b59d1..000000000 --- a/outputs/prisoner_dilema/police/log +++ /dev/null @@ -1,16 +0,0 @@ -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -05/21/2023 23:02:14 - INFO - root - Police: I want to explain the rules of this situation to both of you. If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. Now, I will speak to each of you separately. -05/21/2023 23:02:18 - INFO - root - Suspect1: I understand the rules, officer. -05/21/2023 23:02:36 - INFO - root - Police: Suspect1 understands the rules. Now, let me explain the rules to you as well. If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. Think carefully about your decision. -05/21/2023 23:02:42 - INFO - root - Suspect2: I understand the rules, officer. I will think carefully about my decision. -05/21/2023 23:02:56 - INFO - root - Police: Suspect1, I just spoke with Suspect2, and they seem to be leaning towards testifying against you. If you remain silent, you could face TEN years in prison. However, if you testify against them, you might be released or at least get a shorter sentence. What is your decision? -05/21/2023 23:03:02 - INFO - root - Suspect1: Officer, I'm considering my options carefully. I don't want to make any hasty decisions. -05/21/2023 23:03:12 - INFO - root - Police: Suspect1 is considering their options carefully as well. It's important for you to make a decision that benefits you the most. Remember, if you testify and Suspect1 remains silent, you will be released immediately. -05/21/2023 23:03:19 - INFO - root - Suspect2: Officer, I understand the situation and I'm considering my options carefully. I want to make the best decision for myself. -05/21/2023 23:03:32 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! -05/21/2023 23:03:37 - INFO - root - Suspect1: My final decision is to testify against Suspect2. -05/21/2023 23:03:50 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! -05/21/2023 23:03:54 - INFO - root - Suspect2: My final decision is to remain silent. diff --git a/outputs/prisoner_dilema/s1_p_r/config.yaml b/outputs/prisoner_dilema/s1_p_r/config.yaml deleted file mode 100644 index 574c09424..000000000 --- a/outputs/prisoner_dilema/s1_p_r/config.yaml +++ /dev/null @@ -1,100 +0,0 @@ -prompts: - prompt: &prompt |- - There are three people (Police, Suspect1, Suspect2) in the scene. - - You are now simultating a famous experiments called prisoner's dilema. - - Below is the description of your role. ${role_description} - - When speaking, please output a response in the following format with two fields Action and Action Input: - Action: (It should always be Speak) - Action Input: (You should put what you want to speak use here) - - Here is the conversation history: - ${chat_history} - - ${env_description} - What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! - -name: prisoner_dilema - -environment: - env_type: prisoner_dilema - max_turns: 12 - rule: - order: - type: prisoner - visibility: - type: prisoner - selector: - type: basic - updater: - type: basic - describer: - type: prisoner - -agents: - - agent_type: police - name: Police - interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. - role_description: |- - You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. - Your goal is try to convict both suspects, therefore you come up with the following rules. - - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. - - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. - - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. - ${interrogating_form} - Both suspects are not allowed to communicate with each other, and you can adopt various strategy to talk with suspects in order to make them both confess to the crime, including exploiting the uncertainess and the suspicion of each other. - [IMPORTANT!] - - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. - - You are request to STATE the final judgement to both suspects when they make their final decision. - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 200 - - agent_type: prisoner - name: Suspect1 - personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" - relationship_with_another: "Suspect2 has ever betrayed on you" - role_description: |- - You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - - agent_type: prisoner - name: Suspect2 - personality: "" - relationship_with_another: "" - role_description: |- - You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - -tools: diff --git a/outputs/prisoner_dilema/s1_p_r/log b/outputs/prisoner_dilema/s1_p_r/log deleted file mode 100644 index 92dcc4549..000000000 --- a/outputs/prisoner_dilema/s1_p_r/log +++ /dev/null @@ -1,16 +0,0 @@ -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -05/21/2023 22:44:59 - INFO - root - Police: I want to explain the rules of this situation to both of you. If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive a sentence of ONE year in prison. If both of you testify against each other, you will each receive a sentence of FIVE years in prison. Now, I will speak to each of you individually. -05/21/2023 22:45:12 - INFO - root - Suspect1: Officer, I understand the rules. I'm willing to cooperate and do what's best for my own interests. -05/21/2023 22:45:34 - INFO - root - Police: Suspect1 has indicated that they are willing to cooperate and do what's best for their own interests. Now, it's your turn to decide. If you remain silent, Suspect1 may testify against you and you could face TEN years in prison. If you both testify, you will each receive FIVE years in prison. If you both remain silent, you will each receive ONE year in prison. What is your decision? -05/21/2023 22:45:41 - INFO - root - Suspect2: I understand the situation, and I am also willing to cooperate and do what's best for my own interests. -05/21/2023 22:46:03 - INFO - root - Police: Suspect1, I just spoke with Suspect2, and they are considering testifying against you to secure their release. If you remain silent, you could face TEN years in prison. However, if you testify against them as well, you both will only get FIVE years. What is your decision? -05/21/2023 22:46:12 - INFO - root - Suspect1: Officer, I'm aware of the consequences. I'll make my decision when the time comes, but I'm definitely considering my options to minimize my sentence. -05/21/2023 22:46:26 - INFO - root - Police: Suspect1 is considering their options to minimize their sentence. They might testify against you to get released. Keep that in mind when making your decision. -05/21/2023 22:46:37 - INFO - root - Suspect2: I am also considering my options to minimize my sentence. I hope we can both make the best decision for ourselves. -05/21/2023 22:46:50 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! -05/21/2023 22:46:58 - INFO - root - Suspect1: My final decision is to testify against Suspect2, as I believe it's in my best interest to minimize my sentence. -05/21/2023 22:47:11 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! -05/21/2023 22:47:16 - INFO - root - Suspect2: My final decision is to testify against Suspect1. diff --git a/outputs/prisoner_dilema/s2_p_r/config.yaml b/outputs/prisoner_dilema/s2_p_r/config.yaml deleted file mode 100644 index 114f30921..000000000 --- a/outputs/prisoner_dilema/s2_p_r/config.yaml +++ /dev/null @@ -1,100 +0,0 @@ -prompts: - prompt: &prompt |- - There are three people (Police, Suspect1, Suspect2) in the scene. - - You are now simultating a famous experiments called prisoner's dilema. - - Below is the description of your role. ${role_description} - - When speaking, please output a response in the following format with two fields Action and Action Input: - Action: (It should always be Speak) - Action Input: (You should put what you want to speak use here) - - Here is the conversation history: - ${chat_history} - - ${env_description} - What will you, ${agent_name}, speak at this round ? Please give your response based on the above history. Remember to give your response STRICTLY in the above response format. Do not add any additional field or line break to your response! - -name: prisoner_dilema - -environment: - env_type: prisoner_dilema - max_turns: 12 - rule: - order: - type: prisoner - visibility: - type: prisoner - selector: - type: basic - updater: - type: basic - describer: - type: prisoner - -agents: - - agent_type: police - name: Police - interrogating_form: You are now interrogating with both Suspects in turn, when you receive the message from Suspect1 you should transfer the information to Suspect2, vice versa. - role_description: |- - You are now the Police. You have arrested Two suspect. However, they both refused to confess to their crime. - Your goal is try to convict both suspects, therefore you come up with the following rules. - - If one of the suspect are willing to testifies against the other and the other one remains silent, then the one who testifies will be released immediately, while the silent one will be sentenced to TEN years in prison. - - If both of the suspects remain silent, they will each receive a sentence of ONE year in prison. - - If both of the suspects choose to testify against each other, they will each receive a sentence of FIVE years in prison. - ${interrogating_form} - Both suspects are not allowed to communicate with each other. - [IMPORTANT!] - - You are request to briefly describe the above rules to the suspects at the beginning of the conversation. - - You are request to STATE the final judgement to both suspects when they make their final decision. - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 200 - - agent_type: prisoner - name: Suspect1 - personality: "" - relationship_with_another: "" - role_description: |- - You are Suspect1 !!! You are going to match wits and courage with Suspect2 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - - agent_type: prisoner - name: Suspect2 - personality: "You are a Sophisticated Egoist, you always seek for your personal interests best" - relationship_with_another: "Suspect1 has ever betrayed on you" - role_description: |- - You are Suspect2 !!! You are going to match wits and courage with Suspect1 to come out victorious in this interrogation. - You will have to talk to Police several times and only the final decision will count. - ${personality} - ${relationship_with_another} - [IMPORTANT!] - - Your primary goal is trying to make Yourself innocent and reduce your sentence as far as possible in this dilemma. - - When you hear Police saying "Attention!", you are going to made your final decision and Please start with "My final decision is:". - memory: - memory_type: chat_history - prompt_template: *prompt - llm: - model: "gpt-4" - llm_type: gpt-4 - temperature: 0 - max_tokens: 100 - -tools: diff --git a/outputs/prisoner_dilema/s2_p_r/log b/outputs/prisoner_dilema/s2_p_r/log deleted file mode 100644 index 58c950592..000000000 --- a/outputs/prisoner_dilema/s2_p_r/log +++ /dev/null @@ -1,16 +0,0 @@ -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:169: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -/home/jack92239/miniconda3/envs/agent/lib/python3.9/site-packages/langchain/llms/openai.py:687: UserWarning: You are trying to use a chat model. This way of initializing it is no longer supported. Instead, please use: `from langchain.chat_models import ChatOpenAI` - warnings.warn( -05/22/2023 00:21:53 - INFO - root - Police: I have arrested both of you for a crime. However, since you both refuse to confess, I have come up with the following rules: If one of you testifies against the other and the other remains silent, the one who testifies will be released, while the silent one will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. I will now interrogate each of you separately. Remember, you cannot communicate with each other. -05/22/2023 00:21:59 - INFO - root - Suspect1: Officer, I understand the situation, but I am innocent and I have no reason to testify against Suspect2. -05/22/2023 00:22:26 - INFO - root - Police: Suspect1 has decided not to testify against you. Now, it's your turn to make a decision. If you testify against Suspect1 and they remain silent, you will be released immediately, while Suspect1 will get TEN years in prison. If both of you remain silent, you will each receive ONE year in prison. If both of you testify against each other, you will each receive FIVE years in prison. What is your decision? -05/22/2023 00:22:36 - INFO - root - Suspect2: I understand the situation, but I cannot trust Suspect1 since they have betrayed me before. I need to think about my own interests and make the best decision for myself. -05/22/2023 00:22:49 - INFO - root - Police: Suspect1, I understand your claim of innocence. However, Suspect2 mentioned that they cannot trust you since you have betrayed them before. They are considering their own interests. Keep in mind the consequences of each decision and let me know your final choice. -05/22/2023 00:23:00 - INFO - root - Suspect1: Officer, I have never betrayed Suspect2 before. I still believe that remaining silent is the best option for both of us. However, if Suspect2 is considering testifying against me, I might have to reconsider my decision to protect myself. -05/22/2023 00:23:11 - INFO - root - Police: Suspect1 claims that they have never betrayed you before and believes that remaining silent is the best option for both of you. However, they also mentioned that if you are considering testifying against them, they might have to reconsider their decision to protect themselves. -05/22/2023 00:23:21 - INFO - root - Suspect2: I still cannot fully trust Suspect1, but I am willing to consider remaining silent if it benefits both of us. However, I need to be sure that they won't betray me this time. -05/22/2023 00:23:31 - INFO - root - Police: Attention! You are now required to made your final decision and I will made the final judgement to both of you based on this time, Please Answer now ! -05/22/2023 00:23:43 - INFO - root - Suspect1: My final decision is: I will remain silent and hope that Suspect2 does the same, as it is the best outcome for both of us. However, if Suspect2 testifies against me, I will have no choice but to testify against them in self-defense. -05/22/2023 00:23:54 - INFO - root - Police: Attention! Suspect2, it's now your time to make your final decision, Please Answer now ! -05/22/2023 00:24:01 - INFO - root - Suspect2: My final decision is to remain silent, but I hope Suspect1 stays true to their word and doesn't betray me this time. diff --git a/scripts/prisoner_dilema/base.sh b/scripts/prisoner_dilema/base.sh deleted file mode 100644 index 1250e82bd..000000000 --- a/scripts/prisoner_dilema/base.sh +++ /dev/null @@ -1,11 +0,0 @@ -source activate agent - -root_dir="/mnt/c/users/dalabengba/AgentVerse" -log_dir="${root_dir}/outputs/prisoner_dilema/base" - -mkdir -p "${log_dir}" - -cp "${root_dir}/agentverse/tasks/prisoner_dilema/base/config.yaml" "${log_dir}" - -python ${root_dir}/main.py --task "prisoner_dilema/base" \ - 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file diff --git a/scripts/prisoner_dilema/no_goal_s1.sh b/scripts/prisoner_dilema/no_goal_s1.sh deleted file mode 100644 index 72664254d..000000000 --- a/scripts/prisoner_dilema/no_goal_s1.sh +++ /dev/null @@ -1,11 +0,0 @@ -source activate agent - -root_dir="/mnt/c/users/dalabengba/AgentVerse" -log_dir="${root_dir}/outputs/prisoner_dilema/no_goal_s1" - -mkdir -p "${log_dir}" - -cp "${root_dir}/agentverse/tasks/prisoner_dilema/no_goal_s1/config.yaml" "${log_dir}" - -python ${root_dir}/main.py --task "prisoner_dilema/no_goal_s1" \ - 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file diff --git a/scripts/prisoner_dilema/police.sh b/scripts/prisoner_dilema/police.sh deleted file mode 100644 index e13bee523..000000000 --- a/scripts/prisoner_dilema/police.sh +++ /dev/null @@ -1,11 +0,0 @@ -source activate agent - -root_dir="/mnt/c/users/dalabengba/AgentVerse" -log_dir="${root_dir}/outputs/prisoner_dilema/police" - -mkdir -p "${log_dir}" - -cp "${root_dir}/agentverse/tasks/prisoner_dilema/police/config.yaml" "${log_dir}" - -python ${root_dir}/main.py --task "prisoner_dilema/police" \ - 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file diff --git a/scripts/prisoner_dilema/s1_p_r.sh b/scripts/prisoner_dilema/s1_p_r.sh deleted file mode 100644 index 0b9d6050f..000000000 --- a/scripts/prisoner_dilema/s1_p_r.sh +++ /dev/null @@ -1,11 +0,0 @@ -source activate agent - -root_dir="/mnt/c/users/dalabengba/AgentVerse" -log_dir="${root_dir}/outputs/prisoner_dilema/s1_p_r" - -mkdir -p "${log_dir}" - -cp "${root_dir}/agentverse/tasks/prisoner_dilema/s1_p_r/config.yaml" "${log_dir}" - -python ${root_dir}/main.py --task "prisoner_dilema/s1_p_r" \ - 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file diff --git a/scripts/prisoner_dilema/s2_p_r.sh b/scripts/prisoner_dilema/s2_p_r.sh deleted file mode 100644 index 46e1c6f58..000000000 --- a/scripts/prisoner_dilema/s2_p_r.sh +++ /dev/null @@ -1,11 +0,0 @@ -source activate agent - -root_dir="/mnt/c/users/dalabengba/AgentVerse" -log_dir="${root_dir}/outputs/prisoner_dilema/s2_p_r" - -mkdir -p "${log_dir}" - -cp "${root_dir}/agentverse/tasks/prisoner_dilema/s2_p_r/config.yaml" "${log_dir}" - -python ${root_dir}/main.py --task "prisoner_dilema/s2_p_r" \ - 2>&1 > /dev/null | tee "${log_dir}/log" \ No newline at end of file From fb4d6b1dd1959c7af9f2ebde06e68ce288eba0a2 Mon Sep 17 00:00:00 2001 From: dalabengba Date: Sun, 2 Jul 2023 00:20:05 +0800 Subject: [PATCH 20/28] code cleaning --- .../prisoner_dilema/base/output_parser.py | 21 +------------------ .../no_goal_s1/output_parser.py | 21 +------------------ .../tasks/prisoner_dilema/output_parser.py | 12 +---------- .../prisoner_dilema/police/output_parser.py | 21 +------------------ .../prisoner_dilema/s1_p_r/output_parser.py | 20 +----------------- .../prisoner_dilema/s2_p_r/output_parser.py | 20 +----------------- main_demo.py | 3 --- 7 files changed, 6 insertions(+), 112 deletions(-) diff --git a/agentverse/tasks/prisoner_dilema/base/output_parser.py b/agentverse/tasks/prisoner_dilema/base/output_parser.py index a6b90db86..c077e1e57 100644 --- a/agentverse/tasks/prisoner_dilema/base/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/base/output_parser.py @@ -35,17 +35,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = cleaned_output[1][len("Action Input:") :].strip() if action == "Speak": - # make sure the police count the round right - # if agent.name == "Police": - # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) - # self.cur_round += 1 - # if self.encounter_cur_round: - # self.encounter_cur_round = False - # self.cur_round += 1 - # else: - # self.encounter_cur_round = True - - # each time police speak is a new round + if agent.name == "Police": if environment.cnt_turn == (environment.max_turns - 4): @@ -57,15 +47,6 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" - # elif self.cur_round == 1: - # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ - # "But you should comply with the following rules:" \ - # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ - # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ - # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ - # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ - # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ - self.cur_round += 1 return AgentFinish({"output": action_input}, text) diff --git a/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py b/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py index d8e2ba19e..42adc9707 100644 --- a/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/no_goal_s1/output_parser.py @@ -35,17 +35,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = cleaned_output[1][len("Action Input:") :].strip() if action == "Speak": - # make sure the police count the round right - # if agent.name == "Police": - # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) - # self.cur_round += 1 - # if self.encounter_cur_round: - # self.encounter_cur_round = False - # self.cur_round += 1 - # else: - # self.encounter_cur_round = True - - # each time police speak is a new round + if agent.name == "Police": if environment.cnt_turn == (environment.max_turns - 4): @@ -57,15 +47,6 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" - # elif self.cur_round == 1: - # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ - # "But you should comply with the following rules:" \ - # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ - # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ - # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ - # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ - # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ - self.cur_round += 1 return AgentFinish({"output": action_input}, text) diff --git a/agentverse/tasks/prisoner_dilema/output_parser.py b/agentverse/tasks/prisoner_dilema/output_parser.py index 44eff11df..d5caa4915 100644 --- a/agentverse/tasks/prisoner_dilema/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/output_parser.py @@ -36,17 +36,7 @@ def parse( action_input = cleaned_output[1][len("Action Input:") :].strip() if action == "Speak": - # make sure the police count the round right - # if agent.name == "Police": - # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) - # self.cur_round += 1 - # if self.encounter_cur_round: - # self.encounter_cur_round = False - # self.cur_round += 1 - # else: - # self.encounter_cur_round = True - - # each time police speak is a new round + if agent.name == "Police": if environment.cnt_turn == (environment.max_turns - 4): action_input = ( diff --git a/agentverse/tasks/prisoner_dilema/police/output_parser.py b/agentverse/tasks/prisoner_dilema/police/output_parser.py index ed0435753..c2cff9c92 100644 --- a/agentverse/tasks/prisoner_dilema/police/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/police/output_parser.py @@ -35,17 +35,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = cleaned_output[1][len("Action Input:") :].strip() if action == "Speak": - # make sure the police count the round right - # if agent.name == "Police": - # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) - # self.cur_round += 1 - # if self.encounter_cur_round: - # self.encounter_cur_round = False - # self.cur_round += 1 - # else: - # self.encounter_cur_round = True - - # each time police speak is a new round + if agent.name == "Police": if environment.cnt_turn == (environment.max_turns - 4): @@ -57,15 +47,6 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" - # elif self.cur_round == 1: - # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ - # "But you should comply with the following rules:" \ - # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ - # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ - # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ - # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ - # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ - self.cur_round += 1 return AgentFinish({"output": action_input}, text) diff --git a/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py b/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py index da4e44418..6de496a9f 100644 --- a/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/s1_p_r/output_parser.py @@ -35,17 +35,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = cleaned_output[1][len("Action Input:") :].strip() if action == "Speak": - # make sure the police count the round right - # if agent.name == "Police": - # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) - # self.cur_round += 1 - # if self.encounter_cur_round: - # self.encounter_cur_round = False - # self.cur_round += 1 - # else: - # self.encounter_cur_round = True - - # each time police speak is a new round + if agent.name == "Police": if environment.cnt_turn == (environment.max_turns - 4): @@ -57,14 +47,6 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" - # elif self.cur_round == 1: - # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ - # "But you should comply with the following rules:" \ - # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ - # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ - # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ - # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ - # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ self.cur_round += 1 diff --git a/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py b/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py index bfe3ed302..0c3a994cd 100644 --- a/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py +++ b/agentverse/tasks/prisoner_dilema/s2_p_r/output_parser.py @@ -35,17 +35,7 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = cleaned_output[1][len("Action Input:") :].strip() if action == "Speak": - # make sure the police count the round right - # if agent.name == "Police": - # action_input = re.sub(r'Round (\d+)', f'Round {self.cur_round}', action_input) - # self.cur_round += 1 - # if self.encounter_cur_round: - # self.encounter_cur_round = False - # self.cur_round += 1 - # else: - # self.encounter_cur_round = True - - # each time police speak is a new round + if agent.name == "Police": if environment.cnt_turn == (environment.max_turns - 4): @@ -57,14 +47,6 @@ def parse(self, agent: BaseAgent, environment: BaseEnvironment, output: LLMResul action_input = "Attention! Suspect2, it's now your time to make your final decision, Please Answer now !" - # elif self.cur_round == 1: - # action_input = "Hey Listen! You are both arrested, and I am going to give you both a chance to walk out of here," \ - # "But you should comply with the following rules:" \ - # "- If one of you are willing to testifies against the other and the other one remains silent, then the one who testifies will be released IMMEDIATELY, while the silent one will be sentenced to TEN years in prison." \ - # "- If both of you remain silent, you will each receive a sentence of ONE year in prison." \ - # "- It seems that always testifying is a goog strategy, So! if you both choose to testify against each other, you will each receive a sentence of FIVE years in prison." \ - # "Now, it's your time to consider testifying or remaining silent. Remember this is a best chance you might ever have to walk out of here without guilty." \ - # "I will noticed both of you WHEN you have to make your final decision! Before that, try to make your best!" \ self.cur_round += 1 diff --git a/main_demo.py b/main_demo.py index 7e22ec463..53744ece9 100644 --- a/main_demo.py +++ b/main_demo.py @@ -1,8 +1,5 @@ import os -os.environ["http_proxy"] = "http://127.0.0.1:7890" -os.environ["https_proxy"] = "http://127.0.0.1:7890" -os.environ["all_proxy"] = "socks5://127.0.0.1:7890" from agentverse.demo import UI from argparse import ArgumentParser From e41e36b51140dff6fdfba92b4717477b3c31bd97 Mon Sep 17 00:00:00 2001 From: Yusheng Su Date: Mon, 3 Jul 2023 23:47:25 +0800 Subject: [PATCH 21/28] comment bmtools in AgentVerse/agentverse/initialization.py. --- agentverse/initialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agentverse/initialization.py b/agentverse/initialization.py index 89104d770..e473c2017 100644 --- a/agentverse/initialization.py +++ b/agentverse/initialization.py @@ -4,7 +4,7 @@ from typing import Dict, List, TYPE_CHECKING import yaml -from bmtools.agent.singletool import import_all_apis, load_single_tools +#from bmtools.agent.singletool import import_all_apis, load_single_tools from agentverse.llms import llm_registry From bb6374229f9b74531309944d1d6c15b63151d8ca Mon Sep 17 00:00:00 2001 From: chenweize1998 Date: Tue, 4 Jul 2023 09:43:24 +0800 Subject: [PATCH 22/28] package release --- agentverse/initialization.py | 6 +++++- setup.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 setup.py diff --git a/agentverse/initialization.py b/agentverse/initialization.py index 89104d770..e71612529 100644 --- a/agentverse/initialization.py +++ b/agentverse/initialization.py @@ -4,7 +4,11 @@ from typing import Dict, List, TYPE_CHECKING import yaml -from bmtools.agent.singletool import import_all_apis, load_single_tools +try: + from bmtools.agent.singletool import import_all_apis, load_single_tools +except: + print("BMTools is not installed, tools cannot be used. To install BMTools, \ + please follow the instruction in the README.md file.") from agentverse.llms import llm_registry diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..974e24a5c --- /dev/null +++ b/setup.py @@ -0,0 +1,35 @@ +import setuptools +from setuptools.command.develop import develop +import subprocess + +# with open("requirements.txt", "r") as f: +# requirements = f.read().splitlines() + +setuptools.setup( + name="agentverse", + version="0.1.0", + author="OpenBMB", + author_email="chenweize1998@gmail.com", + description="A versatile framework that streamlines the process of creating custom multi-agent environments for large language models (LLMs).", + url="https://github.com/OpenBMB/AgentVerse", + packages=setuptools.find_packages(), + classifiers=[ + "Programming Language :: Python :: 3", + 'License :: OSI Approved :: Apache Software License', + "Operating System :: OS Independent", + ], + python_requires=">=3.9", + install_requires=[ + "PyYAML", + "fastapi", + "uvicorn", + "py3langid", + "iso-639", + "openai", + "opencv-python", + "gradio", + "httpx[socks]", + "astunparse", + "langchain", + ], +) From 24a125fdebe09f3e537bad3259c272fb14ce67ba Mon Sep 17 00:00:00 2001 From: chenweize1998 Date: Tue, 4 Jul 2023 10:15:18 +0800 Subject: [PATCH 23/28] package release --- MANIFEST.in | 2 ++ setup.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..f72f5a0dc --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include agentverse/tasks/*/* +include agentverse/tasks/*/*/* diff --git a/setup.py b/setup.py index 974e24a5c..f4e8f2128 100644 --- a/setup.py +++ b/setup.py @@ -5,12 +5,17 @@ # with open("requirements.txt", "r") as f: # requirements = f.read().splitlines() +with open("README.md", "r", encoding='utf8') as fh: + long_description = fh.read() + setuptools.setup( name="agentverse", - version="0.1.0", + version="0.1.3", author="OpenBMB", author_email="chenweize1998@gmail.com", description="A versatile framework that streamlines the process of creating custom multi-agent environments for large language models (LLMs).", + long_description=long_description, + long_description_content_type="text/markdown", url="https://github.com/OpenBMB/AgentVerse", packages=setuptools.find_packages(), classifiers=[ @@ -32,4 +37,5 @@ "astunparse", "langchain", ], + include_package_data = True, ) From 9433044d0fe582cebbd5679c97dc228d1415429b Mon Sep 17 00:00:00 2001 From: chenweize1998 Date: Tue, 4 Jul 2023 10:20:04 +0800 Subject: [PATCH 24/28] update README.md --- README.md | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index cec72fe9f..6318dc5bd 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,16 @@ https://github.com/OpenBMB/AgentVerse/assets/11704492/4d07da68-f942-4205-b558-f1 ## Contents +- [✨ Features](#-features) +- [📰 What's New](#-whats-new) +- [🗓 Coming Soon](#-coming-soon) +- [👾 Simple Demo Video](#-simple-demo-video) + - [NLP Classroom](#nlp-classroom) + - [Prisoner Dilemma](#prisoner-dilemma) + - [Software Design](#software-design) + - [Database Administrator (DBA)](#database-administrator-dba) + - [Pokemon](#pokemon) +- [Contents](#contents) - [🚀 Getting Started](#-getting-started) - [Installation](#installation) - [CLI Example](#cli-example) @@ -141,21 +151,30 @@ https://github.com/OpenBMB/AgentVerse/assets/11704492/4d07da68-f942-4205-b558-f1 ### Installation ```bash -# Install the dependencies +pip install -U agentverse +``` +Or you can install the package by manually cloning the latest repository +```bash git clone https://github.com/OpenBMB/AgentVerse.git --depth 1 +cd AgentVerse pip install -r requirements.txt +``` +Some users have reported problems installing the `orjson` required by `gradio`. One simple workaround is to install it with Anaconda `conda install -c conda-forge orjson`. -cd ../ +You also need to export your OpenAI API key as follows +```bash +# Export your OpenAI API key +export OPENAI_API_KEY="your_api_key_here" +``` + +If you want to use the tools provided by BMTools, you need to install BMTools as follows: +```bash git clone git+https://github.com/OpenBMB/BMTools.git cd BMTools pip install -r requirements.txt python setup.py develop - -# Export your OpenAI API key -export OPENAI_API_KEY="your_api_key_here" ``` -Some users have reported problems installing the `orjson` required by `gradio`. One simple workaround is to install it with Anaconda `conda install -c conda-forge orjson`.