From d377c190c56f16af4f2e782514f241942a3844b8 Mon Sep 17 00:00:00 2001
From: cheeswafer <xiayu23@mails.tsinghua.edu.cn>
Date: Wed, 18 Oct 2023 12:07:05 +0800
Subject: [PATCH 1/3] support local LLMs

---
 README.md                                     |   2 +-
 agentverse/llms/openai.py                     |   8 +-
 .../commongen/llama-2-7b-chat-hf/config.yaml  | 197 ++++++++++++++++++
 dataloader/commongen.py                       |   1 +
 requirements.txt                              |   3 +-
 server/run.sh                                 |   9 +
 6 files changed, 217 insertions(+), 3 deletions(-)
 create mode 100644 agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
 create mode 100644 server/run.sh

diff --git a/README.md b/README.md
index 4e448d6cb..892f3cf27 100644
--- a/README.md
+++ b/README.md
@@ -377,7 +377,7 @@ While we provide a basic framework for building environments with our five rule
 1. **Customize the five rule components**. Each rule component has an interface, allowing you to customize its behavior to suit your specific needs. It's important to note that these components are not necessarily independent and can interact through the `rule_params` dictionary in the environment. You can create your own rule components and integrate them with the existing ones to build more complex interactions between agents.
 2. **Customize the environment itself**. Our `basic` environment provides a default execution order for the five rule components that is suitable for most cases, but you can inherit the `BaseEnvironment` class and write your own `run` method to implement a more sophisticated execution order.
 3. **Customize the agent**. Depending on your specific use case, you may also need to inherit the `BaseAgent` class. For example, you may want to use your local LLM as your agents or create agents with specialized knowledge or skills.
-
+4. **Using local LLMs as agents**.  First, start the server by `bash server/run.sh` with your own `MODEL_PATH` and `MODEL_NAMES`. Then, write the configurations with `llm_type` set as `MODEL_NAMES`. (e.g. `agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml`). Then, register `dataloader` (`dataloader/commongen.py`) and `model` (`agentverse/llms/openai.py`). 
 
 
 ## 🔎 Examples
diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py
index 3b7409cf2..08547c300 100644
--- a/agentverse/llms/openai.py
+++ b/agentverse/llms/openai.py
@@ -92,10 +92,12 @@ class OpenAIChatArgs(BaseModelArgs):
 #             total_tokens=response["usage"]["total_tokens"],
 #         )
 
-
+# To support your own local LLMs, register it here and add it into LOCAL_LLMS.
+LOCAL_LLMS = ['llama-2-7b-chat-hf']
 @llm_registry.register("gpt-35-turbo")
 @llm_registry.register("gpt-3.5-turbo")
 @llm_registry.register("gpt-4")
+@llm_registry.register("llama-2-7b-chat-hf")
 class OpenAIChat(BaseChatModel):
     args: OpenAIChatArgs = Field(default_factory=OpenAIChatArgs)
 
@@ -109,6 +111,8 @@ def __init__(self, max_retry: int = 3, **kwargs):
             args[k] = kwargs.pop(k, v)
         if len(kwargs) > 0:
             logging.warning(f"Unused arguments: {kwargs}")
+        if args['model'] in LOCAL_LLMS:
+            openai.api_base = "http://localhost:5000/v1"
         super().__init__(args=args, max_retry=max_retry)
 
     # def _construct_messages(self, history: List[Message]):
@@ -301,6 +305,7 @@ def get_spend(self) -> int:
             "gpt-4": 0.03,
             "gpt-4-0613": 0.03,
             "gpt-4-32k": 0.06,
+            "llama-2-7b-chat-hf": 0.0,
         }
 
         output_cost_map = {
@@ -311,6 +316,7 @@ def get_spend(self) -> int:
             "gpt-4": 0.06,
             "gpt-4-0613": 0.06,
             "gpt-4-32k": 0.12,
+            "llama-2-7b-chat-hf": 0.0,
         }
 
         model = self.args.model
diff --git a/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml b/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
new file mode 100644
index 000000000..8514b1004
--- /dev/null
+++ b/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
@@ -0,0 +1,197 @@
+cnt_agents: &cnt_agents 2
+max_turn: &max_turn 3
+max_inner_turns: &max_inner_turns 3
+
+prompts:
+  role_assigner_prepend_prompt: &role_assigner_prepend_prompt |-
+
+  role_assigner_append_prompt: &role_assigner_append_prompt |-
+    # Role Description
+    You are the leader of a group of experts, now you need to recruit a small group of experts with diverse identity to generate coherent and grammatically correct sentences containing the following given words:
+    ${task_description}
+    
+    You can recruit ${cnt_critic_agents} expert in different fields. What experts will you recruit?
+    
+    # Response Format Guidance
+    You should respond with a list of expert description. For example:
+    1. an electrical engineer specified in the filed of xxx.
+    2. an economist who is good at xxx.
+    3. a lawyer with a good knowledge of xxx.
+    ...
+    
+    Only respond with the description of each role. Do not include your reason.
+
+  solver_prepend_prompt: &solver_prepend_prompt |-
+    You are ${role_description}. Generate a coherent and grammatically correct paragraph containing the following given words (or their variations):
+    WORDS: 
+    ${task_description}
+
+  solver_append_prompt: &solver_append_prompt |-
+
+  critic_prepend_prompt: &critic_prepend_prompt |-
+    You are in a discussion group, aiming to generate coherent and grammatically correct sentences containing the following given words (or their variations):
+    WORDS:
+    ${task_description}
+
+    Below is the chat history in your group.
+    
+  critic_append_prompt: &critic_append_prompt |-
+    You are ${role_description}. Based on your knowledge, can you check whether the latest provided paragraph contains all the given words or their variations? When responding, you should follow the following rules:
+    1. If the above latest provided solution has covered all the given words or their variations, end your response with a special token "[Agree]".
+    1. If not, double-check the above solutions, give your critics, and generate a better solution.
+
+  manager_prompt: &manager_prompt |-
+
+  executor_prepend_prompt: &executor_prepend_prompt |-
+
+  executor_append_prompt: &executor_append_prompt |-
+
+  evaluator_prepend_prompt: &evaluator_prepend_prompt |-
+
+  evaluator_append_prompt: &evaluator_append_prompt |-
+    You are a reviewer who checks whether a paragraph contains all the given words (including their variations). When some words are missing, you should patiently point out, and output a score of 0. When the paragraph contains all the words, you should output a score of 1.
+  
+    WORDS: 
+    ${task_description}
+
+    SOLUTION: 
+    ```
+    ${solution}
+    ```
+
+    TEST RESULT:
+    ${result}
+    
+    RESPONSE FORMAT:
+    You must respond in the following format:
+    Score: (0 or 1. 0 if there are some missing words, 1 if there is no missing words)
+    Advice: (point out all the missing words)
+    
+
+name: pipeline
+
+
+environment:
+  env_type: task-basic
+  max_turn: *max_turn
+  rule:
+    role_assigner:
+      type: role_description
+      cnt_agents: *cnt_agents
+    decision_maker:
+      type: vertical-solver-first
+      max_inner_turns: *max_inner_turns
+    executor:
+      type: coverage-test
+    evaluator:
+      type: basic
+
+agents:
+  - #role_assigner_agent:
+    agent_type: role_assigner
+    name: role assigner
+    max_retry: 1000
+    prepend_prompt_template: *role_assigner_prepend_prompt
+    append_prompt_template: *role_assigner_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 512
+    output_parser:
+      type: role_assigner
+
+  - #solver_agent:
+    agent_type: solver
+    name: Planner
+    max_retry: 1000
+    max_history: 4
+    prepend_prompt_template: *solver_prepend_prompt
+    append_prompt_template: *solver_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: commongen
+      # max_tokens: 1024
+      # stop:
+      #   - "\ndef "
+      #   - "\nclass "
+      #   - "\nif "
+      #   - "\n\n#"
+
+  - #critic_agents:
+    agent_type: critic
+    name: Critic 1
+    max_retry: 1000
+    max_history: 4
+    role_description: |-
+      Waiting to be assigned.
+    prepend_prompt_template: *critic_prepend_prompt
+    append_prompt_template: *critic_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: mgsm-critic-agree
+
+  - #executor_agent:
+    agent_type: executor
+    name: Executor
+    max_retry: 1000
+    prepend_prompt_template: *executor_prepend_prompt
+    append_prompt_template: *executor_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: llama-2-7b-chat-hf
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: commongen
+
+  - #evaluator_agent:
+    agent_type: evaluator
+    name: Evaluator
+    max_retry: 1000
+    role_description: |-
+      Evaluator
+    prepend_prompt_template: *evaluator_prepend_prompt
+    append_prompt_template: *evaluator_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: llama-2-7b-chat-hf
+      temperature: 0.3
+      max_tokens: 1024
+    output_parser:
+      type: humaneval-evaluator
+      dimensions:
+        - Score
+
+  - #manager_agent:
+    agent_type: manager
+    name: Manager
+    max_retry: 1000
+    prompt_template: *manager_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: humaneval-manager
\ No newline at end of file
diff --git a/dataloader/commongen.py b/dataloader/commongen.py
index e7a5e75f9..6cb41385e 100644
--- a/dataloader/commongen.py
+++ b/dataloader/commongen.py
@@ -5,6 +5,7 @@
 
 @dataloader_registry.register("tasksolving/commongen/gpt-4")
 @dataloader_registry.register("tasksolving/commongen/gpt-3.5")
+@dataloader_registry.register("tasksolving/commongen/llama-2-7b-chat-hf")
 class CommongenLoader(DataLoader):
     def __init__(self, path: str):
         super().__init__(path)
diff --git a/requirements.txt b/requirements.txt
index dc4985600..c6b97ceec 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,4 +16,5 @@ typing-inspect==0.8.0
 colorlog
 rapidfuzz
 spacy
-colorama==0.4.6
\ No newline at end of file
+colorama==0.4.6
+fschat[model_worker,webui]
\ No newline at end of file
diff --git a/server/run.sh b/server/run.sh
new file mode 100644
index 000000000..8c760200e
--- /dev/null
+++ b/server/run.sh
@@ -0,0 +1,9 @@
+:<<COMMENT
+See https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for more usages.
+COMMENT
+export CUDA_VISIBLE_DEVICES=1
+python3 -m fastchat.serve.controller & \
+python3 -m fastchat.serve.multi_model_worker \
+    --model-path /data/private/xiayu/huggingface_cache/Llama-2-7b-chat-hf \
+    --model-names llama-2-7b-chat-hf & \
+python3 -m fastchat.serve.openai_api_server --host localhost --port 5000
\ No newline at end of file

From 36f2f2399b3c9c268c7f16405470135081d9e2a0 Mon Sep 17 00:00:00 2001
From: Weize Chen <32613237+chenweize1998@users.noreply.github.com>
Date: Thu, 19 Oct 2023 10:26:45 +0800
Subject: [PATCH 2/3] remove personal info in run.sh

---
 server/run.sh | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/server/run.sh b/server/run.sh
index 8c760200e..0d16fb901 100644
--- a/server/run.sh
+++ b/server/run.sh
@@ -1,9 +1,11 @@
 :<<COMMENT
 See https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for more usages.
 COMMENT
-export CUDA_VISIBLE_DEVICES=1
+# export CUDA_VISIBLE_DEVICES=0
+MODEL_PATH="path_to_the_downloaded_model_dir"
+MODEL_NAME="name_of_the_model"
 python3 -m fastchat.serve.controller & \
 python3 -m fastchat.serve.multi_model_worker \
-    --model-path /data/private/xiayu/huggingface_cache/Llama-2-7b-chat-hf \
-    --model-names llama-2-7b-chat-hf & \
-python3 -m fastchat.serve.openai_api_server --host localhost --port 5000
\ No newline at end of file
+    --model-path ${MODEL_PATH} \
+    --model-names ${MODEL_NAME} & \
+python3 -m fastchat.serve.openai_api_server --host localhost --port 5000

From 597c56fd8ba268458958eb4c9c4ab629bac02619 Mon Sep 17 00:00:00 2001
From: chenweize1998 <chenweize1998@gmail.com>
Date: Thu, 19 Oct 2023 10:30:34 +0800
Subject: [PATCH 3/3] move local model server script into scripts dir

---
 server/run.sh => scripts/run_local_model_server.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename server/run.sh => scripts/run_local_model_server.sh (100%)

diff --git a/server/run.sh b/scripts/run_local_model_server.sh
similarity index 100%
rename from server/run.sh
rename to scripts/run_local_model_server.sh