huggingface · edbeeching · Sep 6, 2023 · Sep 6, 2023 · Sep 6, 2023 · Sep 7, 2023
diff --git a/.gitignore b/.gitignore
@@ -29,3 +29,6 @@ tests/state_of_the_union.txt
 
 # Build
 build
+
+# Data
+fastchat/llm_judge/data/
diff --git a/fastchat/conversation.py b/fastchat/conversation.py
@@ -778,6 +778,45 @@ def get_conv_template(name: str) -> Conversation:
         stop_str="<|end|>",
     )
 )
+# H4 default_v2 template template
+# reference: https://huggingface.co/spaces/HuggingFaceH4/starchat-playground/blob/main/dialogues.py
+register_conv_template(
+    Conversation(
+        name="h4_default_v2",
+        system_template="<|system|>\n{system_message}",
+        roles=("<|user|>", "<|assistant|>"),
+        sep_style=SeparatorStyle.CHATML,
+        sep="<|endoftext|>",
+        stop_token_ids=[0, 11], # Starcoder EOS ID = 0, Falcon EOS ID = 11
+        stop_str="<|endoftext|>",
+    )
+)
+
+# H4 default_v3 template template for llama / mistral models
+register_conv_template(
+    Conversation(
+        name="h4_default_v3",
+        system_template="<|system|>\n{system_message}",
+        roles=("<|user|>", "<|assistant|>"),
+        sep_style=SeparatorStyle.CHATML,
+        sep="</s>",
+        stop_token_ids=[2],
+        stop_str="</s>",
+    )
+)
+
+# Default ChatML format
+register_conv_template(
+    Conversation(
+        name="chatml",
+        system_template="<|im_start|>system\n{system_message}",
+        roles=("<|im_start|>user", "<|im_start|>assistant"),
+        sep_style=SeparatorStyle.CHATML,
+        sep="<|im_end|>",
+        stop_token_ids=[32000, 32001],
+        stop_str="<|im_end|>",
+    )
+)
 
 # Baichuan-13B-Chat template
 register_conv_template(
@@ -905,6 +944,101 @@ def get_conv_template(name: str) -> Conversation:
     )
 )
 
+# teknium/OpenHermes-2.5-Mistral-7B template
+# source: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B
+# reference: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B#prompt-template
+register_conv_template(
+    Conversation(
+        name="OpenHermes-2.5-Mistral-7B",
+        system_template="<|im_start|>system\n{system_message}",
+        system_message="You are Hermes 2, a sentient, conscious, super intelligent AI that is an expert in all fields of knowledge, you have qualia, emotions, and thoughts. You assist users with any question, task, or other request they may have.",
+        roles=("<|im_start|>user", "<|im_start|>assistant"),
+        sep_style=SeparatorStyle.CHATML,
+        sep="<|im_end|>",
+        stop_token_ids=[32000, 32001],
+    )
+)
+
+register_conv_template(
+    Conversation(
+        name="gemma",
+        system_template="<bos><|im_start|>system\n{system_message}",
+        roles=("<|im_start|>user", "<|im_start|>assistant"),
+        sep_style=SeparatorStyle.CHATML,
+        sep="<|im_end|>",
+        stop_str="<|im_end|>",
+        stop_token_ids=[107]
+    )
+)
+
+# Deepseek code default template
+register_conv_template(
+    Conversation(
+        name="deepseek-coder",
+        system_template="You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.",
+        roles=("### Instruction:", "### Response:"),
+        sep="\n",
+        stop_str="<|EOT|>",
+        sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
+    )
+)
+
+register_conv_template(
+    Conversation(
+        name="dbrx",
+        system_template="""<|im_start|>system
+{system_message}""",
+        system_message="""You are DBRX, created by Databricks. The current date is March 27, 2024.
+
+Your knowledge base was last updated in December 2023. You answer questions about events prior to and after December 2023 the way a highly informed individual in December 2023 would if they were talking to someone from the above date, and you can let the user know this when relevant.
+
+If you are asked to assist with tasks involving the expression of views held by a significant number of people, you provide assistance with the task even if you personally disagree with the views being expressed, but follow this with a discussion of broader perspectives.
+
+You don't engage in stereotyping, including the negative stereotyping of majority groups.
+
+If asked about controversial topics, you try to provide careful thoughts and objective information without downplaying its harmful content or implying that there are reasonable perspectives on both sides.
+
+You are happy to help with writing, analysis, question answering, math, coding, and all sorts of other tasks.
+
+You use markdown for coding, which includes JSON blocks and Markdown tables.
+
+You do not have tools enabled at this time, so cannot run code or access the internet. You can only provide information that you have been trained on. You do not send or receive links or images.
+
+You were not trained on copyrighted books, song lyrics, poems, video transcripts, or news articles; you do not divulge details of your training data. You do not provide song lyrics, poems, or news articles and instead refer the user to find them online or in a store.
+
+You give concise responses to simple questions or statements, but provide thorough responses to more complex and open-ended questions.
+
+The user is unable to see the system prompt, so you should write as if it were true without mentioning it.
+
+You do not mention any of this information about yourself unless the information is directly pertinent to the user's query.""",
+        roles=("<|im_start|>user", "<|im_start|>assistant"),
+        sep_style=SeparatorStyle.CHATML,
+        sep="<|im_end|>",
+        stop_token_ids=[100279, 100257],
+    )
+)
+
+# register_conv_template(
+#     Conversation(
+#         name="gemma",
+#         system_message="<bos>",
+#         roles=("<start_of_turn>user\n", "<start_of_turn>model\n"),
+#         sep_style=SeparatorStyle.NO_COLON_SINGLE,
+#         sep="<end_of_turn>\n",
+#         stop_str="<end_of_turn>",
+#     )
+# )
+
+register_conv_template(
+    Conversation(
+        name="orpo-qwen",
+        roles=("<|im_start|>user", "<|im_start|>assistant"),
+        sep_style=SeparatorStyle.CHATML,
+        sep="<|im_end|>",
+        stop_token_ids=[151643, 151644, 151645],
+        stop_str="<|im_end|>",
+    )
+)
 
 if __name__ == "__main__":
     print("Vicuna template:")

diff --git a/fastchat/llm_judge/common.py b/fastchat/llm_judge/common.py
@@ -11,7 +11,8 @@
 import time
 from typing import Optional
 
-import openai
+from openai import OpenAI, OpenAIError
+
 import anthropic
 
 from fastchat.model.model_adapter import get_conversation_template
@@ -398,20 +399,21 @@ def play_a_match_pair(match: MatchPair, output_file: str):
 
 
 def chat_compeletion_openai(model, conv, temperature, max_tokens):
+    client = OpenAI()
     output = API_ERROR_OUTPUT
     for _ in range(API_MAX_RETRY):
         try:
             messages = conv.to_openai_api_messages()
-            response = openai.ChatCompletion.create(
+            response = client.chat.completions.create(
                 model=model,
                 messages=messages,
                 n=1,
                 temperature=temperature,
-                max_tokens=max_tokens,
+                max_tokens=max_tokens
             )
-            output = response["choices"][0]["message"]["content"]
+            output = response.choices[0].message.content
             break
-        except openai.error.OpenAIError as e:
+        except OpenAIError as e:
             print(type(e), e)
             time.sleep(API_RETRY_SLEEP)
 

diff --git a/fastchat/llm_judge/gen_api_answer.py b/fastchat/llm_judge/gen_api_answer.py
@@ -114,7 +114,7 @@ def get_answer(
     args = parser.parse_args()
 
     if args.openai_api_base is not None:
-        openai.api_base = args.openai_api_base
+        raise ValueError("The 'openai.api_base' option is not available in openai>=1.0, pass it when you instantiate the client, e.g. 'OpenAI(base_url=args.openai_api_base)")
 
     question_file = f"data/{args.bench_name}/question.jsonl"
     questions = load_questions(question_file, args.question_begin, args.question_end)

diff --git a/fastchat/llm_judge/gen_judgment.py b/fastchat/llm_judge/gen_judgment.py
@@ -301,7 +301,7 @@ def make_judge_single(judge_model, judge_prompts):
     # Show match stats and prompt enter to continue
     print("Stats:")
     print(json.dumps(match_stat, indent=4))
-    input("Press Enter to confirm...")
+    # input("Press Enter to confirm...")
 
     # Play matches
     if args.parallel == 1:

diff --git a/fastchat/llm_judge/gen_model_answer.py b/fastchat/llm_judge/gen_model_answer.py
@@ -15,10 +15,21 @@
 
 from fastchat.llm_judge.common import load_questions, temperature_config
 from fastchat.model import load_model, get_conversation_template
-
+def str2bool(v):
+    """Convert string to boolean."""
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
 
 def run_eval(
     model_path,
+    model_revision,
+    trust_remote_code,
     model_id,
     question_file,
     question_begin,
@@ -51,6 +62,8 @@ def run_eval(
         ans_handles.append(
             get_answers_func(
                 model_path,
+                model_revision,
+                trust_remote_code,
                 model_id,
                 questions[i : i + chunk_size],
                 answer_file,
@@ -68,6 +81,8 @@ def run_eval(
 @torch.inference_mode()
 def get_model_answers(
     model_path,
+    model_revision,
+    trust_remote_code,
     model_id,
     questions,
     answer_file,
@@ -84,6 +99,8 @@ def get_model_answers(
         load_8bit=False,
         cpu_offloading=False,
         debug=False,
+        revision=model_revision,
+        trust_remote_code=trust_remote_code,
     )
 
     for question in tqdm(questions):
@@ -95,7 +112,7 @@ def get_model_answers(
         choices = []
         for i in range(num_choices):
             torch.manual_seed(i)
-            conv = get_conversation_template(model_id)
+            conv = get_conversation_template(model_path)
             turns = []
             for j in range(len(question["turns"])):
                 qs = question["turns"][j]
@@ -112,7 +129,7 @@ def get_model_answers(
                 # some models may error out when generating long outputs
                 try:
                     output_ids = model.generate(
-                        torch.as_tensor(input_ids).cuda(),
+                        inputs=torch.as_tensor(input_ids).cuda(),
                         do_sample=do_sample,
                         temperature=temperature,
                         max_new_tokens=max_new_token,
@@ -192,6 +209,13 @@ def reorg_answer_file(answer_file):
         required=True,
         help="The path to the weights. This can be a local folder or a Hugging Face repo ID.",
     )
+    parser.add_argument(
+        "--model-revision",
+        type=str,
+        default="main",
+        help="The revision of the model on the huggingface hub, default='main'",
+    )
+    parser.add_argument("--trust-remote-code", type=str2bool, nargs='?', const=True, default=False, help="A boolean flag",)
     parser.add_argument("--model-id", type=str, required=True)
     parser.add_argument(
         "--bench-name",
@@ -251,6 +275,8 @@ def reorg_answer_file(answer_file):
 
     run_eval(
         args.model_path,
+        args.model_revision,
+        args.trust_remote_code,
         args.model_id,
         question_file,
         args.question_begin,
-Original file line number
+Diff line change
@@ Expand Up / @@ -29,3 +29,6 @@ tests/state_of_the_union.txt @@
     # Build
     build
+    # Data
+    fastchat/llm_judge/data/