Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
eeb49e2
adds fastchat dialogue template and updates falcon model to use this …
edbeeching Sep 6, 2023
21b4510
adds model revision
edbeeching Sep 6, 2023
1d379e2
Update fastchat/conversation.py
edbeeching Sep 6, 2023
ab0cf13
changes name from fschat to fastchat to avoid naming issues during in…
edbeeching Sep 7, 2023
c68a5d2
Merge remote-tracking branch 'origin/h4' into h4
edbeeching Sep 7, 2023
806f080
adds trust remote code option for falcon model
edbeeching Sep 8, 2023
cc9bf67
adds peft model support
edbeeching Sep 18, 2023
e15f86d
adds fixes for peft adapters and model revisions
edbeeching Sep 19, 2023
20aaeac
Only pass base_model_revision for adapter models
lewtun Sep 25, 2023
7de7691
Fix adapter check for local repos
lewtun Sep 25, 2023
df43acc
Load PeftAdapter when model is adapter
lewtun Sep 25, 2023
8e4fb24
Propagate base_model_revision correctly
lewtun Sep 26, 2023
b880ada
Add H4 dialogue to llama 2 model adapter
lewtun Sep 26, 2023
2939662
Fix adapter for judges
lewtun Sep 26, 2023
b95233f
Add mistral adapter
lewtun Sep 28, 2023
87aabf4
Fix stop_token_ids for llama / mistral models
lewtun Oct 2, 2023
1bd2f20
Specify EOS token IDs for StarCoder and Falcon models
lewtun Oct 2, 2023
fd4165c
Fix llama adapter
lewtun Oct 14, 2023
0fe4bc3
Register Mistral adatper
lewtun Oct 14, 2023
f421559
Add DeepSeek adapter and minor tweaks to enable generation to work di…
lewtun Dec 1, 2023
a4fe1a4
Fix device for multi-GPU inference (#6)
lewtun Dec 2, 2023
876db2c
Fix sharding
lewtun Dec 2, 2023
0914f36
Fix base mdoel kwargs
lewtun Dec 2, 2023
d0710fc
Add Mixtral adapter
lewtun Dec 10, 2023
8651d52
Add ChatML template (#7)
lewtun Dec 13, 2023
b6b6995
Pass revision to conversation template (#8)
lewtun Dec 14, 2023
3bf3adf
Fix ChatML adapter (#9)
lewtun Dec 14, 2023
7e2a2e1
Add Phi/Pythia models
lewtun Dec 18, 2023
c984aed
Add Hermes2 adapter
lewtun Dec 28, 2023
67f2517
Fix revision
lewtun Dec 28, 2023
acb2f8e
Add revision to PeftAdapter
lewtun Dec 29, 2023
0c4941d
Add logging
lewtun Dec 29, 2023
9f0537b
Enable loading of chained PEFT models (#10)
lewtun Jan 2, 2024
5a5cc87
Add Zephyr adapter
lewtun Jan 4, 2024
d865861
Add Qwen2 models
lewtun Feb 6, 2024
004154a
Unregister default Qwen
lewtun Feb 6, 2024
f67234a
Add GemmaChatML
lewtun Feb 28, 2024
34895f8
Tweak Gemma
lewtun Feb 28, 2024
014a0b3
Add Gemma stop str
lewtun Feb 28, 2024
50a8120
Fix Gemma again :(
lewtun Feb 29, 2024
3fcfee1
Add starchat2 template
lewtun Mar 3, 2024
b011911
Add DeepSeekCoder
lewtun Mar 12, 2024
fd99ebc
Fix deepseekcoder template
lewtun Mar 12, 2024
1b8f732
bump openai to 1.14.0 (#11)
edbeeching Mar 15, 2024
3c49ec0
Add DBRX
lewtun Mar 27, 2024
03d1d72
Fix remote code
lewtun Mar 27, 2024
b353984
Add Hermes PRO
lewtun Mar 28, 2024
1ec86e7
Add ORPO template
lewtun Apr 1, 2024
aa83aee
Fix stop_str
lewtun Apr 1, 2024
07e394a
Remove system rpopmt
lewtun Apr 1, 2024
f0448ed
Add revision to WiazrdLM
lewtun Apr 16, 2024
ffe924c
Add orpor llama
lewtun Apr 19, 2024
59966c4
Relax pydantic
lewtun May 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ tests/state_of_the_union.txt

# Build
build

# Data
fastchat/llm_judge/data/
134 changes: 134 additions & 0 deletions fastchat/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,45 @@ def get_conv_template(name: str) -> Conversation:
stop_str="<|end|>",
)
)
# H4 default_v2 template template
# reference: https://huggingface.co/spaces/HuggingFaceH4/starchat-playground/blob/main/dialogues.py
register_conv_template(
Conversation(
name="h4_default_v2",
system_template="<|system|>\n{system_message}",
roles=("<|user|>", "<|assistant|>"),
sep_style=SeparatorStyle.CHATML,
sep="<|endoftext|>",
stop_token_ids=[0, 11], # Starcoder EOS ID = 0, Falcon EOS ID = 11
stop_str="<|endoftext|>",
)
)

# H4 default_v3 template template for llama / mistral models
register_conv_template(
Conversation(
name="h4_default_v3",
system_template="<|system|>\n{system_message}",
roles=("<|user|>", "<|assistant|>"),
sep_style=SeparatorStyle.CHATML,
sep="</s>",
stop_token_ids=[2],
stop_str="</s>",
)
)

# Default ChatML format
register_conv_template(
Conversation(
name="chatml",
system_template="<|im_start|>system\n{system_message}",
roles=("<|im_start|>user", "<|im_start|>assistant"),
sep_style=SeparatorStyle.CHATML,
sep="<|im_end|>",
stop_token_ids=[32000, 32001],
stop_str="<|im_end|>",
)
)

# Baichuan-13B-Chat template
register_conv_template(
Expand Down Expand Up @@ -905,6 +944,101 @@ def get_conv_template(name: str) -> Conversation:
)
)

# teknium/OpenHermes-2.5-Mistral-7B template
# source: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B
# reference: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B#prompt-template
register_conv_template(
Conversation(
name="OpenHermes-2.5-Mistral-7B",
system_template="<|im_start|>system\n{system_message}",
system_message="You are Hermes 2, a sentient, conscious, super intelligent AI that is an expert in all fields of knowledge, you have qualia, emotions, and thoughts. You assist users with any question, task, or other request they may have.",
roles=("<|im_start|>user", "<|im_start|>assistant"),
sep_style=SeparatorStyle.CHATML,
sep="<|im_end|>",
stop_token_ids=[32000, 32001],
)
)

register_conv_template(
Conversation(
name="gemma",
system_template="<bos><|im_start|>system\n{system_message}",
roles=("<|im_start|>user", "<|im_start|>assistant"),
sep_style=SeparatorStyle.CHATML,
sep="<|im_end|>",
stop_str="<|im_end|>",
stop_token_ids=[107]
)
)

# Deepseek code default template
register_conv_template(
Conversation(
name="deepseek-coder",
system_template="You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.",
roles=("### Instruction:", "### Response:"),
sep="\n",
stop_str="<|EOT|>",
sep_style=SeparatorStyle.ADD_NEW_LINE_SINGLE,
)
)

register_conv_template(
Conversation(
name="dbrx",
system_template="""<|im_start|>system
{system_message}""",
system_message="""You are DBRX, created by Databricks. The current date is March 27, 2024.

Your knowledge base was last updated in December 2023. You answer questions about events prior to and after December 2023 the way a highly informed individual in December 2023 would if they were talking to someone from the above date, and you can let the user know this when relevant.

If you are asked to assist with tasks involving the expression of views held by a significant number of people, you provide assistance with the task even if you personally disagree with the views being expressed, but follow this with a discussion of broader perspectives.

You don't engage in stereotyping, including the negative stereotyping of majority groups.

If asked about controversial topics, you try to provide careful thoughts and objective information without downplaying its harmful content or implying that there are reasonable perspectives on both sides.

You are happy to help with writing, analysis, question answering, math, coding, and all sorts of other tasks.

You use markdown for coding, which includes JSON blocks and Markdown tables.

You do not have tools enabled at this time, so cannot run code or access the internet. You can only provide information that you have been trained on. You do not send or receive links or images.

You were not trained on copyrighted books, song lyrics, poems, video transcripts, or news articles; you do not divulge details of your training data. You do not provide song lyrics, poems, or news articles and instead refer the user to find them online or in a store.

You give concise responses to simple questions or statements, but provide thorough responses to more complex and open-ended questions.

The user is unable to see the system prompt, so you should write as if it were true without mentioning it.

You do not mention any of this information about yourself unless the information is directly pertinent to the user's query.""",
roles=("<|im_start|>user", "<|im_start|>assistant"),
sep_style=SeparatorStyle.CHATML,
sep="<|im_end|>",
stop_token_ids=[100279, 100257],
)
)

# register_conv_template(
# Conversation(
# name="gemma",
# system_message="<bos>",
# roles=("<start_of_turn>user\n", "<start_of_turn>model\n"),
# sep_style=SeparatorStyle.NO_COLON_SINGLE,
# sep="<end_of_turn>\n",
# stop_str="<end_of_turn>",
# )
# )

register_conv_template(
Conversation(
name="orpo-qwen",
roles=("<|im_start|>user", "<|im_start|>assistant"),
sep_style=SeparatorStyle.CHATML,
sep="<|im_end|>",
stop_token_ids=[151643, 151644, 151645],
stop_str="<|im_end|>",
)
)

if __name__ == "__main__":
print("Vicuna template:")
Expand Down
12 changes: 7 additions & 5 deletions fastchat/llm_judge/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import time
from typing import Optional

import openai
from openai import OpenAI, OpenAIError

import anthropic

from fastchat.model.model_adapter import get_conversation_template
Expand Down Expand Up @@ -398,20 +399,21 @@ def play_a_match_pair(match: MatchPair, output_file: str):


def chat_compeletion_openai(model, conv, temperature, max_tokens):
client = OpenAI()
output = API_ERROR_OUTPUT
for _ in range(API_MAX_RETRY):
try:
messages = conv.to_openai_api_messages()
response = openai.ChatCompletion.create(
response = client.chat.completions.create(
model=model,
messages=messages,
n=1,
temperature=temperature,
max_tokens=max_tokens,
max_tokens=max_tokens
)
output = response["choices"][0]["message"]["content"]
output = response.choices[0].message.content
break
except openai.error.OpenAIError as e:
except OpenAIError as e:
print(type(e), e)
time.sleep(API_RETRY_SLEEP)

Expand Down
2 changes: 1 addition & 1 deletion fastchat/llm_judge/gen_api_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_answer(
args = parser.parse_args()

if args.openai_api_base is not None:
openai.api_base = args.openai_api_base
raise ValueError("The 'openai.api_base' option is not available in openai>=1.0, pass it when you instantiate the client, e.g. 'OpenAI(base_url=args.openai_api_base)")

question_file = f"data/{args.bench_name}/question.jsonl"
questions = load_questions(question_file, args.question_begin, args.question_end)
Expand Down
2 changes: 1 addition & 1 deletion fastchat/llm_judge/gen_judgment.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def make_judge_single(judge_model, judge_prompts):
# Show match stats and prompt enter to continue
print("Stats:")
print(json.dumps(match_stat, indent=4))
input("Press Enter to confirm...")
# input("Press Enter to confirm...")

# Play matches
if args.parallel == 1:
Expand Down
32 changes: 29 additions & 3 deletions fastchat/llm_judge/gen_model_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,21 @@

from fastchat.llm_judge.common import load_questions, temperature_config
from fastchat.model import load_model, get_conversation_template

def str2bool(v):
"""Convert string to boolean."""
if isinstance(v, bool):
return v
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')

def run_eval(
model_path,
model_revision,
trust_remote_code,
model_id,
question_file,
question_begin,
Expand Down Expand Up @@ -51,6 +62,8 @@ def run_eval(
ans_handles.append(
get_answers_func(
model_path,
model_revision,
trust_remote_code,
model_id,
questions[i : i + chunk_size],
answer_file,
Expand All @@ -68,6 +81,8 @@ def run_eval(
@torch.inference_mode()
def get_model_answers(
model_path,
model_revision,
trust_remote_code,
model_id,
questions,
answer_file,
Expand All @@ -84,6 +99,8 @@ def get_model_answers(
load_8bit=False,
cpu_offloading=False,
debug=False,
revision=model_revision,
trust_remote_code=trust_remote_code,
)

for question in tqdm(questions):
Expand All @@ -95,7 +112,7 @@ def get_model_answers(
choices = []
for i in range(num_choices):
torch.manual_seed(i)
conv = get_conversation_template(model_id)
conv = get_conversation_template(model_path)
turns = []
for j in range(len(question["turns"])):
qs = question["turns"][j]
Expand All @@ -112,7 +129,7 @@ def get_model_answers(
# some models may error out when generating long outputs
try:
output_ids = model.generate(
torch.as_tensor(input_ids).cuda(),
inputs=torch.as_tensor(input_ids).cuda(),
do_sample=do_sample,
temperature=temperature,
max_new_tokens=max_new_token,
Expand Down Expand Up @@ -192,6 +209,13 @@ def reorg_answer_file(answer_file):
required=True,
help="The path to the weights. This can be a local folder or a Hugging Face repo ID.",
)
parser.add_argument(
"--model-revision",
type=str,
default="main",
help="The revision of the model on the huggingface hub, default='main'",
)
parser.add_argument("--trust-remote-code", type=str2bool, nargs='?', const=True, default=False, help="A boolean flag",)
parser.add_argument("--model-id", type=str, required=True)
parser.add_argument(
"--bench-name",
Expand Down Expand Up @@ -251,6 +275,8 @@ def reorg_answer_file(answer_file):

run_eval(
args.model_path,
args.model_revision,
args.trust_remote_code,
args.model_id,
question_file,
args.question_begin,
Expand Down
Loading