From 0e17884c49f0d7de8ed0506deff2a100751d2b23 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 16:05:25 +0900 Subject: [PATCH 01/15] PoC: add chat template heuristics The fallback chat template adapter of Vicuna is not ideal in some cases (e.g. a test against a sub-portion of the BBC news classification task on Kaggle gave an 82% accuracy with Vicuna and 88% with the official ChatML format for a q4_k_m Qwen 2.5 3B-Instruct gguf). This PR adds a proof of concept simple heuristic which looks at the chat template and upgrades the adapter when it is able to. --- koboldcpp.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index 5793098c753..ee12d473cb3 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4648,6 +4648,26 @@ def main(launch_args,start_server=True): exitcounter = 999 exit_with_error(3,"Could not load text model: " + modelname) + if chatcompl_adapter is None: + # Try to derive chat completions adapter from chat template, now that we have the model loaded + ctbytes = handle.get_chat_template() + chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore") + if chat_template != "": + # "Better than nothing" simple heuristics + if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template: + print("Chat completion heuristic: ChatML (Qwen 2.5 based).") + # ChatML + chatcompl_adapter = { + "system_start": "<|im_start|>system\n\n", + "system_end": "<|im_end|>\n\n", + "user_start": "<|im_start|>user\n\n", + "user_end": "<|im_end|>\n\n", + "assistant_start": "<|im_start|>assistant\n\n", + "assistant_end": "<|im_end|>\n\n", + "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out + "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n", + } + #handle loading image model if args.sdmodel and args.sdmodel!="": imgmodel = args.sdmodel From 7b7150fbf46cf0f4ff49817b3e56c689f717967b Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 16:19:12 +0900 Subject: [PATCH 02/15] gemma 2 heuristic --- koboldcpp.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index ee12d473cb3..23bbd37d413 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4667,6 +4667,16 @@ def main(launch_args,start_server=True): "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n", } + elif "System role not supported" in chat_template and "" in chat_template: + print("Chat completion heuristic: Google Gemma 2.") + # Google Gemma 2 + chatcompl_adapter = { + "user_start": "user\n", + "user_end": "\n", + "assistant_start": "model\n", + "assistant_end": "\n", + } + #handle loading image model if args.sdmodel and args.sdmodel!="": From 8c2f83a57818669b29c1010222c0ecabaf858953 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 16:45:53 +0900 Subject: [PATCH 03/15] Phi 4, Llama 3.x heuristics --- koboldcpp.py | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 23bbd37d413..bcd33464566 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4655,18 +4655,30 @@ def main(launch_args,start_server=True): if chat_template != "": # "Better than nothing" simple heuristics if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template: - print("Chat completion heuristic: ChatML (Qwen 2.5 based).") - # ChatML - chatcompl_adapter = { - "system_start": "<|im_start|>system\n\n", - "system_end": "<|im_end|>\n\n", - "user_start": "<|im_start|>user\n\n", - "user_end": "<|im_end|>\n\n", - "assistant_start": "<|im_start|>assistant\n\n", - "assistant_end": "<|im_end|>\n\n", - "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out - "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n", - } + if "<|im_sep|>" in chat_template: + print("Chat completion heuristic: Phi 4") + # Phi 4 ChatML + chatcompl_adapter = { + "system_start": "<|im_start|>system<|im_sep|>", + "system_end": "<|im_end|>", + "user_start": "<|im_start|>user<|im_sep|>", + "user_end": "<|im_end|>", + "assistant_start": "<|im_start|>assistant<|im_sep|>", + "assistant_end": "<|im_end|>", + } + else: + print("Chat completion heuristic: ChatML (Qwen 2.5 based).") + # Qwen 2.5 ChatML + chatcompl_adapter = { + "system_start": "<|im_start|>system\n\n", + "system_end": "<|im_end|>\n\n", + "user_start": "<|im_start|>user\n\n", + "user_end": "<|im_end|>\n\n", + "assistant_start": "<|im_start|>assistant\n\n", + "assistant_end": "<|im_end|>\n\n", + "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out + "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n", + } elif "System role not supported" in chat_template and "" in chat_template: print("Chat completion heuristic: Google Gemma 2.") # Google Gemma 2 @@ -4676,6 +4688,18 @@ def main(launch_args,start_server=True): "assistant_start": "model\n", "assistant_end": "\n", } + elif "<|start_header_id|>system" in chat_template: + # Llama 3.x + print("Chat completion heuristic: Llama 3.x.") + chatcompl_adapter = { + "system_start": "<|start_header_id|>system<|end_header_id|>\n\n", + "system_end": "<|eot_id|>\n\n", + "user_start": "<|start_header_id|>user<|end_header_id|>\n\n", + "user_end": "<|eot_id|>\n\n", + "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n", + "assistant_end": "<|eot_id|>\n\n", + } + #handle loading image model From 203c4be9ef10d2c3f2605b729ffc94060de065c0 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 17:08:15 +0900 Subject: [PATCH 04/15] better qwen vs generic heuristic --- koboldcpp.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index bcd33464566..b1fc3f7efb6 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4656,7 +4656,7 @@ def main(launch_args,start_server=True): # "Better than nothing" simple heuristics if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template: if "<|im_sep|>" in chat_template: - print("Chat completion heuristic: Phi 4") + print("Chat completion heuristic: ChatML (Phi 4)") # Phi 4 ChatML chatcompl_adapter = { "system_start": "<|im_start|>system<|im_sep|>", @@ -4666,7 +4666,7 @@ def main(launch_args,start_server=True): "assistant_start": "<|im_start|>assistant<|im_sep|>", "assistant_end": "<|im_end|>", } - else: + elif "You are provided with function signatures within " in chat_template: print("Chat completion heuristic: ChatML (Qwen 2.5 based).") # Qwen 2.5 ChatML chatcompl_adapter = { @@ -4679,6 +4679,17 @@ def main(launch_args,start_server=True): "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n", } + else: + print("Chat completion heuristic: ChatML (Generic).") + chatcompl_adapter = { + "system_start": "<|im_start|>system\n\n", + "system_end": "<|im_end|>\n\n", + "user_start": "<|im_start|>user\n\n", + "user_end": "<|im_end|>\n\n", + "assistant_start": "<|im_start|>assistant\n\n", + "assistant_end": "<|im_end|>\n\n", + } + elif "System role not supported" in chat_template and "" in chat_template: print("Chat completion heuristic: Google Gemma 2.") # Google Gemma 2 From 3eb7712cb0f2db0d3904edcccbd015731d04d110 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 17:09:54 +0900 Subject: [PATCH 05/15] cleanup --- koboldcpp.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index b1fc3f7efb6..8e0b16973e1 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4657,7 +4657,6 @@ def main(launch_args,start_server=True): if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template: if "<|im_sep|>" in chat_template: print("Chat completion heuristic: ChatML (Phi 4)") - # Phi 4 ChatML chatcompl_adapter = { "system_start": "<|im_start|>system<|im_sep|>", "system_end": "<|im_end|>", @@ -4668,7 +4667,6 @@ def main(launch_args,start_server=True): } elif "You are provided with function signatures within " in chat_template: print("Chat completion heuristic: ChatML (Qwen 2.5 based).") - # Qwen 2.5 ChatML chatcompl_adapter = { "system_start": "<|im_start|>system\n\n", "system_end": "<|im_end|>\n\n", @@ -4692,7 +4690,6 @@ def main(launch_args,start_server=True): elif "System role not supported" in chat_template and "" in chat_template: print("Chat completion heuristic: Google Gemma 2.") - # Google Gemma 2 chatcompl_adapter = { "user_start": "user\n", "user_end": "\n", @@ -4700,7 +4697,6 @@ def main(launch_args,start_server=True): "assistant_end": "\n", } elif "<|start_header_id|>system" in chat_template: - # Llama 3.x print("Chat completion heuristic: Llama 3.x.") chatcompl_adapter = { "system_start": "<|start_header_id|>system<|end_header_id|>\n\n", From e980dca9de2384ba54d2905e364c68cee13b3175 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 17:27:51 +0900 Subject: [PATCH 06/15] mistral (generic) heuristic --- koboldcpp.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index 8e0b16973e1..57b2ad3d17f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4706,7 +4706,14 @@ def main(launch_args,start_server=True): "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n", "assistant_end": "<|eot_id|>\n\n", } - + elif "[/INST]" in chat_template: + print("Chat completion heuristic: Mistral (Generic)") + chatcompl_adapter = { + "user_start": "[INST]", + "user_end": "[/INST]\n", + "assistant_start": "", + "assistant_end": "", + } #handle loading image model From fea7766ba0ff258b3191823e911bc474bc4efc3d Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 21:58:34 +0900 Subject: [PATCH 07/15] fix sys msg for mistral --- koboldcpp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index 57b2ad3d17f..8e284570e8d 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4709,6 +4709,8 @@ def main(launch_args,start_server=True): elif "[/INST]" in chat_template: print("Chat completion heuristic: Mistral (Generic)") chatcompl_adapter = { + "system_start": "[INST]", + "system_end": "[/INST]\n", "user_start": "[INST]", "user_end": "[/INST]\n", "assistant_start": "", From 4090105c46bdaa89e01382e3afbf9574e47688ec Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 22:21:10 +0900 Subject: [PATCH 08/15] phi 3.5 --- koboldcpp.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index 8e284570e8d..e2e69301512 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4716,6 +4716,16 @@ def main(launch_args,start_server=True): "assistant_start": "", "assistant_end": "", } + elif "<|system|>" in chat_template and "<|user|>" in chat_template: + print("Chat completion heuristic: Phi 3.5") + chatcompl_adapter = { + "system_start": "<|system|>\n", + "system_end": "<|end|>\n", + "user_start": "<|user|>\n", + "user_end": "<|end|>\n", + "assistant_start": "<|assistant|>\n", + "assistant_end": "<|end|>\n", + } #handle loading image model From f90a238fda01f4b6fbdf1a77826fc56055259543 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Tue, 24 Dec 2024 23:27:58 +0900 Subject: [PATCH 09/15] mistral v3 --- koboldcpp.py | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index e2e69301512..39d7ba3445b 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4707,15 +4707,26 @@ def main(launch_args,start_server=True): "assistant_end": "<|eot_id|>\n\n", } elif "[/INST]" in chat_template: - print("Chat completion heuristic: Mistral (Generic)") - chatcompl_adapter = { - "system_start": "[INST]", - "system_end": "[/INST]\n", - "user_start": "[INST]", - "user_end": "[/INST]\n", - "assistant_start": "", - "assistant_end": "", - } + if "\"[INST] \" + system_message" in chat_template: + print("Chat completion heuristic: Mistral V3") + chatcompl_adapter = { + "system_start": "[INST] ", + "system_end": "[/INST] ", + "user_start": "[INST] ", + "user_end": "[/INST] ", + "assistant_start": "", + "assistant_end": "", + } + else: + print("Chat completion heuristic: Mistral (Generic)") + chatcompl_adapter = { + "system_start": "[INST]", + "system_end": "[/INST]\n", + "user_start": "[INST]", + "user_end": "[/INST]\n", + "assistant_start": "", + "assistant_end": "", + } elif "<|system|>" in chat_template and "<|user|>" in chat_template: print("Chat completion heuristic: Phi 3.5") chatcompl_adapter = { From eef1a204ee0cd438ae0532039f2d081e7dfaa20c Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Wed, 25 Dec 2024 11:42:10 +0900 Subject: [PATCH 10/15] cohere (aya expanse 32b based) --- koboldcpp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index 39d7ba3445b..9cfe0f90612 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4737,6 +4737,17 @@ def main(launch_args,start_server=True): "assistant_start": "<|assistant|>\n", "assistant_end": "<|end|>\n", } + elif "<|START_OF_TURN_TOKEN|>" in chat_template: + print("Chat completion heuristic: Cohere (Aya Expanse 32B based)") + chatcompl_adapter = { + "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>", + "system_end": "<|END_OF_TURN_TOKEN|>", + "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", + "user_end": "<|END_OF_TURN_TOKEN|>", + "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + "assistant_end": "<|END_OF_TURN_TOKEN|>", + } + #handle loading image model From df120e87b77f6caf559b1c622473fed207f68f13 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Wed, 25 Dec 2024 13:44:56 +0900 Subject: [PATCH 11/15] only derive from chat template if AutoGuess --- koboldcpp.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 9cfe0f90612..970318c2533 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4413,7 +4413,7 @@ def main(launch_args,start_server=True): print("Warning: Saved story file invalid or not found. No story will be preloaded into server.") # try to read chat completions adapter - if args.chatcompletionsadapter: + if args.chatcompletionsadapter and "autoguess" not in args.chatcompletionsadapter.lower(): global chatcompl_adapter ccadapter_path = None canload = False @@ -4648,7 +4648,11 @@ def main(launch_args,start_server=True): exitcounter = 999 exit_with_error(3,"Could not load text model: " + modelname) - if chatcompl_adapter is None: + if ( + chatcompl_adapter is None + and args.chatcompletionsadapter + and "autoguess" in args.chatcompletionsadapter.lower() + ): # Try to derive chat completions adapter from chat template, now that we have the model loaded ctbytes = handle.get_chat_template() chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore") From d1c273d8dee19ee1d4ea9038b897bc88ada73b48 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Wed, 25 Dec 2024 13:55:24 +0900 Subject: [PATCH 12/15] add notes about alpaca fallbacks --- koboldcpp.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index 970318c2533..2de2d34b35c 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4751,7 +4751,11 @@ def main(launch_args,start_server=True): "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", "assistant_end": "<|END_OF_TURN_TOKEN|>", } + if chatcompl_adapter is None: + print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.") + if chatcompl_adapter is None and not args.chatcompletionsadapter: + print("Note: Alpaca format will be used for OpenAI Compatible API chat completions. Use --chatcompletionsadapter=AutoGuess to use chat template heuristics.") #handle loading image model From b45380f39cc696068f7c180e308739834575a563 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Wed, 25 Dec 2024 13:59:30 +0900 Subject: [PATCH 13/15] added AutoGuess.json dummy --- kcpp_adapters/AutoGuess.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 kcpp_adapters/AutoGuess.json diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json new file mode 100644 index 00000000000..e69de29bb2d From 267d6cb540e26224546b846ab1b392ae4cb4c086 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Wed, 25 Dec 2024 14:05:11 +0900 Subject: [PATCH 14/15] add mistral v7 --- koboldcpp.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index 2de2d34b35c..567efcb6da7 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4711,7 +4711,17 @@ def main(launch_args,start_server=True): "assistant_end": "<|eot_id|>\n\n", } elif "[/INST]" in chat_template: - if "\"[INST] \" + system_message" in chat_template: + if "[SYSTEM_PROMPT]" in chat_template: + print("Chat completion heuristic: Mistral V7 (with system prompt)") + chatcompl_adapter = { + "system_start": "[SYSTEM_PROMPT] ", + "system_end": "[/SYSTEM_PROMPT]", + "user_start": "[INST] ", + "user_end": "[/INST]", + "assistant_start": " ", + "assistant_end": "", + } + elif "\"[INST] \" + system_message" in chat_template: print("Chat completion heuristic: Mistral V3") chatcompl_adapter = { "system_start": "[INST] ", From 92f33c14d0e61c9211fd9b800a1ffbe198b97376 Mon Sep 17 00:00:00 2001 From: Karl-Johan Alm Date: Wed, 25 Dec 2024 21:49:41 +0900 Subject: [PATCH 15/15] switch to using a json list with search strings --- kcpp_adapters/AutoGuess.json | 113 +++++++++++++++++++++++++++++++++ koboldcpp.py | 119 ++++------------------------------- 2 files changed, 124 insertions(+), 108 deletions(-) diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json index e69de29bb2d..a577753dbcc 100644 --- a/kcpp_adapters/AutoGuess.json +++ b/kcpp_adapters/AutoGuess.json @@ -0,0 +1,113 @@ +[ +{ + "search": ["<|im_start|>assistant", "<|im_end|>", "<|im_sep|>"], + "name": "ChatML (Phi 4)", + "adapter": { + "system_start": "<|im_start|>system<|im_sep|>", + "system_end": "<|im_end|>", + "user_start": "<|im_start|>user<|im_sep|>", + "user_end": "<|im_end|>", + "assistant_start": "<|im_start|>assistant<|im_sep|>", + "assistant_end": "<|im_end|>" + } +}, { + "search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within "], + "name": "ChatML (Qwen 2.5 based).", + "adapter": { + "system_start": "<|im_start|>system\n\n", + "system_end": "<|im_end|>\n\n", + "user_start": "<|im_start|>user\n\n", + "user_end": "<|im_end|>\n\n", + "assistant_start": "<|im_start|>assistant\n\n", + "assistant_end": "<|im_end|>\n\n", + "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", + "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" + } +}, { + "search": ["<|im_start|>assistant", "<|im_end|>"], + "name": "ChatML (Generic).", + "adapter": { + "system_start": "<|im_start|>system\n\n", + "system_end": "<|im_end|>\n\n", + "user_start": "<|im_start|>user\n\n", + "user_end": "<|im_end|>\n\n", + "assistant_start": "<|im_start|>assistant\n\n", + "assistant_end": "<|im_end|>\n\n" + } +}, { + "search": ["System role not supported", ""], + "name": "Google Gemma 2.", + "adapter": { + "user_start": "user\n", + "user_end": "\n", + "assistant_start": "model\n", + "assistant_end": "\n" + } +}, { + "search": ["<|start_header_id|>system"], + "name": "Llama 3.x.", + "adapter": { + "system_start": "<|start_header_id|>system<|end_header_id|>\n\n", + "system_end": "<|eot_id|>\n\n", + "user_start": "<|start_header_id|>user<|end_header_id|>\n\n", + "user_end": "<|eot_id|>\n\n", + "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n", + "assistant_end": "<|eot_id|>\n\n" + } +}, { + "search": ["[/INST]", "[SYSTEM_PROMPT]"], + "name": "Mistral V7 (with system prompt)", + "adapter": { + "system_start": "[SYSTEM_PROMPT] ", + "system_end": "[/SYSTEM_PROMPT]", + "user_start": "[INST] ", + "user_end": "[/INST]", + "assistant_start": " ", + "assistant_end": "" + } +}, { + "search": ["[/INST]", "\"[INST] \" + system_message"], + "name": "Mistral V3", + "adapter": { + "system_start": "[INST] ", + "system_end": "[/INST] ", + "user_start": "[INST] ", + "user_end": "[/INST] ", + "assistant_start": "", + "assistant_end": "" + } +}, { + "search": ["[/INST]"], + "name": "Mistral (Generic)", + "adapter": { + "system_start": "[INST]", + "system_end": "[/INST]\n", + "user_start": "[INST]", + "user_end": "[/INST]\n", + "assistant_start": "", + "assistant_end": "" + } +}, { + "search": ["<|system|>", "<|user|>"], + "name": "Phi 3.5", + "adapter": { + "system_start": "<|system|>\n", + "system_end": "<|end|>\n", + "user_start": "<|user|>\n", + "user_end": "<|end|>\n", + "assistant_start": "<|assistant|>\n", + "assistant_end": "<|end|>\n" + } +}, { + "search": ["<|START_OF_TURN_TOKEN|>"], + "name": "Cohere (Aya Expanse 32B based)", + "adapter": { + "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>", + "system_end": "<|END_OF_TURN_TOKEN|>", + "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", + "user_end": "<|END_OF_TURN_TOKEN|>", + "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", + "assistant_end": "<|END_OF_TURN_TOKEN|>" + } +} +] diff --git a/koboldcpp.py b/koboldcpp.py index 567efcb6da7..119178558f2 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -4413,7 +4413,7 @@ def main(launch_args,start_server=True): print("Warning: Saved story file invalid or not found. No story will be preloaded into server.") # try to read chat completions adapter - if args.chatcompletionsadapter and "autoguess" not in args.chatcompletionsadapter.lower(): + if args.chatcompletionsadapter: global chatcompl_adapter ccadapter_path = None canload = False @@ -4649,118 +4649,21 @@ def main(launch_args,start_server=True): exit_with_error(3,"Could not load text model: " + modelname) if ( - chatcompl_adapter is None - and args.chatcompletionsadapter - and "autoguess" in args.chatcompletionsadapter.lower() + chatcompl_adapter is not None + and isinstance(chatcompl_adapter, list) ): + # The chat completions adapter is a list that needs derivation from chat templates # Try to derive chat completions adapter from chat template, now that we have the model loaded ctbytes = handle.get_chat_template() chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore") + candidates = chatcompl_adapter + chatcompl_adapter = None if chat_template != "": - # "Better than nothing" simple heuristics - if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template: - if "<|im_sep|>" in chat_template: - print("Chat completion heuristic: ChatML (Phi 4)") - chatcompl_adapter = { - "system_start": "<|im_start|>system<|im_sep|>", - "system_end": "<|im_end|>", - "user_start": "<|im_start|>user<|im_sep|>", - "user_end": "<|im_end|>", - "assistant_start": "<|im_start|>assistant<|im_sep|>", - "assistant_end": "<|im_end|>", - } - elif "You are provided with function signatures within " in chat_template: - print("Chat completion heuristic: ChatML (Qwen 2.5 based).") - chatcompl_adapter = { - "system_start": "<|im_start|>system\n\n", - "system_end": "<|im_end|>\n\n", - "user_start": "<|im_start|>user\n\n", - "user_end": "<|im_end|>\n\n", - "assistant_start": "<|im_start|>assistant\n\n", - "assistant_end": "<|im_end|>\n\n", - "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n\n\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out - "tools_end": "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n", - } - else: - print("Chat completion heuristic: ChatML (Generic).") - chatcompl_adapter = { - "system_start": "<|im_start|>system\n\n", - "system_end": "<|im_end|>\n\n", - "user_start": "<|im_start|>user\n\n", - "user_end": "<|im_end|>\n\n", - "assistant_start": "<|im_start|>assistant\n\n", - "assistant_end": "<|im_end|>\n\n", - } - - elif "System role not supported" in chat_template and "" in chat_template: - print("Chat completion heuristic: Google Gemma 2.") - chatcompl_adapter = { - "user_start": "user\n", - "user_end": "\n", - "assistant_start": "model\n", - "assistant_end": "\n", - } - elif "<|start_header_id|>system" in chat_template: - print("Chat completion heuristic: Llama 3.x.") - chatcompl_adapter = { - "system_start": "<|start_header_id|>system<|end_header_id|>\n\n", - "system_end": "<|eot_id|>\n\n", - "user_start": "<|start_header_id|>user<|end_header_id|>\n\n", - "user_end": "<|eot_id|>\n\n", - "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n", - "assistant_end": "<|eot_id|>\n\n", - } - elif "[/INST]" in chat_template: - if "[SYSTEM_PROMPT]" in chat_template: - print("Chat completion heuristic: Mistral V7 (with system prompt)") - chatcompl_adapter = { - "system_start": "[SYSTEM_PROMPT] ", - "system_end": "[/SYSTEM_PROMPT]", - "user_start": "[INST] ", - "user_end": "[/INST]", - "assistant_start": " ", - "assistant_end": "", - } - elif "\"[INST] \" + system_message" in chat_template: - print("Chat completion heuristic: Mistral V3") - chatcompl_adapter = { - "system_start": "[INST] ", - "system_end": "[/INST] ", - "user_start": "[INST] ", - "user_end": "[/INST] ", - "assistant_start": "", - "assistant_end": "", - } - else: - print("Chat completion heuristic: Mistral (Generic)") - chatcompl_adapter = { - "system_start": "[INST]", - "system_end": "[/INST]\n", - "user_start": "[INST]", - "user_end": "[/INST]\n", - "assistant_start": "", - "assistant_end": "", - } - elif "<|system|>" in chat_template and "<|user|>" in chat_template: - print("Chat completion heuristic: Phi 3.5") - chatcompl_adapter = { - "system_start": "<|system|>\n", - "system_end": "<|end|>\n", - "user_start": "<|user|>\n", - "user_end": "<|end|>\n", - "assistant_start": "<|assistant|>\n", - "assistant_end": "<|end|>\n", - } - elif "<|START_OF_TURN_TOKEN|>" in chat_template: - print("Chat completion heuristic: Cohere (Aya Expanse 32B based)") - chatcompl_adapter = { - "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>", - "system_end": "<|END_OF_TURN_TOKEN|>", - "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>", - "user_end": "<|END_OF_TURN_TOKEN|>", - "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", - "assistant_end": "<|END_OF_TURN_TOKEN|>", - } + for entry in candidates: + if all(s in chat_template for s in entry['search']): + print(f"Chat completion heuristic: {entry['name']}") + chatcompl_adapter = entry['adapter'] + break if chatcompl_adapter is None: print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.")