From 0e17884c49f0d7de8ed0506deff2a100751d2b23 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 16:05:25 +0900
Subject: [PATCH 01/15] PoC: add chat template heuristics

The fallback chat template adapter of Vicuna is not ideal in some cases (e.g. a test against a sub-portion of the BBC news classification task on Kaggle gave an 82% accuracy with Vicuna and 88% with the official ChatML format for a q4_k_m Qwen 2.5 3B-Instruct gguf).

This PR adds a proof of concept simple heuristic which looks at the chat template and upgrades the adapter when it is able to.
---
 koboldcpp.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
diff --git a/koboldcpp.py b/koboldcpp.py
index 5793098c753..ee12d473cb3 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4648,6 +4648,26 @@ def main(launch_args,start_server=True):
             exitcounter = 999
             exit_with_error(3,"Could not load text model: " + modelname)
 
+    if chatcompl_adapter is None:
+        # Try to derive chat completions adapter from chat template, now that we have the model loaded
+        ctbytes = handle.get_chat_template()
+        chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore")
+        if chat_template != "":
+            # "Better than nothing" simple heuristics
+            if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template:
+                print("Chat completion heuristic: ChatML (Qwen 2.5 based).")
+                # ChatML
+                chatcompl_adapter = {
+                    "system_start": "<|im_start|>system\n\n",
+                    "system_end": "<|im_end|>\n\n",
+                    "user_start": "<|im_start|>user\n\n",
+                    "user_end": "<|im_end|>\n\n",
+                    "assistant_start": "<|im_start|>assistant\n\n",
+                    "assistant_end": "<|im_end|>\n\n",
+                    "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out
+                    "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n",
+                }
+
     #handle loading image model
     if args.sdmodel and args.sdmodel!="":
         imgmodel = args.sdmodel

From 7b7150fbf46cf0f4ff49817b3e56c689f717967b Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 16:19:12 +0900
Subject: [PATCH 02/15] gemma 2 heuristic

---
 koboldcpp.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/koboldcpp.py b/koboldcpp.py
index ee12d473cb3..23bbd37d413 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4667,6 +4667,16 @@ def main(launch_args,start_server=True):
                     "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out
                     "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n",
                 }
+            elif "System role not supported" in chat_template and "<start_of_turn>" in chat_template:
+                print("Chat completion heuristic: Google Gemma 2.")
+                # Google Gemma 2
+                chatcompl_adapter = {
+                    "user_start": "<start_of_turn>user\n",
+                    "user_end": "<end_of_turn>\n",
+                    "assistant_start": "<start_of_turn>model\n",
+                    "assistant_end": "<end_of_turn>\n",
+                }
+
 
     #handle loading image model
     if args.sdmodel and args.sdmodel!="":

From 8c2f83a57818669b29c1010222c0ecabaf858953 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 16:45:53 +0900
Subject: [PATCH 03/15] Phi 4, Llama 3.x heuristics

---
 koboldcpp.py | 48 ++++++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 23bbd37d413..bcd33464566 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4655,18 +4655,30 @@ def main(launch_args,start_server=True):
         if chat_template != "":
             # "Better than nothing" simple heuristics
             if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template:
-                print("Chat completion heuristic: ChatML (Qwen 2.5 based).")
-                # ChatML
-                chatcompl_adapter = {
-                    "system_start": "<|im_start|>system\n\n",
-                    "system_end": "<|im_end|>\n\n",
-                    "user_start": "<|im_start|>user\n\n",
-                    "user_end": "<|im_end|>\n\n",
-                    "assistant_start": "<|im_start|>assistant\n\n",
-                    "assistant_end": "<|im_end|>\n\n",
-                    "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out
-                    "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n",
-                }
+                if "<|im_sep|>" in chat_template:
+                    print("Chat completion heuristic: Phi 4")
+                    # Phi 4 ChatML
+                    chatcompl_adapter = {
+                        "system_start": "<|im_start|>system<|im_sep|>",
+                        "system_end": "<|im_end|>",
+                        "user_start": "<|im_start|>user<|im_sep|>",
+                        "user_end": "<|im_end|>",
+                        "assistant_start": "<|im_start|>assistant<|im_sep|>",
+                        "assistant_end": "<|im_end|>",
+                    }
+                else:
+                    print("Chat completion heuristic: ChatML (Qwen 2.5 based).")
+                    # Qwen 2.5 ChatML
+                    chatcompl_adapter = {
+                        "system_start": "<|im_start|>system\n\n",
+                        "system_end": "<|im_end|>\n\n",
+                        "user_start": "<|im_start|>user\n\n",
+                        "user_end": "<|im_end|>\n\n",
+                        "assistant_start": "<|im_start|>assistant\n\n",
+                        "assistant_end": "<|im_end|>\n\n",
+                        "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out
+                        "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n",
+                    }
             elif "System role not supported" in chat_template and "<start_of_turn>" in chat_template:
                 print("Chat completion heuristic: Google Gemma 2.")
                 # Google Gemma 2
@@ -4676,6 +4688,18 @@ def main(launch_args,start_server=True):
                     "assistant_start": "<start_of_turn>model\n",
                     "assistant_end": "<end_of_turn>\n",
                 }
+            elif "<|start_header_id|>system" in chat_template:
+                # Llama 3.x
+                print("Chat completion heuristic: Llama 3.x.")
+                chatcompl_adapter = {
+                    "system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
+                    "system_end": "<|eot_id|>\n\n",
+                    "user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
+                    "user_end": "<|eot_id|>\n\n",
+                    "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+                    "assistant_end": "<|eot_id|>\n\n",
+                }
+
 
 
     #handle loading image model

From 203c4be9ef10d2c3f2605b729ffc94060de065c0 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 17:08:15 +0900
Subject: [PATCH 04/15] better qwen vs generic heuristic

---
 koboldcpp.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index bcd33464566..b1fc3f7efb6 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4656,7 +4656,7 @@ def main(launch_args,start_server=True):
             # "Better than nothing" simple heuristics
             if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template:
                 if "<|im_sep|>" in chat_template:
-                    print("Chat completion heuristic: Phi 4")
+                    print("Chat completion heuristic: ChatML (Phi 4)")
                     # Phi 4 ChatML
                     chatcompl_adapter = {
                         "system_start": "<|im_start|>system<|im_sep|>",
@@ -4666,7 +4666,7 @@ def main(launch_args,start_server=True):
                         "assistant_start": "<|im_start|>assistant<|im_sep|>",
                         "assistant_end": "<|im_end|>",
                     }
-                else:
+                elif "You are provided with function signatures within <tools>" in chat_template:
                     print("Chat completion heuristic: ChatML (Qwen 2.5 based).")
                     # Qwen 2.5 ChatML
                     chatcompl_adapter = {
@@ -4679,6 +4679,17 @@ def main(launch_args,start_server=True):
                         "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out
                         "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n",
                     }
+                else:
+                    print("Chat completion heuristic: ChatML (Generic).")
+                    chatcompl_adapter = {
+                        "system_start": "<|im_start|>system\n\n",
+                        "system_end": "<|im_end|>\n\n",
+                        "user_start": "<|im_start|>user\n\n",
+                        "user_end": "<|im_end|>\n\n",
+                        "assistant_start": "<|im_start|>assistant\n\n",
+                        "assistant_end": "<|im_end|>\n\n",
+                    }
+
             elif "System role not supported" in chat_template and "<start_of_turn>" in chat_template:
                 print("Chat completion heuristic: Google Gemma 2.")
                 # Google Gemma 2

From 3eb7712cb0f2db0d3904edcccbd015731d04d110 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 17:09:54 +0900
Subject: [PATCH 05/15] cleanup

---
 koboldcpp.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index b1fc3f7efb6..8e0b16973e1 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4657,7 +4657,6 @@ def main(launch_args,start_server=True):
             if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template:
                 if "<|im_sep|>" in chat_template:
                     print("Chat completion heuristic: ChatML (Phi 4)")
-                    # Phi 4 ChatML
                     chatcompl_adapter = {
                         "system_start": "<|im_start|>system<|im_sep|>",
                         "system_end": "<|im_end|>",
@@ -4668,7 +4667,6 @@ def main(launch_args,start_server=True):
                     }
                 elif "You are provided with function signatures within <tools>" in chat_template:
                     print("Chat completion heuristic: ChatML (Qwen 2.5 based).")
-                    # Qwen 2.5 ChatML
                     chatcompl_adapter = {
                         "system_start": "<|im_start|>system\n\n",
                         "system_end": "<|im_end|>\n\n",
@@ -4692,7 +4690,6 @@ def main(launch_args,start_server=True):
 
             elif "System role not supported" in chat_template and "<start_of_turn>" in chat_template:
                 print("Chat completion heuristic: Google Gemma 2.")
-                # Google Gemma 2
                 chatcompl_adapter = {
                     "user_start": "<start_of_turn>user\n",
                     "user_end": "<end_of_turn>\n",
@@ -4700,7 +4697,6 @@ def main(launch_args,start_server=True):
                     "assistant_end": "<end_of_turn>\n",
                 }
             elif "<|start_header_id|>system" in chat_template:
-                # Llama 3.x
                 print("Chat completion heuristic: Llama 3.x.")
                 chatcompl_adapter = {
                     "system_start": "<|start_header_id|>system<|end_header_id|>\n\n",

From e980dca9de2384ba54d2905e364c68cee13b3175 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 17:27:51 +0900
Subject: [PATCH 06/15] mistral (generic) heuristic

---
 koboldcpp.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 8e0b16973e1..57b2ad3d17f 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4706,7 +4706,14 @@ def main(launch_args,start_server=True):
                     "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
                     "assistant_end": "<|eot_id|>\n\n",
                 }
-
+            elif "[/INST]" in chat_template:
+                print("Chat completion heuristic: Mistral (Generic)")
+                chatcompl_adapter = {
+                    "user_start": "[INST]",
+                    "user_end": "[/INST]\n",
+                    "assistant_start": "",
+                    "assistant_end": "</s>",
+                }
 
 
     #handle loading image model

From fea7766ba0ff258b3191823e911bc474bc4efc3d Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 21:58:34 +0900
Subject: [PATCH 07/15] fix sys msg for mistral

---
 koboldcpp.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/koboldcpp.py b/koboldcpp.py
index 57b2ad3d17f..8e284570e8d 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4709,6 +4709,8 @@ def main(launch_args,start_server=True):
             elif "[/INST]" in chat_template:
                 print("Chat completion heuristic: Mistral (Generic)")
                 chatcompl_adapter = {
+                    "system_start": "[INST]",
+                    "system_end": "[/INST]\n",
                     "user_start": "[INST]",
                     "user_end": "[/INST]\n",
                     "assistant_start": "",

From 4090105c46bdaa89e01382e3afbf9574e47688ec Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 22:21:10 +0900
Subject: [PATCH 08/15] phi 3.5

---
 koboldcpp.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/koboldcpp.py b/koboldcpp.py
index 8e284570e8d..e2e69301512 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4716,6 +4716,16 @@ def main(launch_args,start_server=True):
                     "assistant_start": "",
                     "assistant_end": "</s>",
                 }
+            elif "<|system|>" in chat_template and "<|user|>" in chat_template:
+                print("Chat completion heuristic: Phi 3.5")
+                chatcompl_adapter = {
+                    "system_start": "<|system|>\n",
+                    "system_end": "<|end|>\n",
+                    "user_start": "<|user|>\n",
+                    "user_end": "<|end|>\n",
+                    "assistant_start": "<|assistant|>\n",
+                    "assistant_end": "<|end|>\n",
+                }
 
 
     #handle loading image model

From f90a238fda01f4b6fbdf1a77826fc56055259543 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Tue, 24 Dec 2024 23:27:58 +0900
Subject: [PATCH 09/15] mistral v3

---
 koboldcpp.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index e2e69301512..39d7ba3445b 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4707,15 +4707,26 @@ def main(launch_args,start_server=True):
                     "assistant_end": "<|eot_id|>\n\n",
                 }
             elif "[/INST]" in chat_template:
-                print("Chat completion heuristic: Mistral (Generic)")
-                chatcompl_adapter = {
-                    "system_start": "[INST]",
-                    "system_end": "[/INST]\n",
-                    "user_start": "[INST]",
-                    "user_end": "[/INST]\n",
-                    "assistant_start": "",
-                    "assistant_end": "</s>",
-                }
+                if "\"[INST] \" + system_message" in chat_template:
+                    print("Chat completion heuristic: Mistral V3")
+                    chatcompl_adapter = {
+                        "system_start": "[INST] ",
+                        "system_end": "[/INST] ",
+                        "user_start": "[INST] ",
+                        "user_end": "[/INST] ",
+                        "assistant_start": "",
+                        "assistant_end": "</s>",
+                    }
+                else:
+                    print("Chat completion heuristic: Mistral (Generic)")
+                    chatcompl_adapter = {
+                        "system_start": "[INST]",
+                        "system_end": "[/INST]\n",
+                        "user_start": "[INST]",
+                        "user_end": "[/INST]\n",
+                        "assistant_start": "",
+                        "assistant_end": "</s>",
+                    }
             elif "<|system|>" in chat_template and "<|user|>" in chat_template:
                 print("Chat completion heuristic: Phi 3.5")
                 chatcompl_adapter = {

From eef1a204ee0cd438ae0532039f2d081e7dfaa20c Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Wed, 25 Dec 2024 11:42:10 +0900
Subject: [PATCH 10/15] cohere (aya expanse 32b based)

---
 koboldcpp.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/koboldcpp.py b/koboldcpp.py
index 39d7ba3445b..9cfe0f90612 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4737,6 +4737,17 @@ def main(launch_args,start_server=True):
                     "assistant_start": "<|assistant|>\n",
                     "assistant_end": "<|end|>\n",
                 }
+            elif "<|START_OF_TURN_TOKEN|>" in chat_template:
+                print("Chat completion heuristic: Cohere (Aya Expanse 32B based)")
+                chatcompl_adapter = {
+                    "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
+                    "system_end": "<|END_OF_TURN_TOKEN|>",
+                    "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
+                    "user_end": "<|END_OF_TURN_TOKEN|>",
+                    "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
+                    "assistant_end": "<|END_OF_TURN_TOKEN|>",
+                }
+
 
 
     #handle loading image model

From df120e87b77f6caf559b1c622473fed207f68f13 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Wed, 25 Dec 2024 13:44:56 +0900
Subject: [PATCH 11/15] only derive from chat template if AutoGuess

---
 koboldcpp.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 9cfe0f90612..970318c2533 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4413,7 +4413,7 @@ def main(launch_args,start_server=True):
             print("Warning: Saved story file invalid or not found. No story will be preloaded into server.")
 
     # try to read chat completions adapter
-    if args.chatcompletionsadapter:
+    if args.chatcompletionsadapter and "autoguess" not in args.chatcompletionsadapter.lower():
         global chatcompl_adapter
         ccadapter_path = None
         canload = False
@@ -4648,7 +4648,11 @@ def main(launch_args,start_server=True):
             exitcounter = 999
             exit_with_error(3,"Could not load text model: " + modelname)
 
-    if chatcompl_adapter is None:
+    if (
+        chatcompl_adapter is None
+        and args.chatcompletionsadapter
+        and "autoguess" in args.chatcompletionsadapter.lower()
+    ):
         # Try to derive chat completions adapter from chat template, now that we have the model loaded
         ctbytes = handle.get_chat_template()
         chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore")

From d1c273d8dee19ee1d4ea9038b897bc88ada73b48 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Wed, 25 Dec 2024 13:55:24 +0900
Subject: [PATCH 12/15] add notes about alpaca fallbacks

---
 koboldcpp.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/koboldcpp.py b/koboldcpp.py
index 970318c2533..2de2d34b35c 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4751,7 +4751,11 @@ def main(launch_args,start_server=True):
                     "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
                     "assistant_end": "<|END_OF_TURN_TOKEN|>",
                 }
+        if chatcompl_adapter is None:
+            print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.")
 
+    if chatcompl_adapter is None and not args.chatcompletionsadapter:
+        print("Note: Alpaca format will be used for OpenAI Compatible API chat completions. Use --chatcompletionsadapter=AutoGuess to use chat template heuristics.")
 
 
     #handle loading image model

From b45380f39cc696068f7c180e308739834575a563 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Wed, 25 Dec 2024 13:59:30 +0900
Subject: [PATCH 13/15] added AutoGuess.json dummy

---
 kcpp_adapters/AutoGuess.json | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 kcpp_adapters/AutoGuess.json

diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json
new file mode 100644
index 00000000000..e69de29bb2d

From 267d6cb540e26224546b846ab1b392ae4cb4c086 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Wed, 25 Dec 2024 14:05:11 +0900
Subject: [PATCH 14/15] add mistral v7

---
 koboldcpp.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 2de2d34b35c..567efcb6da7 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4711,7 +4711,17 @@ def main(launch_args,start_server=True):
                     "assistant_end": "<|eot_id|>\n\n",
                 }
             elif "[/INST]" in chat_template:
-                if "\"[INST] \" + system_message" in chat_template:
+                if "[SYSTEM_PROMPT]" in chat_template:
+                    print("Chat completion heuristic: Mistral V7 (with system prompt)")
+                    chatcompl_adapter = {
+                        "system_start": "[SYSTEM_PROMPT] ",
+                        "system_end": "[/SYSTEM_PROMPT]",
+                        "user_start": "[INST] ",
+                        "user_end": "[/INST]",
+                        "assistant_start": " ",
+                        "assistant_end": "</s>",
+                    }
+                elif "\"[INST] \" + system_message" in chat_template:
                     print("Chat completion heuristic: Mistral V3")
                     chatcompl_adapter = {
                         "system_start": "[INST] ",

From 92f33c14d0e61c9211fd9b800a1ffbe198b97376 Mon Sep 17 00:00:00 2001
From: Karl-Johan Alm <karljohan-alm@garage.co.jp>
Date: Wed, 25 Dec 2024 21:49:41 +0900
Subject: [PATCH 15/15] switch to using a json list with search strings

---
 kcpp_adapters/AutoGuess.json | 113 +++++++++++++++++++++++++++++++++
 koboldcpp.py                 | 119 ++++-------------------------------
 2 files changed, 124 insertions(+), 108 deletions(-)

diff --git a/kcpp_adapters/AutoGuess.json b/kcpp_adapters/AutoGuess.json
index e69de29bb2d..a577753dbcc 100644
--- a/kcpp_adapters/AutoGuess.json
+++ b/kcpp_adapters/AutoGuess.json
@@ -0,0 +1,113 @@
+[
+{
+    "search": ["<|im_start|>assistant", "<|im_end|>", "<|im_sep|>"],
+    "name": "ChatML (Phi 4)",
+    "adapter": {
+        "system_start": "<|im_start|>system<|im_sep|>",
+        "system_end": "<|im_end|>",
+        "user_start": "<|im_start|>user<|im_sep|>",
+        "user_end": "<|im_end|>",
+        "assistant_start": "<|im_start|>assistant<|im_sep|>",
+        "assistant_end": "<|im_end|>"
+    }
+}, {
+    "search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within <tools>"],
+    "name": "ChatML (Qwen 2.5 based).",
+    "adapter": {
+        "system_start": "<|im_start|>system\n\n",
+        "system_end": "<|im_end|>\n\n",
+        "user_start": "<|im_start|>user\n\n",
+        "user_end": "<|im_end|>\n\n",
+        "assistant_start": "<|im_start|>assistant\n\n",
+        "assistant_end": "<|im_end|>\n\n",
+        "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n",
+        "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n"
+    }
+}, {
+    "search": ["<|im_start|>assistant", "<|im_end|>"],
+    "name": "ChatML (Generic).",
+    "adapter": {
+        "system_start": "<|im_start|>system\n\n",
+        "system_end": "<|im_end|>\n\n",
+        "user_start": "<|im_start|>user\n\n",
+        "user_end": "<|im_end|>\n\n",
+        "assistant_start": "<|im_start|>assistant\n\n",
+        "assistant_end": "<|im_end|>\n\n"
+    }
+}, {
+    "search": ["System role not supported", "<start_of_turn>"],
+    "name": "Google Gemma 2.",
+    "adapter": {
+        "user_start": "<start_of_turn>user\n",
+        "user_end": "<end_of_turn>\n",
+        "assistant_start": "<start_of_turn>model\n",
+        "assistant_end": "<end_of_turn>\n"
+    }
+}, {
+    "search": ["<|start_header_id|>system"],
+    "name": "Llama 3.x.",
+    "adapter": {
+        "system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
+        "system_end": "<|eot_id|>\n\n",
+        "user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
+        "user_end": "<|eot_id|>\n\n",
+        "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+        "assistant_end": "<|eot_id|>\n\n"
+    }
+}, {
+    "search": ["[/INST]", "[SYSTEM_PROMPT]"],
+    "name": "Mistral V7 (with system prompt)",
+    "adapter": {
+        "system_start": "[SYSTEM_PROMPT] ",
+        "system_end": "[/SYSTEM_PROMPT]",
+        "user_start": "[INST] ",
+        "user_end": "[/INST]",
+        "assistant_start": " ",
+        "assistant_end": "</s>"
+    }
+}, {
+    "search": ["[/INST]", "\"[INST] \" + system_message"],
+    "name": "Mistral V3",
+    "adapter": {
+        "system_start": "[INST] ",
+        "system_end": "[/INST] ",
+        "user_start": "[INST] ",
+        "user_end": "[/INST] ",
+        "assistant_start": "",
+        "assistant_end": "</s>"
+    }
+}, {
+    "search": ["[/INST]"],
+    "name": "Mistral (Generic)",
+    "adapter": {
+        "system_start": "[INST]",
+        "system_end": "[/INST]\n",
+        "user_start": "[INST]",
+        "user_end": "[/INST]\n",
+        "assistant_start": "",
+        "assistant_end": "</s>"
+    }
+}, {
+    "search": ["<|system|>", "<|user|>"],
+    "name": "Phi 3.5",
+    "adapter": {
+        "system_start": "<|system|>\n",
+        "system_end": "<|end|>\n",
+        "user_start": "<|user|>\n",
+        "user_end": "<|end|>\n",
+        "assistant_start": "<|assistant|>\n",
+        "assistant_end": "<|end|>\n"
+    }
+}, {
+    "search": ["<|START_OF_TURN_TOKEN|>"],
+    "name": "Cohere (Aya Expanse 32B based)",
+    "adapter": {
+        "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
+        "system_end": "<|END_OF_TURN_TOKEN|>",
+        "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
+        "user_end": "<|END_OF_TURN_TOKEN|>",
+        "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
+        "assistant_end": "<|END_OF_TURN_TOKEN|>"
+    }
+}
+]
diff --git a/koboldcpp.py b/koboldcpp.py
index 567efcb6da7..119178558f2 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -4413,7 +4413,7 @@ def main(launch_args,start_server=True):
             print("Warning: Saved story file invalid or not found. No story will be preloaded into server.")
 
     # try to read chat completions adapter
-    if args.chatcompletionsadapter and "autoguess" not in args.chatcompletionsadapter.lower():
+    if args.chatcompletionsadapter:
         global chatcompl_adapter
         ccadapter_path = None
         canload = False
@@ -4649,118 +4649,21 @@ def main(launch_args,start_server=True):
             exit_with_error(3,"Could not load text model: " + modelname)
 
     if (
-        chatcompl_adapter is None
-        and args.chatcompletionsadapter
-        and "autoguess" in args.chatcompletionsadapter.lower()
+        chatcompl_adapter is not None
+        and isinstance(chatcompl_adapter, list)
     ):
+        # The chat completions adapter is a list that needs derivation from chat templates
         # Try to derive chat completions adapter from chat template, now that we have the model loaded
         ctbytes = handle.get_chat_template()
         chat_template = ctypes.string_at(ctbytes).decode("UTF-8","ignore")
+        candidates = chatcompl_adapter
+        chatcompl_adapter = None
         if chat_template != "":
-            # "Better than nothing" simple heuristics
-            if "<|im_start|>assistant" in chat_template and "<|im_end|>" in chat_template:
-                if "<|im_sep|>" in chat_template:
-                    print("Chat completion heuristic: ChatML (Phi 4)")
-                    chatcompl_adapter = {
-                        "system_start": "<|im_start|>system<|im_sep|>",
-                        "system_end": "<|im_end|>",
-                        "user_start": "<|im_start|>user<|im_sep|>",
-                        "user_end": "<|im_end|>",
-                        "assistant_start": "<|im_start|>assistant<|im_sep|>",
-                        "assistant_end": "<|im_end|>",
-                    }
-                elif "You are provided with function signatures within <tools>" in chat_template:
-                    print("Chat completion heuristic: ChatML (Qwen 2.5 based).")
-                    chatcompl_adapter = {
-                        "system_start": "<|im_start|>system\n\n",
-                        "system_end": "<|im_end|>\n\n",
-                        "user_start": "<|im_start|>user\n\n",
-                        "user_end": "<|im_end|>\n\n",
-                        "assistant_start": "<|im_start|>assistant\n\n",
-                        "assistant_end": "<|im_end|>\n\n",
-                        "tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n", # Qwen 2.5 -- if ambiguous & worth it, use this string to ID/split out
-                        "tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n",
-                    }
-                else:
-                    print("Chat completion heuristic: ChatML (Generic).")
-                    chatcompl_adapter = {
-                        "system_start": "<|im_start|>system\n\n",
-                        "system_end": "<|im_end|>\n\n",
-                        "user_start": "<|im_start|>user\n\n",
-                        "user_end": "<|im_end|>\n\n",
-                        "assistant_start": "<|im_start|>assistant\n\n",
-                        "assistant_end": "<|im_end|>\n\n",
-                    }
-
-            elif "System role not supported" in chat_template and "<start_of_turn>" in chat_template:
-                print("Chat completion heuristic: Google Gemma 2.")
-                chatcompl_adapter = {
-                    "user_start": "<start_of_turn>user\n",
-                    "user_end": "<end_of_turn>\n",
-                    "assistant_start": "<start_of_turn>model\n",
-                    "assistant_end": "<end_of_turn>\n",
-                }
-            elif "<|start_header_id|>system" in chat_template:
-                print("Chat completion heuristic: Llama 3.x.")
-                chatcompl_adapter = {
-                    "system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
-                    "system_end": "<|eot_id|>\n\n",
-                    "user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
-                    "user_end": "<|eot_id|>\n\n",
-                    "assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
-                    "assistant_end": "<|eot_id|>\n\n",
-                }
-            elif "[/INST]" in chat_template:
-                if "[SYSTEM_PROMPT]" in chat_template:
-                    print("Chat completion heuristic: Mistral V7 (with system prompt)")
-                    chatcompl_adapter = {
-                        "system_start": "[SYSTEM_PROMPT] ",
-                        "system_end": "[/SYSTEM_PROMPT]",
-                        "user_start": "[INST] ",
-                        "user_end": "[/INST]",
-                        "assistant_start": " ",
-                        "assistant_end": "</s>",
-                    }
-                elif "\"[INST] \" + system_message" in chat_template:
-                    print("Chat completion heuristic: Mistral V3")
-                    chatcompl_adapter = {
-                        "system_start": "[INST] ",
-                        "system_end": "[/INST] ",
-                        "user_start": "[INST] ",
-                        "user_end": "[/INST] ",
-                        "assistant_start": "",
-                        "assistant_end": "</s>",
-                    }
-                else:
-                    print("Chat completion heuristic: Mistral (Generic)")
-                    chatcompl_adapter = {
-                        "system_start": "[INST]",
-                        "system_end": "[/INST]\n",
-                        "user_start": "[INST]",
-                        "user_end": "[/INST]\n",
-                        "assistant_start": "",
-                        "assistant_end": "</s>",
-                    }
-            elif "<|system|>" in chat_template and "<|user|>" in chat_template:
-                print("Chat completion heuristic: Phi 3.5")
-                chatcompl_adapter = {
-                    "system_start": "<|system|>\n",
-                    "system_end": "<|end|>\n",
-                    "user_start": "<|user|>\n",
-                    "user_end": "<|end|>\n",
-                    "assistant_start": "<|assistant|>\n",
-                    "assistant_end": "<|end|>\n",
-                }
-            elif "<|START_OF_TURN_TOKEN|>" in chat_template:
-                print("Chat completion heuristic: Cohere (Aya Expanse 32B based)")
-                chatcompl_adapter = {
-                    "system_start": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>",
-                    "system_end": "<|END_OF_TURN_TOKEN|>",
-                    "user_start": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>",
-                    "user_end": "<|END_OF_TURN_TOKEN|>",
-                    "assistant_start": "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
-                    "assistant_end": "<|END_OF_TURN_TOKEN|>",
-                }
+            for entry in candidates:
+                if all(s in chat_template for s in entry['search']):
+                    print(f"Chat completion heuristic: {entry['name']}")
+                    chatcompl_adapter = entry['adapter']
+                    break
         if chatcompl_adapter is None:
             print("Chat template heuristics failed to identify chat completions format. Alpaca will be used.")