From f3721e78296c5230b9ae7ad21f232a3aea602943 Mon Sep 17 00:00:00 2001
From: teddybear082 <87204721+teddybear082@users.noreply.github.com>
Date: Mon, 8 Jul 2024 22:53:29 -0400
Subject: [PATCH 1/8] Rudimentary support of openai chat completions tools
 calls

-Most small models are not smart enough to do this, especially a combined tool call + role play response, but at least this allows experimentation along these lines with koboldcpp
---
 koboldcpp.py | 105 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 84 insertions(+), 21 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 91cf13fd63f..1b9f03c9291 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -804,8 +804,10 @@ def string_contains_sequence_substring(inputstr,sequences):
 currfinishreason = "null"
 using_gui_launcher = False
 using_outdated_flags = False
+using_openai_tools = False
 
 def transform_genparams(genparams, api_format):
+    global using_openai_tools
     #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate
     #alias all nonstandard alternative names for rep pen.
     rp1 = genparams.get('repeat_penalty', 1.0)
@@ -851,6 +853,7 @@ def transform_genparams(genparams, api_format):
             assistant_message_end = adapter_obj.get("assistant_end", "")
             images_added = []
 
+
             for message in messages_array:
                 if message['role'] == "system":
                     messages_string += system_message_start
@@ -878,6 +881,59 @@ def transform_genparams(genparams, api_format):
                 elif message['role'] == "assistant":
                     messages_string += assistant_message_end
 
+            # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt
+            tools_array = genparams.get('tools', [])
+            if tools_array:
+                tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting?
+                messages_string += user_message_end + tools_string
+                using_openai_tools = True
+                # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create
+                open_ai_tools_grammar = r"""
+root ::= array
+
+array ::= "[" object "]"
+
+object ::= "{" pairid "," pairtype "," pairfunction "}"
+
+pairid ::= " \"id\" : " string
+
+pairtype ::= " \"type\" : " "\"function\""
+
+pairfunction ::= " \"function\" : " functionobject
+
+functionobject ::= "{" pairname "," pairarguments "}"
+
+pairname ::= " \"name\" : " string
+
+pairarguments ::= " \"arguments\" : " "{" arguments "}"
+
+arguments ::= pair ( "," pair)*
+
+pair ::= string ":" value
+
+value ::= string | number | "true" | "false" | "null"
+
+number ::= int frac? exp?
+
+int ::= "-"? ("0" | [1-9] [0-9]*)
+
+frac ::= "." [0-9]+
+
+exp ::= ("e" | "E") ("+" | "-")? [0-9]+
+
+string ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes
+  )* "\""
+
+hex ::= [0-9a-fA-F]
+"""
+
+                genparams["grammar"] = open_ai_tools_grammar
+            else:
+                using_openai_tools = False
+
             messages_string += assistant_message_start
             genparams["prompt"] = messages_string
             if len(images_added)>0:
@@ -889,6 +945,7 @@ def transform_genparams(genparams, api_format):
                 genparams["stop_sequence"].append(assistant_message_start.strip())
             genparams["trim_stop"] = True
 
+
     elif api_format==5:
         firstimg = genparams.get('image', "")
         genparams["images"] = [firstimg]
@@ -935,13 +992,12 @@ def extract_b64string_from_file_upload(self, body):
             return None
 
     async def generate_text(self, genparams, api_format, stream_flag):
-        global friendlymodelname, chatcompl_adapter, currfinishreason
+        global friendlymodelname, chatcompl_adapter, currfinishreason, using_openai_tools
         is_quiet = args.quiet
         currfinishreason = "null"
-
-        def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
-
-            #flag instance as non-idle for a while
+        
+        def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
+            # flag instance as non-idle for a while
             washordereq = genparams.get('genkey', '').startswith('HORDEREQ_')
             if not washordereq:
                 global last_non_horde_req_time
@@ -984,10 +1040,10 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
                 render_special=genparams.get('render_special', False),
                 banned_tokens=genparams.get('banned_tokens', []),
                 bypass_eos_token=genparams.get('bypass_eos', False),
-                )
+            )
 
-        genout = {"text":"","status":-1,"stopreason":-1}
-        if stream_flag:
+        genout = {"text": "", "status": -1, "stopreason": -1}
+        if stream_flag and not using_openai_tools:
             loop = asyncio.get_event_loop()
             executor = ThreadPoolExecutor()
             genout = await loop.run_in_executor(executor, run_blocking)
@@ -995,9 +1051,9 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
             genout = run_blocking()
 
         recvtxt = genout['text']
-        currfinishreason = ("length" if (genout['stopreason']!=1) else "stop")
+        currfinishreason = ("length" if (genout['stopreason'] != 1) else "stop")
 
-        #flag instance as non-idle for a while
+        # flag instance as non-idle for a while
         washordereq = genparams.get('genkey', '').startswith('HORDEREQ_')
         if not washordereq:
             global last_non_horde_req_time
@@ -1006,27 +1062,34 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
         if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1:
             utfprint("\nOutput: " + recvtxt)
 
-        if api_format==1:
-            res = {"data": {"seqs":[recvtxt]}}
-        elif api_format==3:
+        if api_format == 1:
+            res = {"data": {"seqs": [recvtxt]}}
+        elif api_format == 3:
             res = {"id": "cmpl-1", "object": "text_completion", "created": 1, "model": friendlymodelname,
-            "usage": {"prompt_tokens": 100,"completion_tokens": 100,"total_tokens": 200},
-            "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]}
-        elif api_format==4:
+                   "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200},
+                   "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]}
+        elif api_format == 4:
+            tool_calls = []
+            if using_openai_tools:
+                try:
+                    tool_calls = json.loads(recvtxt)
+                    recvtxt = None
+                except json.JSONDecodeError as e:
+                    print(f"Error parsing tool calls: {e}, omitting tool calls from response, and just passing generated content as message content")
+
             res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname,
-            "usage": {"prompt_tokens": 100,"completion_tokens": 100,"total_tokens": 200},
-            "choices": [{"index": 0, "message":{"role": "assistant", "content": recvtxt,}, "finish_reason": currfinishreason}]}
-        elif api_format==5:
+                   "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200},
+                   "choices": [{"index": 0, "message": {"role": "assistant", "content": recvtxt, "tool_calls": tool_calls}, "finish_reason": currfinishreason}]}
+        elif api_format == 5:
             res = {"caption": end_trim_to_sentence(recvtxt)}
         else:
-            res = {"results": [{"text": recvtxt, "finish_reason":currfinishreason}]}
+            res = {"results": [{"text": recvtxt, "finish_reason": currfinishreason}]}
 
         try:
             return res
         except Exception as e:
             print(f"Generate: Error while generating: {e}")
 
-
     async def send_oai_sse_event(self, data):
         if data=="[DONE]":
             self.wfile.write(f'data: {data}'.encode())

From 5af664c0dbe8fd5ef8f6b43c8dc5a904586f109e Mon Sep 17 00:00:00 2001
From: teddybear082 <87204721+teddybear082@users.noreply.github.com>
Date: Tue, 9 Jul 2024 07:06:54 -0400
Subject: [PATCH 2/8] try to also support specified function and tool choice
 set to none

Allow tools start and end messages to be configured in adapter

Try to force grammar to specific function call if specified (untested)
---
 koboldcpp.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 1b9f03c9291..93835153c64 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -851,6 +851,8 @@ def transform_genparams(genparams, api_format):
             user_message_end = adapter_obj.get("user_end", "")
             assistant_message_start = adapter_obj.get("assistant_start", "\n### Response:\n")
             assistant_message_end = adapter_obj.get("assistant_end", "")
+            tools_message_start = adapter_obj.get("tools_start", "")
+            tools_message_end = adapter_obj.get("tools_end", "")
             images_added = []
 
 
@@ -861,6 +863,8 @@ def transform_genparams(genparams, api_format):
                     messages_string += user_message_start
                 elif message['role'] == "assistant":
                     messages_string += assistant_message_start
+                elif message['role'] == "tool":
+                    messages_string += tools_message_start
 
                 # content can be a string or an array of objects
                 curr_content = message['content']
@@ -880,13 +884,22 @@ def transform_genparams(genparams, api_format):
                     messages_string += user_message_end
                 elif message['role'] == "assistant":
                     messages_string += assistant_message_end
+                elif message['role'] == "tool":
+                    messages_string += tools_message_end
 
-            # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt
+            # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
             tools_array = genparams.get('tools', [])
-            if tools_array:
-                tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting?
+            if tools_array and not genparams.get('tool_choice') == None:
+                tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting? Add the "Respond only in JSON?" or not?
                 messages_string += user_message_end + tools_string
                 using_openai_tools = True
+                specified_function = None
+                if isinstance(genparams.get('tool_choice'), dict):
+                     try:
+                        specified_function = genparams.get('tool_choice').get('function').get('name')
+                     except:
+                        # In case of any issues, just revert back to no specified function
+                        specified_function = None
                 # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create
                 open_ai_tools_grammar = r"""
 root ::= array
@@ -929,8 +942,52 @@ def transform_genparams(genparams, api_format):
 
 hex ::= [0-9a-fA-F]
 """
+                open_ai_tools_grammar_forced_tool_choice = fr"""
+root ::= array
+
+array ::= "[" object "]"
+
+object ::= "{" pairid "," pairtype "," pairfunction "}"
+
+pairid ::= " \"id\" : " string
+
+pairtype ::= " \"type\" : " "\"function\""
+
+pairfunction ::= " \"function\" : " functionobject
+
+functionobject ::= "{" pairname "," pairarguments "}"
+
+pairname ::= " \"name\" : " "\"{specified_function}\""
+
+pairarguments ::= " \"arguments\" : " "{" arguments "}"
+
+arguments ::= pair ( "," pair)*
+
+pair ::= string ":" value
 
-                genparams["grammar"] = open_ai_tools_grammar
+value ::= string | number | "true" | "false" | "null"
+
+number ::= int frac? exp?
+
+int ::= "-"? ("0" | [1-9] [0-9]*)
+
+frac ::= "." [0-9]+
+
+exp ::= ("e" | "E") ("+" | "-")? [0-9]+
+
+string ::=
+  "\"" (
+    [^"\\] |
+    "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes
+  )* "\""
+
+hex ::= [0-9a-fA-F]
+"""
+
+                if specified_function:
+                    genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice
+                else:
+                    genparams["grammar"] = open_ai_tools_grammar
             else:
                 using_openai_tools = False
 

From 4d71178d140b4839366e3c4021381d31897c717d Mon Sep 17 00:00:00 2001
From: teddybear082 <87204721+teddybear082@users.noreply.github.com>
Date: Tue, 9 Jul 2024 20:48:29 -0400
Subject: [PATCH 3/8] ensure tools get listed right after user content and
 before end of user message content

---
 koboldcpp.py | 68 +++++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 93835153c64..ede92dc1aa8 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -855,8 +855,9 @@ def transform_genparams(genparams, api_format):
             tools_message_end = adapter_obj.get("tools_end", "")
             images_added = []
 
-
+            message_index = 0
             for message in messages_array:
+                message_index += 1
                 if message['role'] == "system":
                     messages_string += system_message_start
                 elif message['role'] == "user":
@@ -877,31 +878,23 @@ def transform_genparams(genparams, api_format):
                         elif item['type']=="image_url":
                             if item['image_url'] and item['image_url']['url'] and item['image_url']['url'].startswith("data:image"):
                                 images_added.append(item['image_url']['url'].split(",", 1)[1])
-
-                if message['role'] == "system":
-                    messages_string += system_message_end
-                elif message['role'] == "user":
-                    messages_string += user_message_end
-                elif message['role'] == "assistant":
-                    messages_string += assistant_message_end
-                elif message['role'] == "tool":
-                    messages_string += tools_message_end
-
-            # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
-            tools_array = genparams.get('tools', [])
-            if tools_array and not genparams.get('tool_choice') == None:
-                tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting? Add the "Respond only in JSON?" or not?
-                messages_string += user_message_end + tools_string
-                using_openai_tools = True
-                specified_function = None
-                if isinstance(genparams.get('tool_choice'), dict):
-                     try:
-                        specified_function = genparams.get('tool_choice').get('function').get('name')
-                     except:
-                        # In case of any issues, just revert back to no specified function
+                # If last message, add any tools calls after message content and before message end token if any
+                if message['role'] == "user" and message_index == len(messages_array):
+                    # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
+                    tools_array = genparams.get('tools', [])
+                    if tools_array and not genparams.get('tool_choice') == None:
+                        tools_string = json.dumps(tools_array, indent=2)
+                        messages_string += tools_string
+                        using_openai_tools = True
                         specified_function = None
-                # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create
-                open_ai_tools_grammar = r"""
+                        if isinstance(genparams.get('tool_choice'), dict):
+                             try:
+                                specified_function = genparams.get('tool_choice').get('function').get('name')
+                             except:
+                                # In case of any issues, just revert back to no specified function
+                                specified_function = None
+                        # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create
+                        open_ai_tools_grammar = r"""
 root ::= array
 
 array ::= "[" object "]"
@@ -942,7 +935,7 @@ def transform_genparams(genparams, api_format):
 
 hex ::= [0-9a-fA-F]
 """
-                open_ai_tools_grammar_forced_tool_choice = fr"""
+                        open_ai_tools_grammar_forced_tool_choice = fr"""
 root ::= array
 
 array ::= "[" object "]"
@@ -984,12 +977,22 @@ def transform_genparams(genparams, api_format):
 hex ::= [0-9a-fA-F]
 """
 
-                if specified_function:
-                    genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice
-                else:
-                    genparams["grammar"] = open_ai_tools_grammar
-            else:
-                using_openai_tools = False
+                        if specified_function:
+                            genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice
+                        else:
+                            genparams["grammar"] = open_ai_tools_grammar
+                        # Set temperature low automatically if function calling
+                        genparams["temperature"] = 0.3
+                    else:
+                        using_openai_tools = False
+                if message['role'] == "system":
+                    messages_string += system_message_end
+                elif message['role'] == "user":
+                    messages_string += user_message_end
+                elif message['role'] == "assistant":
+                    messages_string += assistant_message_end
+                elif message['role'] == "tool":
+                    messages_string += tools_message_end
 
             messages_string += assistant_message_start
             genparams["prompt"] = messages_string
@@ -1053,6 +1056,7 @@ async def generate_text(self, genparams, api_format, stream_flag):
         is_quiet = args.quiet
         currfinishreason = "null"
         
+        
         def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
             # flag instance as non-idle for a while
             washordereq = genparams.get('genkey', '').startswith('HORDEREQ_')

From 0409f989275e609af69fc4e9bb3953091f7f12e5 Mon Sep 17 00:00:00 2001
From: teddybear082 <87204721+teddybear082@users.noreply.github.com>
Date: Wed, 10 Jul 2024 16:01:27 -0400
Subject: [PATCH 4/8] omit grammars approach try prompting instead

-use more extensive json parsing and direct instructions to models to try to obtain the desired result

-seems to work relatively well with Mistral-7B-Instruct-v.0.3.Q4_K_M.gguf and neuralhermes-2.5-mistral-7b.Q4_K_M.gguf

-question of whether this is too opinionated of an approach, should the instructions be things that can be passed with the prompt template?
---
 koboldcpp.py | 139 +++++++++++++++++----------------------------------
 1 file changed, 46 insertions(+), 93 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index ede92dc1aa8..e78467d1c0e 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -806,6 +806,41 @@ def string_contains_sequence_substring(inputstr,sequences):
 using_outdated_flags = False
 using_openai_tools = False
 
+# Used to parse json for openai tool calls
+def extract_json_from_string(input_string):
+    parsed_json = None
+
+    # First check if model exported perfect json
+    try:
+        parsed_json = json.loads(input_string)
+        return parsed_json
+    except:
+        pass
+
+    # Next check if all we need is to add brackets to make it perfect json
+    try:
+        parsed_json = json.loads(f"[{input_string}]")
+        return parsed_json
+    except:
+        pass
+
+    # Now use regular expression to match JSON objects or arrays in case part is valid json and part is not
+    json_pattern = r'(\{.*?\}|\[.*?\])'  # was json_pattern = r'(\{.*\}|\[.*\])'  
+
+    # Find all potential JSON parts
+    potential_jsons = re.findall(json_pattern, input_string, re.DOTALL)
+
+    for potential_json in potential_jsons:
+        try:
+            # Attempt to parse the potential JSON part
+            parsed_json = json.loads(potential_json)
+            return parsed_json
+        except json.JSONDecodeError:
+            # If not valid JSON, continue to the next match
+            continue
+
+    return []
+
 def transform_genparams(genparams, api_format):
     global using_openai_tools
     #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate
@@ -883,106 +918,23 @@ def transform_genparams(genparams, api_format):
                     # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
                     tools_array = genparams.get('tools', [])
                     if tools_array and not genparams.get('tool_choice') == None:
-                        tools_string = json.dumps(tools_array, indent=2)
+                        response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
+                        json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
+                        tools_string = json.dumps(tools_array, indent=0)
                         messages_string += tools_string
                         using_openai_tools = True
                         specified_function = None
                         if isinstance(genparams.get('tool_choice'), dict):
                              try:
                                 specified_function = genparams.get('tool_choice').get('function').get('name')
+                                json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0)
                              except:
                                 # In case of any issues, just revert back to no specified function
                                 specified_function = None
-                        # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create
-                        open_ai_tools_grammar = r"""
-root ::= array
-
-array ::= "[" object "]"
-
-object ::= "{" pairid "," pairtype "," pairfunction "}"
-
-pairid ::= " \"id\" : " string
-
-pairtype ::= " \"type\" : " "\"function\""
-
-pairfunction ::= " \"function\" : " functionobject
-
-functionobject ::= "{" pairname "," pairarguments "}"
-
-pairname ::= " \"name\" : " string
-
-pairarguments ::= " \"arguments\" : " "{" arguments "}"
-
-arguments ::= pair ( "," pair)*
-
-pair ::= string ":" value
-
-value ::= string | number | "true" | "false" | "null"
-
-number ::= int frac? exp?
+                        messages_string += json_formatting_instruction
 
-int ::= "-"? ("0" | [1-9] [0-9]*)
-
-frac ::= "." [0-9]+
-
-exp ::= ("e" | "E") ("+" | "-")? [0-9]+
-
-string ::=
-  "\"" (
-    [^"\\] |
-    "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes
-  )* "\""
-
-hex ::= [0-9a-fA-F]
-"""
-                        open_ai_tools_grammar_forced_tool_choice = fr"""
-root ::= array
-
-array ::= "[" object "]"
-
-object ::= "{" pairid "," pairtype "," pairfunction "}"
-
-pairid ::= " \"id\" : " string
-
-pairtype ::= " \"type\" : " "\"function\""
-
-pairfunction ::= " \"function\" : " functionobject
-
-functionobject ::= "{" pairname "," pairarguments "}"
-
-pairname ::= " \"name\" : " "\"{specified_function}\""
-
-pairarguments ::= " \"arguments\" : " "{" arguments "}"
-
-arguments ::= pair ( "," pair)*
-
-pair ::= string ":" value
-
-value ::= string | number | "true" | "false" | "null"
-
-number ::= int frac? exp?
-
-int ::= "-"? ("0" | [1-9] [0-9]*)
-
-frac ::= "." [0-9]+
-
-exp ::= ("e" | "E") ("+" | "-")? [0-9]+
-
-string ::=
-  "\"" (
-    [^"\\] |
-    "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes
-  )* "\""
-
-hex ::= [0-9a-fA-F]
-"""
-
-                        if specified_function:
-                            genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice
-                        else:
-                            genparams["grammar"] = open_ai_tools_grammar
                         # Set temperature low automatically if function calling
-                        genparams["temperature"] = 0.3
+                        genparams["temperature"] = 0.2
                     else:
                         using_openai_tools = False
                 if message['role'] == "system":
@@ -1133,10 +1085,11 @@ def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
             tool_calls = []
             if using_openai_tools:
                 try:
-                    tool_calls = json.loads(recvtxt)
-                    recvtxt = None
-                except json.JSONDecodeError as e:
-                    print(f"Error parsing tool calls: {e}, omitting tool calls from response, and just passing generated content as message content")
+                    tool_calls = extract_json_from_string(recvtxt)
+                    if tool_calls:
+                        recvtxt = None
+                except Exception as e:
+                    print(f"Error parsing or finding tool calls: {e}, omitting tool calls from response, and just passing generated content as message content")
 
             res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname,
                    "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200},

From 9ce88dc7446cf6d9a763ff227d202aa3d7adb1c1 Mon Sep 17 00:00:00 2001
From: teddybear082 <87204721+teddybear082@users.noreply.github.com>
Date: Thu, 11 Jul 2024 20:59:34 -0400
Subject: [PATCH 5/8] add back llamacpp recommended json grammar

Go back to adding grammar but use "official" llamacpp grammar only not a custom one just for openai
---
 koboldcpp.py | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index e78467d1c0e..13eb683bbd0 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -930,11 +930,45 @@ def transform_genparams(genparams, api_format):
                                 json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0)
                              except:
                                 # In case of any issues, just revert back to no specified function
-                                specified_function = None
+                                pass
                         messages_string += json_formatting_instruction
 
                         # Set temperature low automatically if function calling
                         genparams["temperature"] = 0.2
+
+                        # Set grammar to llamacpp example grammar to force json response (see https://github.com/ggerganov/llama.cpp/blob/master/grammars/json_arr.gbnf)
+                        genparams["grammar"] = r"""
+root   ::= arr
+value  ::= object | array | string | number | ("true" | "false" | "null") ws
+
+arr  ::=
+  "[\n" ws (
+            value
+    (",\n" ws value)*
+  )? "]"
+
+object ::=
+  "{" ws (
+            string ":" ws value
+    ("," ws string ":" ws value)*
+  )? "}" ws
+
+array  ::=
+  "[" ws (
+            value
+    ("," ws value)*
+  )? "]" ws
+
+string ::=
+  "\"" (
+    [^"\\\x7F\x00-\x1F] |
+    "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+  )* "\"" ws
+
+number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws
+
+ws ::= | " " | "\n" [ \t]{0,20}
+"""
                     else:
                         using_openai_tools = False
                 if message['role'] == "system":

From 057aeb23e7e932dddf22689ba2fc1491419420ad Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sat, 13 Jul 2024 17:14:28 +0800
Subject: [PATCH 6/8] Tidy up, remove unnecessary globals

---
 koboldcpp.py | 75 ++++++++++++++++++----------------------------------
 1 file changed, 26 insertions(+), 49 deletions(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 1658bb3fe72..13bfa9eae95 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -804,45 +804,35 @@ def string_contains_sequence_substring(inputstr,sequences):
 currfinishreason = "null"
 using_gui_launcher = False
 using_outdated_flags = False
-using_openai_tools = False
 
 # Used to parse json for openai tool calls
 def extract_json_from_string(input_string):
     parsed_json = None
-
-    # First check if model exported perfect json
-    try:
+    try: # First check if model exported perfect json
         parsed_json = json.loads(input_string)
         return parsed_json
-    except:
+    except Exception as e:
         pass
-
-    # Next check if all we need is to add brackets to make it perfect json
-    try:
+    try: # Next check if all we need is to add brackets to make it perfect json
         parsed_json = json.loads(f"[{input_string}]")
         return parsed_json
-    except:
+    except Exception as e:
+        pass
+    try:
+        # Now use regular expression to match JSON objects or arrays in case part is valid json and part is not
+        json_pattern = r'(\{.*?\}|\[.*?\])'  # was json_pattern = r'(\{.*\}|\[.*\])'
+        potential_jsons = re.findall(json_pattern, input_string, re.DOTALL)
+        for potential_json in potential_jsons:
+            try:
+                parsed_json = json.loads(potential_json)
+                return parsed_json
+            except Exception as e:
+                continue
+    except Exception as e:
         pass
-
-    # Now use regular expression to match JSON objects or arrays in case part is valid json and part is not
-    json_pattern = r'(\{.*?\}|\[.*?\])'  # was json_pattern = r'(\{.*\}|\[.*\])'  
-
-    # Find all potential JSON parts
-    potential_jsons = re.findall(json_pattern, input_string, re.DOTALL)
-
-    for potential_json in potential_jsons:
-        try:
-            # Attempt to parse the potential JSON part
-            parsed_json = json.loads(potential_json)
-            return parsed_json
-        except json.JSONDecodeError:
-            # If not valid JSON, continue to the next match
-            continue
-
     return []
 
 def transform_genparams(genparams, api_format):
-    global using_openai_tools
     #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate
     #alias all nonstandard alternative names for rep pen.
     rp1 = genparams.get('repeat_penalty', 1.0)
@@ -917,60 +907,52 @@ def transform_genparams(genparams, api_format):
                 if message['role'] == "user" and message_index == len(messages_array):
                     # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
                     tools_array = genparams.get('tools', [])
-                    if tools_array and not genparams.get('tool_choice') == None:
+                    if tools_array and len(tools_array)>0 and not genparams.get('tool_choice',None) == None:
                         response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
                         json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
                         tools_string = json.dumps(tools_array, indent=0)
                         messages_string += tools_string
-                        using_openai_tools = True
                         specified_function = None
                         if isinstance(genparams.get('tool_choice'), dict):
                              try:
                                 specified_function = genparams.get('tool_choice').get('function').get('name')
                                 json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0)
-                             except:
+                             except Exception as e:
                                 # In case of any issues, just revert back to no specified function
                                 pass
                         messages_string += json_formatting_instruction
 
                         # Set temperature low automatically if function calling
                         genparams["temperature"] = 0.2
+                        genparams["using_openai_tools"] = True
 
                         # Set grammar to llamacpp example grammar to force json response (see https://github.com/ggerganov/llama.cpp/blob/master/grammars/json_arr.gbnf)
                         genparams["grammar"] = r"""
 root   ::= arr
 value  ::= object | array | string | number | ("true" | "false" | "null") ws
-
 arr  ::=
   "[\n" ws (
             value
     (",\n" ws value)*
   )? "]"
-
 object ::=
   "{" ws (
             string ":" ws value
     ("," ws string ":" ws value)*
   )? "}" ws
-
 array  ::=
   "[" ws (
             value
     ("," ws value)*
   )? "]" ws
-
 string ::=
   "\"" (
     [^"\\\x7F\x00-\x1F] |
     "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4})
   )* "\"" ws
-
 number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws
-
 ws ::= | " " | "\n" [ \t]{0,20}
 """
-                    else:
-                        using_openai_tools = False
                 if message['role'] == "system":
                     messages_string += system_message_end
                 elif message['role'] == "user":
@@ -1038,11 +1020,10 @@ def extract_b64string_from_file_upload(self, body):
             return None
 
     async def generate_text(self, genparams, api_format, stream_flag):
-        global friendlymodelname, chatcompl_adapter, currfinishreason, using_openai_tools
+        global friendlymodelname, chatcompl_adapter, currfinishreason
         is_quiet = args.quiet
         currfinishreason = "null"
-        
-        
+
         def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
             # flag instance as non-idle for a while
             washordereq = genparams.get('genkey', '').startswith('HORDEREQ_')
@@ -1090,7 +1071,7 @@ def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
             )
 
         genout = {"text": "", "status": -1, "stopreason": -1}
-        if stream_flag and not using_openai_tools:
+        if stream_flag:
             loop = asyncio.get_event_loop()
             executor = ThreadPoolExecutor()
             genout = await loop.run_in_executor(executor, run_blocking)
@@ -1116,15 +1097,11 @@ def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
                    "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200},
                    "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]}
         elif api_format == 4:
-            tool_calls = []
+            using_openai_tools = genparams.get('using_openai_tools', False)
             if using_openai_tools:
-                try:
-                    tool_calls = extract_json_from_string(recvtxt)
-                    if tool_calls:
-                        recvtxt = None
-                except Exception as e:
-                    print(f"Error parsing or finding tool calls: {e}, omitting tool calls from response, and just passing generated content as message content")
-
+                tool_calls = extract_json_from_string(recvtxt)
+                if tool_calls and len(tool_calls)>0:
+                    recvtxt = None
             res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname,
                    "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200},
                    "choices": [{"index": 0, "message": {"role": "assistant", "content": recvtxt, "tool_calls": tool_calls}, "finish_reason": currfinishreason}]}

From 1c1eb604b94c4a57067c63e69d0c1e679fd5835e Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sat, 13 Jul 2024 17:17:45 +0800
Subject: [PATCH 7/8] clarity

---
 koboldcpp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/koboldcpp.py b/koboldcpp.py
index 13bfa9eae95..2046104dea1 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -907,7 +907,7 @@ def transform_genparams(genparams, api_format):
                 if message['role'] == "user" and message_index == len(messages_array):
                     # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None
                     tools_array = genparams.get('tools', [])
-                    if tools_array and len(tools_array)>0 and not genparams.get('tool_choice',None) == None:
+                    if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) != None:
                         response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}]
                         json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0)
                         tools_string = json.dumps(tools_array, indent=0)

From 2d3991bde32e63ee9bff44d35d1b592d7d8398ee Mon Sep 17 00:00:00 2001
From: teddybear082 <87204721+teddybear082@users.noreply.github.com>
Date: Sat, 13 Jul 2024 10:34:09 -0400
Subject: [PATCH 8/8] fix missing local variable error

This worked to fix the error I mentioned on my last comment
---
 koboldcpp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/koboldcpp.py b/koboldcpp.py
index 2046104dea1..e03e42f18c4 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -1098,6 +1098,7 @@ def run_blocking():  # api format 1=basic,2=kai,3=oai,4=oai-chat
                    "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]}
         elif api_format == 4:
             using_openai_tools = genparams.get('using_openai_tools', False)
+            tool_calls = []
             if using_openai_tools:
                 tool_calls = extract_json_from_string(recvtxt)
                 if tool_calls and len(tool_calls)>0: