From f3721e78296c5230b9ae7ad21f232a3aea602943 Mon Sep 17 00:00:00 2001 From: teddybear082 <87204721+teddybear082@users.noreply.github.com> Date: Mon, 8 Jul 2024 22:53:29 -0400 Subject: [PATCH 1/8] Rudimentary support of openai chat completions tools calls -Most small models are not smart enough to do this, especially a combined tool call + role play response, but at least this allows experimentation along these lines with koboldcpp --- koboldcpp.py | 105 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 21 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 91cf13fd63f..1b9f03c9291 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -804,8 +804,10 @@ def string_contains_sequence_substring(inputstr,sequences): currfinishreason = "null" using_gui_launcher = False using_outdated_flags = False +using_openai_tools = False def transform_genparams(genparams, api_format): + global using_openai_tools #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate #alias all nonstandard alternative names for rep pen. rp1 = genparams.get('repeat_penalty', 1.0) @@ -851,6 +853,7 @@ def transform_genparams(genparams, api_format): assistant_message_end = adapter_obj.get("assistant_end", "") images_added = [] + for message in messages_array: if message['role'] == "system": messages_string += system_message_start @@ -878,6 +881,59 @@ def transform_genparams(genparams, api_format): elif message['role'] == "assistant": messages_string += assistant_message_end + # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt + tools_array = genparams.get('tools', []) + if tools_array: + tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting? + messages_string += user_message_end + tools_string + using_openai_tools = True + # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create + open_ai_tools_grammar = r""" +root ::= array + +array ::= "[" object "]" + +object ::= "{" pairid "," pairtype "," pairfunction "}" + +pairid ::= " \"id\" : " string + +pairtype ::= " \"type\" : " "\"function\"" + +pairfunction ::= " \"function\" : " functionobject + +functionobject ::= "{" pairname "," pairarguments "}" + +pairname ::= " \"name\" : " string + +pairarguments ::= " \"arguments\" : " "{" arguments "}" + +arguments ::= pair ( "," pair)* + +pair ::= string ":" value + +value ::= string | number | "true" | "false" | "null" + +number ::= int frac? exp? + +int ::= "-"? ("0" | [1-9] [0-9]*) + +frac ::= "." [0-9]+ + +exp ::= ("e" | "E") ("+" | "-")? [0-9]+ + +string ::= + "\"" ( + [^"\\] | + "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes + )* "\"" + +hex ::= [0-9a-fA-F] +""" + + genparams["grammar"] = open_ai_tools_grammar + else: + using_openai_tools = False + messages_string += assistant_message_start genparams["prompt"] = messages_string if len(images_added)>0: @@ -889,6 +945,7 @@ def transform_genparams(genparams, api_format): genparams["stop_sequence"].append(assistant_message_start.strip()) genparams["trim_stop"] = True + elif api_format==5: firstimg = genparams.get('image', "") genparams["images"] = [firstimg] @@ -935,13 +992,12 @@ def extract_b64string_from_file_upload(self, body): return None async def generate_text(self, genparams, api_format, stream_flag): - global friendlymodelname, chatcompl_adapter, currfinishreason + global friendlymodelname, chatcompl_adapter, currfinishreason, using_openai_tools is_quiet = args.quiet currfinishreason = "null" - - def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat - - #flag instance as non-idle for a while + + def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat + # flag instance as non-idle for a while washordereq = genparams.get('genkey', '').startswith('HORDEREQ_') if not washordereq: global last_non_horde_req_time @@ -984,10 +1040,10 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat render_special=genparams.get('render_special', False), banned_tokens=genparams.get('banned_tokens', []), bypass_eos_token=genparams.get('bypass_eos', False), - ) + ) - genout = {"text":"","status":-1,"stopreason":-1} - if stream_flag: + genout = {"text": "", "status": -1, "stopreason": -1} + if stream_flag and not using_openai_tools: loop = asyncio.get_event_loop() executor = ThreadPoolExecutor() genout = await loop.run_in_executor(executor, run_blocking) @@ -995,9 +1051,9 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat genout = run_blocking() recvtxt = genout['text'] - currfinishreason = ("length" if (genout['stopreason']!=1) else "stop") + currfinishreason = ("length" if (genout['stopreason'] != 1) else "stop") - #flag instance as non-idle for a while + # flag instance as non-idle for a while washordereq = genparams.get('genkey', '').startswith('HORDEREQ_') if not washordereq: global last_non_horde_req_time @@ -1006,27 +1062,34 @@ def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat if (args.debugmode != -1 and not is_quiet) or args.debugmode >= 1: utfprint("\nOutput: " + recvtxt) - if api_format==1: - res = {"data": {"seqs":[recvtxt]}} - elif api_format==3: + if api_format == 1: + res = {"data": {"seqs": [recvtxt]}} + elif api_format == 3: res = {"id": "cmpl-1", "object": "text_completion", "created": 1, "model": friendlymodelname, - "usage": {"prompt_tokens": 100,"completion_tokens": 100,"total_tokens": 200}, - "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]} - elif api_format==4: + "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200}, + "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]} + elif api_format == 4: + tool_calls = [] + if using_openai_tools: + try: + tool_calls = json.loads(recvtxt) + recvtxt = None + except json.JSONDecodeError as e: + print(f"Error parsing tool calls: {e}, omitting tool calls from response, and just passing generated content as message content") + res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname, - "usage": {"prompt_tokens": 100,"completion_tokens": 100,"total_tokens": 200}, - "choices": [{"index": 0, "message":{"role": "assistant", "content": recvtxt,}, "finish_reason": currfinishreason}]} - elif api_format==5: + "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200}, + "choices": [{"index": 0, "message": {"role": "assistant", "content": recvtxt, "tool_calls": tool_calls}, "finish_reason": currfinishreason}]} + elif api_format == 5: res = {"caption": end_trim_to_sentence(recvtxt)} else: - res = {"results": [{"text": recvtxt, "finish_reason":currfinishreason}]} + res = {"results": [{"text": recvtxt, "finish_reason": currfinishreason}]} try: return res except Exception as e: print(f"Generate: Error while generating: {e}") - async def send_oai_sse_event(self, data): if data=="[DONE]": self.wfile.write(f'data: {data}'.encode()) From 5af664c0dbe8fd5ef8f6b43c8dc5a904586f109e Mon Sep 17 00:00:00 2001 From: teddybear082 <87204721+teddybear082@users.noreply.github.com> Date: Tue, 9 Jul 2024 07:06:54 -0400 Subject: [PATCH 2/8] try to also support specified function and tool choice set to none Allow tools start and end messages to be configured in adapter Try to force grammar to specific function call if specified (untested) --- koboldcpp.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 1b9f03c9291..93835153c64 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -851,6 +851,8 @@ def transform_genparams(genparams, api_format): user_message_end = adapter_obj.get("user_end", "") assistant_message_start = adapter_obj.get("assistant_start", "\n### Response:\n") assistant_message_end = adapter_obj.get("assistant_end", "") + tools_message_start = adapter_obj.get("tools_start", "") + tools_message_end = adapter_obj.get("tools_end", "") images_added = [] @@ -861,6 +863,8 @@ def transform_genparams(genparams, api_format): messages_string += user_message_start elif message['role'] == "assistant": messages_string += assistant_message_start + elif message['role'] == "tool": + messages_string += tools_message_start # content can be a string or an array of objects curr_content = message['content'] @@ -880,13 +884,22 @@ def transform_genparams(genparams, api_format): messages_string += user_message_end elif message['role'] == "assistant": messages_string += assistant_message_end + elif message['role'] == "tool": + messages_string += tools_message_end - # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt + # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None tools_array = genparams.get('tools', []) - if tools_array: - tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting? + if tools_array and not genparams.get('tool_choice') == None: + tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting? Add the "Respond only in JSON?" or not? messages_string += user_message_end + tools_string using_openai_tools = True + specified_function = None + if isinstance(genparams.get('tool_choice'), dict): + try: + specified_function = genparams.get('tool_choice').get('function').get('name') + except: + # In case of any issues, just revert back to no specified function + specified_function = None # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create open_ai_tools_grammar = r""" root ::= array @@ -929,8 +942,52 @@ def transform_genparams(genparams, api_format): hex ::= [0-9a-fA-F] """ + open_ai_tools_grammar_forced_tool_choice = fr""" +root ::= array + +array ::= "[" object "]" + +object ::= "{" pairid "," pairtype "," pairfunction "}" + +pairid ::= " \"id\" : " string + +pairtype ::= " \"type\" : " "\"function\"" + +pairfunction ::= " \"function\" : " functionobject + +functionobject ::= "{" pairname "," pairarguments "}" + +pairname ::= " \"name\" : " "\"{specified_function}\"" + +pairarguments ::= " \"arguments\" : " "{" arguments "}" + +arguments ::= pair ( "," pair)* + +pair ::= string ":" value - genparams["grammar"] = open_ai_tools_grammar +value ::= string | number | "true" | "false" | "null" + +number ::= int frac? exp? + +int ::= "-"? ("0" | [1-9] [0-9]*) + +frac ::= "." [0-9]+ + +exp ::= ("e" | "E") ("+" | "-")? [0-9]+ + +string ::= + "\"" ( + [^"\\] | + "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes + )* "\"" + +hex ::= [0-9a-fA-F] +""" + + if specified_function: + genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice + else: + genparams["grammar"] = open_ai_tools_grammar else: using_openai_tools = False From 4d71178d140b4839366e3c4021381d31897c717d Mon Sep 17 00:00:00 2001 From: teddybear082 <87204721+teddybear082@users.noreply.github.com> Date: Tue, 9 Jul 2024 20:48:29 -0400 Subject: [PATCH 3/8] ensure tools get listed right after user content and before end of user message content --- koboldcpp.py | 68 +++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 93835153c64..ede92dc1aa8 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -855,8 +855,9 @@ def transform_genparams(genparams, api_format): tools_message_end = adapter_obj.get("tools_end", "") images_added = [] - + message_index = 0 for message in messages_array: + message_index += 1 if message['role'] == "system": messages_string += system_message_start elif message['role'] == "user": @@ -877,31 +878,23 @@ def transform_genparams(genparams, api_format): elif item['type']=="image_url": if item['image_url'] and item['image_url']['url'] and item['image_url']['url'].startswith("data:image"): images_added.append(item['image_url']['url'].split(",", 1)[1]) - - if message['role'] == "system": - messages_string += system_message_end - elif message['role'] == "user": - messages_string += user_message_end - elif message['role'] == "assistant": - messages_string += assistant_message_end - elif message['role'] == "tool": - messages_string += tools_message_end - - # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None - tools_array = genparams.get('tools', []) - if tools_array and not genparams.get('tool_choice') == None: - tools_string = json.dumps(tools_array, indent=2) + "Respond only in JSON." #TBD: add Tools notation like \n### Tools: \n or just stick the tools at the end of the prompt? And formatting? Add the "Respond only in JSON?" or not? - messages_string += user_message_end + tools_string - using_openai_tools = True - specified_function = None - if isinstance(genparams.get('tool_choice'), dict): - try: - specified_function = genparams.get('tool_choice').get('function').get('name') - except: - # In case of any issues, just revert back to no specified function + # If last message, add any tools calls after message content and before message end token if any + if message['role'] == "user" and message_index == len(messages_array): + # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None + tools_array = genparams.get('tools', []) + if tools_array and not genparams.get('tool_choice') == None: + tools_string = json.dumps(tools_array, indent=2) + messages_string += tools_string + using_openai_tools = True specified_function = None - # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create - open_ai_tools_grammar = r""" + if isinstance(genparams.get('tool_choice'), dict): + try: + specified_function = genparams.get('tool_choice').get('function').get('name') + except: + # In case of any issues, just revert back to no specified function + specified_function = None + # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create + open_ai_tools_grammar = r""" root ::= array array ::= "[" object "]" @@ -942,7 +935,7 @@ def transform_genparams(genparams, api_format): hex ::= [0-9a-fA-F] """ - open_ai_tools_grammar_forced_tool_choice = fr""" + open_ai_tools_grammar_forced_tool_choice = fr""" root ::= array array ::= "[" object "]" @@ -984,12 +977,22 @@ def transform_genparams(genparams, api_format): hex ::= [0-9a-fA-F] """ - if specified_function: - genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice - else: - genparams["grammar"] = open_ai_tools_grammar - else: - using_openai_tools = False + if specified_function: + genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice + else: + genparams["grammar"] = open_ai_tools_grammar + # Set temperature low automatically if function calling + genparams["temperature"] = 0.3 + else: + using_openai_tools = False + if message['role'] == "system": + messages_string += system_message_end + elif message['role'] == "user": + messages_string += user_message_end + elif message['role'] == "assistant": + messages_string += assistant_message_end + elif message['role'] == "tool": + messages_string += tools_message_end messages_string += assistant_message_start genparams["prompt"] = messages_string @@ -1053,6 +1056,7 @@ async def generate_text(self, genparams, api_format, stream_flag): is_quiet = args.quiet currfinishreason = "null" + def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat # flag instance as non-idle for a while washordereq = genparams.get('genkey', '').startswith('HORDEREQ_') From 0409f989275e609af69fc4e9bb3953091f7f12e5 Mon Sep 17 00:00:00 2001 From: teddybear082 <87204721+teddybear082@users.noreply.github.com> Date: Wed, 10 Jul 2024 16:01:27 -0400 Subject: [PATCH 4/8] omit grammars approach try prompting instead -use more extensive json parsing and direct instructions to models to try to obtain the desired result -seems to work relatively well with Mistral-7B-Instruct-v.0.3.Q4_K_M.gguf and neuralhermes-2.5-mistral-7b.Q4_K_M.gguf -question of whether this is too opinionated of an approach, should the instructions be things that can be passed with the prompt template? --- koboldcpp.py | 139 +++++++++++++++++---------------------------------- 1 file changed, 46 insertions(+), 93 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index ede92dc1aa8..e78467d1c0e 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -806,6 +806,41 @@ def string_contains_sequence_substring(inputstr,sequences): using_outdated_flags = False using_openai_tools = False +# Used to parse json for openai tool calls +def extract_json_from_string(input_string): + parsed_json = None + + # First check if model exported perfect json + try: + parsed_json = json.loads(input_string) + return parsed_json + except: + pass + + # Next check if all we need is to add brackets to make it perfect json + try: + parsed_json = json.loads(f"[{input_string}]") + return parsed_json + except: + pass + + # Now use regular expression to match JSON objects or arrays in case part is valid json and part is not + json_pattern = r'(\{.*?\}|\[.*?\])' # was json_pattern = r'(\{.*\}|\[.*\])' + + # Find all potential JSON parts + potential_jsons = re.findall(json_pattern, input_string, re.DOTALL) + + for potential_json in potential_jsons: + try: + # Attempt to parse the potential JSON part + parsed_json = json.loads(potential_json) + return parsed_json + except json.JSONDecodeError: + # If not valid JSON, continue to the next match + continue + + return [] + def transform_genparams(genparams, api_format): global using_openai_tools #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate @@ -883,106 +918,23 @@ def transform_genparams(genparams, api_format): # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None tools_array = genparams.get('tools', []) if tools_array and not genparams.get('tool_choice') == None: - tools_string = json.dumps(tools_array, indent=2) + response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}] + json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0) + tools_string = json.dumps(tools_array, indent=0) messages_string += tools_string using_openai_tools = True specified_function = None if isinstance(genparams.get('tool_choice'), dict): try: specified_function = genparams.get('tool_choice').get('function').get('name') + json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0) except: # In case of any issues, just revert back to no specified function specified_function = None - # Use grammar to try to constrain output to openai tools format: https://platform.openai.com/docs/api-reference/chat/create - open_ai_tools_grammar = r""" -root ::= array - -array ::= "[" object "]" - -object ::= "{" pairid "," pairtype "," pairfunction "}" - -pairid ::= " \"id\" : " string - -pairtype ::= " \"type\" : " "\"function\"" - -pairfunction ::= " \"function\" : " functionobject - -functionobject ::= "{" pairname "," pairarguments "}" - -pairname ::= " \"name\" : " string - -pairarguments ::= " \"arguments\" : " "{" arguments "}" - -arguments ::= pair ( "," pair)* - -pair ::= string ":" value - -value ::= string | number | "true" | "false" | "null" - -number ::= int frac? exp? + messages_string += json_formatting_instruction -int ::= "-"? ("0" | [1-9] [0-9]*) - -frac ::= "." [0-9]+ - -exp ::= ("e" | "E") ("+" | "-")? [0-9]+ - -string ::= - "\"" ( - [^"\\] | - "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes - )* "\"" - -hex ::= [0-9a-fA-F] -""" - open_ai_tools_grammar_forced_tool_choice = fr""" -root ::= array - -array ::= "[" object "]" - -object ::= "{" pairid "," pairtype "," pairfunction "}" - -pairid ::= " \"id\" : " string - -pairtype ::= " \"type\" : " "\"function\"" - -pairfunction ::= " \"function\" : " functionobject - -functionobject ::= "{" pairname "," pairarguments "}" - -pairname ::= " \"name\" : " "\"{specified_function}\"" - -pairarguments ::= " \"arguments\" : " "{" arguments "}" - -arguments ::= pair ( "," pair)* - -pair ::= string ":" value - -value ::= string | number | "true" | "false" | "null" - -number ::= int frac? exp? - -int ::= "-"? ("0" | [1-9] [0-9]*) - -frac ::= "." [0-9]+ - -exp ::= ("e" | "E") ("+" | "-")? [0-9]+ - -string ::= - "\"" ( - [^"\\] | - "\\" (["\\/bfnrt"] | "u" hex hex hex hex) # escapes - )* "\"" - -hex ::= [0-9a-fA-F] -""" - - if specified_function: - genparams["grammar"] = open_ai_tools_grammar_forced_tool_choice - else: - genparams["grammar"] = open_ai_tools_grammar # Set temperature low automatically if function calling - genparams["temperature"] = 0.3 + genparams["temperature"] = 0.2 else: using_openai_tools = False if message['role'] == "system": @@ -1133,10 +1085,11 @@ def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat tool_calls = [] if using_openai_tools: try: - tool_calls = json.loads(recvtxt) - recvtxt = None - except json.JSONDecodeError as e: - print(f"Error parsing tool calls: {e}, omitting tool calls from response, and just passing generated content as message content") + tool_calls = extract_json_from_string(recvtxt) + if tool_calls: + recvtxt = None + except Exception as e: + print(f"Error parsing or finding tool calls: {e}, omitting tool calls from response, and just passing generated content as message content") res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname, "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200}, From 9ce88dc7446cf6d9a763ff227d202aa3d7adb1c1 Mon Sep 17 00:00:00 2001 From: teddybear082 <87204721+teddybear082@users.noreply.github.com> Date: Thu, 11 Jul 2024 20:59:34 -0400 Subject: [PATCH 5/8] add back llamacpp recommended json grammar Go back to adding grammar but use "official" llamacpp grammar only not a custom one just for openai --- koboldcpp.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index e78467d1c0e..13eb683bbd0 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -930,11 +930,45 @@ def transform_genparams(genparams, api_format): json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0) except: # In case of any issues, just revert back to no specified function - specified_function = None + pass messages_string += json_formatting_instruction # Set temperature low automatically if function calling genparams["temperature"] = 0.2 + + # Set grammar to llamacpp example grammar to force json response (see https://github.com/ggerganov/llama.cpp/blob/master/grammars/json_arr.gbnf) + genparams["grammar"] = r""" +root ::= arr +value ::= object | array | string | number | ("true" | "false" | "null") ws + +arr ::= + "[\n" ws ( + value + (",\n" ws value)* + )? "]" + +object ::= + "{" ws ( + string ":" ws value + ("," ws string ":" ws value)* + )? "}" ws + +array ::= + "[" ws ( + value + ("," ws value)* + )? "]" ws + +string ::= + "\"" ( + [^"\\\x7F\x00-\x1F] | + "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) + )* "\"" ws + +number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws + +ws ::= | " " | "\n" [ \t]{0,20} +""" else: using_openai_tools = False if message['role'] == "system": From 057aeb23e7e932dddf22689ba2fc1491419420ad Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 13 Jul 2024 17:14:28 +0800 Subject: [PATCH 6/8] Tidy up, remove unnecessary globals --- koboldcpp.py | 75 ++++++++++++++++++---------------------------------- 1 file changed, 26 insertions(+), 49 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 1658bb3fe72..13bfa9eae95 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -804,45 +804,35 @@ def string_contains_sequence_substring(inputstr,sequences): currfinishreason = "null" using_gui_launcher = False using_outdated_flags = False -using_openai_tools = False # Used to parse json for openai tool calls def extract_json_from_string(input_string): parsed_json = None - - # First check if model exported perfect json - try: + try: # First check if model exported perfect json parsed_json = json.loads(input_string) return parsed_json - except: + except Exception as e: pass - - # Next check if all we need is to add brackets to make it perfect json - try: + try: # Next check if all we need is to add brackets to make it perfect json parsed_json = json.loads(f"[{input_string}]") return parsed_json - except: + except Exception as e: + pass + try: + # Now use regular expression to match JSON objects or arrays in case part is valid json and part is not + json_pattern = r'(\{.*?\}|\[.*?\])' # was json_pattern = r'(\{.*\}|\[.*\])' + potential_jsons = re.findall(json_pattern, input_string, re.DOTALL) + for potential_json in potential_jsons: + try: + parsed_json = json.loads(potential_json) + return parsed_json + except Exception as e: + continue + except Exception as e: pass - - # Now use regular expression to match JSON objects or arrays in case part is valid json and part is not - json_pattern = r'(\{.*?\}|\[.*?\])' # was json_pattern = r'(\{.*\}|\[.*\])' - - # Find all potential JSON parts - potential_jsons = re.findall(json_pattern, input_string, re.DOTALL) - - for potential_json in potential_jsons: - try: - # Attempt to parse the potential JSON part - parsed_json = json.loads(potential_json) - return parsed_json - except json.JSONDecodeError: - # If not valid JSON, continue to the next match - continue - return [] def transform_genparams(genparams, api_format): - global using_openai_tools #api format 1=basic,2=kai,3=oai,4=oai-chat,5=interrogate #alias all nonstandard alternative names for rep pen. rp1 = genparams.get('repeat_penalty', 1.0) @@ -917,60 +907,52 @@ def transform_genparams(genparams, api_format): if message['role'] == "user" and message_index == len(messages_array): # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None tools_array = genparams.get('tools', []) - if tools_array and not genparams.get('tool_choice') == None: + if tools_array and len(tools_array)>0 and not genparams.get('tool_choice',None) == None: response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}] json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0) tools_string = json.dumps(tools_array, indent=0) messages_string += tools_string - using_openai_tools = True specified_function = None if isinstance(genparams.get('tool_choice'), dict): try: specified_function = genparams.get('tool_choice').get('function').get('name') json_formatting_instruction = f"The user is asking you to use the style of this JSON object formatting to complete the parameters for the specific function named {specified_function} in the following format: " + json.dumps([{"id": "insert an id for the response", "type": "function", "function": {"name": f"{specified_function}", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}], indent=0) - except: + except Exception as e: # In case of any issues, just revert back to no specified function pass messages_string += json_formatting_instruction # Set temperature low automatically if function calling genparams["temperature"] = 0.2 + genparams["using_openai_tools"] = True # Set grammar to llamacpp example grammar to force json response (see https://github.com/ggerganov/llama.cpp/blob/master/grammars/json_arr.gbnf) genparams["grammar"] = r""" root ::= arr value ::= object | array | string | number | ("true" | "false" | "null") ws - arr ::= "[\n" ws ( value (",\n" ws value)* )? "]" - object ::= "{" ws ( string ":" ws value ("," ws string ":" ws value)* )? "}" ws - array ::= "[" ws ( value ("," ws value)* )? "]" ws - string ::= "\"" ( [^"\\\x7F\x00-\x1F] | "\\" (["\\bfnrt] | "u" [0-9a-fA-F]{4}) )* "\"" ws - number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws - ws ::= | " " | "\n" [ \t]{0,20} """ - else: - using_openai_tools = False if message['role'] == "system": messages_string += system_message_end elif message['role'] == "user": @@ -1038,11 +1020,10 @@ def extract_b64string_from_file_upload(self, body): return None async def generate_text(self, genparams, api_format, stream_flag): - global friendlymodelname, chatcompl_adapter, currfinishreason, using_openai_tools + global friendlymodelname, chatcompl_adapter, currfinishreason is_quiet = args.quiet currfinishreason = "null" - - + def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat # flag instance as non-idle for a while washordereq = genparams.get('genkey', '').startswith('HORDEREQ_') @@ -1090,7 +1071,7 @@ def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat ) genout = {"text": "", "status": -1, "stopreason": -1} - if stream_flag and not using_openai_tools: + if stream_flag: loop = asyncio.get_event_loop() executor = ThreadPoolExecutor() genout = await loop.run_in_executor(executor, run_blocking) @@ -1116,15 +1097,11 @@ def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200}, "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]} elif api_format == 4: - tool_calls = [] + using_openai_tools = genparams.get('using_openai_tools', False) if using_openai_tools: - try: - tool_calls = extract_json_from_string(recvtxt) - if tool_calls: - recvtxt = None - except Exception as e: - print(f"Error parsing or finding tool calls: {e}, omitting tool calls from response, and just passing generated content as message content") - + tool_calls = extract_json_from_string(recvtxt) + if tool_calls and len(tool_calls)>0: + recvtxt = None res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname, "usage": {"prompt_tokens": 100, "completion_tokens": 100, "total_tokens": 200}, "choices": [{"index": 0, "message": {"role": "assistant", "content": recvtxt, "tool_calls": tool_calls}, "finish_reason": currfinishreason}]} From 1c1eb604b94c4a57067c63e69d0c1e679fd5835e Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 13 Jul 2024 17:17:45 +0800 Subject: [PATCH 7/8] clarity --- koboldcpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index 13bfa9eae95..2046104dea1 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -907,7 +907,7 @@ def transform_genparams(genparams, api_format): if message['role'] == "user" and message_index == len(messages_array): # Check if user is passing a openai tools array, if so add to end of prompt before assistant prompt unless tool_choice has been set to None tools_array = genparams.get('tools', []) - if tools_array and len(tools_array)>0 and not genparams.get('tool_choice',None) == None: + if tools_array and len(tools_array) > 0 and genparams.get('tool_choice',None) != None: response_array = [{"id": "insert an id for the response", "type": "function", "function": {"name": "insert the name of the function you want to call", "arguments": {"first property key": "first property value", "second property key": "second property value"}}}] json_formatting_instruction = " Use this style of JSON object formatting to give your answer if you think the user is asking you to perform an action: " + json.dumps(response_array, indent=0) tools_string = json.dumps(tools_array, indent=0) From 2d3991bde32e63ee9bff44d35d1b592d7d8398ee Mon Sep 17 00:00:00 2001 From: teddybear082 <87204721+teddybear082@users.noreply.github.com> Date: Sat, 13 Jul 2024 10:34:09 -0400 Subject: [PATCH 8/8] fix missing local variable error This worked to fix the error I mentioned on my last comment --- koboldcpp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/koboldcpp.py b/koboldcpp.py index 2046104dea1..e03e42f18c4 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1098,6 +1098,7 @@ def run_blocking(): # api format 1=basic,2=kai,3=oai,4=oai-chat "choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]} elif api_format == 4: using_openai_tools = genparams.get('using_openai_tools', False) + tool_calls = [] if using_openai_tools: tool_calls = extract_json_from_string(recvtxt) if tool_calls and len(tool_calls)>0: