From 144da1254ce1e798b39217df0a159feb9f20a77e Mon Sep 17 00:00:00 2001 From: James O'Leary Date: Thu, 19 Mar 2026 13:54:25 -0700 Subject: [PATCH] chat : recover partial AST on non-partial parse failure --- common/chat.cpp | 21 +-- models/templates/Qwen-Qwen3.5-0.8B.jinja | 154 +++++++++++++++++++++ tests/test-chat.cpp | 162 +++++++++++++++++++---- 3 files changed, 297 insertions(+), 40 deletions(-) create mode 100644 models/templates/Qwen-Qwen3.5-0.8B.jinja diff --git a/common/chat.cpp b/common/chat.cpp index e129581fd2f..a2ef6a22ce2 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1758,23 +1758,12 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars auto result = parser.parse(ctx); if (result.fail()) { - // During partial parsing, return partial results if any AST nodes were captured - // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK - if (is_partial && result.end > 0) { - // Try to extract any partial results from what was successfully parsed - common_chat_msg msg; - msg.role = "assistant"; - auto mapper = common_chat_peg_mapper(msg); - mapper.from_ast(ctx.ast, result); - - if (ctx.is_debug()) { - fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str()); - fflush(stderr); - } - return msg; + if (result.end == 0) { + throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " + + effective_input.substr(result.end)); } - throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " + - input.substr(result.end)); + LOG_WRN("%s: parser failed at pos %zu, returning partial result. Unparsed: %s\n", + __func__, result.end, effective_input.substr(result.end).c_str()); } common_chat_msg msg; diff --git a/models/templates/Qwen-Qwen3.5-0.8B.jinja b/models/templates/Qwen-Qwen3.5-0.8B.jinja new file mode 100644 index 00000000000..0ef09f214ea --- /dev/null +++ b/models/templates/Qwen-Qwen3.5-0.8B.jinja @@ -0,0 +1,154 @@ +{%- set image_count = namespace(value=0) %} +{%- set video_count = namespace(value=0) %} +{%- macro render_content(content, do_vision_count, is_system_content=false) %} + {%- if content is string %} + {{- content }} + {%- elif content is iterable and content is not mapping %} + {%- for item in content %} + {%- if 'image' in item or 'image_url' in item or item.type == 'image' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain images.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set image_count.value = image_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Picture ' ~ image_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|image_pad|><|vision_end|>' }} + {%- elif 'video' in item or item.type == 'video' %} + {%- if is_system_content %} + {{- raise_exception('System message cannot contain videos.') }} + {%- endif %} + {%- if do_vision_count %} + {%- set video_count.value = video_count.value + 1 %} + {%- endif %} + {%- if add_vision_id %} + {{- 'Video ' ~ video_count.value ~ ': ' }} + {%- endif %} + {{- '<|vision_start|><|video_pad|><|vision_end|>' }} + {%- elif 'text' in item %} + {{- item.text }} + {%- else %} + {{- raise_exception('Unexpected item type in content.') }} + {%- endif %} + {%- endfor %} + {%- elif content is none or content is undefined %} + {{- '' }} + {%- else %} + {{- raise_exception('Unexpected content type.') }} + {%- endif %} +{%- endmacro %} +{%- if not messages %} + {{- raise_exception('No messages provided.') }} +{%- endif %} +{%- if tools and tools is iterable and tools is not mapping %} + {{- '<|im_start|>system\n' }} + {{- "# Tools\n\nYou have access to the following functions:\n\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n" }} + {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {%- if content %} + {{- '\n\n' + content }} + {%- endif %} + {%- endif %} + {{- '<|im_end|>\n' }} +{%- else %} + {%- if messages[0].role == 'system' %} + {%- set content = render_content(messages[0].content, false, true)|trim %} + {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for message in messages[::-1] %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- if ns.multi_step_tool and message.role == "user" %} + {%- set content = render_content(message.content, false)|trim %} + {%- if not(content.startswith('') and content.endswith('')) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if ns.multi_step_tool %} + {{- raise_exception('No user query found in messages.') }} +{%- endif %} +{%- for message in messages %} + {%- set content = render_content(message.content, true)|trim %} + {%- if message.role == "system" %} + {%- if not loop.first %} + {{- raise_exception('System message must be at the beginning.') }} + {%- endif %} + {%- elif message.role == "user" %} + {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- set reasoning_content = reasoning_content|trim %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n' + content }} + {%- endif %} + {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {%- if loop.first %} + {%- if content|trim %} + {{- '\n\n\n\n' }} + {%- else %} + {{- '\n\n' }} + {%- endif %} + {%- else %} + {{- '\n\n\n' }} + {%- endif %} + {%- if tool_call.arguments is defined %} + {%- for args_name, args_value in tool_call.arguments|items %} + {{- '\n' }} + {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %} + {{- args_value }} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.previtem and loop.previtem.role != "tool" %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- content }} + {{- '\n' }} + {%- if not loop.last and loop.nextitem.role != "tool" %} + {{- '<|im_end|>\n' }} + {%- elif loop.last %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- else %} + {{- raise_exception('Unexpected message role.') }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is true %} + {{- '\n' }} + {%- else %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 58fef8e99c7..5067f28808d 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -806,6 +806,7 @@ struct peg_test_case { std::string input; common_chat_msg expect; bool is_partial = false; + bool expect_partial_rollback = false; }; struct make_peg_parser { @@ -889,41 +890,45 @@ static void test_peg_parser(common_chat_templates * tmpls, std::string prefix = tc.input.substr(0, safe_len); common_chat_msg msg_current = parser.parse(prefix, is_partial); - for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) { - if (!diff.reasoning_content_delta.empty()) { - msg_accum.reasoning_content += diff.reasoning_content_delta; - } - if (!diff.content_delta.empty()) { - msg_accum.content += diff.content_delta; - } - if (diff.tool_call_index != std::string::npos) { - // During partial parsing, a new tool call may appear with empty name initially - // The name gets filled in as more input is parsed - while (msg_accum.tool_calls.size() <= diff.tool_call_index) { - msg_accum.tool_calls.push_back({ "", "", "" }); - } - // Always update name and id from diff (may change during incremental parsing), but only if the delta - // actually contains them - if (!diff.tool_call_delta.name.empty()) { - msg_accum.tool_calls[diff.tool_call_index].name = diff.tool_call_delta.name; + try { + for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) { + if (!diff.reasoning_content_delta.empty()) { + msg_accum.reasoning_content += diff.reasoning_content_delta; } - if (!diff.tool_call_delta.id.empty()) { - msg_accum.tool_calls[diff.tool_call_index].id = diff.tool_call_delta.id; + if (!diff.content_delta.empty()) { + msg_accum.content += diff.content_delta; } - if (!diff.tool_call_delta.arguments.empty()) { - msg_accum.tool_calls[diff.tool_call_index].arguments += diff.tool_call_delta.arguments; + if (diff.tool_call_index != std::string::npos) { + while (msg_accum.tool_calls.size() <= diff.tool_call_index) { + msg_accum.tool_calls.push_back({ "", "", "" }); + } + if (!diff.tool_call_delta.name.empty()) { + msg_accum.tool_calls[diff.tool_call_index].name = diff.tool_call_delta.name; + } + if (!diff.tool_call_delta.id.empty()) { + msg_accum.tool_calls[diff.tool_call_index].id = diff.tool_call_delta.id; + } + if (!diff.tool_call_delta.arguments.empty()) { + msg_accum.tool_calls[diff.tool_call_index].arguments += diff.tool_call_delta.arguments; + } } } - } - try { assert_msg_equals(msg_current, msg_accum, true); } catch (std::exception & e) { - throw std::runtime_error((std::string("Error comparing accumulated message to current: ") + e.what()).c_str()); + if (tc.expect_partial_rollback) { + return; + } + throw std::runtime_error((std::string("Error in incremental parse: ") + e.what()).c_str()); } msg_prev = msg_current; } + if (tc.expect_partial_rollback) { + throw std::runtime_error("Partial rollback was expected but did not occur. " + "If grammar coverage improved, update this test to expect the extracted content instead."); + } + if (!tc.is_partial) { assert_msg_equals(tc.expect, parser.parse(tc.input, false), true); } @@ -1098,6 +1103,10 @@ class peg_test_builder { tc_.is_partial = val; return *this; } + peg_test_builder & expect_partial_rollback(bool val = true) { + tc_.expect_partial_rollback = val; + return *this; + } // Expect setters peg_test_builder & expect(const common_chat_msg & msg) { @@ -1807,6 +1816,78 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); } + + // Qwen3.5-0.8B: template + basic tool calling + malformed JSON regression + { + auto tst = peg_tester("models/templates/Qwen-Qwen3.5-0.8B.jinja", /*detailed_debug=*/false); + + static common_chat_tool flashcards_tool{ + "flashcards", "Flashcards for studying", + R"({"type":"object","properties":{"flashcards":{"type":"array","items":{"type":"object","properties":{"query":{"type":"string"},"recall":{"type":"string"}},"required":["recall","query"]}}},"required":["flashcards"]})", + }; + + tst.test("Hello, world!\nWhat's up?") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .expect(message_assist) + .run(); + + tst.test( + "" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "") + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK) + .tools({ special_function_tool }) + .expect(message_assist_call) + .run(); + + tst.test( + "The user wants flashcards about California.\n" + "\n" + "\n" + "\n" + "\n" + "\n" + "1\n" + "\n" + "\n" + "") + .tools({ special_function_tool }) + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect_reasoning("The user wants flashcards about California.\n") + .expect_tool_calls({ + { "special_function", R"({"arg1": 1})", {} }, + }) + .run(); + + // Well-formed JSON: no regression + tst.test( + "Making cards.\n" + "\n\n" + "\n" + "\n" + "\n" + "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n" + "\n" + "\n" + "") + .tools({ flashcards_tool }) + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect_reasoning("Making cards.\n") + .expect_tool_calls({ + { "flashcards", R"({"flashcards": [{"query": "CA", "recall": "LA"}]})", {} }, + }) + .run(); + } + { auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug); tst.test( @@ -1954,6 +2035,39 @@ static void test_template_output_peg_parsers(bool detailed_debug) { } } + // Qwen3.5 TAG_WITH_TAGGED: trailing newline after . + // + // This is not canonical inference output. It was observed during inference, but + // grammar constraints and/or parallel_tool_calls settings now prevent the model + // from generating this exact output at runtime. It exists solely as a regression + // test: the trailing \n causes result.fail() with result.end > 0 on the full + // parse. On unpatched code this throws std::runtime_error and discards all parsed + // content. With the fix, reasoning and tool calls are recovered. + { + auto tst = peg_tester("models/templates/Qwen-Qwen3.5-0.8B.jinja", /*detailed_debug=*/false); + + static common_chat_tool flashcards_tool{ + "flashcards", "Flashcards for studying", + R"({"type":"object","properties":{"flashcards":{"type":"array","items":{"type":"object","properties":{"query":{"type":"string"},"recall":{"type":"string"}},"required":["recall","query"]}}},"required":["flashcards"]})", + }; + + tst.test( + "Making cards.\n" + "\n\n" + "\n" + "\n" + "\n" + "[{\"query\": \"CA\", \"recall\": \"LA\"}]\n" + "\n" + "\n" + "\n") + .tools({ flashcards_tool }) + .enable_thinking(true) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .expect_partial_rollback() + .run(); + } + // Kimi-K2-Thinking tests - custom parser // Unique feature: tool call ID embeds function name as functions.: {