visorcraft · visorcraft · Mar 9, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -93,7 +93,7 @@ jobs:
         id: cmake_test
         run: |
           cd build
-          ctest -L main --verbose --timeout 900
+          ctest -L main -E "test-llama-archs" --verbose --timeout 900
 
   macOS-latest-cmake-x64:
     runs-on: macos-15-intel

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -39,6 +39,7 @@ Before submitting your PR:
     - For intricate features, consider opening a feature request first to discuss and align expectations
     - When adding support for a new model or feature, focus on **CPU support only** in the initial PR unless you have a good reason not to. Add support for other backends like CUDA in follow-up PRs
 - Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
+- If you are a new contributor, limit your open PRs to 1.
 
 After submitting your PR:
 - Expect requests for modifications to ensure the code meets llama.cpp's standards for quality and long-term maintainability

diff --git a/README.md b/README.md
@@ -259,6 +259,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 - [llama-swap](https://github.com/mostlygeek/llama-swap) - transparent proxy that adds automatic model switching with llama-server
 - [Kalavai](https://github.com/kalavai-net/kalavai-client) - Crowdsource end to end LLM deployment at any scale
 - [llmaz](https://github.com/InftyAI/llmaz) - ☸️ Easy, advanced inference platform for large language models on Kubernetes.
+- [LLMKube](https://github.com/defilantech/llmkube) - Kubernetes operator for llama.cpp with multi-GPU and Apple Silicon Metal
+  support"
 </details>
 
 <details>

diff --git a/common/arg.cpp b/common/arg.cpp
@@ -2666,7 +2666,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         [](common_params & params, const std::string & value) {
             params.out_file = value;
         }
-    ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE}));
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE, LLAMA_EXAMPLE_RESULTS}));
     add_opt(common_arg(
         {"-ofreq", "--output-frequency"}, "N",
         string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq),
@@ -3607,6 +3607,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             }
         }
     ).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
+    add_opt(common_arg(
+        {"--check"},
+        string_format("check rather than generate results (default: %s)", params.check ? "true" : "false"),
+        [](common_params & params) {
+            params.check = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_RESULTS}));
     add_opt(common_arg(
         {"--save-logits"},
         string_format("save final logits to files for verification (default: %s)", params.save_logits ? "true" : "false"),

diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
@@ -1,6 +1,7 @@
 #include "chat-auto-parser.h"
 #include "chat-peg-parser.h"
 #include "chat.h"
+#include "common.h"
 #include "json-schema-to-grammar.h"
 #include "nlohmann/json.hpp"
 
@@ -51,13 +52,15 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
     bool has_tools =
         autoparser.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty();
     std::string trigger_marker = !autoparser.tools.format.section_start.empty() ? autoparser.tools.format.section_start :
-                                                                                autoparser.tools.format.per_call_start;
-    bool        include_grammar =
-        has_tools && ((inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO && !trigger_marker.empty()) ||
-                      inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
+                                                                                  autoparser.tools.format.per_call_start;
+
+    bool has_response_format = !inputs.json_schema.empty() && inputs.json_schema.is_object();
+    bool include_grammar = has_response_format || (has_tools &&
+            ((inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO && !trigger_marker.empty()) ||
+              inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
 
     if (include_grammar) {
-        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar_lazy = !has_response_format && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
         data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
@@ -68,7 +71,7 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
         });
 
         // Set grammar triggers based on tool section markers (fall back to per-call markers)
-        if (data.grammar_lazy) {  // only do triggers on lazy grammar
+        if (data.grammar_lazy) {
             data.grammar_triggers = {
                 { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
             };
@@ -104,8 +107,11 @@ common_peg_arena autoparser::build_parser(const templates_params & inputs) const
         bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
 
         if (has_response_format) {
-            return ctx.reasoning_parser + p.space() +
-                   p.content(p.schema(p.json(), "response-format", inputs.json_schema)) + p.end();
+            auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
+            return ctx.reasoning_parser + p.space() + p.choice({
+                p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
+                response_format
+            }) + p.end();
         }
 
         if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {

diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp
@@ -162,7 +162,7 @@ diff_split calculate_diff_split(const std::string & left, const std::string & ri
         right_fully_consumed = true;
     }
 
-    auto eat_segment = [](std::string & str, segment & seg) -> std::string { return str.append(seg.value); };
+    auto eat_segment = [](std::string str, const segment & seg) -> std::string { return std::move(str) + seg.value; };
 
     bool can_have_text_suffix = left_end->type == segment_type::TEXT && right_end->type == segment_type::TEXT;
     bool can_have_text_prefix = right_start->type == segment_type::TEXT && left_start->type == segment_type::TEXT;

diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
@@ -167,8 +167,8 @@ void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const co
     });
 }
 
-tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, bool is_partial) const {
-    common_peg_parse_context ctx(input, is_partial);
+tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, common_peg_parse_flags extra_flags) const {
+    common_peg_parse_context ctx(input, flags | extra_flags);
     auto parse_result = arena.parse(ctx);
 
     tag_based_peg_mapper mapper;
@@ -179,11 +179,10 @@ tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & inp
 
 tagged_parse_result tagged_peg_parser::parse_anywhere_and_extract(const std::string & input) const {
     if (input.empty()) {
-        return parse_and_extract(input, false);
+        return parse_and_extract(input);
     }
     for (size_t i = 0; i < input.size(); i++) {
-        common_peg_parse_context ctx(input, false);
-        ctx.debug = debug;
+        common_peg_parse_context ctx(input, flags);
         auto parse_result = arena.parse(ctx, i);
         if (parse_result.success() || i == input.size() - 1) {
             tag_based_peg_mapper mapper;
@@ -477,6 +476,74 @@ common_peg_parser common_chat_peg_builder::standard_constructed_tools(
     return force_tool_calls ? section : optional(section);
 }
 
+// Python-style tool calls: name(arg1="value1", arg2=123)
+// Used only by LFM2 for now, so we don't merge it into autoparser
+common_peg_parser common_chat_peg_builder::python_style_tool_calls(
+    const nlohmann::json & tools,
+    bool                   parallel_tool_calls) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
+    }
+
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        nlohmann::json params = function.contains("parameters") ? function.at("parameters") : nlohmann::json::object();
+
+        auto args = eps();
+        if (params.contains("properties") && !params["properties"].empty()) {
+            auto arg_choice = choice();
+            for (const auto & el : params["properties"].items()) {
+                const std::string & prop_name = el.key();
+                const auto & prop_def = el.value();
+                bool is_string_type = (prop_def.contains("type") && prop_def["type"] == "string");
+
+                auto arg_name_parser = literal(prop_name);
+
+                common_peg_parser arg_value_parser = eps();
+                auto string_value_parser = choice({
+                    literal("\"") + tool_arg_string_value(json_string_content()) + literal("\""),
+                    literal("'") + tool_arg_string_value(json_string_content()) + literal("'")
+                });
+
+                if (is_string_type) {
+                    arg_value_parser = string_value_parser;
+                } else {
+                    arg_value_parser = tool_arg_value(python_value());
+                }
+
+                // Full argument: name="value" or name=value
+                auto arg_rule = tool_arg(
+                    tool_arg_open(eps()) +
+                    tool_arg_name(arg_name_parser) +
+                    literal("=") +
+                    arg_value_parser +
+                    tool_arg_close(eps())
+                );
+                arg_choice |= arg_rule;
+            }
+
+            args = arg_choice + zero_or_more("," + space() + arg_choice);
+        }
+
+        auto tool_parser = tool(tool_open(tool_name(literal(name)) + literal("(")) +
+            space() + tool_args(args) + space() + tool_close(literal(")"))
+        );
+
+        tool_choices |= rule("tool-" + name, tool_parser);
+    }
+
+    if (parallel_tool_calls) {
+        return "[" + space() + tool_choices + zero_or_more("," + space() + tool_choices) + space() + "]";
+    }
+    return "[" + space() + tool_choices + space() + "]";
+}
+
 // Helper: Parse dot notation key into prefix and field name
 static std::pair<std::string, std::string> parse_key_spec(const std::string & key) {
     auto dot_pos = key.find('.');

diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
@@ -112,6 +112,11 @@ class common_chat_peg_builder : public common_peg_parser_builder {
                                                  bool                                       parallel_tool_calls,
                                                  bool                                       force_tool_calls);
 
+    // Helper for Python-style function call format: name(arg1="value1", arg2=123)
+    // Used by LFM2 and similar templates
+    common_peg_parser python_style_tool_calls(const nlohmann::json & tools,
+                                              bool                   parallel_tool_calls);
+
   private:
     // Implementation helpers for standard_json_tools — one per JSON tool call layout mode
     common_peg_parser build_json_tools_function_is_key(const nlohmann::json & tools,
@@ -155,19 +160,19 @@ struct tagged_parse_result {
 
 struct tagged_peg_parser {
     common_peg_arena arena;
-    bool debug = false;
+    common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_NONE;
 
     tagged_peg_parser & withDebug() {
-      debug = true;
+      flags |= COMMON_PEG_PARSE_FLAG_DEBUG;
       return *this;
     }
 
     tagged_peg_parser & withoutDebug() {
-      debug = false;
+      flags = flags & ~COMMON_PEG_PARSE_FLAG_DEBUG;
       return *this;
     }
 
-    tagged_parse_result parse_and_extract(const std::string & input, bool is_partial = false) const;
+    tagged_parse_result parse_and_extract(const std::string & input, common_peg_parse_flags extra_flags = COMMON_PEG_PARSE_FLAG_NONE) const;
     tagged_parse_result parse_anywhere_and_extract(const std::string & input) const;
 };
 

diff --git a/common/chat.cpp b/common/chat.cpp
@@ -129,7 +129,7 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
                 {"type", "function"},
                 {"function", {
                     {"name", tool_call.name},
-                    {"arguments", json::parse(tool_call.arguments)},
+                    {"arguments", json(tool_call.arguments)},
                 }},
             };
             if (!tool_call.id.empty()) {
@@ -1274,6 +1274,82 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
     return data;
 }
 
+// LFM2 format:
+// - Reasoning: <think>{reasoning}</think> (optional, only if enable_thinking is true)
+// - Content: text after reasoning (optional)
+// - Tool calls: <|tool_call_start|>[function_name(arg1="value1", arg2="value2")]<|tool_call_end|>
+// Tool calls can appear multiple times (parallel tool calls)
+static common_chat_params common_chat_params_init_lfm2(const common_chat_template &    tmpl,
+                                                       const autoparser::templates_params & inputs) {
+    common_chat_params data;
+
+    data.prompt            = common_chat_template_direct_apply(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.preserved_tokens  = {
+        "<|tool_list_start|>",
+        "<|tool_list_end|>",
+        "<|tool_call_start|>",
+        "<|tool_call_end|>",
+        "<think>",
+        "</think>",
+    };
+
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+
+    const std::string TOOL_CALL_START = "<|tool_call_start|>";
+    const std::string TOOL_CALL_END   = "<|tool_call_end|>";
+    const std::string THINK_START     = "<think>";
+    const std::string THINK_END       = "</think>";
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+
+        auto end = p.end();
+
+        auto reasoning = p.eps();
+        if (extract_reasoning && inputs.enable_thinking) {
+            reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
+        }
+
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return reasoning + p.content(p.rest()) + end;
+        }
+
+        auto tool_calls = p.rule("tool-calls",
+            p.trigger_rule("tool-call", p.literal(TOOL_CALL_START) +
+                p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls) +
+                p.literal(TOOL_CALL_END)
+            )
+        );
+
+        auto content = p.content(p.until(TOOL_CALL_START));
+
+        return reasoning + content + tool_calls + end;
+    });
+
+    data.parser = parser.save();
+
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
+            });
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, TOOL_CALL_START }
+        };
+    }
+
+    return data;
+}
+
 namespace workaround {
 
 // if first message is system and template does not support it, merge it with next message
@@ -1353,6 +1429,8 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
     params.add_bos = tmpls->add_bos;
     params.add_eos = tmpls->add_eos;
 
+    workaround::func_args_not_string(params.messages);
+
     if (!tmpl.original_caps().supports_system_role) {
         workaround::system_message_not_supported(params.messages);
     }
@@ -1420,6 +1498,14 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
         return common_chat_params_init_kimi_k2(tmpl, params);
     }
 
+    // LFM2 - uses <|tool_list_start|>/<|tool_list_end|> markers and <|tool_call_start|>[name(args)]<|tool_call_end|> format
+    // Detection: template has "<|tool_list_start|>" and "<|tool_list_end|>" markers
+    if (src.find("<|tool_list_start|>") != std::string::npos &&
+        src.find("<|tool_list_end|>") != std::string::npos) {
+        LOG_DBG("Using specialized template: LFM2\n");
+        return common_chat_params_init_lfm2(tmpl, params);
+    }
+
     try {
         LOG_DBG("Using differential autoparser\n");
         struct autoparser::autoparser autoparser;
@@ -1525,8 +1611,12 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_pars
 
     LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), input.c_str());
 
-    common_peg_parse_context ctx(input, is_partial);
-    ctx.debug   = params.debug;
+    common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_LENIENT;
+    if (params.debug) {
+        flags |= COMMON_PEG_PARSE_FLAG_DEBUG;
+    }
+
+    common_peg_parse_context ctx(input, flags);
     auto result = parser.parse(ctx);
 
     if (result.fail()) {
@@ -1539,7 +1629,7 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_pars
             auto mapper = common_chat_peg_mapper(msg);
             mapper.from_ast(ctx.ast, result);
 
-            if (ctx.debug) {
+            if (ctx.is_debug()) {
                 fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
                 fflush(stderr);
             }
@@ -1555,7 +1645,7 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_pars
     auto mapper = common_chat_peg_mapper(msg);
     mapper.from_ast(ctx.ast, result);
 
-    if (ctx.debug) {
+    if (ctx.is_debug()) {
         fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
         fflush(stderr);
     }